diff options
author | rdivacky <rdivacky@FreeBSD.org> | 2009-10-14 17:57:32 +0000 |
---|---|---|
committer | rdivacky <rdivacky@FreeBSD.org> | 2009-10-14 17:57:32 +0000 |
commit | cd749a9c07f1de2fb8affde90537efa4bc3e7c54 (patch) | |
tree | b21f6de4e08b89bb7931806bab798fc2a5e3a686 /lib/Transforms | |
parent | 72621d11de5b873f1695f391eb95f0b336c3d2d4 (diff) | |
download | FreeBSD-src-cd749a9c07f1de2fb8affde90537efa4bc3e7c54.zip FreeBSD-src-cd749a9c07f1de2fb8affde90537efa4bc3e7c54.tar.gz |
Update llvm to r84119.
Diffstat (limited to 'lib/Transforms')
86 files changed, 8170 insertions, 6252 deletions
diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp index d07f613..8000d0d 100644 --- a/lib/Transforms/Hello/Hello.cpp +++ b/lib/Transforms/Hello/Hello.cpp @@ -16,7 +16,7 @@ #include "llvm/Pass.h" #include "llvm/Function.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -32,7 +32,7 @@ namespace { HelloCounter++; std::string fname = F.getName(); EscapeString(fname); - cerr << "Hello: " << fname << "\n"; + errs() << "Hello: " << fname << "\n"; return false; } }; @@ -51,7 +51,7 @@ namespace { HelloCounter++; std::string fname = F.getName(); EscapeString(fname); - cerr << "Hello: " << fname << "\n"; + errs() << "Hello: " << fname << "\n"; return false; } diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index a612634..5b91f3d 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -36,16 +36,18 @@ #include "llvm/Module.h" #include "llvm/CallGraphSCCPass.h" #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Target/TargetData.h" #include "llvm/Support/CallSite.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Support/Compiler.h" #include <set> using namespace llvm; @@ -60,11 +62,10 @@ namespace { struct VISIBILITY_HIDDEN ArgPromotion : public CallGraphSCCPass { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AliasAnalysis>(); - AU.addRequired<TargetData>(); CallGraphSCCPass::getAnalysisUsage(AU); } - virtual bool runOnSCC(const std::vector<CallGraphNode *> &SCC); + virtual bool runOnSCC(std::vector<CallGraphNode *> &SCC); static char ID; // Pass identification, replacement for typeid explicit ArgPromotion(unsigned maxElements = 3) : CallGraphSCCPass(&ID), maxElements(maxElements) {} @@ -73,11 +74,11 @@ namespace { typedef std::vector<uint64_t> IndicesVector; private: - bool PromoteArguments(CallGraphNode *CGN); + CallGraphNode *PromoteArguments(CallGraphNode *CGN); bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const; - Function *DoPromotion(Function *F, - SmallPtrSet<Argument*, 8> &ArgsToPromote, - SmallPtrSet<Argument*, 8> &ByValArgsToTransform); + CallGraphNode *DoPromotion(Function *F, + SmallPtrSet<Argument*, 8> &ArgsToPromote, + SmallPtrSet<Argument*, 8> &ByValArgsToTransform); /// The maximum number of elements to expand, or 0 for unlimited. unsigned maxElements; }; @@ -91,14 +92,17 @@ Pass *llvm::createArgumentPromotionPass(unsigned maxElements) { return new ArgPromotion(maxElements); } -bool ArgPromotion::runOnSCC(const std::vector<CallGraphNode *> &SCC) { +bool ArgPromotion::runOnSCC(std::vector<CallGraphNode *> &SCC) { bool Changed = false, LocalChange; do { // Iterate until we stop promoting from this SCC. LocalChange = false; // Attempt to promote arguments from all functions in this SCC. for (unsigned i = 0, e = SCC.size(); i != e; ++i) - LocalChange |= PromoteArguments(SCC[i]); + if (CallGraphNode *CGN = PromoteArguments(SCC[i])) { + LocalChange = true; + SCC[i] = CGN; + } Changed |= LocalChange; // Remember that we changed something. } while (LocalChange); @@ -110,11 +114,11 @@ bool ArgPromotion::runOnSCC(const std::vector<CallGraphNode *> &SCC) { /// example, all callers are direct). If safe to promote some arguments, it /// calls the DoPromotion method. /// -bool ArgPromotion::PromoteArguments(CallGraphNode *CGN) { +CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { Function *F = CGN->getFunction(); // Make sure that it is local to this module. - if (!F || !F->hasLocalLinkage()) return false; + if (!F || !F->hasLocalLinkage()) return 0; // First check: see if there are any pointer arguments! If not, quick exit. SmallVector<std::pair<Argument*, unsigned>, 16> PointerArgs; @@ -123,12 +127,12 @@ bool ArgPromotion::PromoteArguments(CallGraphNode *CGN) { I != E; ++I, ++ArgNo) if (isa<PointerType>(I->getType())) PointerArgs.push_back(std::pair<Argument*, unsigned>(I, ArgNo)); - if (PointerArgs.empty()) return false; + if (PointerArgs.empty()) return 0; // Second check: make sure that all callers are direct callers. We can't // transform functions that have indirect callers. if (F->hasAddressTaken()) - return false; + return 0; // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. @@ -144,9 +148,9 @@ bool ArgPromotion::PromoteArguments(CallGraphNode *CGN) { const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); if (const StructType *STy = dyn_cast<StructType>(AgTy)) { if (maxElements > 0 && STy->getNumElements() > maxElements) { - DOUT << "argpromotion disable promoting argument '" - << PtrArg->getName() << "' because it would require adding more " - << "than " << maxElements << " arguments to the function.\n"; + DEBUG(errs() << "argpromotion disable promoting argument '" + << PtrArg->getName() << "' because it would require adding more" + << " than " << maxElements << " arguments to the function.\n"); } else { // If all the elements are single-value types, we can promote it. bool AllSimple = true; @@ -173,13 +177,10 @@ bool ArgPromotion::PromoteArguments(CallGraphNode *CGN) { } // No promotable pointer arguments. - if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) return false; - - Function *NewF = DoPromotion(F, ArgsToPromote, ByValArgsToTransform); + if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) + return 0; - // Update the call graph to know that the function has been transformed. - getAnalysis<CallGraph>().changeFunction(F, NewF); - return true; + return DoPromotion(F, ArgsToPromote, ByValArgsToTransform); } /// IsAlwaysValidPointer - Return true if the specified pointer is always legal @@ -409,9 +410,9 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { // to do. if (ToPromote.find(Operands) == ToPromote.end()) { if (maxElements > 0 && ToPromote.size() == maxElements) { - DOUT << "argpromotion not promoting argument '" - << Arg->getName() << "' because it would require adding more " - << "than " << maxElements << " arguments to the function.\n"; + DEBUG(errs() << "argpromotion not promoting argument '" + << Arg->getName() << "' because it would require adding more " + << "than " << maxElements << " arguments to the function.\n"); // We limit aggregate promotion to only promoting up to a fixed number // of elements of the aggregate. return false; @@ -432,7 +433,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { SmallPtrSet<BasicBlock*, 16> TranspBlocks; AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); - TargetData &TD = getAnalysis<TargetData>(); + TargetData *TD = getAnalysisIfAvailable<TargetData>(); + if (!TD) return false; // Without TargetData, assume the worst. for (unsigned i = 0, e = Loads.size(); i != e; ++i) { // Check to see if the load is invalidated from the start of the block to @@ -442,7 +444,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { const PointerType *LoadTy = cast<PointerType>(Load->getPointerOperand()->getType()); - unsigned LoadSize = (unsigned)TD.getTypeStoreSize(LoadTy->getElementType()); + unsigned LoadSize =(unsigned)TD->getTypeStoreSize(LoadTy->getElementType()); if (AA.canInstructionRangeModify(BB->front(), *Load, Arg, LoadSize)) return false; // Pointer is invalidated! @@ -467,8 +469,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { /// DoPromotion - This method actually performs the promotion of the specified /// arguments, and returns the new function. At this point, we know that it's /// safe to do so. -Function *ArgPromotion::DoPromotion(Function *F, - SmallPtrSet<Argument*, 8> &ArgsToPromote, +CallGraphNode *ArgPromotion::DoPromotion(Function *F, + SmallPtrSet<Argument*, 8> &ArgsToPromote, SmallPtrSet<Argument*, 8> &ByValArgsToTransform) { // Start by computing a new prototype for the function, which is the same as @@ -581,19 +583,24 @@ Function *ArgPromotion::DoPromotion(Function *F, bool ExtraArgHack = false; if (Params.empty() && FTy->isVarArg()) { ExtraArgHack = true; - Params.push_back(Type::Int32Ty); + Params.push_back(Type::getInt32Ty(F->getContext())); } // Construct the new function type using the new arguments. FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); - // Create the new function body and insert it into the module... + // Create the new function body and insert it into the module. Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName()); NF->copyAttributesFrom(F); + + DEBUG(errs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" + << "From: " << *F); + // Recompute the parameter attributes list based on the new arguments for // the function. - NF->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); + NF->setAttributes(AttrListPtr::get(AttributesVec.begin(), + AttributesVec.end())); AttributesVec.clear(); F->getParent()->getFunctionList().insert(F, NF); @@ -606,6 +613,10 @@ Function *ArgPromotion::DoPromotion(Function *F, // Get the callgraph information that we need to update to reflect our // changes. CallGraph &CG = getAnalysis<CallGraph>(); + + // Get a new callgraph node for NF. + CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF); + // Loop over all of the callers of the function, transforming the call sites // to pass in the loaded pointers. @@ -636,9 +647,10 @@ Function *ArgPromotion::DoPromotion(Function *F, // Emit a GEP and load for each element of the struct. const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); const StructType *STy = cast<StructType>(AgTy); - Value *Idxs[2] = { ConstantInt::get(Type::Int32Ty, 0), 0 }; + Value *Idxs[2] = { + ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - Idxs[1] = ConstantInt::get(Type::Int32Ty, i); + Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = GetElementPtrInst::Create(*AI, Idxs, Idxs+2, (*AI)->getName()+"."+utostr(i), Call); @@ -662,7 +674,9 @@ Function *ArgPromotion::DoPromotion(Function *F, IE = SI->end(); II != IE; ++II) { // Use i32 to index structs, and i64 for others (pointers/arrays). // This satisfies GEP constraints. - const Type *IdxTy = (isa<StructType>(ElTy) ? Type::Int32Ty : Type::Int64Ty); + const Type *IdxTy = (isa<StructType>(ElTy) ? + Type::getInt32Ty(F->getContext()) : + Type::getInt64Ty(F->getContext())); Ops.push_back(ConstantInt::get(IdxTy, *II)); // Keep track of the type we're currently indexing ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II); @@ -679,7 +693,7 @@ Function *ArgPromotion::DoPromotion(Function *F, } if (ExtraArgHack) - Args.push_back(Constant::getNullValue(Type::Int32Ty)); + Args.push_back(Constant::getNullValue(Type::getInt32Ty(F->getContext()))); // Push any varargs arguments on the list for (; AI != CS.arg_end(); ++AI, ++ArgIndex) { @@ -715,7 +729,8 @@ Function *ArgPromotion::DoPromotion(Function *F, AA.replaceWithNewValue(Call, New); // Update the callgraph to know that the callsite has been transformed. - CG[Call->getParent()->getParent()]->replaceCallSite(Call, New); + CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()]; + CalleeNode->replaceCallEdge(Call, New, NF_CGN); if (!Call->use_empty()) { Call->replaceAllUsesWith(New); @@ -756,14 +771,16 @@ Function *ArgPromotion::DoPromotion(Function *F, const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt); const StructType *STy = cast<StructType>(AgTy); - Value *Idxs[2] = { ConstantInt::get(Type::Int32Ty, 0), 0 }; + Value *Idxs[2] = { + ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - Idxs[1] = ConstantInt::get(Type::Int32Ty, i); - std::string Name = TheAlloca->getName()+"."+utostr(i); - Value *Idx = GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2, - Name, InsertPt); - I2->setName(I->getName()+"."+utostr(i)); + Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); + Value *Idx = + GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2, + TheAlloca->getName()+"."+Twine(i), + InsertPt); + I2->setName(I->getName()+"."+Twine(i)); new StoreInst(I2++, Idx, InsertPt); } @@ -792,8 +809,8 @@ Function *ArgPromotion::DoPromotion(Function *F, LI->replaceAllUsesWith(I2); AA.replaceWithNewValue(LI, I2); LI->eraseFromParent(); - DOUT << "*** Promoted load of argument '" << I->getName() - << "' in function '" << F->getName() << "'\n"; + DEBUG(errs() << "*** Promoted load of argument '" << I->getName() + << "' in function '" << F->getName() << "'\n"); } else { GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->use_back()); IndicesVector Operands; @@ -819,8 +836,8 @@ Function *ArgPromotion::DoPromotion(Function *F, NewName += ".val"; TheArg->setName(NewName); - DOUT << "*** Promoted agg argument '" << TheArg->getName() - << "' of function '" << NF->getName() << "'\n"; + DEBUG(errs() << "*** Promoted agg argument '" << TheArg->getName() + << "' of function '" << NF->getName() << "'\n"); // All of the uses must be load instructions. Replace them all with // the argument specified by ArgNo. @@ -842,13 +859,18 @@ Function *ArgPromotion::DoPromotion(Function *F, // Notify the alias analysis implementation that we inserted a new argument. if (ExtraArgHack) - AA.copyValue(Constant::getNullValue(Type::Int32Ty), NF->arg_begin()); + AA.copyValue(Constant::getNullValue(Type::getInt32Ty(F->getContext())), + NF->arg_begin()); // Tell the alias analysis that the old function is about to disappear. AA.replaceWithNewValue(F, NF); + + NF_CGN->stealCalledFunctionsFrom(CG[F]); + // Now that the old function is dead, delete it. - F->eraseFromParent(); - return NF; + delete CG.removeFunctionFromModule(F); + + return NF_CGN; } diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index 1438b48..ec0f1e1 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -1,18 +1,19 @@ add_llvm_library(LLVMipo - FunctionAttrs.cpp ArgumentPromotion.cpp ConstantMerge.cpp DeadArgumentElimination.cpp DeadTypeElimination.cpp ExtractGV.cpp + FunctionAttrs.cpp GlobalDCE.cpp GlobalOpt.cpp + IPConstantPropagation.cpp + IPO.cpp IndMemRemoval.cpp InlineAlways.cpp - Inliner.cpp InlineSimple.cpp + Inliner.cpp Internalize.cpp - IPConstantPropagation.cpp LoopExtractor.cpp LowerSetJmp.cpp MergeFunctions.cpp diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp index 237e6db..c1a1045 100644 --- a/lib/Transforms/IPO/ConstantMerge.cpp +++ b/lib/Transforms/IPO/ConstantMerge.cpp @@ -78,7 +78,7 @@ bool ConstantMerge::runOnModule(Module &M) { } // Only process constants with initializers. - if (GV->isConstant() && GV->hasInitializer()) { + if (GV->isConstant() && GV->hasDefinitiveInitializer()) { Constant *Init = GV->getInitializer(); // Check to see if the initializer is already known. diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index e480dad..79a32f0 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -24,10 +24,12 @@ #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" @@ -72,7 +74,7 @@ namespace { std::string getDescription() const { return std::string((IsArg ? "Argument #" : "Return value #")) - + utostr(Idx) + " of function " + F->getName(); + + utostr(Idx) + " of function " + F->getNameStr(); } }; @@ -195,8 +197,10 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { // Start by computing a new prototype for the function, which is the same as // the old function, but doesn't have isVarArg set. const FunctionType *FTy = Fn.getFunctionType(); + std::vector<const Type*> Params(FTy->param_begin(), FTy->param_end()); - FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), Params, false); + FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), + Params, false); unsigned NumArgs = Params.size(); // Create the new function body and insert it into the module... @@ -277,7 +281,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { /// for void functions and 1 for functions not returning a struct. It returns /// the number of struct elements for functions returning a struct. static unsigned NumRetVals(const Function *F) { - if (F->getReturnType() == Type::VoidTy) + if (F->getReturnType() == Type::getVoidTy(F->getContext())) return 0; else if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) return STy->getNumElements(); @@ -422,7 +426,7 @@ void DAE::SurveyFunction(Function &F) { return; } - DOUT << "DAE - Inspecting callers for fn: " << F.getName() << "\n"; + DEBUG(errs() << "DAE - Inspecting callers for fn: " << F.getName() << "\n"); // Keep track of the number of live retvals, so we can skip checks once all // of them turn out to be live. unsigned NumLiveRetVals = 0; @@ -485,7 +489,7 @@ void DAE::SurveyFunction(Function &F) { for (unsigned i = 0; i != RetCount; ++i) MarkValue(CreateRet(&F, i), RetValLiveness[i], MaybeLiveRetUses[i]); - DOUT << "DAE - Inspecting args for fn: " << F.getName() << "\n"; + DEBUG(errs() << "DAE - Inspecting args for fn: " << F.getName() << "\n"); // Now, check all of our arguments. unsigned i = 0; @@ -527,7 +531,7 @@ void DAE::MarkValue(const RetOrArg &RA, Liveness L, /// mark any values that are used as this function's parameters or by its return /// values (according to Uses) live as well. void DAE::MarkLive(const Function &F) { - DOUT << "DAE - Intrinsically live fn: " << F.getName() << "\n"; + DEBUG(errs() << "DAE - Intrinsically live fn: " << F.getName() << "\n"); // Mark the function as live. LiveFunctions.insert(&F); // Mark all arguments as live. @@ -548,7 +552,7 @@ void DAE::MarkLive(const RetOrArg &RA) { if (!LiveValues.insert(RA).second) return; // We were already marked Live. - DOUT << "DAE - Marking " << RA.getDescription() << " live\n"; + DEBUG(errs() << "DAE - Marking " << RA.getDescription() << " live\n"); PropagateLiveness(RA); } @@ -596,11 +600,12 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { const Type *RetTy = FTy->getReturnType(); const Type *NRetTy = NULL; unsigned RetCount = NumRetVals(F); + // -1 means unused, other numbers are the new index SmallVector<int, 5> NewRetIdxs(RetCount, -1); std::vector<const Type*> RetTypes; - if (RetTy == Type::VoidTy) { - NRetTy = Type::VoidTy; + if (RetTy == Type::getVoidTy(F->getContext())) { + NRetTy = Type::getVoidTy(F->getContext()); } else { const StructType *STy = dyn_cast<StructType>(RetTy); if (STy) @@ -612,8 +617,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { NewRetIdxs[i] = RetTypes.size() - 1; } else { ++NumRetValsEliminated; - DOUT << "DAE - Removing return value " << i << " from " - << F->getNameStart() << "\n"; + DEBUG(errs() << "DAE - Removing return value " << i << " from " + << F->getName() << "\n"); } } else @@ -622,8 +627,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { RetTypes.push_back(RetTy); NewRetIdxs[0] = 0; } else { - DOUT << "DAE - Removing return value from " << F->getNameStart() - << "\n"; + DEBUG(errs() << "DAE - Removing return value from " << F->getName() + << "\n"); ++NumRetValsEliminated; } if (RetTypes.size() > 1) @@ -633,14 +638,14 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // something and {} into void. // Make the new struct packed if we used to return a packed struct // already. - NRetTy = StructType::get(RetTypes, STy->isPacked()); + NRetTy = StructType::get(STy->getContext(), RetTypes, STy->isPacked()); else if (RetTypes.size() == 1) // One return type? Just a simple value then, but only if we didn't use to // return a struct with that simple value before. NRetTy = RetTypes.front(); else if (RetTypes.size() == 0) // No return types? Make it void, but only if we didn't use to return {}. - NRetTy = Type::VoidTy; + NRetTy = Type::getVoidTy(F->getContext()); } assert(NRetTy && "No new return type found?"); @@ -649,7 +654,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // values. Otherwise, ensure that we don't have any conflicting attributes // here. Currently, this should not be possible, but special handling might be // required when new return value attributes are added. - if (NRetTy == Type::VoidTy) + if (NRetTy == Type::getVoidTy(F->getContext())) RAttrs &= ~Attribute::typeIncompatible(NRetTy); else assert((RAttrs & Attribute::typeIncompatible(NRetTy)) == 0 @@ -677,8 +682,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { AttributesVec.push_back(AttributeWithIndex::get(Params.size(), Attrs)); } else { ++NumArgumentsEliminated; - DOUT << "DAE - Removing argument " << i << " (" << I->getNameStart() - << ") from " << F->getNameStart() << "\n"; + DEBUG(errs() << "DAE - Removing argument " << i << " (" << I->getName() + << ") from " << F->getName() << "\n"); } } @@ -697,11 +702,12 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { bool ExtraArgHack = false; if (Params.empty() && FTy->isVarArg() && FTy->getNumParams() != 0) { ExtraArgHack = true; - Params.push_back(Type::Int32Ty); + Params.push_back(Type::getInt32Ty(F->getContext())); } // Create the new function type based on the recomputed parameters. - FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg()); + FunctionType *NFTy = FunctionType::get(NRetTy, Params, + FTy->isVarArg()); // No change? if (NFTy == FTy) @@ -750,7 +756,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { } if (ExtraArgHack) - Args.push_back(UndefValue::get(Type::Int32Ty)); + Args.push_back(UndefValue::get(Type::getInt32Ty(F->getContext()))); // Push any varargs arguments on the list. Don't forget their attributes. for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) { @@ -786,7 +792,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Return type not changed? Just replace users then. Call->replaceAllUsesWith(New); New->takeName(Call); - } else if (New->getType() == Type::VoidTy) { + } else if (New->getType() == Type::getVoidTy(F->getContext())) { // Our return value has uses, but they will get removed later on. // Replace by null for now. Call->replaceAllUsesWith(Constant::getNullValue(Call->getType())); @@ -806,7 +812,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // extract/insertvalue chaining and let instcombine clean that up. // // Start out building up our return value from undef - Value *RetVal = llvm::UndefValue::get(RetTy); + Value *RetVal = UndefValue::get(RetTy); for (unsigned i = 0; i != RetCount; ++i) if (NewRetIdxs[i] != -1) { Value *V; @@ -862,7 +868,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { Value *RetVal; - if (NFTy->getReturnType() == Type::VoidTy) { + if (NFTy->getReturnType() == Type::getVoidTy(F->getContext())) { RetVal = 0; } else { assert (isa<StructType>(RetTy)); @@ -873,7 +879,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // clean that up. Value *OldRet = RI->getOperand(0); // Start out building up our return value from undef - RetVal = llvm::UndefValue::get(NRetTy); + RetVal = UndefValue::get(NRetTy); for (unsigned i = 0; i != RetCount; ++i) if (NewRetIdxs[i] != -1) { ExtractValueInst *EV = ExtractValueInst::Create(OldRet, i, @@ -893,7 +899,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { } // Replace the return instruction with one returning the new return // value (possibly 0 if we became void). - ReturnInst::Create(RetVal, RI); + ReturnInst::Create(F->getContext(), RetVal, RI); BB->getInstList().erase(RI); } @@ -910,7 +916,7 @@ bool DAE::runOnModule(Module &M) { // removed. We can do this if they never call va_start. This loop cannot be // fused with the next loop, because deleting a function invalidates // information computed while surveying other functions. - DOUT << "DAE - Deleting dead varargs\n"; + DEBUG(errs() << "DAE - Deleting dead varargs\n"); for (Module::iterator I = M.begin(), E = M.end(); I != E; ) { Function &F = *I++; if (F.getFunctionType()->isVarArg()) @@ -921,7 +927,7 @@ bool DAE::runOnModule(Module &M) { // We assume all arguments are dead unless proven otherwise (allowing us to // determine that dead arguments passed into recursive functions are dead). // - DOUT << "DAE - Determining liveness\n"; + DEBUG(errs() << "DAE - Determining liveness\n"); for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) SurveyFunction(*I); diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp index 0c529d2..191100c 100644 --- a/lib/Transforms/IPO/ExtractGV.cpp +++ b/lib/Transforms/IPO/ExtractGV.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Constants.h" @@ -43,6 +44,7 @@ namespace { return false; // Nothing to extract } + if (deleteStuff) return deleteGV(); M.setModuleInlineAsm(""); @@ -99,7 +101,8 @@ namespace { // by putting them in the used array { std::vector<Constant *> AUGs; - const Type *SBP= PointerType::getUnqual(Type::Int8Ty); + const Type *SBP= + Type::getInt8PtrTy(M.getContext()); for (std::vector<GlobalValue*>::iterator GI = Named.begin(), GE = Named.end(); GI != GE; ++GI) { (*GI)->setLinkage(GlobalValue::ExternalLinkage); @@ -107,9 +110,9 @@ namespace { } ArrayType *AT = ArrayType::get(SBP, AUGs.size()); Constant *Init = ConstantArray::get(AT, AUGs); - GlobalValue *gv = new GlobalVariable(AT, false, + GlobalValue *gv = new GlobalVariable(M, AT, false, GlobalValue::AppendingLinkage, - Init, "llvm.used", &M); + Init, "llvm.used"); gv->setSection("llvm.metadata"); } diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index e831524..7edaa7f 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -26,6 +26,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/MallocHelper.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/UniqueVector.h" @@ -44,7 +45,7 @@ namespace { FunctionAttrs() : CallGraphSCCPass(&ID) {} // runOnSCC - Analyze the SCC, performing the transformation if possible. - bool runOnSCC(const std::vector<CallGraphNode *> &SCC); + bool runOnSCC(std::vector<CallGraphNode *> &SCC); // AddReadAttrs - Deduce readonly/readnone attributes for the SCC. bool AddReadAttrs(const std::vector<CallGraphNode *> &SCC); @@ -54,7 +55,7 @@ namespace { // IsFunctionMallocLike - Does this function allocate new memory? bool IsFunctionMallocLike(Function *F, - SmallPtrSet<CallGraphNode*, 8> &) const; + SmallPtrSet<Function*, 8> &) const; // AddNoAliasAttrs - Deduce noalias attributes for the SCC. bool AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC); @@ -93,13 +94,12 @@ bool FunctionAttrs::PointsToLocalMemory(Value *V) { /// AddReadAttrs - Deduce readonly/readnone attributes for the SCC. bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) { - SmallPtrSet<CallGraphNode*, 8> SCCNodes; - CallGraph &CG = getAnalysis<CallGraph>(); + SmallPtrSet<Function*, 8> SCCNodes; // Fill SCCNodes with the elements of the SCC. Used for quickly // looking up whether a given CallGraphNode is in this SCC. for (unsigned i = 0, e = SCC.size(); i != e; ++i) - SCCNodes.insert(SCC[i]); + SCCNodes.insert(SCC[i]->getFunction()); // Check if any of the functions in the SCC read or write memory. If they // write memory then they can't be marked readnone or readonly. @@ -133,9 +133,9 @@ bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) { // Some instructions can be ignored even if they read or write memory. // Detect these now, skipping to the next instruction if one is found. CallSite CS = CallSite::get(I); - if (CS.getInstruction()) { + if (CS.getInstruction() && CS.getCalledFunction()) { // Ignore calls to functions in the same SCC. - if (SCCNodes.count(CG[CS.getCalledFunction()])) + if (SCCNodes.count(CS.getCalledFunction())) continue; } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { // Ignore loads from local memory. @@ -154,7 +154,7 @@ bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) { return false; if (isa<MallocInst>(I)) - // MallocInst claims not to write memory! PR3754. + // malloc claims not to write memory! PR3754. return false; // If this instruction may read memory, remember that. @@ -226,9 +226,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const std::vector<CallGraphNode *> &SCC) { /// IsFunctionMallocLike - A function is malloc-like if it returns either null /// or a pointer that doesn't alias any other pointer visible to the caller. bool FunctionAttrs::IsFunctionMallocLike(Function *F, - SmallPtrSet<CallGraphNode*, 8> &SCCNodes) const { - CallGraph &CG = getAnalysis<CallGraph>(); - + SmallPtrSet<Function*, 8> &SCCNodes) const { UniqueVector<Value *> FlowsToReturn; for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) if (ReturnInst *Ret = dyn_cast<ReturnInst>(I->getTerminator())) @@ -250,32 +248,36 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F, if (Instruction *RVI = dyn_cast<Instruction>(RetVal)) switch (RVI->getOpcode()) { // Extend the analysis by looking upwards. - case Instruction::GetElementPtr: case Instruction::BitCast: + case Instruction::GetElementPtr: FlowsToReturn.insert(RVI->getOperand(0)); continue; case Instruction::Select: { SelectInst *SI = cast<SelectInst>(RVI); FlowsToReturn.insert(SI->getTrueValue()); FlowsToReturn.insert(SI->getFalseValue()); - } continue; + continue; + } case Instruction::PHI: { PHINode *PN = cast<PHINode>(RVI); for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i) FlowsToReturn.insert(PN->getIncomingValue(i)); - } continue; + continue; + } // Check whether the pointer came from an allocation. case Instruction::Alloca: case Instruction::Malloc: break; case Instruction::Call: + if (isMalloc(RVI)) + break; case Instruction::Invoke: { CallSite CS(RVI); if (CS.paramHasAttr(0, Attribute::NoAlias)) break; if (CS.getCalledFunction() && - SCCNodes.count(CG[CS.getCalledFunction()])) + SCCNodes.count(CS.getCalledFunction())) break; } // fall-through default: @@ -291,12 +293,12 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F, /// AddNoAliasAttrs - Deduce noalias attributes for the SCC. bool FunctionAttrs::AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC) { - SmallPtrSet<CallGraphNode*, 8> SCCNodes; + SmallPtrSet<Function*, 8> SCCNodes; // Fill SCCNodes with the elements of the SCC. Used for quickly // looking up whether a given CallGraphNode is in this SCC. for (unsigned i = 0, e = SCC.size(); i != e; ++i) - SCCNodes.insert(SCC[i]); + SCCNodes.insert(SCC[i]->getFunction()); // Check each function in turn, determining which functions return noalias // pointers. @@ -339,7 +341,7 @@ bool FunctionAttrs::AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC) { return MadeChange; } -bool FunctionAttrs::runOnSCC(const std::vector<CallGraphNode *> &SCC) { +bool FunctionAttrs::runOnSCC(std::vector<CallGraphNode *> &SCC) { bool Changed = AddReadAttrs(SCC); Changed |= AddNoCaptureAttrs(SCC); Changed |= AddNoAliasAttrs(SCC); diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index 9c652b9..09f9e7c 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -58,6 +58,7 @@ ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); } bool GlobalDCE::runOnModule(Module &M) { bool Changed = false; + // Loop over the module, adding globals which are obviously necessary. for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { Changed |= RemoveUnusedGlobalValue(*I); @@ -147,6 +148,9 @@ bool GlobalDCE::runOnModule(Module &M) { // Make sure that all memory is released AliveGlobals.clear(); + + // Remove dead metadata. + Changed |= M.getContext().RemoveDeadMetadata(); return Changed; } diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 7fe097c..a44386e 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -20,20 +20,23 @@ #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/MallocHelper.h" #include "llvm/Target/TargetData.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/STLExtras.h" #include <algorithm> using namespace llvm; @@ -56,7 +59,6 @@ STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated"); namespace { struct VISIBILITY_HIDDEN GlobalOpt : public ModulePass { virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<TargetData>(); } static char ID; // Pass identification, replacement for typeid GlobalOpt() : ModulePass(&ID) {} @@ -244,7 +246,8 @@ static bool AnalyzeGlobal(Value *V, GlobalStatus &GS, return false; } -static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) { +static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx, + LLVMContext &Context) { ConstantInt *CI = dyn_cast<ConstantInt>(Idx); if (!CI) return 0; unsigned IdxV = CI->getZExtValue(); @@ -280,7 +283,8 @@ static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) { /// users of the global, cleaning up the obvious ones. This is largely just a /// quick scan over the use list to clean up the easy and obvious cruft. This /// returns true if it made a change. -static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { +static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, + LLVMContext &Context) { bool Changed = false; for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) { User *U = *UI++; @@ -301,11 +305,11 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { Constant *SubInit = 0; if (Init) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); - Changed |= CleanupConstantGlobalUsers(CE, SubInit); + Changed |= CleanupConstantGlobalUsers(CE, SubInit, Context); } else if (CE->getOpcode() == Instruction::BitCast && isa<PointerType>(CE->getType())) { // Pointer cast, delete any stores and memsets to the global. - Changed |= CleanupConstantGlobalUsers(CE, 0); + Changed |= CleanupConstantGlobalUsers(CE, 0, Context); } if (CE->use_empty()) { @@ -319,11 +323,11 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { Constant *SubInit = 0; if (!isa<ConstantExpr>(GEP->getOperand(0))) { ConstantExpr *CE = - dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP)); + dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, Context)); if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); } - Changed |= CleanupConstantGlobalUsers(GEP, SubInit); + Changed |= CleanupConstantGlobalUsers(GEP, SubInit, Context); if (GEP->use_empty()) { GEP->eraseFromParent(); @@ -341,7 +345,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { if (SafeToDestroyConstant(C)) { C->destroyConstant(); // This could have invalidated UI, start over from scratch. - CleanupConstantGlobalUsers(V, Init); + CleanupConstantGlobalUsers(V, Init, Context); return true; } } @@ -423,13 +427,18 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { // Scalar replacing *just* the outer index of the array is probably not // going to be a win anyway, so just give up. for (++GEPI; // Skip array index. - GEPI != E && (isa<ArrayType>(*GEPI) || isa<VectorType>(*GEPI)); + GEPI != E; ++GEPI) { uint64_t NumElements; if (const ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI)) NumElements = SubArrayTy->getNumElements(); - else - NumElements = cast<VectorType>(*GEPI)->getNumElements(); + else if (const VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI)) + NumElements = SubVectorTy->getNumElements(); + else { + assert(isa<StructType>(*GEPI) && + "Indexed GEP type is not array, vector, or struct!"); + continue; + } ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand()); if (!IdxVal || IdxVal->getZExtValue() >= NumElements) @@ -461,7 +470,8 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) { /// behavior of the program in a more fine-grained way. We have determined that /// this transformation is safe already. We return the first global variable we /// insert so that the caller can reprocess it. -static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { +static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD, + LLVMContext &Context) { // Make sure this global only has simple uses that we can SRA. if (!GlobalUsersSafeToSRA(GV)) return 0; @@ -483,14 +493,15 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { const StructLayout &Layout = *TD.getStructLayout(STy); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Constant *In = getAggregateConstantElement(Init, - ConstantInt::get(Type::Int32Ty, i)); + ConstantInt::get(Type::getInt32Ty(Context), i), + Context); assert(In && "Couldn't get element of initializer?"); - GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false, + GlobalVariable *NGV = new GlobalVariable(Context, + STy->getElementType(i), false, GlobalVariable::InternalLinkage, - In, GV->getName()+"."+utostr(i), - (Module *)NULL, + In, GV->getName()+"."+Twine(i), GV->isThreadLocal(), - GV->getType()->getAddressSpace()); + GV->getType()->getAddressSpace()); Globals.insert(GV, NGV); NewGlobals.push_back(NGV); @@ -517,15 +528,16 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType()); for (unsigned i = 0, e = NumElements; i != e; ++i) { Constant *In = getAggregateConstantElement(Init, - ConstantInt::get(Type::Int32Ty, i)); + ConstantInt::get(Type::getInt32Ty(Context), i), + Context); assert(In && "Couldn't get element of initializer?"); - GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false, + GlobalVariable *NGV = new GlobalVariable(Context, + STy->getElementType(), false, GlobalVariable::InternalLinkage, - In, GV->getName()+"."+utostr(i), - (Module *)NULL, + In, GV->getName()+"."+Twine(i), GV->isThreadLocal(), - GV->getType()->getAddressSpace()); + GV->getType()->getAddressSpace()); Globals.insert(GV, NGV); NewGlobals.push_back(NGV); @@ -541,9 +553,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { if (NewGlobals.empty()) return 0; - DOUT << "PERFORMING GLOBAL SRA ON: " << *GV; + DEBUG(errs() << "PERFORMING GLOBAL SRA ON: " << *GV); - Constant *NullInt = Constant::getNullValue(Type::Int32Ty); + Constant *NullInt = Constant::getNullValue(Type::getInt32Ty(Context)); // Loop over all of the uses of the global, replacing the constantexpr geps, // with smaller constantexpr geps or direct references. @@ -577,7 +589,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i) Idxs.push_back(GEPI->getOperand(i)); NewPtr = GetElementPtrInst::Create(NewPtr, Idxs.begin(), Idxs.end(), - GEPI->getName()+"."+utostr(Val), GEPI); + GEPI->getName()+"."+Twine(Val),GEPI); } } GEP->replaceAllUsesWith(NewPtr); @@ -667,7 +679,8 @@ static bool AllUsesOfLoadedValueWillTrapIfNull(GlobalVariable *GV) { return true; } -static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { +static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV, + LLVMContext &Context) { bool Changed = false; for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) { Instruction *I = cast<Instruction>(*UI++); @@ -700,7 +713,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { } else if (CastInst *CI = dyn_cast<CastInst>(I)) { Changed |= OptimizeAwayTrappingUsesOfValue(CI, ConstantExpr::getCast(CI->getOpcode(), - NewV, CI->getType())); + NewV, CI->getType()), Context); if (CI->use_empty()) { Changed = true; CI->eraseFromParent(); @@ -717,8 +730,8 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { break; if (Idxs.size() == GEPI->getNumOperands()-1) Changed |= OptimizeAwayTrappingUsesOfValue(GEPI, - ConstantExpr::getGetElementPtr(NewV, &Idxs[0], - Idxs.size())); + ConstantExpr::getGetElementPtr(NewV, &Idxs[0], + Idxs.size()), Context); if (GEPI->use_empty()) { Changed = true; GEPI->eraseFromParent(); @@ -734,7 +747,8 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { /// value stored into it. If there are uses of the loaded value that would trap /// if the loaded value is dynamically null, then we know that they cannot be /// reachable with a null optimize away the load. -static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { +static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, + LLVMContext &Context) { bool Changed = false; // Keep track of whether we are able to remove all the uses of the global @@ -745,7 +759,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){ User *GlobalUser = *GUI++; if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) { - Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV); + Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV, Context); // If we were able to delete all uses of the loads if (LI->use_empty()) { LI->eraseFromParent(); @@ -768,15 +782,15 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { } if (Changed) { - DOUT << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV; + DEBUG(errs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV); ++NumGlobUses; } // If we nuked all of the loads, then none of the stores are needed either, // nor is the global. if (AllNonStoreUsesGone) { - DOUT << " *** GLOBAL NOW DEAD!\n"; - CleanupConstantGlobalUsers(GV, 0); + DEBUG(errs() << " *** GLOBAL NOW DEAD!\n"); + CleanupConstantGlobalUsers(GV, 0, Context); if (GV->use_empty()) { GV->eraseFromParent(); ++NumDeleted; @@ -788,10 +802,10 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { /// ConstantPropUsersOf - Walk the use list of V, constant folding all of the /// instructions that are foldable. -static void ConstantPropUsersOf(Value *V) { +static void ConstantPropUsersOf(Value *V, LLVMContext &Context) { for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) if (Instruction *I = dyn_cast<Instruction>(*UI++)) - if (Constant *NewC = ConstantFoldInstruction(I)) { + if (Constant *NewC = ConstantFoldInstruction(I, Context)) { I->replaceAllUsesWith(NewC); // Advance UI to the next non-I use to avoid invalidating it! @@ -808,8 +822,9 @@ static void ConstantPropUsersOf(Value *V) { /// malloc, there is no reason to actually DO the malloc. Instead, turn the /// malloc into a global, and any loads of GV as uses of the new global. static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, - MallocInst *MI) { - DOUT << "PROMOTING MALLOC GLOBAL: " << *GV << " MALLOC = " << *MI; + MallocInst *MI, + LLVMContext &Context) { + DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV << " MALLOC = " << *MI); ConstantInt *NElements = cast<ConstantInt>(MI->getArraySize()); if (NElements->getZExtValue() != 1) { @@ -818,10 +833,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, Type *NewTy = ArrayType::get(MI->getAllocatedType(), NElements->getZExtValue()); MallocInst *NewMI = - new MallocInst(NewTy, Constant::getNullValue(Type::Int32Ty), + new MallocInst(NewTy, Constant::getNullValue(Type::getInt32Ty(Context)), MI->getAlignment(), MI->getName(), MI); Value* Indices[2]; - Indices[0] = Indices[1] = Constant::getNullValue(Type::Int32Ty); + Indices[0] = Indices[1] = Constant::getNullValue(Type::getInt32Ty(Context)); Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2, NewMI->getName()+".el0", MI); MI->replaceAllUsesWith(NewGEP); @@ -831,17 +846,17 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, // Create the new global variable. The contents of the malloc'd memory is // undefined, so initialize with an undef value. + // FIXME: This new global should have the alignment returned by malloc. Code + // could depend on malloc returning large alignment (on the mac, 16 bytes) but + // this would only guarantee some lower alignment. Constant *Init = UndefValue::get(MI->getAllocatedType()); - GlobalVariable *NewGV = new GlobalVariable(MI->getAllocatedType(), false, + GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), + MI->getAllocatedType(), false, GlobalValue::InternalLinkage, Init, GV->getName()+".body", - (Module *)NULL, + GV, GV->isThreadLocal()); - // FIXME: This new global should have the alignment returned by malloc. Code - // could depend on malloc returning large alignment (on the mac, 16 bytes) but - // this would only guarantee some lower alignment. - GV->getParent()->getGlobalList().insert(GV, NewGV); - + // Anything that used the malloc now uses the global directly. MI->replaceAllUsesWith(NewGV); @@ -853,9 +868,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, // If there is a comparison against null, we will insert a global bool to // keep track of whether the global was initialized yet or not. GlobalVariable *InitBool = - new GlobalVariable(Type::Int1Ty, false, GlobalValue::InternalLinkage, - ConstantInt::getFalse(), GV->getName()+".init", - (Module *)NULL, GV->isThreadLocal()); + new GlobalVariable(Context, Type::getInt1Ty(Context), false, + GlobalValue::InternalLinkage, + ConstantInt::getFalse(Context), GV->getName()+".init", + GV->isThreadLocal()); bool InitBoolUsed = false; // Loop over all uses of GV, processing them in turn. @@ -872,10 +888,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", CI); InitBoolUsed = true; switch (CI->getPredicate()) { - default: assert(0 && "Unknown ICmp Predicate!"); + default: llvm_unreachable("Unknown ICmp Predicate!"); case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_SLT: - LV = ConstantInt::getFalse(); // X < null -> always false + LV = ConstantInt::getFalse(Context); // X < null -> always false break; case ICmpInst::ICMP_ULE: case ICmpInst::ICMP_SLE: @@ -897,7 +913,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, } else { StoreInst *SI = cast<StoreInst>(GV->use_back()); // The global is initialized when the store to it occurs. - new StoreInst(ConstantInt::getTrue(), InitBool, SI); + new StoreInst(ConstantInt::getTrue(Context), InitBool, SI); SI->eraseFromParent(); } @@ -917,9 +933,141 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, // To further other optimizations, loop over all users of NewGV and try to // constant prop them. This will promote GEP instructions with constant // indices into GEP constant-exprs, which will allow global-opt to hack on it. - ConstantPropUsersOf(NewGV); + ConstantPropUsersOf(NewGV, Context); if (RepValue != NewGV) - ConstantPropUsersOf(RepValue); + ConstantPropUsersOf(RepValue, Context); + + return NewGV; +} + +/// OptimizeGlobalAddressOfMalloc - This function takes the specified global +/// variable, and transforms the program as if it always contained the result of +/// the specified malloc. Because it is always the result of the specified +/// malloc, there is no reason to actually DO the malloc. Instead, turn the +/// malloc into a global, and any loads of GV as uses of the new global. +static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, + CallInst *CI, + BitCastInst *BCI, + LLVMContext &Context, + TargetData* TD) { + const Type *IntPtrTy = TD->getIntPtrType(Context); + + DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV << " MALLOC = " << *CI); + + ConstantInt *NElements = cast<ConstantInt>(getMallocArraySize(CI, + Context, TD)); + if (NElements->getZExtValue() != 1) { + // If we have an array allocation, transform it to a single element + // allocation to make the code below simpler. + Type *NewTy = ArrayType::get(getMallocAllocatedType(CI), + NElements->getZExtValue()); + Value* NewM = CallInst::CreateMalloc(CI, IntPtrTy, NewTy); + Instruction* NewMI = cast<Instruction>(NewM); + Value* Indices[2]; + Indices[0] = Indices[1] = Constant::getNullValue(IntPtrTy); + Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2, + NewMI->getName()+".el0", CI); + BCI->replaceAllUsesWith(NewGEP); + BCI->eraseFromParent(); + CI->eraseFromParent(); + BCI = cast<BitCastInst>(NewMI); + CI = extractMallocCallFromBitCast(NewMI); + } + + // Create the new global variable. The contents of the malloc'd memory is + // undefined, so initialize with an undef value. + // FIXME: This new global should have the alignment returned by malloc. Code + // could depend on malloc returning large alignment (on the mac, 16 bytes) but + // this would only guarantee some lower alignment. + const Type *MAT = getMallocAllocatedType(CI); + Constant *Init = UndefValue::get(MAT); + GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), + MAT, false, + GlobalValue::InternalLinkage, Init, + GV->getName()+".body", + GV, + GV->isThreadLocal()); + + // Anything that used the malloc now uses the global directly. + BCI->replaceAllUsesWith(NewGV); + + Constant *RepValue = NewGV; + if (NewGV->getType() != GV->getType()->getElementType()) + RepValue = ConstantExpr::getBitCast(RepValue, + GV->getType()->getElementType()); + + // If there is a comparison against null, we will insert a global bool to + // keep track of whether the global was initialized yet or not. + GlobalVariable *InitBool = + new GlobalVariable(Context, Type::getInt1Ty(Context), false, + GlobalValue::InternalLinkage, + ConstantInt::getFalse(Context), GV->getName()+".init", + GV->isThreadLocal()); + bool InitBoolUsed = false; + + // Loop over all uses of GV, processing them in turn. + std::vector<StoreInst*> Stores; + while (!GV->use_empty()) + if (LoadInst *LI = dyn_cast<LoadInst>(GV->use_back())) { + while (!LI->use_empty()) { + Use &LoadUse = LI->use_begin().getUse(); + if (!isa<ICmpInst>(LoadUse.getUser())) + LoadUse = RepValue; + else { + ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser()); + // Replace the cmp X, 0 with a use of the bool value. + Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI); + InitBoolUsed = true; + switch (ICI->getPredicate()) { + default: llvm_unreachable("Unknown ICmp Predicate!"); + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_SLT: + LV = ConstantInt::getFalse(Context); // X < null -> always false + break; + case ICmpInst::ICMP_ULE: + case ICmpInst::ICMP_SLE: + case ICmpInst::ICMP_EQ: + LV = BinaryOperator::CreateNot(LV, "notinit", ICI); + break; + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_UGE: + case ICmpInst::ICMP_SGE: + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_SGT: + break; // no change. + } + ICI->replaceAllUsesWith(LV); + ICI->eraseFromParent(); + } + } + LI->eraseFromParent(); + } else { + StoreInst *SI = cast<StoreInst>(GV->use_back()); + // The global is initialized when the store to it occurs. + new StoreInst(ConstantInt::getTrue(Context), InitBool, SI); + SI->eraseFromParent(); + } + + // If the initialization boolean was used, insert it, otherwise delete it. + if (!InitBoolUsed) { + while (!InitBool->use_empty()) // Delete initializations + cast<Instruction>(InitBool->use_back())->eraseFromParent(); + delete InitBool; + } else + GV->getParent()->getGlobalList().insert(GV, InitBool); + + + // Now the GV is dead, nuke it and the malloc. + GV->eraseFromParent(); + BCI->eraseFromParent(); + CI->eraseFromParent(); + + // To further other optimizations, loop over all users of NewGV and try to + // constant prop them. This will promote GEP instructions with constant + // indices into GEP constant-exprs, which will allow global-opt to hack on it. + ConstantPropUsersOf(NewGV, Context); + if (RepValue != NewGV) + ConstantPropUsersOf(RepValue, Context); return NewGV; } @@ -1071,7 +1219,7 @@ static bool LoadUsesSimpleEnoughForHeapSRA(Value *V, /// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from /// GV are simple enough to perform HeapSRA, return true. static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV, - MallocInst *MI) { + Instruction *StoredVal) { SmallPtrSet<PHINode*, 32> LoadUsingPHIs; SmallPtrSet<PHINode*, 32> LoadUsingPHIsPerLoad; for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E; @@ -1095,7 +1243,7 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV, Value *InVal = PN->getIncomingValue(op); // PHI of the stored value itself is ok. - if (InVal == MI) continue; + if (InVal == StoredVal) continue; if (PHINode *InPN = dyn_cast<PHINode>(InVal)) { // One of the PHIs in our set is (optimistically) ok. @@ -1121,7 +1269,8 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV, static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, - std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { + std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite, + LLVMContext &Context) { std::vector<Value*> &FieldVals = InsertedScalarizedValues[V]; if (FieldNo >= FieldVals.size()) @@ -1139,19 +1288,20 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, // a new Load of the scalarized global. Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo, InsertedScalarizedValues, - PHIsToRewrite), - LI->getName()+".f" + utostr(FieldNo), LI); + PHIsToRewrite, Context), + LI->getName()+".f"+Twine(FieldNo), LI); } else if (PHINode *PN = dyn_cast<PHINode>(V)) { // PN's type is pointer to struct. Make a new PHI of pointer to struct // field. const StructType *ST = cast<StructType>(cast<PointerType>(PN->getType())->getElementType()); - Result =PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)), - PN->getName()+".f"+utostr(FieldNo), PN); + Result = + PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)), + PN->getName()+".f"+Twine(FieldNo), PN); PHIsToRewrite.push_back(std::make_pair(PN, FieldNo)); } else { - assert(0 && "Unknown usable value"); + llvm_unreachable("Unknown usable value"); Result = 0; } @@ -1162,18 +1312,20 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, /// the load, rewrite the derived value to use the HeapSRoA'd load. static void RewriteHeapSROALoadUser(Instruction *LoadUser, DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, - std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { + std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite, + LLVMContext &Context) { // If this is a comparison against null, handle it. if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) { assert(isa<ConstantPointerNull>(SCI->getOperand(1))); // If we have a setcc of the loaded pointer, we can use a setcc of any // field. Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0, - InsertedScalarizedValues, PHIsToRewrite); + InsertedScalarizedValues, PHIsToRewrite, + Context); - Value *New = new ICmpInst(SCI->getPredicate(), NPtr, - Constant::getNullValue(NPtr->getType()), - SCI->getName(), SCI); + Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr, + Constant::getNullValue(NPtr->getType()), + SCI->getName()); SCI->replaceAllUsesWith(New); SCI->eraseFromParent(); return; @@ -1187,7 +1339,8 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, // Load the pointer for this field. unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue(); Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo, - InsertedScalarizedValues, PHIsToRewrite); + InsertedScalarizedValues, PHIsToRewrite, + Context); // Create the new GEP idx vector. SmallVector<Value*, 8> GEPIdx; @@ -1219,7 +1372,8 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, // users. for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) { Instruction *User = cast<Instruction>(*UI++); - RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); + RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite, + Context); } } @@ -1229,11 +1383,13 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, /// AllGlobalLoadUsesSimpleEnoughForHeapSRA. static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, - std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { + std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite, + LLVMContext &Context) { for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end(); UI != E; ) { Instruction *User = cast<Instruction>(*UI++); - RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); + RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite, + Context); } if (Load->use_empty()) { @@ -1244,8 +1400,9 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, /// PerformHeapAllocSRoA - MI is an allocation of an array of structures. Break /// it up into multiple allocations of arrays of the fields. -static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){ - DOUT << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *MI; +static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI, + LLVMContext &Context){ + DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *MI); const StructType *STy = cast<StructType>(MI->getAllocatedType()); // There is guaranteed to be at least one use of the malloc (storing @@ -1264,14 +1421,15 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){ const Type *PFieldTy = PointerType::getUnqual(FieldTy); GlobalVariable *NGV = - new GlobalVariable(PFieldTy, false, GlobalValue::InternalLinkage, + new GlobalVariable(*GV->getParent(), + PFieldTy, false, GlobalValue::InternalLinkage, Constant::getNullValue(PFieldTy), - GV->getName() + ".f" + utostr(FieldNo), GV, + GV->getName() + ".f" + Twine(FieldNo), GV, GV->isThreadLocal()); FieldGlobals.push_back(NGV); MallocInst *NMI = new MallocInst(FieldTy, MI->getArraySize(), - MI->getName() + ".f" + utostr(FieldNo),MI); + MI->getName() + ".f" + Twine(FieldNo), MI); FieldMallocs.push_back(NMI); new StoreInst(NMI, NGV, MI); } @@ -1290,9 +1448,9 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){ // } Value *RunningOr = 0; for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { - Value *Cond = new ICmpInst(ICmpInst::ICMP_EQ, FieldMallocs[i], - Constant::getNullValue(FieldMallocs[i]->getType()), - "isnull", MI); + Value *Cond = new ICmpInst(MI, ICmpInst::ICMP_EQ, FieldMallocs[i], + Constant::getNullValue(FieldMallocs[i]->getType()), + "isnull"); if (!RunningOr) RunningOr = Cond; // First seteq else @@ -1305,7 +1463,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){ // Create the block to check the first condition. Put all these blocks at the // end of the function as they are unlikely to be executed. - BasicBlock *NullPtrBlock = BasicBlock::Create("malloc_ret_null", + BasicBlock *NullPtrBlock = BasicBlock::Create(Context, "malloc_ret_null", OrigBB->getParent()); // Remove the uncond branch from OrigBB to ContBB, turning it into a cond @@ -1317,11 +1475,13 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){ // pointer, because some may be null while others are not. for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock); - Value *Cmp = new ICmpInst(ICmpInst::ICMP_NE, GVVal, + Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, Constant::getNullValue(GVVal->getType()), - "tmp", NullPtrBlock); - BasicBlock *FreeBlock = BasicBlock::Create("free_it", OrigBB->getParent()); - BasicBlock *NextBlock = BasicBlock::Create("next", OrigBB->getParent()); + "tmp"); + BasicBlock *FreeBlock = BasicBlock::Create(Context, "free_it", + OrigBB->getParent()); + BasicBlock *NextBlock = BasicBlock::Create(Context, "next", + OrigBB->getParent()); BranchInst::Create(FreeBlock, NextBlock, Cmp, NullPtrBlock); // Fill in FreeBlock. @@ -1353,7 +1513,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){ Instruction *User = cast<Instruction>(*UI++); if (LoadInst *LI = dyn_cast<LoadInst>(User)) { - RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite); + RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite, + Context); continue; } @@ -1384,7 +1545,192 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *InVal = PN->getIncomingValue(i); InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues, - PHIsToRewrite); + PHIsToRewrite, Context); + FieldPN->addIncoming(InVal, PN->getIncomingBlock(i)); + } + } + + // Drop all inter-phi links and any loads that made it this far. + for (DenseMap<Value*, std::vector<Value*> >::iterator + I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); + I != E; ++I) { + if (PHINode *PN = dyn_cast<PHINode>(I->first)) + PN->dropAllReferences(); + else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) + LI->dropAllReferences(); + } + + // Delete all the phis and loads now that inter-references are dead. + for (DenseMap<Value*, std::vector<Value*> >::iterator + I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); + I != E; ++I) { + if (PHINode *PN = dyn_cast<PHINode>(I->first)) + PN->eraseFromParent(); + else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) + LI->eraseFromParent(); + } + + // The old global is now dead, remove it. + GV->eraseFromParent(); + + ++NumHeapSRA; + return cast<GlobalVariable>(FieldGlobals[0]); +} + +/// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break +/// it up into multiple allocations of arrays of the fields. +static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, + CallInst *CI, BitCastInst* BCI, + LLVMContext &Context, + TargetData *TD){ + DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << " MALLOC CALL = " << *CI + << " BITCAST = " << *BCI << '\n'); + const Type* MAT = getMallocAllocatedType(CI); + const StructType *STy = cast<StructType>(MAT); + + // There is guaranteed to be at least one use of the malloc (storing + // it into GV). If there are other uses, change them to be uses of + // the global to simplify later code. This also deletes the store + // into GV. + ReplaceUsesOfMallocWithGlobal(BCI, GV); + + // Okay, at this point, there are no users of the malloc. Insert N + // new mallocs at the same place as CI, and N globals. + std::vector<Value*> FieldGlobals; + std::vector<Value*> FieldMallocs; + + for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ + const Type *FieldTy = STy->getElementType(FieldNo); + const PointerType *PFieldTy = PointerType::getUnqual(FieldTy); + + GlobalVariable *NGV = + new GlobalVariable(*GV->getParent(), + PFieldTy, false, GlobalValue::InternalLinkage, + Constant::getNullValue(PFieldTy), + GV->getName() + ".f" + Twine(FieldNo), GV, + GV->isThreadLocal()); + FieldGlobals.push_back(NGV); + + Value *NMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), FieldTy, + getMallocArraySize(CI, Context, TD), + BCI->getName() + ".f" + Twine(FieldNo)); + FieldMallocs.push_back(NMI); + new StoreInst(NMI, NGV, BCI); + } + + // The tricky aspect of this transformation is handling the case when malloc + // fails. In the original code, malloc failing would set the result pointer + // of malloc to null. In this case, some mallocs could succeed and others + // could fail. As such, we emit code that looks like this: + // F0 = malloc(field0) + // F1 = malloc(field1) + // F2 = malloc(field2) + // if (F0 == 0 || F1 == 0 || F2 == 0) { + // if (F0) { free(F0); F0 = 0; } + // if (F1) { free(F1); F1 = 0; } + // if (F2) { free(F2); F2 = 0; } + // } + Value *RunningOr = 0; + for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { + Value *Cond = new ICmpInst(BCI, ICmpInst::ICMP_EQ, FieldMallocs[i], + Constant::getNullValue(FieldMallocs[i]->getType()), + "isnull"); + if (!RunningOr) + RunningOr = Cond; // First seteq + else + RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", BCI); + } + + // Split the basic block at the old malloc. + BasicBlock *OrigBB = BCI->getParent(); + BasicBlock *ContBB = OrigBB->splitBasicBlock(BCI, "malloc_cont"); + + // Create the block to check the first condition. Put all these blocks at the + // end of the function as they are unlikely to be executed. + BasicBlock *NullPtrBlock = BasicBlock::Create(Context, "malloc_ret_null", + OrigBB->getParent()); + + // Remove the uncond branch from OrigBB to ContBB, turning it into a cond + // branch on RunningOr. + OrigBB->getTerminator()->eraseFromParent(); + BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB); + + // Within the NullPtrBlock, we need to emit a comparison and branch for each + // pointer, because some may be null while others are not. + for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { + Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock); + Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, + Constant::getNullValue(GVVal->getType()), + "tmp"); + BasicBlock *FreeBlock = BasicBlock::Create(Context, "free_it", + OrigBB->getParent()); + BasicBlock *NextBlock = BasicBlock::Create(Context, "next", + OrigBB->getParent()); + BranchInst::Create(FreeBlock, NextBlock, Cmp, NullPtrBlock); + + // Fill in FreeBlock. + new FreeInst(GVVal, FreeBlock); + new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i], + FreeBlock); + BranchInst::Create(NextBlock, FreeBlock); + + NullPtrBlock = NextBlock; + } + + BranchInst::Create(ContBB, NullPtrBlock); + + // CI and BCI are no longer needed, remove them. + BCI->eraseFromParent(); + CI->eraseFromParent(); + + /// InsertedScalarizedLoads - As we process loads, if we can't immediately + /// update all uses of the load, keep track of what scalarized loads are + /// inserted for a given load. + DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues; + InsertedScalarizedValues[GV] = FieldGlobals; + + std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite; + + // Okay, the malloc site is completely handled. All of the uses of GV are now + // loads, and all uses of those loads are simple. Rewrite them to use loads + // of the per-field globals instead. + for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) { + Instruction *User = cast<Instruction>(*UI++); + + if (LoadInst *LI = dyn_cast<LoadInst>(User)) { + RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite, + Context); + continue; + } + + // Must be a store of null. + StoreInst *SI = cast<StoreInst>(User); + assert(isa<ConstantPointerNull>(SI->getOperand(0)) && + "Unexpected heap-sra user!"); + + // Insert a store of null into each global. + for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { + const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType()); + Constant *Null = Constant::getNullValue(PT->getElementType()); + new StoreInst(Null, FieldGlobals[i], SI); + } + // Erase the original store. + SI->eraseFromParent(); + } + + // While we have PHIs that are interesting to rewrite, do it. + while (!PHIsToRewrite.empty()) { + PHINode *PN = PHIsToRewrite.back().first; + unsigned FieldNo = PHIsToRewrite.back().second; + PHIsToRewrite.pop_back(); + PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]); + assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi"); + + // Add all the incoming values. This can materialize more phis. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *InVal = PN->getIncomingValue(i); + InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues, + PHIsToRewrite, Context); FieldPN->addIncoming(InVal, PN->getIncomingBlock(i)); } } @@ -1422,7 +1768,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, MallocInst *MI, Module::global_iterator &GVI, - TargetData &TD) { + TargetData *TD, + LLVMContext &Context) { // If this is a malloc of an abstract type, don't touch it. if (!MI->getAllocatedType()->isSized()) return false; @@ -1456,9 +1803,10 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // Restrict this transformation to only working on small allocations // (2048 bytes currently), as we don't want to introduce a 16M global or // something. - if (NElements->getZExtValue()* - TD.getTypeAllocSize(MI->getAllocatedType()) < 2048) { - GVI = OptimizeGlobalAddressOfMalloc(GV, MI); + if (TD && + NElements->getZExtValue()* + TD->getTypeAllocSize(MI->getAllocatedType()) < 2048) { + GVI = OptimizeGlobalAddressOfMalloc(GV, MI, Context); return true; } } @@ -1485,7 +1833,8 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, if (const ArrayType *AT = dyn_cast<ArrayType>(MI->getAllocatedType())) { MallocInst *NewMI = new MallocInst(AllocSTy, - ConstantInt::get(Type::Int32Ty, AT->getNumElements()), + ConstantInt::get(Type::getInt32Ty(Context), + AT->getNumElements()), "", MI); NewMI->takeName(MI); Value *Cast = new BitCastInst(NewMI, MI->getType(), "tmp", MI); @@ -1494,7 +1843,100 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, MI = NewMI; } - GVI = PerformHeapAllocSRoA(GV, MI); + GVI = PerformHeapAllocSRoA(GV, MI, Context); + return true; + } + } + + return false; +} + +/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a +/// pointer global variable with a single value stored it that is a malloc or +/// cast of malloc. +static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, + CallInst *CI, + BitCastInst *BCI, + Module::global_iterator &GVI, + TargetData *TD, + LLVMContext &Context) { + // If we can't figure out the type being malloced, then we can't optimize. + const Type *AllocTy = getMallocAllocatedType(CI); + assert(AllocTy); + + // If this is a malloc of an abstract type, don't touch it. + if (!AllocTy->isSized()) + return false; + + // We can't optimize this global unless all uses of it are *known* to be + // of the malloc value, not of the null initializer value (consider a use + // that compares the global's value against zero to see if the malloc has + // been reached). To do this, we check to see if all uses of the global + // would trap if the global were null: this proves that they must all + // happen after the malloc. + if (!AllUsesOfLoadedValueWillTrapIfNull(GV)) + return false; + + // We can't optimize this if the malloc itself is used in a complex way, + // for example, being stored into multiple globals. This allows the + // malloc to be stored into the specified global, loaded setcc'd, and + // GEP'd. These are all things we could transform to using the global + // for. + { + SmallPtrSet<PHINode*, 8> PHIs; + if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs)) + return false; + } + + // If we have a global that is only initialized with a fixed size malloc, + // transform the program to use global memory instead of malloc'd memory. + // This eliminates dynamic allocation, avoids an indirection accessing the + // data, and exposes the resultant global to further GlobalOpt. + if (ConstantInt *NElements = + dyn_cast<ConstantInt>(getMallocArraySize(CI, Context, TD))) { + // Restrict this transformation to only working on small allocations + // (2048 bytes currently), as we don't want to introduce a 16M global or + // something. + if (TD && + NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { + GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, Context, TD); + return true; + } + } + + // If the allocation is an array of structures, consider transforming this + // into multiple malloc'd arrays, one for each field. This is basically + // SRoA for malloc'd memory. + + // If this is an allocation of a fixed size array of structs, analyze as a + // variable size array. malloc [100 x struct],1 -> malloc struct, 100 + if (!isArrayMalloc(CI, Context, TD)) + if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) + AllocTy = AT->getElementType(); + + if (const StructType *AllocSTy = dyn_cast<StructType>(AllocTy)) { + // This the structure has an unreasonable number of fields, leave it + // alone. + if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && + AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, BCI)) { + + // If this is a fixed size array, transform the Malloc to be an alloc of + // structs. malloc [100 x struct],1 -> malloc struct, 100 + if (const ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { + Value* NumElements = ConstantInt::get(Type::getInt32Ty(Context), + AT->getNumElements()); + Value* NewMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), + AllocSTy, NumElements, + BCI->getName()); + Value *Cast = new BitCastInst(NewMI, getMallocType(CI), "tmp", CI); + BCI->replaceAllUsesWith(Cast); + BCI->eraseFromParent(); + CI->eraseFromParent(); + BCI = cast<BitCastInst>(NewMI); + CI = extractMallocCallFromBitCast(NewMI); + } + + GVI = PerformHeapAllocSRoA(GV, CI, BCI, Context, TD); return true; } } @@ -1506,7 +1948,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // that only one value (besides its initializer) is ever stored to the global. static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, Module::global_iterator &GVI, - TargetData &TD) { + TargetData *TD, LLVMContext &Context) { // Ignore no-op GEPs and bitcasts. StoredOnceVal = StoredOnceVal->stripPointerCasts(); @@ -1518,14 +1960,25 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, GV->getInitializer()->isNullValue()) { if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) { if (GV->getInitializer()->getType() != SOVC->getType()) - SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); + SOVC = + ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); // Optimize away any trapping uses of the loaded value. - if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC)) + if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, Context)) return true; } else if (MallocInst *MI = dyn_cast<MallocInst>(StoredOnceVal)) { - if (TryToOptimizeStoreOfMallocToGlobal(GV, MI, GVI, TD)) + if (TryToOptimizeStoreOfMallocToGlobal(GV, MI, GVI, TD, Context)) return true; + } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) { + if (getMallocAllocatedType(CI)) { + BitCastInst* BCI = NULL; + for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); + UI != E; ) + BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++)); + if (BCI && + TryToOptimizeStoreOfMallocToGlobal(GV, CI, BCI, GVI, TD, Context)) + return true; + } } } @@ -1536,7 +1989,8 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, /// two values ever stored into GV are its initializer and OtherVal. See if we /// can shrink the global into a boolean and select between the two values /// whenever it is used. This exposes the values to other scalar optimizations. -static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { +static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal, + LLVMContext &Context) { const Type *GVElType = GV->getType()->getElementType(); // If GVElType is already i1, it is already shrunk. If the type of the GV is @@ -1544,7 +1998,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { // between them is very expensive and unlikely to lead to later // simplification. In these cases, we typically end up with "cond ? v1 : v2" // where v1 and v2 both require constant pool loads, a big loss. - if (GVElType == Type::Int1Ty || GVElType->isFloatingPoint() || + if (GVElType == Type::getInt1Ty(Context) || GVElType->isFloatingPoint() || isa<PointerType>(GVElType) || isa<VectorType>(GVElType)) return false; @@ -1554,18 +2008,19 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { if (!isa<LoadInst>(I) && !isa<StoreInst>(I)) return false; - DOUT << " *** SHRINKING TO BOOL: " << *GV; + DEBUG(errs() << " *** SHRINKING TO BOOL: " << *GV); // Create the new global, initializing it to false. - GlobalVariable *NewGV = new GlobalVariable(Type::Int1Ty, false, - GlobalValue::InternalLinkage, ConstantInt::getFalse(), + GlobalVariable *NewGV = new GlobalVariable(Context, + Type::getInt1Ty(Context), false, + GlobalValue::InternalLinkage, ConstantInt::getFalse(Context), GV->getName()+".b", - (Module *)NULL, GV->isThreadLocal()); GV->getParent()->getGlobalList().insert(GV, NewGV); Constant *InitVal = GV->getInitializer(); - assert(InitVal->getType() != Type::Int1Ty && "No reason to shrink to bool!"); + assert(InitVal->getType() != Type::getInt1Ty(Context) && + "No reason to shrink to bool!"); // If initialized to zero and storing one into the global, we can use a cast // instead of a select to synthesize the desired value. @@ -1581,7 +2036,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { // Only do this if we weren't storing a loaded value. Value *StoreVal; if (StoringOther || SI->getOperand(0) == InitVal) - StoreVal = ConstantInt::get(Type::Int1Ty, StoringOther); + StoreVal = ConstantInt::get(Type::getInt1Ty(Context), StoringOther); else { // Otherwise, we are storing a previously loaded copy. To do this, // change the copy from copying the original value to just copying the @@ -1632,7 +2087,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, GV->removeDeadConstantUsers(); if (GV->use_empty()) { - DOUT << "GLOBAL DEAD: " << *GV; + DEBUG(errs() << "GLOBAL DEAD: " << *GV); GV->eraseFromParent(); ++NumDeleted; return true; @@ -1675,7 +2130,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, GS.AccessingFunction->getName() == "main" && GS.AccessingFunction->hasExternalLinkage() && GV->getType()->getAddressSpace() == 0) { - DOUT << "LOCALIZING GLOBAL: " << *GV; + DEBUG(errs() << "LOCALIZING GLOBAL: " << *GV); Instruction* FirstI = GS.AccessingFunction->getEntryBlock().begin(); const Type* ElemTy = GV->getType()->getElementType(); // FIXME: Pass Global's alignment when globals have alignment @@ -1692,11 +2147,12 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, // If the global is never loaded (but may be stored to), it is dead. // Delete it now. if (!GS.isLoaded) { - DOUT << "GLOBAL NEVER LOADED: " << *GV; + DEBUG(errs() << "GLOBAL NEVER LOADED: " << *GV); // Delete any stores we can find to the global. We may not be able to // make it completely dead though. - bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer()); + bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), + GV->getContext()); // If the global is dead now, delete it. if (GV->use_empty()) { @@ -1707,16 +2163,16 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, return Changed; } else if (GS.StoredType <= GlobalStatus::isInitializerStored) { - DOUT << "MARKING CONSTANT: " << *GV; + DEBUG(errs() << "MARKING CONSTANT: " << *GV); GV->setConstant(true); // Clean up any obviously simplifiable users now. - CleanupConstantGlobalUsers(GV, GV->getInitializer()); + CleanupConstantGlobalUsers(GV, GV->getInitializer(), GV->getContext()); // If the global is dead now, just nuke it. if (GV->use_empty()) { - DOUT << " *** Marking constant allowed us to simplify " - << "all users and delete global!\n"; + DEBUG(errs() << " *** Marking constant allowed us to simplify " + << "all users and delete global!\n"); GV->eraseFromParent(); ++NumDeleted; } @@ -1724,11 +2180,12 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, ++NumMarked; return true; } else if (!GV->getInitializer()->getType()->isSingleValueType()) { - if (GlobalVariable *FirstNewGV = SRAGlobal(GV, - getAnalysis<TargetData>())) { - GVI = FirstNewGV; // Don't skip the newly produced globals! - return true; - } + if (TargetData *TD = getAnalysisIfAvailable<TargetData>()) + if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD, + GV->getContext())) { + GVI = FirstNewGV; // Don't skip the newly produced globals! + return true; + } } else if (GS.StoredType == GlobalStatus::isStoredOnce) { // If the initial value for the global was an undef value, and if only // one other value was stored into it, we can just change the @@ -1740,11 +2197,12 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, GV->setInitializer(SOVConstant); // Clean up any obviously simplifiable users now. - CleanupConstantGlobalUsers(GV, GV->getInitializer()); + CleanupConstantGlobalUsers(GV, GV->getInitializer(), + GV->getContext()); if (GV->use_empty()) { - DOUT << " *** Substituting initializer allowed us to " - << "simplify all users and delete global!\n"; + DEBUG(errs() << " *** Substituting initializer allowed us to " + << "simplify all users and delete global!\n"); GV->eraseFromParent(); ++NumDeleted; } else { @@ -1757,13 +2215,14 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, // Try to optimize globals based on the knowledge that only one value // (besides its initializer) is ever stored to the global. if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI, - getAnalysis<TargetData>())) + getAnalysisIfAvailable<TargetData>(), + GV->getContext())) return true; // Otherwise, if the global was not a boolean, we can shrink it to be a // boolean. if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) - if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) { + if (TryToShrinkGlobalToBoolean(GV, SOVConstant, GV->getContext())) { ++NumShrunkToBool; return true; } @@ -1866,16 +2325,16 @@ GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { if (!ATy) return 0; const StructType *STy = dyn_cast<StructType>(ATy->getElementType()); if (!STy || STy->getNumElements() != 2 || - STy->getElementType(0) != Type::Int32Ty) return 0; + STy->getElementType(0) != Type::getInt32Ty(M.getContext())) return 0; const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1)); if (!PFTy) return 0; const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType()); - if (!FTy || FTy->getReturnType() != Type::VoidTy || FTy->isVarArg() || - FTy->getNumParams() != 0) + if (!FTy || FTy->getReturnType() != Type::getVoidTy(M.getContext()) || + FTy->isVarArg() || FTy->getNumParams() != 0) return 0; // Verify that the initializer is simple enough for us to handle. - if (!I->hasInitializer()) return 0; + if (!I->hasDefinitiveInitializer()) return 0; ConstantArray *CA = dyn_cast<ConstantArray>(I->getInitializer()); if (!CA) return 0; for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) @@ -1916,10 +2375,11 @@ static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) { /// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the /// specified array, returning the new global to use. static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, - const std::vector<Function*> &Ctors) { + const std::vector<Function*> &Ctors, + LLVMContext &Context) { // If we made a change, reassemble the initializer list. std::vector<Constant*> CSVals; - CSVals.push_back(ConstantInt::get(Type::Int32Ty, 65535)); + CSVals.push_back(ConstantInt::get(Type::getInt32Ty(Context), 65535)); CSVals.push_back(0); // Create the new init list. @@ -1928,19 +2388,19 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, if (Ctors[i]) { CSVals[1] = Ctors[i]; } else { - const Type *FTy = FunctionType::get(Type::VoidTy, false); + const Type *FTy = FunctionType::get(Type::getVoidTy(Context), false); const PointerType *PFTy = PointerType::getUnqual(FTy); CSVals[1] = Constant::getNullValue(PFTy); - CSVals[0] = ConstantInt::get(Type::Int32Ty, 2147483647); + CSVals[0] = ConstantInt::get(Type::getInt32Ty(Context), 2147483647); } - CAList.push_back(ConstantStruct::get(CSVals)); + CAList.push_back(ConstantStruct::get(Context, CSVals, false)); } // Create the array initializer. const Type *StructTy = - cast<ArrayType>(GCL->getType()->getElementType())->getElementType(); - Constant *CA = ConstantArray::get(ArrayType::get(StructTy, CAList.size()), - CAList); + cast<ArrayType>(GCL->getType()->getElementType())->getElementType(); + Constant *CA = ConstantArray::get(ArrayType::get(StructTy, + CAList.size()), CAList); // If we didn't change the number of elements, don't create a new GV. if (CA->getType() == GCL->getInitializer()->getType()) { @@ -1949,9 +2409,9 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, } // Create the new global and insert it next to the existing list. - GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(), + GlobalVariable *NGV = new GlobalVariable(Context, CA->getType(), + GCL->isConstant(), GCL->getLinkage(), CA, "", - (Module *)NULL, GCL->isThreadLocal()); GCL->getParent()->getGlobalList().insert(GCL, NGV); NGV->takeName(GCL); @@ -1984,21 +2444,38 @@ static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues, /// enough for us to understand. In particular, if it is a cast of something, /// we punt. We basically just support direct accesses to globals and GEP's of /// globals. This should be kept up to date with CommitValueTo. -static bool isSimpleEnoughPointerToCommit(Constant *C) { - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { - if (!GV->hasExternalLinkage() && !GV->hasLocalLinkage()) - return false; // do not allow weak/linkonce/dllimport/dllexport linkage. - return !GV->isDeclaration(); // reject external globals. - } +static bool isSimpleEnoughPointerToCommit(Constant *C, LLVMContext &Context) { + // Conservatively, avoid aggregate types. This is because we don't + // want to worry about them partially overlapping other stores. + if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType()) + return false; + + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) + // Do not allow weak/linkonce/dllimport/dllexport linkage or + // external globals. + return GV->hasDefinitiveInitializer(); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) // Handle a constantexpr gep. if (CE->getOpcode() == Instruction::GetElementPtr && - isa<GlobalVariable>(CE->getOperand(0))) { + isa<GlobalVariable>(CE->getOperand(0)) && + cast<GEPOperator>(CE)->isInBounds()) { GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); - if (!GV->hasExternalLinkage() && !GV->hasLocalLinkage()) - return false; // do not allow weak/linkonce/dllimport/dllexport linkage. - return GV->hasInitializer() && - ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); + // Do not allow weak/linkonce/dllimport/dllexport linkage or + // external globals. + if (!GV->hasDefinitiveInitializer()) + return false; + + // The first index must be zero. + ConstantInt *CI = dyn_cast<ConstantInt>(*next(CE->op_begin())); + if (!CI || !CI->isZero()) return false; + + // The remaining indices must be compile-time known integers within the + // notional bounds of the corresponding static array types. + if (!CE->isGEPWithNoNotionalOverIndexing()) + return false; + + return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); } return false; } @@ -2007,7 +2484,8 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) { /// initializer. This returns 'Init' modified to reflect 'Val' stored into it. /// At this point, the GEP operands of Addr [0, OpNo) have been stepped into. static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, - ConstantExpr *Addr, unsigned OpNo) { + ConstantExpr *Addr, unsigned OpNo, + LLVMContext &Context) { // Base case of the recursion. if (OpNo == Addr->getNumOperands()) { assert(Val->getType() == Init->getType() && "Type mismatch!"); @@ -2028,7 +2506,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) Elts.push_back(UndefValue::get(STy->getElementType(i))); } else { - assert(0 && "This code is out of sync with " + llvm_unreachable("This code is out of sync with " " ConstantFoldLoadThroughGEPConstantExpr"); } @@ -2036,10 +2514,10 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo)); unsigned Idx = CU->getZExtValue(); assert(Idx < STy->getNumElements() && "Struct index out of range!"); - Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1); + Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1, Context); // Return the modified struct. - return ConstantStruct::get(&Elts[0], Elts.size(), STy->isPacked()); + return ConstantStruct::get(Context, &Elts[0], Elts.size(), STy->isPacked()); } else { ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo)); const ArrayType *ATy = cast<ArrayType>(Init->getType()); @@ -2056,20 +2534,21 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, Constant *Elt = UndefValue::get(ATy->getElementType()); Elts.assign(ATy->getNumElements(), Elt); } else { - assert(0 && "This code is out of sync with " + llvm_unreachable("This code is out of sync with " " ConstantFoldLoadThroughGEPConstantExpr"); } assert(CI->getZExtValue() < ATy->getNumElements()); Elts[CI->getZExtValue()] = - EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); + EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1, Context); return ConstantArray::get(ATy, Elts); } } /// CommitValueTo - We have decided that Addr (which satisfies the predicate /// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen. -static void CommitValueTo(Constant *Val, Constant *Addr) { +static void CommitValueTo(Constant *Val, Constant *Addr, + LLVMContext &Context) { if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) { assert(GV->hasInitializer()); GV->setInitializer(Val); @@ -2080,7 +2559,7 @@ static void CommitValueTo(Constant *Val, Constant *Addr) { GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); Constant *Init = GV->getInitializer(); - Init = EvaluateStoreInto(Init, Val, CE, 2); + Init = EvaluateStoreInto(Init, Val, CE, 2, Context); GV->setInitializer(Init); } @@ -2088,7 +2567,8 @@ static void CommitValueTo(Constant *Val, Constant *Addr) { /// P after the stores reflected by 'memory' have been performed. If we can't /// decide, return null. static Constant *ComputeLoadResult(Constant *P, - const DenseMap<Constant*, Constant*> &Memory) { + const DenseMap<Constant*, Constant*> &Memory, + LLVMContext &Context) { // If this memory location has been recently stored, use the stored value: it // is the most up-to-date. DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P); @@ -2096,7 +2576,7 @@ static Constant *ComputeLoadResult(Constant *P, // Access it. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) { - if (GV->hasInitializer()) + if (GV->hasDefinitiveInitializer()) return GV->getInitializer(); return 0; } @@ -2106,7 +2586,7 @@ static Constant *ComputeLoadResult(Constant *P, if (CE->getOpcode() == Instruction::GetElementPtr && isa<GlobalVariable>(CE->getOperand(0))) { GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); - if (GV->hasInitializer()) + if (GV->hasDefinitiveInitializer()) return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); } @@ -2117,7 +2597,7 @@ static Constant *ComputeLoadResult(Constant *P, /// successful, false if we can't evaluate it. ActualArgs contains the formal /// arguments for the function. static bool EvaluateFunction(Function *F, Constant *&RetVal, - const std::vector<Constant*> &ActualArgs, + const SmallVectorImpl<Constant*> &ActualArgs, std::vector<Function*> &CallStack, DenseMap<Constant*, Constant*> &MutatedMemory, std::vector<GlobalVariable*> &AllocaTmps) { @@ -2126,6 +2606,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end()) return false; + LLVMContext &Context = F->getContext(); + CallStack.push_back(F); /// Values - As we compute SSA register values, we store their contents here. @@ -2152,7 +2634,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) { if (SI->isVolatile()) return false; // no volatile accesses. Constant *Ptr = getVal(Values, SI->getOperand(1)); - if (!isSimpleEnoughPointerToCommit(Ptr)) + if (!isSimpleEnoughPointerToCommit(Ptr, Context)) // If this is too complex for us to commit, reject it. return false; Constant *Val = getVal(Values, SI->getOperand(0)); @@ -2170,7 +2652,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, getVal(Values, CI->getOperand(0)), CI->getType()); } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) { - InstResult = ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)), + InstResult = + ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)), getVal(Values, SI->getOperand(1)), getVal(Values, SI->getOperand(2))); } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) { @@ -2179,16 +2662,18 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e; ++i) GEPOps.push_back(getVal(Values, *i)); - InstResult = ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size()); + InstResult = cast<GEPOperator>(GEP)->isInBounds() ? + ConstantExpr::getInBoundsGetElementPtr(P, &GEPOps[0], GEPOps.size()) : + ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size()); } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) { if (LI->isVolatile()) return false; // no volatile accesses. InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)), - MutatedMemory); + MutatedMemory, Context); if (InstResult == 0) return false; // Could not evaluate load. } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) { if (AI->isArrayAllocation()) return false; // Cannot handle array allocs. const Type *Ty = AI->getType()->getElementType(); - AllocaTmps.push_back(new GlobalVariable(Ty, false, + AllocaTmps.push_back(new GlobalVariable(Context, Ty, false, GlobalValue::InternalLinkage, UndefValue::get(Ty), AI->getName())); @@ -2208,14 +2693,14 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, Function *Callee = dyn_cast<Function>(getVal(Values, CI->getOperand(0))); if (!Callee) return false; // Cannot resolve. - std::vector<Constant*> Formals; + SmallVector<Constant*, 8> Formals; for (User::op_iterator i = CI->op_begin() + 1, e = CI->op_end(); i != e; ++i) Formals.push_back(getVal(Values, *i)); - + if (Callee->isDeclaration()) { // If this is a function we can constant fold, do it. - if (Constant *C = ConstantFoldCall(Callee, &Formals[0], + if (Constant *C = ConstantFoldCall(Callee, Formals.data(), Formals.size())) { InstResult = C; } else { @@ -2310,16 +2795,17 @@ static bool EvaluateStaticConstructor(Function *F) { // Call the function. Constant *RetValDummy; - bool EvalSuccess = EvaluateFunction(F, RetValDummy, std::vector<Constant*>(), - CallStack, MutatedMemory, AllocaTmps); + bool EvalSuccess = EvaluateFunction(F, RetValDummy, + SmallVector<Constant*, 0>(), CallStack, + MutatedMemory, AllocaTmps); if (EvalSuccess) { // We succeeded at evaluation: commit the result. - DOUT << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" - << F->getName() << "' to " << MutatedMemory.size() - << " stores.\n"; + DEBUG(errs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" + << F->getName() << "' to " << MutatedMemory.size() + << " stores.\n"); for (DenseMap<Constant*, Constant*>::iterator I = MutatedMemory.begin(), E = MutatedMemory.end(); I != E; ++I) - CommitValueTo(I->second, I->first); + CommitValueTo(I->second, I->first, F->getContext()); } // At this point, we are done interpreting. If we created any 'alloca' @@ -2376,7 +2862,7 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { if (!MadeChange) return false; - GCL = InstallGlobalCtors(GCL, Ctors); + GCL = InstallGlobalCtors(GCL, Ctors, GCL->getContext()); return true; } diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp index e4a9dea..7b0e9c7 100644 --- a/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -19,6 +19,7 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Constants.h" #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/ValueTracking.h" @@ -129,7 +130,8 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) { Function::arg_iterator AI = F.arg_begin(); for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI) { // Do we have a constant argument? - if (ArgumentConstants[i].second || AI->use_empty()) + if (ArgumentConstants[i].second || AI->use_empty() || + (AI->hasByValAttr() && !F.onlyReadsMemory())) continue; Value *V = ArgumentConstants[i].first; @@ -151,13 +153,15 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) { // callers will be updated to use the value they pass in directly instead of // using the return value. bool IPCP::PropagateConstantReturn(Function &F) { - if (F.getReturnType() == Type::VoidTy) + if (F.getReturnType() == Type::getVoidTy(F.getContext())) return false; // No return value. // If this function could be overridden later in the link stage, we can't // propagate information about its results into callers. if (F.mayBeOverridden()) return false; + + LLVMContext &Context = F.getContext(); // Check to see if this function returns a constant. SmallVector<Value *,4> RetVals; @@ -182,7 +186,7 @@ bool IPCP::PropagateConstantReturn(Function &F) { if (!STy) V = RI->getOperand(i); else - V = FindInsertedValue(RI->getOperand(0), i); + V = FindInsertedValue(RI->getOperand(0), i, Context); if (V) { // Ignore undefs, we can change them into anything diff --git a/lib/Transforms/IPO/IndMemRemoval.cpp b/lib/Transforms/IPO/IndMemRemoval.cpp index b55dea2..e7884ec 100644 --- a/lib/Transforms/IPO/IndMemRemoval.cpp +++ b/lib/Transforms/IPO/IndMemRemoval.cpp @@ -1,4 +1,4 @@ -//===-- IndMemRemoval.cpp - Remove indirect allocations and frees ----------===// +//===-- IndMemRemoval.cpp - Remove indirect allocations and frees ---------===// // // The LLVM Compiler Infrastructure // @@ -10,8 +10,8 @@ // This pass finds places where memory allocation functions may escape into // indirect land. Some transforms are much easier (aka possible) only if free // or malloc are not called indirectly. -// Thus find places where the address of memory functions are taken and construct -// bounce functions with direct calls of those functions. +// Thus find places where the address of memory functions are taken and +// construct bounce functions with direct calls of those functions. // //===----------------------------------------------------------------------===// @@ -55,8 +55,8 @@ bool IndMemRemPass::runOnModule(Module &M) { Function* FN = Function::Create(F->getFunctionType(), GlobalValue::LinkOnceAnyLinkage, "free_llvm_bounce", &M); - BasicBlock* bb = BasicBlock::Create("entry",FN); - Instruction* R = ReturnInst::Create(bb); + BasicBlock* bb = BasicBlock::Create(M.getContext(), "entry",FN); + Instruction* R = ReturnInst::Create(M.getContext(), bb); new FreeInst(FN->arg_begin(), R); ++NumBounce; NumBounceSites += F->getNumUses(); @@ -70,11 +70,12 @@ bool IndMemRemPass::runOnModule(Module &M) { GlobalValue::LinkOnceAnyLinkage, "malloc_llvm_bounce", &M); FN->setDoesNotAlias(0); - BasicBlock* bb = BasicBlock::Create("entry",FN); + BasicBlock* bb = BasicBlock::Create(M.getContext(), "entry",FN); Instruction* c = CastInst::CreateIntegerCast( - FN->arg_begin(), Type::Int32Ty, false, "c", bb); - Instruction* a = new MallocInst(Type::Int8Ty, c, "m", bb); - ReturnInst::Create(a, bb); + FN->arg_begin(), Type::getInt32Ty(M.getContext()), false, "c", bb); + Instruction* a = new MallocInst(Type::getInt8Ty(M.getContext()), + c, "m", bb); + ReturnInst::Create(M.getContext(), a, bb); ++NumBounce; NumBounceSites += F->getNumUses(); F->replaceAllUsesWith(FN); diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp index 5f9ea54..2344403 100644 --- a/lib/Transforms/IPO/InlineAlways.cpp +++ b/lib/Transforms/IPO/InlineAlways.cpp @@ -19,11 +19,11 @@ #include "llvm/Module.h" #include "llvm/Type.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/InlineCost.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Compiler.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/InlinerPass.h" -#include "llvm/Transforms/Utils/InlineCost.h" #include "llvm/ADT/SmallPtrSet.h" using namespace llvm; diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp index e107a00..b1c643b 100644 --- a/lib/Transforms/IPO/InlineSimple.cpp +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -18,11 +18,11 @@ #include "llvm/Module.h" #include "llvm/Type.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/InlineCost.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Compiler.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/InlinerPass.h" -#include "llvm/Transforms/Utils/InlineCost.h" #include "llvm/ADT/SmallPtrSet.h" using namespace llvm; @@ -78,7 +78,7 @@ bool SimpleInliner::doInitialization(CallGraph &CG) { return false; // Don't crash on invalid code - if (!GV->hasInitializer()) + if (!GV->hasDefinitiveInitializer()) return false; const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index b382837..ea47366 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -18,21 +18,25 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/InlineCost.h" #include "llvm/Support/CallSite.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/IPO/InlinerPass.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include <set> using namespace llvm; STATISTIC(NumInlined, "Number of functions inlined"); STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); +STATISTIC(NumMergedAllocas, "Number of allocas merged together"); static cl::opt<int> -InlineLimit("inline-threshold", cl::Hidden, cl::init(200), +InlineLimit("inline-threshold", cl::Hidden, cl::init(200), cl::ZeroOrMore, cl::desc("Control the amount of inlining to perform (default = 200)")); Inliner::Inliner(void *ID) @@ -45,19 +49,32 @@ Inliner::Inliner(void *ID, int Threshold) /// the call graph. If the derived class implements this method, it should /// always explicitly call the implementation here. void Inliner::getAnalysisUsage(AnalysisUsage &Info) const { - Info.addRequired<TargetData>(); CallGraphSCCPass::getAnalysisUsage(Info); } -// InlineCallIfPossible - If it is possible to inline the specified call site, -// do so and update the CallGraph for this operation. -bool Inliner::InlineCallIfPossible(CallSite CS, CallGraph &CG, - const SmallPtrSet<Function*, 8> &SCCFunctions, - const TargetData &TD) { + +typedef DenseMap<const ArrayType*, std::vector<AllocaInst*> > +InlinedArrayAllocasTy; + +/// InlineCallIfPossible - If it is possible to inline the specified call site, +/// do so and update the CallGraph for this operation. +/// +/// This function also does some basic book-keeping to update the IR. The +/// InlinedArrayAllocas map keeps track of any allocas that are already +/// available from other functions inlined into the caller. If we are able to +/// inline this call site we attempt to reuse already available allocas or add +/// any new allocas to the set if not possible. +static bool InlineCallIfPossible(CallSite CS, CallGraph &CG, + const TargetData *TD, + InlinedArrayAllocasTy &InlinedArrayAllocas) { Function *Callee = CS.getCalledFunction(); Function *Caller = CS.getCaller(); - if (!InlineFunction(CS, &CG, &TD)) return false; + // Try to inline the function. Get the list of static allocas that were + // inlined. + SmallVector<AllocaInst*, 16> StaticAllocas; + if (!InlineFunction(CS, &CG, TD, &StaticAllocas)) + return false; // If the inlined function had a higher stack protection level than the // calling function, then bump up the caller's stack protection level. @@ -67,23 +84,89 @@ bool Inliner::InlineCallIfPossible(CallSite CS, CallGraph &CG, !Caller->hasFnAttr(Attribute::StackProtectReq)) Caller->addFnAttr(Attribute::StackProtect); - // If we inlined the last possible call site to the function, delete the - // function body now. - if (Callee->use_empty() && (Callee->hasLocalLinkage() || - Callee->hasAvailableExternallyLinkage()) && - !SCCFunctions.count(Callee)) { - DOUT << " -> Deleting dead function: " << Callee->getName() << "\n"; - CallGraphNode *CalleeNode = CG[Callee]; - - // Remove any call graph edges from the callee to its callees. - CalleeNode->removeAllCalledFunctions(); - - resetCachedCostInfo(CalleeNode->getFunction()); + + // Look at all of the allocas that we inlined through this call site. If we + // have already inlined other allocas through other calls into this function, + // then we know that they have disjoint lifetimes and that we can merge them. + // + // There are many heuristics possible for merging these allocas, and the + // different options have different tradeoffs. One thing that we *really* + // don't want to hurt is SRoA: once inlining happens, often allocas are no + // longer address taken and so they can be promoted. + // + // Our "solution" for that is to only merge allocas whose outermost type is an + // array type. These are usually not promoted because someone is using a + // variable index into them. These are also often the most important ones to + // merge. + // + // A better solution would be to have real memory lifetime markers in the IR + // and not have the inliner do any merging of allocas at all. This would + // allow the backend to do proper stack slot coloring of all allocas that + // *actually make it to the backend*, which is really what we want. + // + // Because we don't have this information, we do this simple and useful hack. + // + SmallPtrSet<AllocaInst*, 16> UsedAllocas; + + // Loop over all the allocas we have so far and see if they can be merged with + // a previously inlined alloca. If not, remember that we had it. + for (unsigned AllocaNo = 0, e = StaticAllocas.size(); + AllocaNo != e; ++AllocaNo) { + AllocaInst *AI = StaticAllocas[AllocaNo]; + + // Don't bother trying to merge array allocations (they will usually be + // canonicalized to be an allocation *of* an array), or allocations whose + // type is not itself an array (because we're afraid of pessimizing SRoA). + const ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType()); + if (ATy == 0 || AI->isArrayAllocation()) + continue; + + // Get the list of all available allocas for this array type. + std::vector<AllocaInst*> &AllocasForType = InlinedArrayAllocas[ATy]; + + // Loop over the allocas in AllocasForType to see if we can reuse one. Note + // that we have to be careful not to reuse the same "available" alloca for + // multiple different allocas that we just inlined, we use the 'UsedAllocas' + // set to keep track of which "available" allocas are being used by this + // function. Also, AllocasForType can be empty of course! + bool MergedAwayAlloca = false; + for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) { + AllocaInst *AvailableAlloca = AllocasForType[i]; + + // The available alloca has to be in the right function, not in some other + // function in this SCC. + if (AvailableAlloca->getParent() != AI->getParent()) + continue; + + // If the inlined function already uses this alloca then we can't reuse + // it. + if (!UsedAllocas.insert(AvailableAlloca)) + continue; + + // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare + // success! + DEBUG(errs() << " ***MERGED ALLOCA: " << *AI); + + AI->replaceAllUsesWith(AvailableAlloca); + AI->eraseFromParent(); + MergedAwayAlloca = true; + ++NumMergedAllocas; + break; + } - // Removing the node for callee from the call graph and delete it. - delete CG.removeFunctionFromModule(CalleeNode); - ++NumDeleted; + // If we already nuked the alloca, we're done with it. + if (MergedAwayAlloca) + continue; + + // If we were unable to merge away the alloca either because there are no + // allocas of the right type available or because we reused them all + // already, remember that this alloca came from an inlined function and mark + // it used so we don't reuse it for other allocas from this inline + // operation. + AllocasForType.push_back(AI); + UsedAllocas.insert(AI); } + return true; } @@ -91,69 +174,145 @@ bool Inliner::InlineCallIfPossible(CallSite CS, CallGraph &CG, /// at the given CallSite. bool Inliner::shouldInline(CallSite CS) { InlineCost IC = getInlineCost(CS); - float FudgeFactor = getInlineFudgeFactor(CS); if (IC.isAlways()) { - DOUT << " Inlining: cost=always" - << ", Call: " << *CS.getInstruction(); + DEBUG(errs() << " Inlining: cost=always" + << ", Call: " << *CS.getInstruction() << "\n"); return true; } if (IC.isNever()) { - DOUT << " NOT Inlining: cost=never" - << ", Call: " << *CS.getInstruction(); + DEBUG(errs() << " NOT Inlining: cost=never" + << ", Call: " << *CS.getInstruction() << "\n"); return false; } int Cost = IC.getValue(); int CurrentThreshold = InlineThreshold; - Function *Fn = CS.getCaller(); - if (Fn && !Fn->isDeclaration() - && Fn->hasFnAttr(Attribute::OptimizeForSize) - && InlineThreshold != 50) { + Function *Caller = CS.getCaller(); + if (Caller && !Caller->isDeclaration() && + Caller->hasFnAttr(Attribute::OptimizeForSize) && + InlineLimit.getNumOccurrences() == 0 && + InlineThreshold != 50) CurrentThreshold = 50; - } + float FudgeFactor = getInlineFudgeFactor(CS); if (Cost >= (int)(CurrentThreshold * FudgeFactor)) { - DOUT << " NOT Inlining: cost=" << Cost - << ", Call: " << *CS.getInstruction(); + DEBUG(errs() << " NOT Inlining: cost=" << Cost + << ", Call: " << *CS.getInstruction() << "\n"); return false; - } else { - DOUT << " Inlining: cost=" << Cost - << ", Call: " << *CS.getInstruction(); - return true; } + + // Try to detect the case where the current inlining candidate caller + // (call it B) is a static function and is an inlining candidate elsewhere, + // and the current candidate callee (call it C) is large enough that + // inlining it into B would make B too big to inline later. In these + // circumstances it may be best not to inline C into B, but to inline B + // into its callers. + if (Caller->hasLocalLinkage()) { + int TotalSecondaryCost = 0; + bool outerCallsFound = false; + bool allOuterCallsWillBeInlined = true; + bool someOuterCallWouldNotBeInlined = false; + for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end(); + I != E; ++I) { + CallSite CS2 = CallSite::get(*I); + + // If this isn't a call to Caller (it could be some other sort + // of reference) skip it. + if (CS2.getInstruction() == 0 || CS2.getCalledFunction() != Caller) + continue; + + InlineCost IC2 = getInlineCost(CS2); + if (IC2.isNever()) + allOuterCallsWillBeInlined = false; + if (IC2.isAlways() || IC2.isNever()) + continue; + + outerCallsFound = true; + int Cost2 = IC2.getValue(); + int CurrentThreshold2 = InlineThreshold; + Function *Caller2 = CS2.getCaller(); + if (Caller2 && !Caller2->isDeclaration() && + Caller2->hasFnAttr(Attribute::OptimizeForSize) && + InlineThreshold != 50) + CurrentThreshold2 = 50; + + float FudgeFactor2 = getInlineFudgeFactor(CS2); + + if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2)) + allOuterCallsWillBeInlined = false; + + // See if we have this case. We subtract off the penalty + // for the call instruction, which we would be deleting. + if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) && + Cost2 + Cost - (InlineConstants::CallPenalty + 1) >= + (int)(CurrentThreshold2 * FudgeFactor2)) { + someOuterCallWouldNotBeInlined = true; + TotalSecondaryCost += Cost2; + } + } + // If all outer calls to Caller would get inlined, the cost for the last + // one is set very low by getInlineCost, in anticipation that Caller will + // be removed entirely. We did not account for this above unless there + // is only one caller of Caller. + if (allOuterCallsWillBeInlined && Caller->use_begin() != Caller->use_end()) + TotalSecondaryCost += InlineConstants::LastCallToStaticBonus; + + if (outerCallsFound && someOuterCallWouldNotBeInlined && + TotalSecondaryCost < Cost) { + DEBUG(errs() << " NOT Inlining: " << *CS.getInstruction() << + " Cost = " << Cost << + ", outer Cost = " << TotalSecondaryCost << '\n'); + return false; + } + } + + DEBUG(errs() << " Inlining: cost=" << Cost + << ", Call: " << *CS.getInstruction() << '\n'); + return true; } -bool Inliner::runOnSCC(const std::vector<CallGraphNode*> &SCC) { +bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) { CallGraph &CG = getAnalysis<CallGraph>(); - TargetData &TD = getAnalysis<TargetData>(); + const TargetData *TD = getAnalysisIfAvailable<TargetData>(); SmallPtrSet<Function*, 8> SCCFunctions; - DOUT << "Inliner visiting SCC:"; + DEBUG(errs() << "Inliner visiting SCC:"); for (unsigned i = 0, e = SCC.size(); i != e; ++i) { Function *F = SCC[i]->getFunction(); if (F) SCCFunctions.insert(F); - DOUT << " " << (F ? F->getName() : "INDIRECTNODE"); + DEBUG(errs() << " " << (F ? F->getName() : "INDIRECTNODE")); } // Scan through and identify all call sites ahead of time so that we only // inline call sites in the original functions, not call sites that result // from inlining other functions. - std::vector<CallSite> CallSites; + SmallVector<CallSite, 16> CallSites; - for (unsigned i = 0, e = SCC.size(); i != e; ++i) - if (Function *F = SCC[i]->getFunction()) - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { - CallSite CS = CallSite::get(I); - if (CS.getInstruction() && !isa<DbgInfoIntrinsic>(I) && - (!CS.getCalledFunction() || - !CS.getCalledFunction()->isDeclaration())) - CallSites.push_back(CS); - } + for (unsigned i = 0, e = SCC.size(); i != e; ++i) { + Function *F = SCC[i]->getFunction(); + if (!F) continue; + + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + CallSite CS = CallSite::get(I); + // If this isn't a call, or it is a call to an intrinsic, it can + // never be inlined. + if (CS.getInstruction() == 0 || isa<IntrinsicInst>(I)) + continue; + + // If this is a direct call to an external function, we can never inline + // it. If it is an indirect call, inlining may resolve it to be a + // direct call, so we keep it. + if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration()) + continue; + + CallSites.push_back(CS); + } + } - DOUT << ": " << CallSites.size() << " call sites.\n"; + DEBUG(errs() << ": " << CallSites.size() << " call sites.\n"); // Now that we have all of the call sites, move the ones to functions in the // current SCC to the end of the list. @@ -163,6 +322,9 @@ bool Inliner::runOnSCC(const std::vector<CallGraphNode*> &SCC) { if (SCCFunctions.count(F)) std::swap(CallSites[i--], CallSites[--FirstCallInSCC]); + + InlinedArrayAllocasTy InlinedArrayAllocas; + // Now that we have all of the call sites, loop over them and inline them if // it looks profitable to do so. bool Changed = false; @@ -171,51 +333,68 @@ bool Inliner::runOnSCC(const std::vector<CallGraphNode*> &SCC) { LocalChange = false; // Iterate over the outer loop because inlining functions can cause indirect // calls to become direct calls. - for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) - if (Function *Callee = CallSites[CSi].getCalledFunction()) { - // Calls to external functions are never inlinable. - if (Callee->isDeclaration()) { - if (SCC.size() == 1) { - std::swap(CallSites[CSi], CallSites.back()); - CallSites.pop_back(); - } else { - // Keep the 'in SCC / not in SCC' boundary correct. - CallSites.erase(CallSites.begin()+CSi); - } - --CSi; - continue; - } - - // If the policy determines that we should inline this function, - // try to do so. - CallSite CS = CallSites[CSi]; - if (shouldInline(CS)) { - Function *Caller = CS.getCaller(); - // Attempt to inline the function... - if (InlineCallIfPossible(CS, CG, SCCFunctions, TD)) { - // Remove any cached cost info for this caller, as inlining the - // callee has increased the size of the caller (which may be the - // same as the callee). - resetCachedCostInfo(Caller); - - // Remove this call site from the list. If possible, use - // swap/pop_back for efficiency, but do not use it if doing so would - // move a call site to a function in this SCC before the - // 'FirstCallInSCC' barrier. - if (SCC.size() == 1) { - std::swap(CallSites[CSi], CallSites.back()); - CallSites.pop_back(); - } else { - CallSites.erase(CallSites.begin()+CSi); - } - --CSi; - - ++NumInlined; - Changed = true; - LocalChange = true; - } - } + for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) { + CallSite CS = CallSites[CSi]; + + Function *Callee = CS.getCalledFunction(); + // We can only inline direct calls to non-declarations. + if (Callee == 0 || Callee->isDeclaration()) continue; + + // If the policy determines that we should inline this function, + // try to do so. + if (!shouldInline(CS)) + continue; + + Function *Caller = CS.getCaller(); + // Attempt to inline the function... + if (!InlineCallIfPossible(CS, CG, TD, InlinedArrayAllocas)) + continue; + + // If we inlined the last possible call site to the function, delete the + // function body now. + if (Callee->use_empty() && Callee->hasLocalLinkage() && + // TODO: Can remove if in SCC now. + !SCCFunctions.count(Callee) && + + // The function may be apparently dead, but if there are indirect + // callgraph references to the node, we cannot delete it yet, this + // could invalidate the CGSCC iterator. + CG[Callee]->getNumReferences() == 0) { + DEBUG(errs() << " -> Deleting dead function: " + << Callee->getName() << "\n"); + CallGraphNode *CalleeNode = CG[Callee]; + + // Remove any call graph edges from the callee to its callees. + CalleeNode->removeAllCalledFunctions(); + + resetCachedCostInfo(Callee); + + // Removing the node for callee from the call graph and delete it. + delete CG.removeFunctionFromModule(CalleeNode); + ++NumDeleted; } + + // Remove any cached cost info for this caller, as inlining the + // callee has increased the size of the caller (which may be the + // same as the callee). + resetCachedCostInfo(Caller); + + // Remove this call site from the list. If possible, use + // swap/pop_back for efficiency, but do not use it if doing so would + // move a call site to a function in this SCC before the + // 'FirstCallInSCC' barrier. + if (SCC.size() == 1) { + std::swap(CallSites[CSi], CallSites.back()); + CallSites.pop_back(); + } else { + CallSites.erase(CallSites.begin()+CSi); + } + --CSi; + + ++NumInlined; + Changed = true; + LocalChange = true; + } } while (LocalChange); return Changed; @@ -227,47 +406,55 @@ bool Inliner::doFinalization(CallGraph &CG) { return removeDeadFunctions(CG); } - /// removeDeadFunctions - Remove dead functions that are not included in - /// DNR (Do Not Remove) list. +/// removeDeadFunctions - Remove dead functions that are not included in +/// DNR (Do Not Remove) list. bool Inliner::removeDeadFunctions(CallGraph &CG, - SmallPtrSet<const Function *, 16> *DNR) { - std::set<CallGraphNode*> FunctionsToRemove; + SmallPtrSet<const Function *, 16> *DNR) { + SmallPtrSet<CallGraphNode*, 16> FunctionsToRemove; // Scan for all of the functions, looking for ones that should now be removed // from the program. Insert the dead ones in the FunctionsToRemove set. for (CallGraph::iterator I = CG.begin(), E = CG.end(); I != E; ++I) { CallGraphNode *CGN = I->second; - if (Function *F = CGN ? CGN->getFunction() : 0) { - // If the only remaining users of the function are dead constants, remove - // them. - F->removeDeadConstantUsers(); - - if (DNR && DNR->count(F)) - continue; + if (CGN->getFunction() == 0) + continue; + + Function *F = CGN->getFunction(); + + // If the only remaining users of the function are dead constants, remove + // them. + F->removeDeadConstantUsers(); + + if (DNR && DNR->count(F)) + continue; + if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() && + !F->hasAvailableExternallyLinkage()) + continue; + if (!F->use_empty()) + continue; + + // Remove any call graph edges from the function to its callees. + CGN->removeAllCalledFunctions(); + + // Remove any edges from the external node to the function's call graph + // node. These edges might have been made irrelegant due to + // optimization of the program. + CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN); - if ((F->hasLinkOnceLinkage() || F->hasLocalLinkage()) && - F->use_empty()) { - - // Remove any call graph edges from the function to its callees. - CGN->removeAllCalledFunctions(); - - // Remove any edges from the external node to the function's call graph - // node. These edges might have been made irrelegant due to - // optimization of the program. - CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN); - - // Removing the node for callee from the call graph and delete it. - FunctionsToRemove.insert(CGN); - } - } + // Removing the node for callee from the call graph and delete it. + FunctionsToRemove.insert(CGN); } // Now that we know which functions to delete, do so. We didn't want to do // this inline, because that would invalidate our CallGraph::iterator // objects. :( + // + // Note that it doesn't matter that we are iterating over a non-stable set + // here to do this, it doesn't matter which order the functions are deleted + // in. bool Changed = false; - for (std::set<CallGraphNode*>::iterator I = FunctionsToRemove.begin(), - E = FunctionsToRemove.end(); I != E; ++I) { + for (SmallPtrSet<CallGraphNode*, 16>::iterator I = FunctionsToRemove.begin(), + E = FunctionsToRemove.end(); I != E; ++I) { resetCachedCostInfo((*I)->getFunction()); delete CG.removeFunctionFromModule(*I); ++NumDeleted; diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp index 5093ae9..e3c3c67 100644 --- a/lib/Transforms/IPO/Internalize.cpp +++ b/lib/Transforms/IPO/Internalize.cpp @@ -21,6 +21,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" #include <fstream> #include <set> @@ -86,7 +87,7 @@ void InternalizePass::LoadFile(const char *Filename) { // Load the APIFile... std::ifstream In(Filename); if (!In.good()) { - cerr << "WARNING: Internalize couldn't load file '" << Filename + errs() << "WARNING: Internalize couldn't load file '" << Filename << "'! Continuing as if it's empty.\n"; return; // Just continue as if the file were empty } @@ -101,7 +102,7 @@ void InternalizePass::LoadFile(const char *Filename) { bool InternalizePass::runOnModule(Module &M) { CallGraph *CG = getAnalysisIfAvailable<CallGraph>(); CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0; - + if (ExternalNames.empty()) { // Return if we're not in 'all but main' mode and have no external api if (!AllButMain) @@ -131,12 +132,14 @@ bool InternalizePass::runOnModule(Module &M) { if (ExternalNode) ExternalNode->removeOneAbstractEdgeTo((*CG)[I]); Changed = true; ++NumFunctions; - DOUT << "Internalizing func " << I->getName() << "\n"; + DEBUG(errs() << "Internalizing func " << I->getName() << "\n"); } // Never internalize the llvm.used symbol. It is used to implement // attribute((used)). + // FIXME: Shouldn't this just filter on llvm.metadata section?? ExternalNames.insert("llvm.used"); + ExternalNames.insert("llvm.compiler.used"); // Never internalize anchors used by the machine module info, else the info // won't find them. (see MachineModuleInfo.) @@ -158,7 +161,7 @@ bool InternalizePass::runOnModule(Module &M) { I->setLinkage(GlobalValue::InternalLinkage); Changed = true; ++NumGlobals; - DOUT << "Internalized gvar " << I->getName() << "\n"; + DEBUG(errs() << "Internalized gvar " << I->getName() << "\n"); } // Mark all aliases that are not in the api as internal as well. @@ -169,7 +172,7 @@ bool InternalizePass::runOnModule(Module &M) { I->setLinkage(GlobalValue::InternalLinkage); Changed = true; ++NumAliases; - DOUT << "Internalized alias " << I->getName() << "\n"; + DEBUG(errs() << "Internalized alias " << I->getName() << "\n"); } return Changed; diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp index 0c65443..02ac3bb 100644 --- a/lib/Transforms/IPO/LoopExtractor.cpp +++ b/lib/Transforms/IPO/LoopExtractor.cpp @@ -20,7 +20,7 @@ #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Transforms/Scalar.h" @@ -33,23 +33,19 @@ using namespace llvm; STATISTIC(NumExtracted, "Number of loops extracted"); namespace { - // FIXME: This is not a function pass, but the PassManager doesn't allow - // Module passes to require FunctionPasses, so we can't get loop info if we're - // not a function pass. - struct VISIBILITY_HIDDEN LoopExtractor : public FunctionPass { + struct VISIBILITY_HIDDEN LoopExtractor : public LoopPass { static char ID; // Pass identification, replacement for typeid unsigned NumLoops; explicit LoopExtractor(unsigned numLoops = ~0) - : FunctionPass(&ID), NumLoops(numLoops) {} + : LoopPass(&ID), NumLoops(numLoops) {} - virtual bool runOnFunction(Function &F); + virtual bool runOnLoop(Loop *L, LPPassManager &LPM); virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredID(BreakCriticalEdgesID); AU.addRequiredID(LoopSimplifyID); AU.addRequired<DominatorTree>(); - AU.addRequired<LoopInfo>(); } }; } @@ -73,68 +69,50 @@ Y("loop-extract-single", "Extract at most one loop into a new function"); // createLoopExtractorPass - This pass extracts all natural loops from the // program into a function if it can. // -FunctionPass *llvm::createLoopExtractorPass() { return new LoopExtractor(); } +Pass *llvm::createLoopExtractorPass() { return new LoopExtractor(); } -bool LoopExtractor::runOnFunction(Function &F) { - LoopInfo &LI = getAnalysis<LoopInfo>(); - - // If this function has no loops, there is nothing to do. - if (LI.empty()) +bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) { + // Only visit top-level loops. + if (L->getParentLoop()) return false; DominatorTree &DT = getAnalysis<DominatorTree>(); - - // If there is more than one top-level loop in this function, extract all of - // the loops. bool Changed = false; - if (LI.end()-LI.begin() > 1) { - for (LoopInfo::iterator i = LI.begin(), e = LI.end(); i != e; ++i) { - if (NumLoops == 0) return Changed; - --NumLoops; - Changed |= ExtractLoop(DT, *i) != 0; - ++NumExtracted; - } - } else { - // Otherwise there is exactly one top-level loop. If this function is more - // than a minimal wrapper around the loop, extract the loop. - Loop *TLL = *LI.begin(); - bool ShouldExtractLoop = false; - - // Extract the loop if the entry block doesn't branch to the loop header. - TerminatorInst *EntryTI = F.getEntryBlock().getTerminator(); - if (!isa<BranchInst>(EntryTI) || - !cast<BranchInst>(EntryTI)->isUnconditional() || - EntryTI->getSuccessor(0) != TLL->getHeader()) - ShouldExtractLoop = true; - else { - // Check to see if any exits from the loop are more than just return - // blocks. - SmallVector<BasicBlock*, 8> ExitBlocks; - TLL->getExitBlocks(ExitBlocks); - for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) - if (!isa<ReturnInst>(ExitBlocks[i]->getTerminator())) { - ShouldExtractLoop = true; - break; - } - } - if (ShouldExtractLoop) { - if (NumLoops == 0) return Changed; - --NumLoops; - Changed |= ExtractLoop(DT, TLL) != 0; - ++NumExtracted; - } else { - // Okay, this function is a minimal container around the specified loop. - // If we extract the loop, we will continue to just keep extracting it - // infinitely... so don't extract it. However, if the loop contains any - // subloops, extract them. - for (Loop::iterator i = TLL->begin(), e = TLL->end(); i != e; ++i) { - if (NumLoops == 0) return Changed; - --NumLoops; - Changed |= ExtractLoop(DT, *i) != 0; - ++NumExtracted; + // If there is more than one top-level loop in this function, extract all of + // the loops. Otherwise there is exactly one top-level loop; in this case if + // this function is more than a minimal wrapper around the loop, extract + // the loop. + bool ShouldExtractLoop = false; + + // Extract the loop if the entry block doesn't branch to the loop header. + TerminatorInst *EntryTI = + L->getHeader()->getParent()->getEntryBlock().getTerminator(); + if (!isa<BranchInst>(EntryTI) || + !cast<BranchInst>(EntryTI)->isUnconditional() || + EntryTI->getSuccessor(0) != L->getHeader()) + ShouldExtractLoop = true; + else { + // Check to see if any exits from the loop are more than just return + // blocks. + SmallVector<BasicBlock*, 8> ExitBlocks; + L->getExitBlocks(ExitBlocks); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + if (!isa<ReturnInst>(ExitBlocks[i]->getTerminator())) { + ShouldExtractLoop = true; + break; } + } + if (ShouldExtractLoop) { + if (NumLoops == 0) return Changed; + --NumLoops; + if (ExtractLoop(DT, L) != 0) { + Changed = true; + // After extraction, the loop is replaced by a function call, so + // we shouldn't try to run any more loop passes on it. + LPM.deleteLoopFromQueue(L); } + ++NumExtracted; } return Changed; @@ -143,7 +121,7 @@ bool LoopExtractor::runOnFunction(Function &F) { // createSingleLoopExtractorPass - This pass extracts one natural loop from the // program into a function if it can. This is used by bugpoint. // -FunctionPass *llvm::createSingleLoopExtractorPass() { +Pass *llvm::createSingleLoopExtractorPass() { return new SingleLoopExtractor(); } @@ -193,8 +171,8 @@ void BlockExtractorPass::LoadFile(const char *Filename) { // Load the BlockFile... std::ifstream In(Filename); if (!In.good()) { - cerr << "WARNING: BlockExtractor couldn't load file '" << Filename - << "'!\n"; + errs() << "WARNING: BlockExtractor couldn't load file '" << Filename + << "'!\n"; return; } while (In) { diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp index dfc040b..55194b3 100644 --- a/lib/Transforms/IPO/LowerSetJmp.cpp +++ b/lib/Transforms/IPO/LowerSetJmp.cpp @@ -39,6 +39,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CFG.h" @@ -200,7 +201,7 @@ bool LowerSetJmp::runOnModule(Module& M) { // This function is always successful, unless it isn't. bool LowerSetJmp::doInitialization(Module& M) { - const Type *SBPTy = PointerType::getUnqual(Type::Int8Ty); + const Type *SBPTy = Type::getInt8PtrTy(M.getContext()); const Type *SBPPTy = PointerType::getUnqual(SBPTy); // N.B. See llvm/runtime/GCCLibraries/libexception/SJLJ-Exception.h for @@ -208,33 +209,40 @@ bool LowerSetJmp::doInitialization(Module& M) // void __llvm_sjljeh_init_setjmpmap(void**) InitSJMap = M.getOrInsertFunction("__llvm_sjljeh_init_setjmpmap", - Type::VoidTy, SBPPTy, (Type *)0); + Type::getVoidTy(M.getContext()), + SBPPTy, (Type *)0); // void __llvm_sjljeh_destroy_setjmpmap(void**) DestroySJMap = M.getOrInsertFunction("__llvm_sjljeh_destroy_setjmpmap", - Type::VoidTy, SBPPTy, (Type *)0); + Type::getVoidTy(M.getContext()), + SBPPTy, (Type *)0); // void __llvm_sjljeh_add_setjmp_to_map(void**, void*, unsigned) AddSJToMap = M.getOrInsertFunction("__llvm_sjljeh_add_setjmp_to_map", - Type::VoidTy, SBPPTy, SBPTy, - Type::Int32Ty, (Type *)0); + Type::getVoidTy(M.getContext()), + SBPPTy, SBPTy, + Type::getInt32Ty(M.getContext()), + (Type *)0); // void __llvm_sjljeh_throw_longjmp(int*, int) ThrowLongJmp = M.getOrInsertFunction("__llvm_sjljeh_throw_longjmp", - Type::VoidTy, SBPTy, Type::Int32Ty, + Type::getVoidTy(M.getContext()), SBPTy, + Type::getInt32Ty(M.getContext()), (Type *)0); // unsigned __llvm_sjljeh_try_catching_longjmp_exception(void **) TryCatchLJ = M.getOrInsertFunction("__llvm_sjljeh_try_catching_longjmp_exception", - Type::Int32Ty, SBPPTy, (Type *)0); + Type::getInt32Ty(M.getContext()), SBPPTy, (Type *)0); // bool __llvm_sjljeh_is_longjmp_exception() IsLJException = M.getOrInsertFunction("__llvm_sjljeh_is_longjmp_exception", - Type::Int1Ty, (Type *)0); + Type::getInt1Ty(M.getContext()), + (Type *)0); // int __llvm_sjljeh_get_longjmp_value() GetLJValue = M.getOrInsertFunction("__llvm_sjljeh_get_longjmp_value", - Type::Int32Ty, (Type *)0); + Type::getInt32Ty(M.getContext()), + (Type *)0); return true; } @@ -257,7 +265,8 @@ bool LowerSetJmp::IsTransformableFunction(const std::string& Name) { // throwing the exception for us. void LowerSetJmp::TransformLongJmpCall(CallInst* Inst) { - const Type* SBPTy = PointerType::getUnqual(Type::Int8Ty); + const Type* SBPTy = + Type::getInt8PtrTy(Inst->getContext()); // Create the call to "__llvm_sjljeh_throw_longjmp". This takes the // same parameters as "longjmp", except that the buffer is cast to a @@ -278,7 +287,7 @@ void LowerSetJmp::TransformLongJmpCall(CallInst* Inst) if (SVP.first) BranchInst::Create(SVP.first->getParent(), Inst); else - new UnwindInst(Inst); + new UnwindInst(Inst->getContext(), Inst); // Remove all insts after the branch/unwind inst. Go from back to front to // avoid replaceAllUsesWith if possible. @@ -309,7 +318,8 @@ AllocaInst* LowerSetJmp::GetSetJmpMap(Function* Func) assert(Inst && "Couldn't find even ONE instruction in entry block!"); // Fill in the alloca and call to initialize the SJ map. - const Type *SBPTy = PointerType::getUnqual(Type::Int8Ty); + const Type *SBPTy = + Type::getInt8PtrTy(Func->getContext()); AllocaInst* Map = new AllocaInst(SBPTy, 0, "SJMap", Inst); CallInst::Create(InitSJMap, Map, "", Inst); return SJMap[Func] = Map; @@ -324,12 +334,13 @@ BasicBlock* LowerSetJmp::GetRethrowBB(Function* Func) // The basic block we're going to jump to if we need to rethrow the // exception. - BasicBlock* Rethrow = BasicBlock::Create("RethrowExcept", Func); + BasicBlock* Rethrow = + BasicBlock::Create(Func->getContext(), "RethrowExcept", Func); // Fill in the "Rethrow" BB with a call to rethrow the exception. This // is the last instruction in the BB since at this point the runtime // should exit this function and go to the next function. - new UnwindInst(Rethrow); + new UnwindInst(Func->getContext(), Rethrow); return RethrowBBMap[Func] = Rethrow; } @@ -340,7 +351,8 @@ LowerSetJmp::SwitchValuePair LowerSetJmp::GetSJSwitch(Function* Func, { if (SwitchValMap[Func].first) return SwitchValMap[Func]; - BasicBlock* LongJmpPre = BasicBlock::Create("LongJmpBlkPre", Func); + BasicBlock* LongJmpPre = + BasicBlock::Create(Func->getContext(), "LongJmpBlkPre", Func); // Keep track of the preliminary basic block for some of the other // transformations. @@ -352,7 +364,8 @@ LowerSetJmp::SwitchValuePair LowerSetJmp::GetSJSwitch(Function* Func, // The "decision basic block" gets the number associated with the // setjmp call returning to switch on and the value returned by // longjmp. - BasicBlock* DecisionBB = BasicBlock::Create("LJDecisionBB", Func); + BasicBlock* DecisionBB = + BasicBlock::Create(Func->getContext(), "LJDecisionBB", Func); BranchInst::Create(DecisionBB, Rethrow, Cond, LongJmpPre); @@ -375,12 +388,13 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst) Function* Func = ABlock->getParent(); // Add this setjmp to the setjmp map. - const Type* SBPTy = PointerType::getUnqual(Type::Int8Ty); + const Type* SBPTy = + Type::getInt8PtrTy(Inst->getContext()); CastInst* BufPtr = new BitCastInst(Inst->getOperand(1), SBPTy, "SBJmpBuf", Inst); std::vector<Value*> Args = make_vector<Value*>(GetSetJmpMap(Func), BufPtr, - ConstantInt::get(Type::Int32Ty, + ConstantInt::get(Type::getInt32Ty(Inst->getContext()), SetJmpIDMap[Func]++), 0); CallInst::Create(AddSJToMap, Args.begin(), Args.end(), "", Inst); @@ -424,14 +438,17 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst) // This PHI node will be in the new block created from the // splitBasicBlock call. - PHINode* PHI = PHINode::Create(Type::Int32Ty, "SetJmpReturn", Inst); + PHINode* PHI = PHINode::Create(Type::getInt32Ty(Inst->getContext()), + "SetJmpReturn", Inst); // Coming from a call to setjmp, the return is 0. - PHI->addIncoming(ConstantInt::getNullValue(Type::Int32Ty), ABlock); + PHI->addIncoming(Constant::getNullValue(Type::getInt32Ty(Inst->getContext())), + ABlock); // Add the case for this setjmp's number... SwitchValuePair SVP = GetSJSwitch(Func, GetRethrowBB(Func)); - SVP.first->addCase(ConstantInt::get(Type::Int32Ty, SetJmpIDMap[Func] - 1), + SVP.first->addCase(ConstantInt::get(Type::getInt32Ty(Inst->getContext()), + SetJmpIDMap[Func] - 1), SetJmpContBlock); // Value coming from the handling of the exception. @@ -503,7 +520,8 @@ void LowerSetJmp::visitInvokeInst(InvokeInst& II) BasicBlock* ExceptBB = II.getUnwindDest(); Function* Func = BB->getParent(); - BasicBlock* NewExceptBB = BasicBlock::Create("InvokeExcept", Func); + BasicBlock* NewExceptBB = BasicBlock::Create(II.getContext(), + "InvokeExcept", Func); // If this is a longjmp exception, then branch to the preliminary BB of // the longjmp exception handling. Otherwise, go to the old exception. diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 5693cc0..13bbf9c 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -47,11 +47,14 @@ #include "llvm/Constants.h" #include "llvm/InlineAsm.h" #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include <map> #include <vector> using namespace llvm; @@ -61,7 +64,7 @@ STATISTIC(NumFunctionsMerged, "Number of functions merged"); namespace { struct VISIBILITY_HIDDEN MergeFunctions : public ModulePass { static char ID; // Pass identification, replacement for typeid - MergeFunctions() : ModulePass((intptr_t)&ID) {} + MergeFunctions() : ModulePass(&ID) {} bool runOnModule(Module &M); }; @@ -127,7 +130,7 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) { return false; default: - assert(0 && "Unknown type!"); + llvm_unreachable("Unknown type!"); return false; case Type::PointerTyID: { @@ -185,7 +188,8 @@ static bool isEquivalentOperation(const Instruction *I1, const Instruction *I2) { if (I1->getOpcode() != I2->getOpcode() || I1->getNumOperands() != I2->getNumOperands() || - !isEquivalentType(I1->getType(), I2->getType())) + !isEquivalentType(I1->getType(), I2->getType()) || + !I1->hasSameSubclassOptionalData(I2)) return false; // We have two instructions of identical opcode and #operands. Check to see @@ -449,6 +453,7 @@ static LinkageCategory categorize(const Function *F) { switch (F->getLinkage()) { case GlobalValue::InternalLinkage: case GlobalValue::PrivateLinkage: + case GlobalValue::LinkerPrivateLinkage: return Internal; case GlobalValue::WeakAnyLinkage: @@ -468,14 +473,14 @@ static LinkageCategory categorize(const Function *F) { return ExternalStrong; } - assert(0 && "Unknown LinkageType."); + llvm_unreachable("Unknown LinkageType."); return ExternalWeak; } static void ThunkGToF(Function *F, Function *G) { Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "", G->getParent()); - BasicBlock *BB = BasicBlock::Create("", NewG); + BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG); std::vector<Value *> Args; unsigned i = 0; @@ -494,13 +499,13 @@ static void ThunkGToF(Function *F, Function *G) { CallInst *CI = CallInst::Create(F, Args.begin(), Args.end(), "", BB); CI->setTailCall(); CI->setCallingConv(F->getCallingConv()); - if (NewG->getReturnType() == Type::VoidTy) { - ReturnInst::Create(BB); + if (NewG->getReturnType() == Type::getVoidTy(F->getContext())) { + ReturnInst::Create(F->getContext(), BB); } else if (CI->getType() != NewG->getReturnType()) { Value *BCI = new BitCastInst(CI, NewG->getReturnType(), "", BB); - ReturnInst::Create(BCI, BB); + ReturnInst::Create(F->getContext(), BCI, BB); } else { - ReturnInst::Create(CI, BB); + ReturnInst::Create(F->getContext(), CI, BB); } NewG->copyAttributesFrom(G); @@ -574,22 +579,22 @@ static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) { case Internal: switch (catG) { case ExternalStrong: - assert(0); + llvm_unreachable(0); // fall-through case ExternalWeak: - if (F->hasAddressTaken()) + if (F->hasAddressTaken()) ThunkGToF(F, G); else AliasGToF(F, G); - break; + break; case Internal: { bool addrTakenF = F->hasAddressTaken(); bool addrTakenG = G->hasAddressTaken(); if (!addrTakenF && addrTakenG) { std::swap(FnVec[i], FnVec[j]); std::swap(F, G); - std::swap(addrTakenF, addrTakenG); - } + std::swap(addrTakenF, addrTakenG); + } if (addrTakenF && addrTakenG) { ThunkGToF(F, G); @@ -597,7 +602,7 @@ static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) { assert(!addrTakenG); AliasGToF(F, G); } - } break; + } break; } break; } @@ -629,19 +634,19 @@ bool MergeFunctions::runOnModule(Module &M) { bool LocalChanged; do { LocalChanged = false; - DOUT << "size: " << FnMap.size() << "\n"; + DEBUG(errs() << "size: " << FnMap.size() << "\n"); for (std::map<unsigned long, std::vector<Function *> >::iterator I = FnMap.begin(), E = FnMap.end(); I != E; ++I) { std::vector<Function *> &FnVec = I->second; - DOUT << "hash (" << I->first << "): " << FnVec.size() << "\n"; + DEBUG(errs() << "hash (" << I->first << "): " << FnVec.size() << "\n"); for (int i = 0, e = FnVec.size(); i != e; ++i) { for (int j = i + 1; j != e; ++j) { bool isEqual = equals(FnVec[i], FnVec[j]); - DOUT << " " << FnVec[i]->getName() - << (isEqual ? " == " : " != ") - << FnVec[j]->getName() << "\n"; + DEBUG(errs() << " " << FnVec[i]->getName() + << (isEqual ? " == " : " != ") + << FnVec[j]->getName() << "\n"); if (isEqual) { if (fold(FnVec, i, j)) { diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index 73ec9c1..8f858d3 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -48,7 +48,8 @@ ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); } Function* PartialInliner::unswitchFunction(Function* F) { // First, verify that this function is an unswitching candidate... BasicBlock* entryBlock = F->begin(); - if (!isa<BranchInst>(entryBlock->getTerminator())) + BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator()); + if (!BR || BR->isUnconditional()) return 0; BasicBlock* returnBlock = 0; diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index 2b52f46..daf81e9 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -19,6 +19,7 @@ #include "llvm/CallGraphSCCPass.h" #include "llvm/Constants.h" #include "llvm/Function.h" +#include "llvm/LLVMContext.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Analysis/CallGraph.h" @@ -40,7 +41,7 @@ namespace { PruneEH() : CallGraphSCCPass(&ID) {} // runOnSCC - Analyze the SCC, performing the transformation if possible. - bool runOnSCC(const std::vector<CallGraphNode *> &SCC); + bool runOnSCC(std::vector<CallGraphNode *> &SCC); bool SimplifyFunction(Function *F); void DeleteBasicBlock(BasicBlock *BB); @@ -54,7 +55,7 @@ X("prune-eh", "Remove unused exception handling info"); Pass *llvm::createPruneEHPass() { return new PruneEH(); } -bool PruneEH::runOnSCC(const std::vector<CallGraphNode *> &SCC) { +bool PruneEH::runOnSCC(std::vector<CallGraphNode *> &SCC) { SmallPtrSet<CallGraphNode *, 8> SCCNodes; CallGraph &CG = getAnalysis<CallGraph>(); bool MadeChange = false; @@ -164,9 +165,6 @@ bool PruneEH::runOnSCC(const std::vector<CallGraphNode *> &SCC) { // function if we have invokes to non-unwinding functions or code after calls to // no-return functions. bool PruneEH::SimplifyFunction(Function *F) { - CallGraph &CG = getAnalysis<CallGraph>(); - CallGraphNode *CGN = CG[F]; - bool MadeChange = false; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) @@ -180,14 +178,13 @@ bool PruneEH::SimplifyFunction(Function *F) { Call->setAttributes(II->getAttributes()); // Anything that used the value produced by the invoke instruction - // now uses the value produced by the call instruction. + // now uses the value produced by the call instruction. Note that we + // do this even for void functions and calls with no uses so that the + // callgraph edge is updated. II->replaceAllUsesWith(Call); BasicBlock *UnwindBlock = II->getUnwindDest(); UnwindBlock->removePredecessor(II->getParent()); - // Fix up the call graph. - CGN->replaceCallSite(II, Call); - // Insert a branch to the normal destination right before the // invoke. BranchInst::Create(II->getNormalDest(), II); @@ -214,7 +211,7 @@ bool PruneEH::SimplifyFunction(Function *F) { // Remove the uncond branch and add an unreachable. BB->getInstList().pop_back(); - new UnreachableInst(BB); + new UnreachableInst(BB->getContext(), BB); DeleteBasicBlock(New); // Delete the new BB. MadeChange = true; diff --git a/lib/Transforms/IPO/RaiseAllocations.cpp b/lib/Transforms/IPO/RaiseAllocations.cpp index 9900368..4c1f26d 100644 --- a/lib/Transforms/IPO/RaiseAllocations.cpp +++ b/lib/Transforms/IPO/RaiseAllocations.cpp @@ -16,6 +16,7 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Instructions.h" #include "llvm/Pass.h" @@ -69,7 +70,6 @@ ModulePass *llvm::createRaiseAllocationsPass() { // function into the appropriate instruction. // void RaiseAllocations::doInitialization(Module &M) { - // Get Malloc and free prototypes if they exist! MallocFunc = M.getFunction("malloc"); if (MallocFunc) { @@ -77,22 +77,27 @@ void RaiseAllocations::doInitialization(Module &M) { // Get the expected prototype for malloc const FunctionType *Malloc1Type = - FunctionType::get(PointerType::getUnqual(Type::Int8Ty), - std::vector<const Type*>(1, Type::Int64Ty), false); + FunctionType::get(Type::getInt8PtrTy(M.getContext()), + std::vector<const Type*>(1, + Type::getInt64Ty(M.getContext())), false); // Chck to see if we got the expected malloc if (TyWeHave != Malloc1Type) { // Check to see if the prototype is wrong, giving us i8*(i32) * malloc // This handles the common declaration of: 'void *malloc(unsigned);' const FunctionType *Malloc2Type = - FunctionType::get(PointerType::getUnqual(Type::Int8Ty), - std::vector<const Type*>(1, Type::Int32Ty), false); + FunctionType::get(PointerType::getUnqual( + Type::getInt8Ty(M.getContext())), + std::vector<const Type*>(1, + Type::getInt32Ty(M.getContext())), false); if (TyWeHave != Malloc2Type) { // Check to see if the prototype is missing, giving us // i8*(...) * malloc // This handles the common declaration of: 'void *malloc();' const FunctionType *Malloc3Type = - FunctionType::get(PointerType::getUnqual(Type::Int8Ty), true); + FunctionType::get(PointerType::getUnqual( + Type::getInt8Ty(M.getContext())), + true); if (TyWeHave != Malloc3Type) // Give up MallocFunc = 0; @@ -105,19 +110,24 @@ void RaiseAllocations::doInitialization(Module &M) { const FunctionType* TyWeHave = FreeFunc->getFunctionType(); // Get the expected prototype for void free(i8*) - const FunctionType *Free1Type = FunctionType::get(Type::VoidTy, - std::vector<const Type*>(1, PointerType::getUnqual(Type::Int8Ty)), false); + const FunctionType *Free1Type = + FunctionType::get(Type::getVoidTy(M.getContext()), + std::vector<const Type*>(1, PointerType::getUnqual( + Type::getInt8Ty(M.getContext()))), + false); if (TyWeHave != Free1Type) { // Check to see if the prototype was forgotten, giving us // void (...) * free // This handles the common forward declaration of: 'void free();' - const FunctionType* Free2Type = FunctionType::get(Type::VoidTy, true); + const FunctionType* Free2Type = + FunctionType::get(Type::getVoidTy(M.getContext()), true); if (TyWeHave != Free2Type) { // One last try, check to see if we can find free as // int (...)* free. This handles the case where NOTHING was declared. - const FunctionType* Free3Type = FunctionType::get(Type::Int32Ty, true); + const FunctionType* Free3Type = + FunctionType::get(Type::getInt32Ty(M.getContext()), true); if (TyWeHave != Free3Type) { // Give up. @@ -137,7 +147,7 @@ void RaiseAllocations::doInitialization(Module &M) { bool RaiseAllocations::runOnModule(Module &M) { // Find the malloc/free prototypes... doInitialization(M); - + bool Changed = false; // First, process all of the malloc calls... @@ -159,12 +169,15 @@ bool RaiseAllocations::runOnModule(Module &M) { // If no prototype was provided for malloc, we may need to cast the // source size. - if (Source->getType() != Type::Int32Ty) + if (Source->getType() != Type::getInt32Ty(M.getContext())) Source = - CastInst::CreateIntegerCast(Source, Type::Int32Ty, false/*ZExt*/, + CastInst::CreateIntegerCast(Source, + Type::getInt32Ty(M.getContext()), + false/*ZExt*/, "MallocAmtCast", I); - MallocInst *MI = new MallocInst(Type::Int8Ty, Source, "", I); + MallocInst *MI = new MallocInst(Type::getInt8Ty(M.getContext()), + Source, "", I); MI->takeName(I); I->replaceAllUsesWith(MI); @@ -216,7 +229,7 @@ bool RaiseAllocations::runOnModule(Module &M) { Value *Source = *CS.arg_begin(); if (!isa<PointerType>(Source->getType())) Source = new IntToPtrInst(Source, - PointerType::getUnqual(Type::Int8Ty), + Type::getInt8PtrTy(M.getContext()), "FreePtrCast", I); new FreeInst(Source, I); @@ -226,7 +239,7 @@ bool RaiseAllocations::runOnModule(Module &M) { BranchInst::Create(II->getNormalDest(), I); // Delete the old call site - if (I->getType() != Type::VoidTy) + if (I->getType() != Type::getVoidTy(M.getContext())) I->replaceAllUsesWith(UndefValue::get(I->getType())); I->eraseFromParent(); Changed = true; diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index 046e044..77d44b2 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -24,18 +24,18 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/ValueSymbolTable.h" #include "llvm/TypeSymbolTable.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Support/Compiler.h" #include "llvm/ADT/SmallPtrSet.h" using namespace llvm; namespace { - class VISIBILITY_HIDDEN StripSymbols : public ModulePass { + class StripSymbols : public ModulePass { bool OnlyDebugInfo; public: static char ID; // Pass identification, replacement for typeid @@ -49,7 +49,7 @@ namespace { } }; - class VISIBILITY_HIDDEN StripNonDebugSymbols : public ModulePass { + class StripNonDebugSymbols : public ModulePass { public: static char ID; // Pass identification, replacement for typeid explicit StripNonDebugSymbols() @@ -62,7 +62,7 @@ namespace { } }; - class VISIBILITY_HIDDEN StripDebugDeclare : public ModulePass { + class StripDebugDeclare : public ModulePass { public: static char ID; // Pass identification, replacement for typeid explicit StripDebugDeclare() @@ -138,7 +138,7 @@ static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) { Value *V = VI->getValue(); ++VI; if (!isa<GlobalValue>(V) || cast<GlobalValue>(V)->hasLocalLinkage()) { - if (!PreserveDbgInfo || strncmp(V->getNameStart(), "llvm.dbg", 8)) + if (!PreserveDbgInfo || !V->getName().startswith("llvm.dbg")) // Set name to "", removing from symbol table! V->setName(""); } @@ -156,43 +156,37 @@ static void StripTypeSymtab(TypeSymbolTable &ST, bool PreserveDbgInfo) { } /// Find values that are marked as llvm.used. -void findUsedValues(Module &M, - SmallPtrSet<const GlobalValue*, 8>& llvmUsedValues) { - if (GlobalVariable *LLVMUsed = M.getGlobalVariable("llvm.used")) { - llvmUsedValues.insert(LLVMUsed); - // Collect values that are preserved as per explicit request. - // llvm.used is used to list these values. - if (ConstantArray *Inits = - dyn_cast<ConstantArray>(LLVMUsed->getInitializer())) { - for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) { - if (GlobalValue *GV = dyn_cast<GlobalValue>(Inits->getOperand(i))) - llvmUsedValues.insert(GV); - else if (ConstantExpr *CE = - dyn_cast<ConstantExpr>(Inits->getOperand(i))) - if (CE->getOpcode() == Instruction::BitCast) - if (GlobalValue *GV = dyn_cast<GlobalValue>(CE->getOperand(0))) - llvmUsedValues.insert(GV); - } - } - } +static void findUsedValues(GlobalVariable *LLVMUsed, + SmallPtrSet<const GlobalValue*, 8> &UsedValues) { + if (LLVMUsed == 0) return; + UsedValues.insert(LLVMUsed); + + ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer()); + if (Inits == 0) return; + + for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) + if (GlobalValue *GV = + dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts())) + UsedValues.insert(GV); } /// StripSymbolNames - Strip symbol names. -bool StripSymbolNames(Module &M, bool PreserveDbgInfo) { +static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) { SmallPtrSet<const GlobalValue*, 8> llvmUsedValues; - findUsedValues(M, llvmUsedValues); + findUsedValues(M.getGlobalVariable("llvm.used"), llvmUsedValues); + findUsedValues(M.getGlobalVariable("llvm.compiler.used"), llvmUsedValues); for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0) - if (!PreserveDbgInfo || strncmp(I->getNameStart(), "llvm.dbg", 8)) + if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg")) I->setName(""); // Internal symbols can't participate in linkage } for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0) - if (!PreserveDbgInfo || strncmp(I->getNameStart(), "llvm.dbg", 8)) + if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg")) I->setName(""); // Internal symbols can't participate in linkage StripSymtab(I->getValueSymbolTable(), PreserveDbgInfo); } @@ -206,169 +200,58 @@ bool StripSymbolNames(Module &M, bool PreserveDbgInfo) { // StripDebugInfo - Strip debug info in the module if it exists. // To do this, we remove llvm.dbg.func.start, llvm.dbg.stoppoint, and // llvm.dbg.region.end calls, and any globals they point to if now dead. -bool StripDebugInfo(Module &M) { - - SmallPtrSet<const GlobalValue*, 8> llvmUsedValues; - findUsedValues(M, llvmUsedValues); - - SmallVector<GlobalVariable *, 2> CUs; - SmallVector<GlobalVariable *, 4> GVs; - SmallVector<GlobalVariable *, 4> SPs; - CollectDebugInfoAnchors(M, CUs, GVs, SPs); - // These anchors use LinkOnce linkage so that the optimizer does not - // remove them accidently. Set InternalLinkage for all these debug - // info anchors. - for (SmallVector<GlobalVariable *, 2>::iterator I = CUs.begin(), - E = CUs.end(); I != E; ++I) - (*I)->setLinkage(GlobalValue::InternalLinkage); - for (SmallVector<GlobalVariable *, 4>::iterator I = GVs.begin(), - E = GVs.end(); I != E; ++I) - (*I)->setLinkage(GlobalValue::InternalLinkage); - for (SmallVector<GlobalVariable *, 4>::iterator I = SPs.begin(), - E = SPs.end(); I != E; ++I) - (*I)->setLinkage(GlobalValue::InternalLinkage); - - - // Delete all dbg variables. - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) { - GlobalVariable *GV = dyn_cast<GlobalVariable>(I); - if (!GV) continue; - if (!GV->use_empty() && llvmUsedValues.count(I) == 0) { - if (strncmp(GV->getNameStart(), "llvm.dbg", 8) == 0) { - GV->replaceAllUsesWith(UndefValue::get(GV->getType())); - } - } - } +static bool StripDebugInfo(Module &M) { + // Remove all of the calls to the debugger intrinsics, and remove them from + // the module. Function *FuncStart = M.getFunction("llvm.dbg.func.start"); Function *StopPoint = M.getFunction("llvm.dbg.stoppoint"); Function *RegionStart = M.getFunction("llvm.dbg.region.start"); Function *RegionEnd = M.getFunction("llvm.dbg.region.end"); Function *Declare = M.getFunction("llvm.dbg.declare"); - std::vector<Constant*> DeadConstants; - - // Remove all of the calls to the debugger intrinsics, and remove them from - // the module. if (FuncStart) { while (!FuncStart->use_empty()) { CallInst *CI = cast<CallInst>(FuncStart->use_back()); - Value *Arg = CI->getOperand(1); - assert(CI->use_empty() && "llvm.dbg intrinsic should have void result"); CI->eraseFromParent(); - if (Arg->use_empty()) - if (Constant *C = dyn_cast<Constant>(Arg)) - DeadConstants.push_back(C); } FuncStart->eraseFromParent(); } if (StopPoint) { while (!StopPoint->use_empty()) { CallInst *CI = cast<CallInst>(StopPoint->use_back()); - Value *Arg = CI->getOperand(3); - assert(CI->use_empty() && "llvm.dbg intrinsic should have void result"); CI->eraseFromParent(); - if (Arg->use_empty()) - if (Constant *C = dyn_cast<Constant>(Arg)) - DeadConstants.push_back(C); } StopPoint->eraseFromParent(); } if (RegionStart) { while (!RegionStart->use_empty()) { CallInst *CI = cast<CallInst>(RegionStart->use_back()); - Value *Arg = CI->getOperand(1); - assert(CI->use_empty() && "llvm.dbg intrinsic should have void result"); CI->eraseFromParent(); - if (Arg->use_empty()) - if (Constant *C = dyn_cast<Constant>(Arg)) - DeadConstants.push_back(C); } RegionStart->eraseFromParent(); } if (RegionEnd) { while (!RegionEnd->use_empty()) { CallInst *CI = cast<CallInst>(RegionEnd->use_back()); - Value *Arg = CI->getOperand(1); - assert(CI->use_empty() && "llvm.dbg intrinsic should have void result"); CI->eraseFromParent(); - if (Arg->use_empty()) - if (Constant *C = dyn_cast<Constant>(Arg)) - DeadConstants.push_back(C); } RegionEnd->eraseFromParent(); } if (Declare) { while (!Declare->use_empty()) { CallInst *CI = cast<CallInst>(Declare->use_back()); - Value *Arg1 = CI->getOperand(1); - Value *Arg2 = CI->getOperand(2); - assert(CI->use_empty() && "llvm.dbg intrinsic should have void result"); CI->eraseFromParent(); - if (Arg1->use_empty()) { - if (Constant *C = dyn_cast<Constant>(Arg1)) - DeadConstants.push_back(C); - else - RecursivelyDeleteTriviallyDeadInstructions(Arg1); - } - if (Arg2->use_empty()) - if (Constant *C = dyn_cast<Constant>(Arg2)) - DeadConstants.push_back(C); } Declare->eraseFromParent(); } - // llvm.dbg.compile_units and llvm.dbg.subprograms are marked as linkonce - // but since we are removing all debug information, make them internal now. - // FIXME: Use private linkage maybe? - if (Constant *C = M.getNamedGlobal("llvm.dbg.compile_units")) - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) - GV->setLinkage(GlobalValue::InternalLinkage); - - if (Constant *C = M.getNamedGlobal("llvm.dbg.subprograms")) - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) - GV->setLinkage(GlobalValue::InternalLinkage); - - if (Constant *C = M.getNamedGlobal("llvm.dbg.global_variables")) - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) - GV->setLinkage(GlobalValue::InternalLinkage); - - // Delete all dbg variables. - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) { - GlobalVariable *GV = dyn_cast<GlobalVariable>(I); - if (!GV) continue; - if (GV->use_empty() && llvmUsedValues.count(I) == 0 - && (!GV->hasSection() - || strcmp(GV->getSection().c_str(), "llvm.metadata") == 0)) - DeadConstants.push_back(GV); - } - - if (DeadConstants.empty()) - return false; + NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv"); + if (NMD) + NMD->eraseFromParent(); - // Delete any internal globals that were only used by the debugger intrinsics. - while (!DeadConstants.empty()) { - Constant *C = DeadConstants.back(); - DeadConstants.pop_back(); - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { - if (GV->hasLocalLinkage()) - RemoveDeadConstant(GV); - } - else - RemoveDeadConstant(C); - } - - // Remove all llvm.dbg types. - TypeSymbolTable &ST = M.getTypeSymbolTable(); - for (TypeSymbolTable::iterator TI = ST.begin(), TE = ST.end(); TI != TE; ) { - if (!strncmp(TI->first.c_str(), "llvm.dbg.", 9)) - ST.remove(TI++); - else - ++TI; - } - + // Remove dead metadata. + M.getContext().RemoveDeadMetadata(); return true; } @@ -414,8 +297,7 @@ bool StripDebugDeclare::runOnModule(Module &M) { I != E; ++I) { GlobalVariable *GV = dyn_cast<GlobalVariable>(I); if (!GV) continue; - if (GV->use_empty() && GV->hasName() - && strncmp(GV->getNameStart(), "llvm.dbg.global_variable", 24) == 0) + if (GV->use_empty() && GV->getName().startswith("llvm.dbg.global_variable")) DeadConstants.push_back(GV); } diff --git a/lib/Transforms/IPO/StructRetPromotion.cpp b/lib/Transforms/IPO/StructRetPromotion.cpp index 9f54388..4442820 100644 --- a/lib/Transforms/IPO/StructRetPromotion.cpp +++ b/lib/Transforms/IPO/StructRetPromotion.cpp @@ -23,6 +23,7 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/CallGraphSCCPass.h" #include "llvm/Instructions.h" @@ -34,6 +35,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; STATISTIC(NumRejectedSRETUses , "Number of sret rejected due to unexpected uses"); @@ -47,15 +49,15 @@ namespace { CallGraphSCCPass::getAnalysisUsage(AU); } - virtual bool runOnSCC(const std::vector<CallGraphNode *> &SCC); + virtual bool runOnSCC(std::vector<CallGraphNode *> &SCC); static char ID; // Pass identification, replacement for typeid SRETPromotion() : CallGraphSCCPass(&ID) {} private: - bool PromoteReturn(CallGraphNode *CGN); + CallGraphNode *PromoteReturn(CallGraphNode *CGN); bool isSafeToUpdateAllCallers(Function *F); Function *cloneFunctionBody(Function *F, const StructType *STy); - void updateCallSites(Function *F, Function *NF); + CallGraphNode *updateCallSites(Function *F, Function *NF); bool nestedStructType(const StructType *STy); }; } @@ -68,49 +70,54 @@ Pass *llvm::createStructRetPromotionPass() { return new SRETPromotion(); } -bool SRETPromotion::runOnSCC(const std::vector<CallGraphNode *> &SCC) { +bool SRETPromotion::runOnSCC(std::vector<CallGraphNode *> &SCC) { bool Changed = false; for (unsigned i = 0, e = SCC.size(); i != e; ++i) - Changed |= PromoteReturn(SCC[i]); + if (CallGraphNode *NewNode = PromoteReturn(SCC[i])) { + SCC[i] = NewNode; + Changed = true; + } return Changed; } /// PromoteReturn - This method promotes function that uses StructRet paramater -/// into a function that uses mulitple return value. -bool SRETPromotion::PromoteReturn(CallGraphNode *CGN) { +/// into a function that uses multiple return values. +CallGraphNode *SRETPromotion::PromoteReturn(CallGraphNode *CGN) { Function *F = CGN->getFunction(); if (!F || F->isDeclaration() || !F->hasLocalLinkage()) - return false; + return 0; // Make sure that function returns struct. if (F->arg_size() == 0 || !F->hasStructRetAttr() || F->doesNotReturn()) - return false; + return 0; - DOUT << "SretPromotion: Looking at sret function " << F->getNameStart() << "\n"; + DEBUG(errs() << "SretPromotion: Looking at sret function " + << F->getName() << "\n"); - assert (F->getReturnType() == Type::VoidTy && "Invalid function return type"); + assert(F->getReturnType() == Type::getVoidTy(F->getContext()) && + "Invalid function return type"); Function::arg_iterator AI = F->arg_begin(); const llvm::PointerType *FArgType = dyn_cast<PointerType>(AI->getType()); - assert (FArgType && "Invalid sret parameter type"); + assert(FArgType && "Invalid sret parameter type"); const llvm::StructType *STy = dyn_cast<StructType>(FArgType->getElementType()); - assert (STy && "Invalid sret parameter element type"); + assert(STy && "Invalid sret parameter element type"); // Check if it is ok to perform this promotion. if (isSafeToUpdateAllCallers(F) == false) { - DOUT << "SretPromotion: Not all callers can be updated\n"; + DEBUG(errs() << "SretPromotion: Not all callers can be updated\n"); NumRejectedSRETUses++; - return false; + return 0; } - DOUT << "SretPromotion: sret argument will be promoted\n"; + DEBUG(errs() << "SretPromotion: sret argument will be promoted\n"); NumSRET++; // [1] Replace use of sret parameter - AllocaInst *TheAlloca = new AllocaInst (STy, NULL, "mrv", - F->getEntryBlock().begin()); + AllocaInst *TheAlloca = new AllocaInst(STy, NULL, "mrv", + F->getEntryBlock().begin()); Value *NFirstArg = F->arg_begin(); NFirstArg->replaceAllUsesWith(TheAlloca); @@ -121,7 +128,7 @@ bool SRETPromotion::PromoteReturn(CallGraphNode *CGN) { ++BI; if (isa<ReturnInst>(I)) { Value *NV = new LoadInst(TheAlloca, "mrv.ld", I); - ReturnInst *NR = ReturnInst::Create(NV, I); + ReturnInst *NR = ReturnInst::Create(F->getContext(), NV, I); I->replaceAllUsesWith(NR); I->eraseFromParent(); } @@ -131,11 +138,13 @@ bool SRETPromotion::PromoteReturn(CallGraphNode *CGN) { Function *NF = cloneFunctionBody(F, STy); // [4] Update all call sites to use new function - updateCallSites(F, NF); + CallGraphNode *NF_CFN = updateCallSites(F, NF); - F->eraseFromParent(); - getAnalysis<CallGraph>().changeFunction(F, NF); - return true; + CallGraph &CG = getAnalysis<CallGraph>(); + NF_CFN->stealCalledFunctionsFrom(CG[F]); + + delete CG.removeFunctionFromModule(F); + return NF_CFN; } // Check if it is ok to perform this promotion. @@ -243,23 +252,26 @@ Function *SRETPromotion::cloneFunctionBody(Function *F, Function::arg_iterator NI = NF->arg_begin(); ++I; while (I != E) { - I->replaceAllUsesWith(NI); - NI->takeName(I); - ++I; - ++NI; + I->replaceAllUsesWith(NI); + NI->takeName(I); + ++I; + ++NI; } return NF; } /// updateCallSites - Update all sites that call F to use NF. -void SRETPromotion::updateCallSites(Function *F, Function *NF) { +CallGraphNode *SRETPromotion::updateCallSites(Function *F, Function *NF) { CallGraph &CG = getAnalysis<CallGraph>(); SmallVector<Value*, 16> Args; // Attributes - Keep track of the parameter attributes for the arguments. SmallVector<AttributeWithIndex, 8> ArgAttrsVec; + // Get a new callgraph node for NF. + CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF); + while (!F->use_empty()) { CallSite CS = CallSite::get(*F->use_begin()); Instruction *Call = CS.getInstruction(); @@ -309,8 +321,10 @@ void SRETPromotion::updateCallSites(Function *F, Function *NF) { New->takeName(Call); // Update the callgraph to know that the callsite has been transformed. - CG[Call->getParent()->getParent()]->replaceCallSite(Call, New); - + CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()]; + CalleeNode->removeCallEdgeFor(Call); + CalleeNode->addCalledFunction(New, NF_CGN); + // Update all users of sret parameter to extract value using extractvalue. for (Value::use_iterator UI = FirstCArg->use_begin(), UE = FirstCArg->use_end(); UI != UE; ) { @@ -318,24 +332,25 @@ void SRETPromotion::updateCallSites(Function *F, Function *NF) { CallInst *C2 = dyn_cast<CallInst>(U2); if (C2 && (C2 == Call)) continue; - else if (GetElementPtrInst *UGEP = dyn_cast<GetElementPtrInst>(U2)) { - ConstantInt *Idx = dyn_cast<ConstantInt>(UGEP->getOperand(2)); - assert (Idx && "Unexpected getelementptr index!"); - Value *GR = ExtractValueInst::Create(New, Idx->getZExtValue(), - "evi", UGEP); - while(!UGEP->use_empty()) { - // isSafeToUpdateAllCallers has checked that all GEP uses are - // LoadInsts - LoadInst *L = cast<LoadInst>(*UGEP->use_begin()); - L->replaceAllUsesWith(GR); - L->eraseFromParent(); - } - UGEP->eraseFromParent(); + + GetElementPtrInst *UGEP = cast<GetElementPtrInst>(U2); + ConstantInt *Idx = cast<ConstantInt>(UGEP->getOperand(2)); + Value *GR = ExtractValueInst::Create(New, Idx->getZExtValue(), + "evi", UGEP); + while(!UGEP->use_empty()) { + // isSafeToUpdateAllCallers has checked that all GEP uses are + // LoadInsts + LoadInst *L = cast<LoadInst>(*UGEP->use_begin()); + L->replaceAllUsesWith(GR); + L->eraseFromParent(); } - else assert( 0 && "Unexpected sret parameter use"); + UGEP->eraseFromParent(); + continue; } Call->eraseFromParent(); } + + return NF_CGN; } /// nestedStructType - Return true if STy includes any @@ -344,7 +359,7 @@ bool SRETPromotion::nestedStructType(const StructType *STy) { unsigned Num = STy->getNumElements(); for (unsigned i = 0; i < Num; i++) { const Type *Ty = STy->getElementType(i); - if (!Ty->isSingleValueType() && Ty != Type::VoidTy) + if (!Ty->isSingleValueType() && Ty != Type::getVoidTy(STy->getContext())) return true; } return false; diff --git a/lib/Transforms/Instrumentation/BlockProfiling.cpp b/lib/Transforms/Instrumentation/BlockProfiling.cpp index 2bd9809..eb8f225 100644 --- a/lib/Transforms/Instrumentation/BlockProfiling.cpp +++ b/lib/Transforms/Instrumentation/BlockProfiling.cpp @@ -19,12 +19,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" #include "RSProfiling.h" #include "ProfilingUtils.h" @@ -52,8 +51,8 @@ ModulePass *llvm::createFunctionProfilerPass() { bool FunctionProfiler::runOnModule(Module &M) { Function *Main = M.getFunction("main"); if (Main == 0) { - cerr << "WARNING: cannot insert function profiling into a module" - << " with no main function!\n"; + errs() << "WARNING: cannot insert function profiling into a module" + << " with no main function!\n"; return false; // No main, no instrumentation! } @@ -62,10 +61,11 @@ bool FunctionProfiler::runOnModule(Module &M) { if (!I->isDeclaration()) ++NumFunctions; - const Type *ATy = ArrayType::get(Type::Int32Ty, NumFunctions); + const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), + NumFunctions); GlobalVariable *Counters = - new GlobalVariable(ATy, false, GlobalValue::InternalLinkage, - Constant::getNullValue(ATy), "FuncProfCounters", &M); + new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage, + Constant::getNullValue(ATy), "FuncProfCounters"); // Instrument all of the functions... unsigned i = 0; @@ -98,26 +98,29 @@ ModulePass *llvm::createBlockProfilerPass() { return new BlockProfiler(); } bool BlockProfiler::runOnModule(Module &M) { Function *Main = M.getFunction("main"); if (Main == 0) { - cerr << "WARNING: cannot insert block profiling into a module" - << " with no main function!\n"; + errs() << "WARNING: cannot insert block profiling into a module" + << " with no main function!\n"; return false; // No main, no instrumentation! } unsigned NumBlocks = 0; for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - NumBlocks += I->size(); + if (!I->isDeclaration()) + NumBlocks += I->size(); - const Type *ATy = ArrayType::get(Type::Int32Ty, NumBlocks); + const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumBlocks); GlobalVariable *Counters = - new GlobalVariable(ATy, false, GlobalValue::InternalLinkage, - Constant::getNullValue(ATy), "BlockProfCounters", &M); + new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage, + Constant::getNullValue(ATy), "BlockProfCounters"); // Instrument all of the blocks... unsigned i = 0; - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + if (I->isDeclaration()) continue; for (Function::iterator BB = I->begin(), E = I->end(); BB != E; ++BB) // Insert counter at the start of the block IncrementCounterInBlock(BB, i++, Counters); + } // Add the initialization call to main. InsertProfilingInitCall(Main, "llvm_start_block_profiling", Counters); diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt index d7c518d..494928e 100644 --- a/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -1,6 +1,7 @@ add_llvm_library(LLVMInstrumentation BlockProfiling.cpp EdgeProfiling.cpp + OptimalEdgeProfiling.cpp ProfilingUtils.cpp RSProfiling.cpp ) diff --git a/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/lib/Transforms/Instrumentation/EdgeProfiling.cpp index 0831f3b..b9cb275 100644 --- a/lib/Transforms/Instrumentation/EdgeProfiling.cpp +++ b/lib/Transforms/Instrumentation/EdgeProfiling.cpp @@ -16,25 +16,30 @@ // number of counters inserted. // //===----------------------------------------------------------------------===// - +#define DEBUG_TYPE "insert-edge-profiling" #include "ProfilingUtils.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Instrumentation.h" +#include "llvm/ADT/Statistic.h" #include <set> using namespace llvm; +STATISTIC(NumEdgesInserted, "The # of edges inserted."); + namespace { class VISIBILITY_HIDDEN EdgeProfiler : public ModulePass { bool runOnModule(Module &M); public: static char ID; // Pass identification, replacement for typeid EdgeProfiler() : ModulePass(&ID) {} + + virtual const char *getPassName() const { + return "Edge Profiler"; + } }; } @@ -47,14 +52,17 @@ ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); } bool EdgeProfiler::runOnModule(Module &M) { Function *Main = M.getFunction("main"); if (Main == 0) { - cerr << "WARNING: cannot insert edge profiling into a module" - << " with no main function!\n"; + errs() << "WARNING: cannot insert edge profiling into a module" + << " with no main function!\n"; return false; // No main, no instrumentation! } std::set<BasicBlock*> BlocksToInstrument; unsigned NumEdges = 0; - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + // Reserve space for (0,entry) edge. + ++NumEdges; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { // Keep track of which blocks need to be instrumented. We don't want to // instrument blocks that are added as the result of breaking critical @@ -62,15 +70,20 @@ bool EdgeProfiler::runOnModule(Module &M) { BlocksToInstrument.insert(BB); NumEdges += BB->getTerminator()->getNumSuccessors(); } + } - const Type *ATy = ArrayType::get(Type::Int32Ty, NumEdges); + const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumEdges); GlobalVariable *Counters = - new GlobalVariable(ATy, false, GlobalValue::InternalLinkage, - Constant::getNullValue(ATy), "EdgeProfCounters", &M); + new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage, + Constant::getNullValue(ATy), "EdgeProfCounters"); + NumEdgesInserted = NumEdges; // Instrument all of the edges... unsigned i = 0; - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + // Create counter for (0,entry) edge. + IncrementCounterInBlock(&F->getEntryBlock(), i++, Counters); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) if (BlocksToInstrument.count(BB)) { // Don't instrument inserted blocks // Okay, we have to add a counter of each outgoing edge. If the @@ -93,6 +106,7 @@ bool EdgeProfiler::runOnModule(Module &M) { } } } + } // Add the initialization call to main. InsertProfilingInitCall(Main, "llvm_start_edge_profiling", Counters); diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h new file mode 100644 index 0000000..2951dbc --- /dev/null +++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h @@ -0,0 +1,95 @@ +//===- llvm/Analysis/MaximumSpanningTree.h - Interface ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This module privides means for calculating a maximum spanning tree for a +// given set of weighted edges. The type parameter T is the type of a node. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H +#define LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H + +#include "llvm/ADT/EquivalenceClasses.h" +#include <vector> +#include <algorithm> + +namespace llvm { + + /// MaximumSpanningTree - A MST implementation. + /// The type parameter T determines the type of the nodes of the graph. + template <typename T> + class MaximumSpanningTree { + + // A comparing class for comparing weighted edges. + template <typename CT> + struct EdgeWeightCompare { + bool operator()(typename MaximumSpanningTree<CT>::EdgeWeight X, + typename MaximumSpanningTree<CT>::EdgeWeight Y) const { + if (X.second > Y.second) return true; + if (X.second < Y.second) return false; + return false; + } + }; + + public: + typedef std::pair<const T*, const T*> Edge; + typedef std::pair<Edge, double> EdgeWeight; + typedef std::vector<EdgeWeight> EdgeWeights; + protected: + typedef std::vector<Edge> MaxSpanTree; + + MaxSpanTree MST; + + public: + static char ID; // Class identification, replacement for typeinfo + + /// MaximumSpanningTree() - Takes a vector of weighted edges and returns a + /// spanning tree. + MaximumSpanningTree(EdgeWeights &EdgeVector) { + + std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare<T>()); + + // Create spanning tree, Forest contains a special data structure + // that makes checking if two nodes are already in a common (sub-)tree + // fast and cheap. + EquivalenceClasses<const T*> Forest; + for (typename EdgeWeights::iterator EWi = EdgeVector.begin(), + EWe = EdgeVector.end(); EWi != EWe; ++EWi) { + Edge e = (*EWi).first; + + Forest.insert(e.first); + Forest.insert(e.second); + } + + // Iterate over the sorted edges, biggest first. + for (typename EdgeWeights::iterator EWi = EdgeVector.begin(), + EWe = EdgeVector.end(); EWi != EWe; ++EWi) { + Edge e = (*EWi).first; + + if (Forest.findLeader(e.first) != Forest.findLeader(e.second)) { + Forest.unionSets(e.first, e.second); + // So we know now that the edge is not already in a subtree, so we push + // the edge to the MST. + MST.push_back(e); + } + } + } + + typename MaxSpanTree::iterator begin() { + return MST.begin(); + } + + typename MaxSpanTree::iterator end() { + return MST.end(); + } + }; + +} // End llvm namespace + +#endif diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp new file mode 100644 index 0000000..b2e6747 --- /dev/null +++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp @@ -0,0 +1,219 @@ +//===- OptimalEdgeProfiling.cpp - Insert counters for opt. edge profiling -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass instruments the specified program with counters for edge profiling. +// Edge profiling can give a reasonable approximation of the hot paths through a +// program, and is used for a wide variety of program transformations. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "insert-optimal-edge-profiling" +#include "ProfilingUtils.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/Analysis/ProfileInfoLoader.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Statistic.h" +#include "MaximumSpanningTree.h" +#include <set> +using namespace llvm; + +STATISTIC(NumEdgesInserted, "The # of edges inserted."); + +namespace { + class VISIBILITY_HIDDEN OptimalEdgeProfiler : public ModulePass { + bool runOnModule(Module &M); + public: + static char ID; // Pass identification, replacement for typeid + OptimalEdgeProfiler() : ModulePass(&ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredID(ProfileEstimatorPassID); + AU.addRequired<ProfileInfo>(); + } + + virtual const char *getPassName() const { + return "Optimal Edge Profiler"; + } + }; +} + +char OptimalEdgeProfiler::ID = 0; +static RegisterPass<OptimalEdgeProfiler> +X("insert-optimal-edge-profiling", + "Insert optimal instrumentation for edge profiling"); + +ModulePass *llvm::createOptimalEdgeProfilerPass() { + return new OptimalEdgeProfiler(); +} + +inline static void printEdgeCounter(ProfileInfo::Edge e, + BasicBlock* b, + unsigned i) { + DEBUG(errs() << "--Edge Counter for " << (e) << " in " \ + << ((b)?(b)->getNameStr():"0") << " (# " << (i) << ")\n"); +} + +bool OptimalEdgeProfiler::runOnModule(Module &M) { + Function *Main = M.getFunction("main"); + if (Main == 0) { + errs() << "WARNING: cannot insert edge profiling into a module" + << " with no main function!\n"; + return false; // No main, no instrumentation! + } + + // NumEdges counts all the edges that may be instrumented. Later on its + // decided which edges to actually instrument, to achieve optimal profiling. + // For the entry block a virtual edge (0,entry) is reserved, for each block + // with no successors an edge (BB,0) is reserved. These edges are necessary + // to calculate a truly optimal maximum spanning tree and thus an optimal + // instrumentation. + unsigned NumEdges = 0; + + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + // Reserve space for (0,entry) edge. + ++NumEdges; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + // Keep track of which blocks need to be instrumented. We don't want to + // instrument blocks that are added as the result of breaking critical + // edges! + if (BB->getTerminator()->getNumSuccessors() == 0) { + // Reserve space for (BB,0) edge. + ++NumEdges; + } else { + NumEdges += BB->getTerminator()->getNumSuccessors(); + } + } + } + + // In the profiling output a counter for each edge is reserved, but only few + // are used. This is done to be able to read back in the profile without + // calulating the maximum spanning tree again, instead each edge counter that + // is not used is initialised with -1 to signal that this edge counter has to + // be calculated from other edge counters on reading the profile info back + // in. + + const Type *Int32 = Type::getInt32Ty(M.getContext()); + const ArrayType *ATy = ArrayType::get(Int32, NumEdges); + GlobalVariable *Counters = + new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage, + Constant::getNullValue(ATy), "OptEdgeProfCounters"); + NumEdgesInserted = 0; + + std::vector<Constant*> Initializer(NumEdges); + Constant* Zero = ConstantInt::get(Int32, 0); + Constant* Uncounted = ConstantInt::get(Int32, ProfileInfoLoader::Uncounted); + + // Instrument all of the edges not in MST... + unsigned i = 0; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + DEBUG(errs()<<"Working on "<<F->getNameStr()<<"\n"); + + // Calculate a Maximum Spanning Tree with the edge weights determined by + // ProfileEstimator. ProfileEstimator also assign weights to the virtual + // edges (0,entry) and (BB,0) (for blocks with no successors) and this + // edges also participate in the maximum spanning tree calculation. + // The third parameter of MaximumSpanningTree() has the effect that not the + // actual MST is returned but the edges _not_ in the MST. + + ProfileInfo::EdgeWeights ECs = + getAnalysisID<ProfileInfo>(ProfileEstimatorPassID, *F).getEdgeWeights(F); + std::vector<ProfileInfo::EdgeWeight> EdgeVector(ECs.begin(), ECs.end()); + MaximumSpanningTree<BasicBlock> MST (EdgeVector); + std::stable_sort(MST.begin(),MST.end()); + + // Check if (0,entry) not in the MST. If not, instrument edge + // (IncrementCounterInBlock()) and set the counter initially to zero, if + // the edge is in the MST the counter is initialised to -1. + + BasicBlock *entry = &(F->getEntryBlock()); + ProfileInfo::Edge edge = ProfileInfo::getEdge(0,entry); + if (!std::binary_search(MST.begin(), MST.end(), edge)) { + printEdgeCounter(edge,entry,i); + IncrementCounterInBlock(entry, i, Counters); NumEdgesInserted++; + Initializer[i++] = (Zero); + } else{ + Initializer[i++] = (Uncounted); + } + + // InsertedBlocks contains all blocks that were inserted for splitting an + // edge, this blocks do not have to be instrumented. + DenseSet<BasicBlock*> InsertedBlocks; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + // Check if block was not inserted and thus does not have to be + // instrumented. + if (InsertedBlocks.count(BB)) continue; + + // Okay, we have to add a counter of each outgoing edge not in MST. If + // the outgoing edge is not critical don't split it, just insert the + // counter in the source or destination of the edge. Also, if the block + // has no successors, the virtual edge (BB,0) is processed. + TerminatorInst *TI = BB->getTerminator(); + if (TI->getNumSuccessors() == 0) { + ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,0); + if (!std::binary_search(MST.begin(), MST.end(), edge)) { + printEdgeCounter(edge,BB,i); + IncrementCounterInBlock(BB, i, Counters); NumEdgesInserted++; + Initializer[i++] = (Zero); + } else{ + Initializer[i++] = (Uncounted); + } + } + for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) { + BasicBlock *Succ = TI->getSuccessor(s); + ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,Succ); + if (!std::binary_search(MST.begin(), MST.end(), edge)) { + + // If the edge is critical, split it. + bool wasInserted = SplitCriticalEdge(TI, s, this); + Succ = TI->getSuccessor(s); + if (wasInserted) + InsertedBlocks.insert(Succ); + + // Okay, we are guaranteed that the edge is no longer critical. If + // we only have a single successor, insert the counter in this block, + // otherwise insert it in the successor block. + if (TI->getNumSuccessors() == 1) { + // Insert counter at the start of the block + printEdgeCounter(edge,BB,i); + IncrementCounterInBlock(BB, i, Counters); NumEdgesInserted++; + } else { + // Insert counter at the start of the block + printEdgeCounter(edge,Succ,i); + IncrementCounterInBlock(Succ, i, Counters); NumEdgesInserted++; + } + Initializer[i++] = (Zero); + } else { + Initializer[i++] = (Uncounted); + } + } + } + } + + // Check if the number of edges counted at first was the number of edges we + // considered for instrumentation. + assert(i==NumEdges && "the number of edges in counting array is wrong"); + + // Assing the now completely defined initialiser to the array. + Constant *init = ConstantArray::get(ATy, Initializer); + Counters->setInitializer(init); + + // Add the initialization call to main. + InsertProfilingInitCall(Main, "llvm_start_opt_edge_profiling", Counters); + return true; +} + diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp index 48071f1..1679bea 100644 --- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp +++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp @@ -18,22 +18,27 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, GlobalValue *Array) { + LLVMContext &Context = MainFn->getContext(); const Type *ArgVTy = - PointerType::getUnqual(PointerType::getUnqual(Type::Int8Ty)); - const PointerType *UIntPtr = PointerType::getUnqual(Type::Int32Ty); + PointerType::getUnqual(Type::getInt8PtrTy(Context)); + const PointerType *UIntPtr = + Type::getInt32PtrTy(Context); Module &M = *MainFn->getParent(); - Constant *InitFn = M.getOrInsertFunction(FnName, Type::Int32Ty, Type::Int32Ty, - ArgVTy, UIntPtr, Type::Int32Ty, + Constant *InitFn = M.getOrInsertFunction(FnName, Type::getInt32Ty(Context), + Type::getInt32Ty(Context), + ArgVTy, UIntPtr, + Type::getInt32Ty(Context), (Type *)0); // This could force argc and argv into programs that wouldn't otherwise have // them, but instead we just pass null values in. std::vector<Value*> Args(4); - Args[0] = Constant::getNullValue(Type::Int32Ty); + Args[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Args[1] = Constant::getNullValue(ArgVTy); // Skip over any allocas in the entry block. @@ -41,7 +46,8 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, BasicBlock::iterator InsertPos = Entry->begin(); while (isa<AllocaInst>(InsertPos)) ++InsertPos; - std::vector<Constant*> GEPIndices(2, Constant::getNullValue(Type::Int32Ty)); + std::vector<Constant*> GEPIndices(2, + Constant::getNullValue(Type::getInt32Ty(Context))); unsigned NumElements = 0; if (Array) { Args[2] = ConstantExpr::getGetElementPtr(Array, &GEPIndices[0], @@ -53,7 +59,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, // pass null. Args[2] = ConstantPointerNull::get(UIntPtr); } - Args[3] = ConstantInt::get(Type::Int32Ty, NumElements); + Args[3] = ConstantInt::get(Type::getInt32Ty(Context), NumElements); Instruction *InitCall = CallInst::Create(InitFn, Args.begin(), Args.end(), "newargc", InsertPos); @@ -78,16 +84,18 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, AI = MainFn->arg_begin(); // If the program looked at argc, have it look at the return value of the // init call instead. - if (AI->getType() != Type::Int32Ty) { + if (AI->getType() != Type::getInt32Ty(Context)) { Instruction::CastOps opcode; if (!AI->use_empty()) { opcode = CastInst::getCastOpcode(InitCall, true, AI->getType(), true); AI->replaceAllUsesWith( CastInst::Create(opcode, InitCall, AI->getType(), "", InsertPos)); } - opcode = CastInst::getCastOpcode(AI, true, Type::Int32Ty, true); + opcode = CastInst::getCastOpcode(AI, true, + Type::getInt32Ty(Context), true); InitCall->setOperand(1, - CastInst::Create(opcode, AI, Type::Int32Ty, "argc.cast", InitCall)); + CastInst::Create(opcode, AI, Type::getInt32Ty(Context), + "argc.cast", InitCall)); } else { AI->replaceAllUsesWith(InitCall); InitCall->setOperand(1, AI); @@ -104,17 +112,20 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, while (isa<AllocaInst>(InsertPos)) ++InsertPos; + LLVMContext &Context = BB->getContext(); + // Create the getelementptr constant expression std::vector<Constant*> Indices(2); - Indices[0] = Constant::getNullValue(Type::Int32Ty); - Indices[1] = ConstantInt::get(Type::Int32Ty, CounterNum); + Indices[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Indices[1] = ConstantInt::get(Type::getInt32Ty(Context), CounterNum); Constant *ElementPtr = - ConstantExpr::getGetElementPtr(CounterArray, &Indices[0], Indices.size()); + ConstantExpr::getGetElementPtr(CounterArray, &Indices[0], + Indices.size()); // Load, increment and store the value back. Value *OldVal = new LoadInst(ElementPtr, "OldFuncCounter", InsertPos); Value *NewVal = BinaryOperator::Create(Instruction::Add, OldVal, - ConstantInt::get(Type::Int32Ty, 1), + ConstantInt::get(Type::getInt32Ty(Context), 1), "NewFuncCounter", InsertPos); new StoreInst(NewVal, ElementPtr, InsertPos); } diff --git a/lib/Transforms/Instrumentation/RSProfiling.cpp b/lib/Transforms/Instrumentation/RSProfiling.cpp index b110f4e..3b72260 100644 --- a/lib/Transforms/Instrumentation/RSProfiling.cpp +++ b/lib/Transforms/Instrumentation/RSProfiling.cpp @@ -33,6 +33,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Pass.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Instructions.h" #include "llvm/Constants.h" @@ -43,6 +44,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" #include "RSProfiling.h" #include <set> @@ -197,8 +200,8 @@ GlobalRandomCounter::GlobalRandomCounter(Module& M, const IntegerType* t, uint64_t resetval) : T(t) { ConstantInt* Init = ConstantInt::get(T, resetval); ResetValue = Init; - Counter = new GlobalVariable(T, false, GlobalValue::InternalLinkage, - Init, "RandomSteeringCounter", &M); + Counter = new GlobalVariable(M, T, false, GlobalValue::InternalLinkage, + Init, "RandomSteeringCounter"); } GlobalRandomCounter::~GlobalRandomCounter() {} @@ -211,8 +214,9 @@ void GlobalRandomCounter::ProcessChoicePoint(BasicBlock* bb) { //decrement counter LoadInst* l = new LoadInst(Counter, "counter", t); - ICmpInst* s = new ICmpInst(ICmpInst::ICMP_EQ, l, ConstantInt::get(T, 0), - "countercc", t); + ICmpInst* s = new ICmpInst(t, ICmpInst::ICMP_EQ, l, + ConstantInt::get(T, 0), + "countercc"); Value* nv = BinaryOperator::CreateSub(l, ConstantInt::get(T, 1), "counternew", t); @@ -221,7 +225,8 @@ void GlobalRandomCounter::ProcessChoicePoint(BasicBlock* bb) { //reset counter BasicBlock* oldnext = t->getSuccessor(0); - BasicBlock* resetblock = BasicBlock::Create("reset", oldnext->getParent(), + BasicBlock* resetblock = BasicBlock::Create(bb->getContext(), + "reset", oldnext->getParent(), oldnext); TerminatorInst* t2 = BranchInst::Create(oldnext, resetblock); t->setSuccessor(0, resetblock); @@ -234,8 +239,8 @@ GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const IntegerType* t, : AI(0), T(t) { ConstantInt* Init = ConstantInt::get(T, resetval); ResetValue = Init; - Counter = new GlobalVariable(T, false, GlobalValue::InternalLinkage, - Init, "RandomSteeringCounter", &M); + Counter = new GlobalVariable(M, T, false, GlobalValue::InternalLinkage, + Init, "RandomSteeringCounter"); } GlobalRandomCounterOpt::~GlobalRandomCounterOpt() {} @@ -283,8 +288,9 @@ void GlobalRandomCounterOpt::ProcessChoicePoint(BasicBlock* bb) { //decrement counter LoadInst* l = new LoadInst(AI, "counter", t); - ICmpInst* s = new ICmpInst(ICmpInst::ICMP_EQ, l, ConstantInt::get(T, 0), - "countercc", t); + ICmpInst* s = new ICmpInst(t, ICmpInst::ICMP_EQ, l, + ConstantInt::get(T, 0), + "countercc"); Value* nv = BinaryOperator::CreateSub(l, ConstantInt::get(T, 1), "counternew", t); @@ -293,7 +299,8 @@ void GlobalRandomCounterOpt::ProcessChoicePoint(BasicBlock* bb) { //reset counter BasicBlock* oldnext = t->getSuccessor(0); - BasicBlock* resetblock = BasicBlock::Create("reset", oldnext->getParent(), + BasicBlock* resetblock = BasicBlock::Create(bb->getContext(), + "reset", oldnext->getParent(), oldnext); TerminatorInst* t2 = BranchInst::Create(oldnext, resetblock); t->setSuccessor(0, resetblock); @@ -315,12 +322,13 @@ void CycleCounter::ProcessChoicePoint(BasicBlock* bb) { CallInst* c = CallInst::Create(F, "rdcc", t); BinaryOperator* b = - BinaryOperator::CreateAnd(c, ConstantInt::get(Type::Int64Ty, rm), + BinaryOperator::CreateAnd(c, + ConstantInt::get(Type::getInt64Ty(bb->getContext()), rm), "mrdcc", t); - ICmpInst *s = new ICmpInst(ICmpInst::ICMP_EQ, b, - ConstantInt::get(Type::Int64Ty, 0), - "mrdccc", t); + ICmpInst *s = new ICmpInst(t, ICmpInst::ICMP_EQ, b, + ConstantInt::get(Type::getInt64Ty(bb->getContext()), 0), + "mrdccc"); t->setCondition(s); } @@ -345,16 +353,16 @@ void RSProfilers_std::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNu // Create the getelementptr constant expression std::vector<Constant*> Indices(2); - Indices[0] = Constant::getNullValue(Type::Int32Ty); - Indices[1] = ConstantInt::get(Type::Int32Ty, CounterNum); - Constant *ElementPtr = ConstantExpr::getGetElementPtr(CounterArray, + Indices[0] = Constant::getNullValue(Type::getInt32Ty(BB->getContext())); + Indices[1] = ConstantInt::get(Type::getInt32Ty(BB->getContext()), CounterNum); + Constant *ElementPtr =ConstantExpr::getGetElementPtr(CounterArray, &Indices[0], 2); // Load, increment and store the value back. Value *OldVal = new LoadInst(ElementPtr, "OldCounter", InsertPos); profcode.insert(OldVal); Value *NewVal = BinaryOperator::CreateAdd(OldVal, - ConstantInt::get(Type::Int32Ty, 1), + ConstantInt::get(Type::getInt32Ty(BB->getContext()), 1), "NewCounter", InsertPos); profcode.insert(NewVal); profcode.insert(new StoreInst(NewVal, ElementPtr, InsertPos)); @@ -377,7 +385,8 @@ Value* ProfilerRS::Translate(Value* v) { if (bb == &bb->getParent()->getEntryBlock()) TransCache[bb] = bb; //don't translate entry block else - TransCache[bb] = BasicBlock::Create("dup_" + bb->getName(), + TransCache[bb] = BasicBlock::Create(v->getContext(), + "dup_" + bb->getName(), bb->getParent(), NULL); return TransCache[bb]; } else if (Instruction* i = dyn_cast<Instruction>(v)) { @@ -401,7 +410,7 @@ Value* ProfilerRS::Translate(Value* v) { TransCache[v] = v; return v; } - assert(0 && "Value not handled"); + llvm_unreachable("Value not handled"); return 0; } @@ -466,16 +475,16 @@ void ProfilerRS::ProcessBackEdge(BasicBlock* src, BasicBlock* dst, Function& F) //a: Function::iterator BBN = src; ++BBN; - BasicBlock* bbC = BasicBlock::Create("choice", &F, BBN); + BasicBlock* bbC = BasicBlock::Create(F.getContext(), "choice", &F, BBN); //ChoicePoints.insert(bbC); BBN = cast<BasicBlock>(Translate(src)); - BasicBlock* bbCp = BasicBlock::Create("choice", &F, ++BBN); + BasicBlock* bbCp = BasicBlock::Create(F.getContext(), "choice", &F, ++BBN); ChoicePoints.insert(bbCp); //b: BranchInst::Create(cast<BasicBlock>(Translate(dst)), bbC); BranchInst::Create(dst, cast<BasicBlock>(Translate(dst)), - ConstantInt::get(Type::Int1Ty, true), bbCp); + ConstantInt::get(Type::getInt1Ty(src->getContext()), true), bbCp); //c: { TerminatorInst* iB = src->getTerminator(); @@ -531,9 +540,8 @@ bool ProfilerRS::runOnFunction(Function& F) { TerminatorInst* T = F.getEntryBlock().getTerminator(); ReplaceInstWithInst(T, BranchInst::Create(T->getSuccessor(0), cast<BasicBlock>( - Translate(T->getSuccessor(0))), - ConstantInt::get(Type::Int1Ty, - true))); + Translate(T->getSuccessor(0))), + ConstantInt::get(Type::getInt1Ty(F.getContext()), true))); //do whatever is needed now that the function is duplicated c->PrepFunction(&F); @@ -556,10 +564,12 @@ bool ProfilerRS::runOnFunction(Function& F) { bool ProfilerRS::doInitialization(Module &M) { switch (RandomMethod) { case GBV: - c = new GlobalRandomCounter(M, Type::Int32Ty, (1 << 14) - 1); + c = new GlobalRandomCounter(M, Type::getInt32Ty(M.getContext()), + (1 << 14) - 1); break; case GBVO: - c = new GlobalRandomCounterOpt(M, Type::Int32Ty, (1 << 14) - 1); + c = new GlobalRandomCounterOpt(M, Type::getInt32Ty(M.getContext()), + (1 << 14) - 1); break; case HOSTCC: c = new CycleCounter(M, (1 << 14) - 1); @@ -639,7 +649,7 @@ static void getBackEdges(Function& F, T& BackEdges) { std::map<BasicBlock*, int> finish; int time = 0; recBackEdge(&F.getEntryBlock(), BackEdges, color, depth, finish, time); - DOUT << F.getName() << " " << BackEdges.size() << "\n"; + DEBUG(errs() << F.getName() << " " << BackEdges.size() << "\n"); } diff --git a/lib/Transforms/Makefile b/lib/Transforms/Makefile index 5fe1eeb..025d02a 100644 --- a/lib/Transforms/Makefile +++ b/lib/Transforms/Makefile @@ -13,7 +13,7 @@ PARALLEL_DIRS = Utils Instrumentation Scalar IPO Hello include $(LEVEL)/Makefile.config # No support for plugins on windows targets -ifeq ($(OS), $(filter $(OS), Cygwin MingW)) +ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW)) PARALLEL_DIRS := $(filter-out Hello, $(PARALLEL_DIRS)) endif diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp index 9c55f66..37f383f 100644 --- a/lib/Transforms/Scalar/ADCE.cpp +++ b/lib/Transforms/Scalar/ADCE.cpp @@ -21,19 +21,17 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Pass.h" #include "llvm/Support/CFG.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/InstIterator.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" - using namespace llvm; STATISTIC(NumRemoved, "Number of instructions removed"); namespace { - struct VISIBILITY_HIDDEN ADCE : public FunctionPass { + struct ADCE : public FunctionPass { static char ID; // Pass identification, replacement for typeid ADCE() : FunctionPass(&ID) {} diff --git a/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/lib/Transforms/Scalar/BasicBlockPlacement.cpp index fb9b880..54533f5 100644 --- a/lib/Transforms/Scalar/BasicBlockPlacement.cpp +++ b/lib/Transforms/Scalar/BasicBlockPlacement.cpp @@ -31,7 +31,6 @@ #include "llvm/Function.h" #include "llvm/Pass.h" #include "llvm/Support/CFG.h" -#include "llvm/Support/Compiler.h" #include "llvm/ADT/Statistic.h" #include "llvm/Transforms/Scalar.h" #include <set> @@ -40,7 +39,7 @@ using namespace llvm; STATISTIC(NumMoved, "Number of basic blocks moved"); namespace { - struct VISIBILITY_HIDDEN BlockPlacement : public FunctionPass { + struct BlockPlacement : public FunctionPass { static char ID; // Pass identification, replacement for typeid BlockPlacement() : FunctionPass(&ID) {} @@ -127,13 +126,13 @@ void BlockPlacement::PlaceBlocks(BasicBlock *BB) { /*empty*/; if (SI == E) return; // No more successors to place. - unsigned MaxExecutionCount = PI->getExecutionCount(*SI); + double MaxExecutionCount = PI->getExecutionCount(*SI); BasicBlock *MaxSuccessor = *SI; // Scan for more frequently executed successors for (; SI != E; ++SI) if (!PlacedBlocks.count(*SI)) { - unsigned Count = PI->getExecutionCount(*SI); + double Count = PI->getExecutionCount(*SI); if (Count > MaxExecutionCount || // Prefer to not disturb the code. (Count == MaxExecutionCount && *SI == &*InsertPos)) { diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index 8a8f83f..cbeed4c 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -1,13 +1,13 @@ add_llvm_library(LLVMScalarOpts ADCE.cpp BasicBlockPlacement.cpp + CodeGenLICM.cpp CodeGenPrepare.cpp CondPropagate.cpp ConstantProp.cpp DCE.cpp DeadStoreElimination.cpp GVN.cpp - GVNPRE.cpp IndVarSimplify.cpp InstructionCombining.cpp JumpThreading.cpp @@ -19,7 +19,6 @@ add_llvm_library(LLVMScalarOpts LoopUnroll.cpp LoopUnswitch.cpp MemCpyOptimizer.cpp - PredicateSimplifier.cpp Reassociate.cpp Reg2Mem.cpp SCCP.cpp diff --git a/lib/Transforms/Scalar/CodeGenLICM.cpp b/lib/Transforms/Scalar/CodeGenLICM.cpp new file mode 100644 index 0000000..10f950e --- /dev/null +++ b/lib/Transforms/Scalar/CodeGenLICM.cpp @@ -0,0 +1,112 @@ +//===- CodeGenLICM.cpp - LICM a function for code generation --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This function performs late LICM, hoisting constants out of loops that +// are not valid immediates. It should not be followed by instcombine, +// because instcombine would quickly stuff the constants back into the loop. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "codegen-licm" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/ADT/DenseMap.h" +using namespace llvm; + +namespace { + class CodeGenLICM : public LoopPass { + virtual bool runOnLoop(Loop *L, LPPassManager &LPM); + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + public: + static char ID; // Pass identification, replacement for typeid + explicit CodeGenLICM() : LoopPass(&ID) {} + }; +} + +char CodeGenLICM::ID = 0; +static RegisterPass<CodeGenLICM> X("codegen-licm", + "hoist constants out of loops"); + +Pass *llvm::createCodeGenLICMPass() { + return new CodeGenLICM(); +} + +bool CodeGenLICM::runOnLoop(Loop *L, LPPassManager &) { + bool Changed = false; + + // Only visit outermost loops. + if (L->getParentLoop()) return Changed; + + Instruction *PreheaderTerm = L->getLoopPreheader()->getTerminator(); + DenseMap<Constant *, BitCastInst *> HoistedConstants; + + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); + I != E; ++I) { + BasicBlock *BB = *I; + for (BasicBlock::iterator BBI = BB->begin(), BBE = BB->end(); + BBI != BBE; ++BBI) { + Instruction *I = BBI; + // TODO: For now, skip all intrinsic instructions, because some of them + // can require their operands to be constants, and we don't want to + // break that. + if (isa<IntrinsicInst>(I)) + continue; + // LLVM represents fneg as -0.0-x; don't hoist the -0.0 out. + if (BinaryOperator::isFNeg(I) || + BinaryOperator::isNeg(I) || + BinaryOperator::isNot(I)) + continue; + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + // Don't hoist out switch case constants. + if (isa<SwitchInst>(I) && i == 1) + break; + // Don't hoist out shuffle masks. + if (isa<ShuffleVectorInst>(I) && i == 2) + break; + Value *Op = I->getOperand(i); + Constant *C = dyn_cast<Constant>(Op); + if (!C) continue; + // TODO: Ask the target which constants are legal. This would allow + // us to add support for hoisting ConstantInts and GlobalValues too. + if (isa<ConstantFP>(C) || + isa<ConstantVector>(C) || + isa<ConstantAggregateZero>(C)) { + BitCastInst *&BC = HoistedConstants[C]; + if (!BC) + BC = new BitCastInst(C, C->getType(), "hoist", PreheaderTerm); + I->setOperand(i, BC); + Changed = true; + } + } + } + } + + return Changed; +} + +void CodeGenLICM::getAnalysisUsage(AnalysisUsage &AU) const { + // This pass preserves just about everything. List some popular things here. + AU.setPreservesCFG(); + AU.addPreservedID(LoopSimplifyID); + AU.addPreserved<LoopInfo>(); + AU.addPreserved<AliasAnalysis>(); + AU.addPreserved("scalar-evolution"); + AU.addPreserved("iv-users"); + AU.addPreserved("lda"); + AU.addPreserved("live-values"); + + // Hoisting requires a loop preheader. + AU.addRequiredID(LoopSimplifyID); +} diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 85e9243..a3e3fea 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -23,10 +23,9 @@ #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Pass.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Analysis/ProfileInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/AddrModeMatcher.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" @@ -35,10 +34,10 @@ #include "llvm/Assembly/Writer.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -46,10 +45,11 @@ static cl::opt<bool> FactorCommonPreds("split-critical-paths-tweak", cl::init(false), cl::Hidden); namespace { - class VISIBILITY_HIDDEN CodeGenPrepare : public FunctionPass { + class CodeGenPrepare : public FunctionPass { /// TLI - Keep a pointer of a TargetLowering to consult for determining /// transformation profitability. const TargetLowering *TLI; + ProfileInfo *PI; /// BackEdges - Keep a set of all the loop back edges. /// @@ -60,6 +60,10 @@ namespace { : FunctionPass(&ID), TLI(tli) {} bool runOnFunction(Function &F); + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<ProfileInfo>(); + } + private: bool EliminateMostlyEmptyBlocks(Function &F); bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; @@ -95,6 +99,7 @@ void CodeGenPrepare::findLoopBackEdges(const Function &F) { bool CodeGenPrepare::runOnFunction(Function &F) { bool EverMadeChange = false; + PI = getAnalysisIfAvailable<ProfileInfo>(); // First pass, eliminate blocks that contain only PHI nodes and an // unconditional branch. EverMadeChange |= EliminateMostlyEmptyBlocks(F); @@ -232,7 +237,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { BranchInst *BI = cast<BranchInst>(BB->getTerminator()); BasicBlock *DestBB = BI->getSuccessor(0); - DOUT << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB; + DEBUG(errs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); // If the destination block has a single pred, then this is a trivial edge, // just collapse it. @@ -241,12 +246,12 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { // Remember if SinglePred was the entry block of the function. If so, we // will need to move BB back to the entry position. bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); - MergeBasicBlockIntoOnlyPred(DestBB); + MergeBasicBlockIntoOnlyPred(DestBB, this); if (isEntry && BB != &BB->getParent()->getEntryBlock()) BB->moveBefore(&BB->getParent()->getEntryBlock()); - DOUT << "AFTER:\n" << *DestBB << "\n\n\n"; + DEBUG(errs() << "AFTER:\n" << *DestBB << "\n\n\n"); return; } } @@ -283,9 +288,13 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { // The PHIs are now updated, change everything that refers to BB to use // DestBB and remove BB. BB->replaceAllUsesWith(DestBB); + if (PI) { + PI->replaceAllUses(BB, DestBB); + PI->removeEdge(ProfileInfo::getEdge(BB, DestBB)); + } BB->eraseFromParent(); - DOUT << "AFTER:\n" << *DestBB << "\n\n\n"; + DEBUG(errs() << "AFTER:\n" << *DestBB << "\n\n\n"); } @@ -358,6 +367,9 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, // If we found a workable predecessor, change TI to branch to Succ. if (FoundMatch) { + ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>(); + if (PI) + PI->splitEdge(TIBB, Dest, Pred); Dest->removePredecessor(TIBB); TI->setSuccessor(SuccNum, Pred); return; @@ -410,8 +422,8 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, /// static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ // If this is a noop copy, - MVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); - MVT DstVT = TLI.getValueType(CI->getType()); + EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(CI->getType()); // This is an fp<->int conversion? if (SrcVT.isInteger() != DstVT.isInteger()) @@ -424,10 +436,10 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ // If these values will be promoted, find out what they will be promoted // to. This helps us consider truncates on PPC as noop copies when they // are. - if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote) - SrcVT = TLI.getTypeToTransformTo(SrcVT); - if (TLI.getTypeAction(DstVT) == TargetLowering::Promote) - DstVT = TLI.getTypeToTransformTo(DstVT); + if (TLI.getTypeAction(CI->getContext(), SrcVT) == TargetLowering::Promote) + SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); + if (TLI.getTypeAction(CI->getContext(), DstVT) == TargetLowering::Promote) + DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); // If, after promotion, these are the same types, this is a noop copy. if (SrcVT != DstVT) @@ -520,7 +532,8 @@ static bool OptimizeCmpExpression(CmpInst *CI) { BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); InsertedCmp = - CmpInst::Create(CI->getOpcode(), CI->getPredicate(), CI->getOperand(0), + CmpInst::Create(CI->getOpcode(), + CI->getPredicate(), CI->getOperand(0), CI->getOperand(1), "", InsertPt); MadeChange = true; } @@ -577,7 +590,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // If all the instructions matched are already in this BB, don't do anything. if (!AnyNonLocal) { - DEBUG(cerr << "CGP: Found local addrmode: " << AddrMode << "\n"); + DEBUG(errs() << "CGP: Found local addrmode: " << AddrMode << "\n"); return false; } @@ -592,14 +605,15 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // computation. Value *&SunkAddr = SunkAddrs[Addr]; if (SunkAddr) { - DEBUG(cerr << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " - << *MemoryInst); + DEBUG(errs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " + << *MemoryInst); if (SunkAddr->getType() != Addr->getType()) SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt); } else { - DEBUG(cerr << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " - << *MemoryInst); - const Type *IntPtrTy = TLI->getTargetData()->getIntPtrType(); + DEBUG(errs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " + << *MemoryInst); + const Type *IntPtrTy = + TLI->getTargetData()->getIntPtrType(AccessTy->getContext()); Value *Result = 0; // Start with the scale value. @@ -616,7 +630,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt); } if (AddrMode.Scale != 1) - V = BinaryOperator::CreateMul(V, Context->getConstantInt(IntPtrTy, + V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), "sunkaddr", InsertPt); Result = V; @@ -648,7 +662,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // Add in the Base Offset if present. if (AddrMode.BaseOffs) { - Value *V = Context->getConstantInt(IntPtrTy, AddrMode.BaseOffs); + Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); if (Result) Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); else @@ -656,7 +670,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, } if (Result == 0) - SunkAddr = Context->getNullValue(Addr->getType()); + SunkAddr = Constant::getNullValue(Addr->getType()); else SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt); } @@ -858,18 +872,16 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { } else if (CallInst *CI = dyn_cast<CallInst>(I)) { // If we found an inline asm expession, and if the target knows how to // lower it to normal LLVM code, do so now. - if (TLI && isa<InlineAsm>(CI->getCalledValue())) - if (const TargetAsmInfo *TAI = - TLI->getTargetMachine().getTargetAsmInfo()) { - if (TAI->ExpandInlineAsm(CI)) { - BBI = BB.begin(); - // Avoid processing instructions out of order, which could cause - // reuse before a value is defined. - SunkAddrs.clear(); - } else - // Sink address computing for memory operands into the block. - MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs); - } + if (TLI && isa<InlineAsm>(CI->getCalledValue())) { + if (TLI->ExpandInlineAsm(CI)) { + BBI = BB.begin(); + // Avoid processing instructions out of order, which could cause + // reuse before a value is defined. + SunkAddrs.clear(); + } else + // Sink address computing for memory operands into the block. + MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs); + } } } diff --git a/lib/Transforms/Scalar/CondPropagate.cpp b/lib/Transforms/Scalar/CondPropagate.cpp index c85d031..5b573f4 100644 --- a/lib/Transforms/Scalar/CondPropagate.cpp +++ b/lib/Transforms/Scalar/CondPropagate.cpp @@ -14,26 +14,21 @@ #define DEBUG_TYPE "condprop" #include "llvm/Transforms/Scalar.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Pass.h" #include "llvm/Type.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Streams.h" using namespace llvm; STATISTIC(NumBrThread, "Number of CFG edges threaded through branches"); STATISTIC(NumSwThread, "Number of CFG edges threaded through switches"); namespace { - struct VISIBILITY_HIDDEN CondProp : public FunctionPass { + struct CondProp : public FunctionPass { static char ID; // Pass identification, replacement for typeid CondProp() : FunctionPass(&ID) {} @@ -124,7 +119,7 @@ void CondProp::SimplifyBlock(BasicBlock *BB) { // Succ is now dead, but we cannot delete it without potentially // invalidating iterators elsewhere. Just insert an unreachable // instruction in it and delete this block later on. - new UnreachableInst(Succ); + new UnreachableInst(BB->getContext(), Succ); DeadBlocks.push_back(Succ); MadeChange = true; } @@ -196,8 +191,6 @@ void CondProp::SimplifyPredecessors(SwitchInst *SI) { if (&*BBI != SI) return; - bool RemovedPreds = false; - // Ok, we have this really simple case, walk the PHI operands, looking for // constants. Walk from the end to remove operands from the end when // possible, and to avoid invalidating "i". @@ -209,7 +202,6 @@ void CondProp::SimplifyPredecessors(SwitchInst *SI) { RevectorBlockTo(PN->getIncomingBlock(i-1), SI->getSuccessor(DestCase)); ++NumSwThread; - RemovedPreds = true; // If there were two predecessors before this simplification, or if the // PHI node contained all the same value except for the one we just diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp index b933488..4fee327 100644 --- a/lib/Transforms/Scalar/ConstantProp.cpp +++ b/lib/Transforms/Scalar/ConstantProp.cpp @@ -24,7 +24,6 @@ #include "llvm/Constant.h" #include "llvm/Instruction.h" #include "llvm/Pass.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/InstIterator.h" #include "llvm/ADT/Statistic.h" #include <set> @@ -33,7 +32,7 @@ using namespace llvm; STATISTIC(NumInstKilled, "Number of instructions killed"); namespace { - struct VISIBILITY_HIDDEN ConstantPropagation : public FunctionPass { + struct ConstantPropagation : public FunctionPass { static char ID; // Pass identification, replacement for typeid ConstantPropagation() : FunctionPass(&ID) {} @@ -67,7 +66,7 @@ bool ConstantPropagation::runOnFunction(Function &F) { WorkList.erase(WorkList.begin()); // Get an element from the worklist... if (!I->use_empty()) // Don't muck with dead instructions... - if (Constant *C = ConstantFoldInstruction(I)) { + if (Constant *C = ConstantFoldInstruction(I, F.getContext())) { // Add all of the users of this instruction to the worklist, they might // be constant propagatable now... for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp index 8bb504c..39940c3 100644 --- a/lib/Transforms/Scalar/DCE.cpp +++ b/lib/Transforms/Scalar/DCE.cpp @@ -21,7 +21,6 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Instruction.h" #include "llvm/Pass.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/InstIterator.h" #include "llvm/ADT/Statistic.h" #include <set> @@ -34,7 +33,7 @@ namespace { //===--------------------------------------------------------------------===// // DeadInstElimination pass implementation // - struct VISIBILITY_HIDDEN DeadInstElimination : public BasicBlockPass { + struct DeadInstElimination : public BasicBlockPass { static char ID; // Pass identification, replacement for typeid DeadInstElimination() : BasicBlockPass(&ID) {} virtual bool runOnBasicBlock(BasicBlock &BB) { diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index b923c92..a7b3e75 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -29,14 +29,15 @@ #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Support/Compiler.h" using namespace llvm; STATISTIC(NumFastStores, "Number of stores deleted"); STATISTIC(NumFastOther , "Number of other instrs removed"); namespace { - struct VISIBILITY_HIDDEN DSE : public FunctionPass { + struct DSE : public FunctionPass { + TargetData *TD; + static char ID; // Pass identification, replacement for typeid DSE() : FunctionPass(&ID) {} @@ -62,7 +63,6 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<DominatorTree>(); - AU.addRequired<TargetData>(); AU.addRequired<AliasAnalysis>(); AU.addRequired<MemoryDependenceAnalysis>(); AU.addPreserved<DominatorTree>(); @@ -79,15 +79,15 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); } bool DSE::runOnBasicBlock(BasicBlock &BB) { MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>(); - TargetData &TD = getAnalysis<TargetData>(); + TD = getAnalysisIfAvailable<TargetData>(); bool MadeChange = false; - // Do a top-down walk on the BB + // Do a top-down walk on the BB. for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) { Instruction *Inst = BBI++; - // If we find a store or a free, get it's memory dependence. + // If we find a store or a free, get its memory dependence. if (!isa<StoreInst>(Inst) && !isa<FreeInst>(Inst)) continue; @@ -117,13 +117,17 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { // If this is a store-store dependence, then the previous store is dead so // long as this store is at least as big as it. if (StoreInst *DepStore = dyn_cast<StoreInst>(InstDep.getInst())) - if (TD.getTypeStoreSize(DepStore->getOperand(0)->getType()) <= - TD.getTypeStoreSize(SI->getOperand(0)->getType())) { + if (TD && + TD->getTypeStoreSize(DepStore->getOperand(0)->getType()) <= + TD->getTypeStoreSize(SI->getOperand(0)->getType())) { // Delete the store and now-dead instructions that feed it. DeleteDeadInstruction(DepStore); NumFastStores++; MadeChange = true; - + + // DeleteDeadInstruction can delete the current instruction in loop + // cases, reset BBI. + BBI = Inst; if (BBI != BB.begin()) --BBI; continue; @@ -134,8 +138,15 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) { if (SI->getPointerOperand() == DepLoad->getPointerOperand() && SI->getOperand(0) == DepLoad) { + // DeleteDeadInstruction can delete the current instruction. Save BBI + // in case we need it. + WeakVH NextInst(BBI); + DeleteDeadInstruction(SI); - if (BBI != BB.begin()) + + if (NextInst == 0) // Next instruction deleted. + BBI = BB.begin(); + else if (BBI != BB.begin()) // Revisit this instruction if possible. --BBI; NumFastStores++; MadeChange = true; @@ -181,7 +192,6 @@ bool DSE::handleFreeWithNonTrivialDependency(FreeInst *F, MemDepResult Dep) { /// store i32 1, i32* %A /// ret void bool DSE::handleEndBlock(BasicBlock &BB) { - TargetData &TD = getAnalysis<TargetData>(); AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); bool MadeChange = false; @@ -302,14 +312,16 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // Get size information for the alloca unsigned pointerSize = ~0U; - if (AllocaInst* A = dyn_cast<AllocaInst>(*I)) { - if (ConstantInt* C = dyn_cast<ConstantInt>(A->getArraySize())) - pointerSize = C->getZExtValue() * - TD.getTypeAllocSize(A->getAllocatedType()); - } else { - const PointerType* PT = cast<PointerType>( - cast<Argument>(*I)->getType()); - pointerSize = TD.getTypeAllocSize(PT->getElementType()); + if (TD) { + if (AllocaInst* A = dyn_cast<AllocaInst>(*I)) { + if (ConstantInt* C = dyn_cast<ConstantInt>(A->getArraySize())) + pointerSize = C->getZExtValue() * + TD->getTypeAllocSize(A->getAllocatedType()); + } else { + const PointerType* PT = cast<PointerType>( + cast<Argument>(*I)->getType()); + pointerSize = TD->getTypeAllocSize(PT->getElementType()); + } } // See if the call site touches it @@ -357,7 +369,6 @@ bool DSE::handleEndBlock(BasicBlock &BB) { bool DSE::RemoveUndeadPointers(Value* killPointer, uint64_t killPointerSize, BasicBlock::iterator &BBI, SmallPtrSet<Value*, 64>& deadPointers) { - TargetData &TD = getAnalysis<TargetData>(); AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); // If the kill pointer can be easily reduced to an alloca, @@ -379,13 +390,15 @@ bool DSE::RemoveUndeadPointers(Value* killPointer, uint64_t killPointerSize, E = deadPointers.end(); I != E; ++I) { // Get size information for the alloca. unsigned pointerSize = ~0U; - if (AllocaInst* A = dyn_cast<AllocaInst>(*I)) { - if (ConstantInt* C = dyn_cast<ConstantInt>(A->getArraySize())) - pointerSize = C->getZExtValue() * - TD.getTypeAllocSize(A->getAllocatedType()); - } else { - const PointerType* PT = cast<PointerType>(cast<Argument>(*I)->getType()); - pointerSize = TD.getTypeAllocSize(PT->getElementType()); + if (TD) { + if (AllocaInst* A = dyn_cast<AllocaInst>(*I)) { + if (ConstantInt* C = dyn_cast<ConstantInt>(A->getArraySize())) + pointerSize = C->getZExtValue() * + TD->getTypeAllocSize(A->getAllocatedType()); + } else { + const PointerType* PT = cast<PointerType>(cast<Argument>(*I)->getType()); + pointerSize = TD->getTypeAllocSize(PT->getElementType()); + } } // See if this pointer could alias it diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index f4fe15e..2ed4a63 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -23,6 +23,7 @@ #include "llvm/Function.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" +#include "llvm/Operator.h" #include "llvm/Value.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" @@ -32,13 +33,18 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MallocHelper.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" #include <cstdio> using namespace llvm; @@ -60,17 +66,17 @@ static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); /// as an efficient mechanism to determine the expression-wise equivalence of /// two values. namespace { - struct VISIBILITY_HIDDEN Expression { + struct Expression { enum ExpressionOpcode { ADD, FADD, SUB, FSUB, MUL, FMUL, UDIV, SDIV, FDIV, UREM, SREM, - FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ, - ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, - ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, - FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE, - FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE, + FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ, + ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, + ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, + FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE, + FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE, FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT, SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI, - FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT, + FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT, PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, CONSTANT, EMPTY, TOMBSTONE }; @@ -80,11 +86,11 @@ namespace { uint32_t secondVN; uint32_t thirdVN; SmallVector<uint32_t, 4> varargs; - Value* function; - + Value *function; + Expression() { } Expression(ExpressionOpcode o) : opcode(o) { } - + bool operator==(const Expression &other) const { if (opcode != other.opcode) return false; @@ -103,30 +109,30 @@ namespace { else { if (varargs.size() != other.varargs.size()) return false; - + for (size_t i = 0; i < varargs.size(); ++i) if (varargs[i] != other.varargs[i]) return false; - + return true; } } - + bool operator!=(const Expression &other) const { return !(*this == other); } }; - - class VISIBILITY_HIDDEN ValueTable { + + class ValueTable { private: DenseMap<Value*, uint32_t> valueNumbering; DenseMap<Expression, uint32_t> expressionNumbering; AliasAnalysis* AA; MemoryDependenceAnalysis* MD; DominatorTree* DT; - + uint32_t nextValueNumber; - + Expression::ExpressionOpcode getOpcode(BinaryOperator* BO); Expression::ExpressionOpcode getOpcode(CmpInst* C); Expression::ExpressionOpcode getOpcode(CastInst* C); @@ -142,11 +148,11 @@ namespace { Expression create_expression(Constant* C); public: ValueTable() : nextValueNumber(1) { } - uint32_t lookup_or_add(Value* V); - uint32_t lookup(Value* V) const; - void add(Value* V, uint32_t num); + uint32_t lookup_or_add(Value *V); + uint32_t lookup(Value *V) const; + void add(Value *V, uint32_t num); void clear(); - void erase(Value* v); + void erase(Value *v); unsigned size(); void setAliasAnalysis(AliasAnalysis* A) { AA = A; } AliasAnalysis *getAliasAnalysis() const { return AA; } @@ -162,30 +168,30 @@ template <> struct DenseMapInfo<Expression> { static inline Expression getEmptyKey() { return Expression(Expression::EMPTY); } - + static inline Expression getTombstoneKey() { return Expression(Expression::TOMBSTONE); } - + static unsigned getHashValue(const Expression e) { unsigned hash = e.opcode; - + hash = e.firstVN + hash * 37; hash = e.secondVN + hash * 37; hash = e.thirdVN + hash * 37; - + hash = ((unsigned)((uintptr_t)e.type >> 4) ^ (unsigned)((uintptr_t)e.type >> 9)) + hash * 37; - + for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(), E = e.varargs.end(); I != E; ++I) hash = *I + hash * 37; - + hash = ((unsigned)((uintptr_t)e.function >> 4) ^ (unsigned)((uintptr_t)e.function >> 9)) + hash * 37; - + return hash; } static bool isEqual(const Expression &LHS, const Expression &RHS) { @@ -201,7 +207,7 @@ template <> struct DenseMapInfo<Expression> { Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) { switch(BO->getOpcode()) { default: // THIS SHOULD NEVER HAPPEN - assert(0 && "Binary operator with unknown opcode?"); + llvm_unreachable("Binary operator with unknown opcode?"); case Instruction::Add: return Expression::ADD; case Instruction::FAdd: return Expression::FADD; case Instruction::Sub: return Expression::SUB; @@ -224,10 +230,10 @@ Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) { } Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) { - if (isa<ICmpInst>(C) || isa<VICmpInst>(C)) { + if (isa<ICmpInst>(C)) { switch (C->getPredicate()) { default: // THIS SHOULD NEVER HAPPEN - assert(0 && "Comparison with unknown predicate?"); + llvm_unreachable("Comparison with unknown predicate?"); case ICmpInst::ICMP_EQ: return Expression::ICMPEQ; case ICmpInst::ICMP_NE: return Expression::ICMPNE; case ICmpInst::ICMP_UGT: return Expression::ICMPUGT; @@ -239,32 +245,32 @@ Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) { case ICmpInst::ICMP_SLT: return Expression::ICMPSLT; case ICmpInst::ICMP_SLE: return Expression::ICMPSLE; } - } - assert((isa<FCmpInst>(C) || isa<VFCmpInst>(C)) && "Unknown compare"); - switch (C->getPredicate()) { - default: // THIS SHOULD NEVER HAPPEN - assert(0 && "Comparison with unknown predicate?"); - case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ; - case FCmpInst::FCMP_OGT: return Expression::FCMPOGT; - case FCmpInst::FCMP_OGE: return Expression::FCMPOGE; - case FCmpInst::FCMP_OLT: return Expression::FCMPOLT; - case FCmpInst::FCMP_OLE: return Expression::FCMPOLE; - case FCmpInst::FCMP_ONE: return Expression::FCMPONE; - case FCmpInst::FCMP_ORD: return Expression::FCMPORD; - case FCmpInst::FCMP_UNO: return Expression::FCMPUNO; - case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ; - case FCmpInst::FCMP_UGT: return Expression::FCMPUGT; - case FCmpInst::FCMP_UGE: return Expression::FCMPUGE; - case FCmpInst::FCMP_ULT: return Expression::FCMPULT; - case FCmpInst::FCMP_ULE: return Expression::FCMPULE; - case FCmpInst::FCMP_UNE: return Expression::FCMPUNE; + } else { + switch (C->getPredicate()) { + default: // THIS SHOULD NEVER HAPPEN + llvm_unreachable("Comparison with unknown predicate?"); + case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ; + case FCmpInst::FCMP_OGT: return Expression::FCMPOGT; + case FCmpInst::FCMP_OGE: return Expression::FCMPOGE; + case FCmpInst::FCMP_OLT: return Expression::FCMPOLT; + case FCmpInst::FCMP_OLE: return Expression::FCMPOLE; + case FCmpInst::FCMP_ONE: return Expression::FCMPONE; + case FCmpInst::FCMP_ORD: return Expression::FCMPORD; + case FCmpInst::FCMP_UNO: return Expression::FCMPUNO; + case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ; + case FCmpInst::FCMP_UGT: return Expression::FCMPUGT; + case FCmpInst::FCMP_UGE: return Expression::FCMPUGE; + case FCmpInst::FCMP_ULT: return Expression::FCMPULT; + case FCmpInst::FCMP_ULE: return Expression::FCMPULE; + case FCmpInst::FCMP_UNE: return Expression::FCMPUNE; + } } } Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) { switch(C->getOpcode()) { default: // THIS SHOULD NEVER HAPPEN - assert(0 && "Cast operator with unknown opcode?"); + llvm_unreachable("Cast operator with unknown opcode?"); case Instruction::Trunc: return Expression::TRUNC; case Instruction::ZExt: return Expression::ZEXT; case Instruction::SExt: return Expression::SEXT; @@ -282,126 +288,126 @@ Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) { Expression ValueTable::create_expression(CallInst* C) { Expression e; - + e.type = C->getType(); e.firstVN = 0; e.secondVN = 0; e.thirdVN = 0; e.function = C->getCalledFunction(); e.opcode = Expression::CALL; - + for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end(); I != E; ++I) e.varargs.push_back(lookup_or_add(*I)); - + return e; } Expression ValueTable::create_expression(BinaryOperator* BO) { Expression e; - + e.firstVN = lookup_or_add(BO->getOperand(0)); e.secondVN = lookup_or_add(BO->getOperand(1)); e.thirdVN = 0; e.function = 0; e.type = BO->getType(); e.opcode = getOpcode(BO); - + return e; } Expression ValueTable::create_expression(CmpInst* C) { Expression e; - + e.firstVN = lookup_or_add(C->getOperand(0)); e.secondVN = lookup_or_add(C->getOperand(1)); e.thirdVN = 0; e.function = 0; e.type = C->getType(); e.opcode = getOpcode(C); - + return e; } Expression ValueTable::create_expression(CastInst* C) { Expression e; - + e.firstVN = lookup_or_add(C->getOperand(0)); e.secondVN = 0; e.thirdVN = 0; e.function = 0; e.type = C->getType(); e.opcode = getOpcode(C); - + return e; } Expression ValueTable::create_expression(ShuffleVectorInst* S) { Expression e; - + e.firstVN = lookup_or_add(S->getOperand(0)); e.secondVN = lookup_or_add(S->getOperand(1)); e.thirdVN = lookup_or_add(S->getOperand(2)); e.function = 0; e.type = S->getType(); e.opcode = Expression::SHUFFLE; - + return e; } Expression ValueTable::create_expression(ExtractElementInst* E) { Expression e; - + e.firstVN = lookup_or_add(E->getOperand(0)); e.secondVN = lookup_or_add(E->getOperand(1)); e.thirdVN = 0; e.function = 0; e.type = E->getType(); e.opcode = Expression::EXTRACT; - + return e; } Expression ValueTable::create_expression(InsertElementInst* I) { Expression e; - + e.firstVN = lookup_or_add(I->getOperand(0)); e.secondVN = lookup_or_add(I->getOperand(1)); e.thirdVN = lookup_or_add(I->getOperand(2)); e.function = 0; e.type = I->getType(); e.opcode = Expression::INSERT; - + return e; } Expression ValueTable::create_expression(SelectInst* I) { Expression e; - + e.firstVN = lookup_or_add(I->getCondition()); e.secondVN = lookup_or_add(I->getTrueValue()); e.thirdVN = lookup_or_add(I->getFalseValue()); e.function = 0; e.type = I->getType(); e.opcode = Expression::SELECT; - + return e; } Expression ValueTable::create_expression(GetElementPtrInst* G) { Expression e; - + e.firstVN = lookup_or_add(G->getPointerOperand()); e.secondVN = 0; e.thirdVN = 0; e.function = 0; e.type = G->getType(); e.opcode = Expression::GEP; - + for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end(); I != E; ++I) e.varargs.push_back(lookup_or_add(*I)); - + return e; } @@ -410,21 +416,21 @@ Expression ValueTable::create_expression(GetElementPtrInst* G) { //===----------------------------------------------------------------------===// /// add - Insert a value into the table with a specified value number. -void ValueTable::add(Value* V, uint32_t num) { +void ValueTable::add(Value *V, uint32_t num) { valueNumbering.insert(std::make_pair(V, num)); } /// lookup_or_add - Returns the value number for the specified value, assigning /// it a new number if it did not have one before. -uint32_t ValueTable::lookup_or_add(Value* V) { +uint32_t ValueTable::lookup_or_add(Value *V) { DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); if (VI != valueNumbering.end()) return VI->second; - + if (CallInst* C = dyn_cast<CallInst>(V)) { if (AA->doesNotAccessMemory(C)) { Expression e = create_expression(C); - + DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); if (EI != expressionNumbering.end()) { valueNumbering.insert(std::make_pair(V, EI->second)); @@ -432,20 +438,20 @@ uint32_t ValueTable::lookup_or_add(Value* V) { } else { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); - + return nextValueNumber++; } } else if (AA->onlyReadsMemory(C)) { Expression e = create_expression(C); - + if (expressionNumbering.find(e) == expressionNumbering.end()) { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); return nextValueNumber++; } - + MemDepResult local_dep = MD->getDependency(C); - + if (!local_dep.isDef() && !local_dep.isNonLocal()) { valueNumbering.insert(std::make_pair(V, nextValueNumber)); return nextValueNumber++; @@ -453,12 +459,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) { if (local_dep.isDef()) { CallInst* local_cdep = cast<CallInst>(local_dep.getInst()); - + if (local_cdep->getNumOperands() != C->getNumOperands()) { valueNumbering.insert(std::make_pair(V, nextValueNumber)); return nextValueNumber++; } - + for (unsigned i = 1; i < C->getNumOperands(); ++i) { uint32_t c_vn = lookup_or_add(C->getOperand(i)); uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i)); @@ -467,19 +473,19 @@ uint32_t ValueTable::lookup_or_add(Value* V) { return nextValueNumber++; } } - + uint32_t v = lookup_or_add(local_cdep); valueNumbering.insert(std::make_pair(V, v)); return v; } // Non-local case. - const MemoryDependenceAnalysis::NonLocalDepInfo &deps = + const MemoryDependenceAnalysis::NonLocalDepInfo &deps = MD->getNonLocalCallDependency(CallSite(C)); // FIXME: call/call dependencies for readonly calls should return def, not // clobber! Move the checking logic to MemDep! CallInst* cdep = 0; - + // Check to see if we have a single dominating call instruction that is // identical to C. for (unsigned i = 0, e = deps.size(); i != e; ++i) { @@ -494,23 +500,23 @@ uint32_t ValueTable::lookup_or_add(Value* V) { cdep = 0; break; } - + CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->second.getInst()); // FIXME: All duplicated with non-local case. if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){ cdep = NonLocalDepCall; continue; } - + cdep = 0; break; } - + if (!cdep) { valueNumbering.insert(std::make_pair(V, nextValueNumber)); return nextValueNumber++; } - + if (cdep->getNumOperands() != C->getNumOperands()) { valueNumbering.insert(std::make_pair(V, nextValueNumber)); return nextValueNumber++; @@ -523,18 +529,18 @@ uint32_t ValueTable::lookup_or_add(Value* V) { return nextValueNumber++; } } - + uint32_t v = lookup_or_add(cdep); valueNumbering.insert(std::make_pair(V, v)); return v; - + } else { valueNumbering.insert(std::make_pair(V, nextValueNumber)); return nextValueNumber++; } } else if (BinaryOperator* BO = dyn_cast<BinaryOperator>(V)) { Expression e = create_expression(BO); - + DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); if (EI != expressionNumbering.end()) { valueNumbering.insert(std::make_pair(V, EI->second)); @@ -542,12 +548,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) { } else { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); - + return nextValueNumber++; } } else if (CmpInst* C = dyn_cast<CmpInst>(V)) { Expression e = create_expression(C); - + DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); if (EI != expressionNumbering.end()) { valueNumbering.insert(std::make_pair(V, EI->second)); @@ -555,12 +561,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) { } else { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); - + return nextValueNumber++; } } else if (ShuffleVectorInst* U = dyn_cast<ShuffleVectorInst>(V)) { Expression e = create_expression(U); - + DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); if (EI != expressionNumbering.end()) { valueNumbering.insert(std::make_pair(V, EI->second)); @@ -568,12 +574,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) { } else { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); - + return nextValueNumber++; } } else if (ExtractElementInst* U = dyn_cast<ExtractElementInst>(V)) { Expression e = create_expression(U); - + DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); if (EI != expressionNumbering.end()) { valueNumbering.insert(std::make_pair(V, EI->second)); @@ -581,12 +587,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) { } else { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); - + return nextValueNumber++; } } else if (InsertElementInst* U = dyn_cast<InsertElementInst>(V)) { Expression e = create_expression(U); - + DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); if (EI != expressionNumbering.end()) { valueNumbering.insert(std::make_pair(V, EI->second)); @@ -594,12 +600,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) { } else { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); - + return nextValueNumber++; } } else if (SelectInst* U = dyn_cast<SelectInst>(V)) { Expression e = create_expression(U); - + DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); if (EI != expressionNumbering.end()) { valueNumbering.insert(std::make_pair(V, EI->second)); @@ -607,12 +613,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) { } else { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); - + return nextValueNumber++; } } else if (CastInst* U = dyn_cast<CastInst>(V)) { Expression e = create_expression(U); - + DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); if (EI != expressionNumbering.end()) { valueNumbering.insert(std::make_pair(V, EI->second)); @@ -620,12 +626,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) { } else { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); - + return nextValueNumber++; } } else if (GetElementPtrInst* U = dyn_cast<GetElementPtrInst>(V)) { Expression e = create_expression(U); - + DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); if (EI != expressionNumbering.end()) { valueNumbering.insert(std::make_pair(V, EI->second)); @@ -633,7 +639,7 @@ uint32_t ValueTable::lookup_or_add(Value* V) { } else { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); - + return nextValueNumber++; } } else { @@ -644,7 +650,7 @@ uint32_t ValueTable::lookup_or_add(Value* V) { /// lookup - Returns the value number of the specified value. Fails if /// the value has not yet been numbered. -uint32_t ValueTable::lookup(Value* V) const { +uint32_t ValueTable::lookup(Value *V) const { DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); assert(VI != valueNumbering.end() && "Value not numbered?"); return VI->second; @@ -658,7 +664,7 @@ void ValueTable::clear() { } /// erase - Remove a value from the value numbering -void ValueTable::erase(Value* V) { +void ValueTable::erase(Value *V) { valueNumbering.erase(V); } @@ -676,17 +682,17 @@ void ValueTable::verifyRemoved(const Value *V) const { //===----------------------------------------------------------------------===// namespace { - struct VISIBILITY_HIDDEN ValueNumberScope { + struct ValueNumberScope { ValueNumberScope* parent; DenseMap<uint32_t, Value*> table; - + ValueNumberScope(ValueNumberScope* p) : parent(p) { } }; } namespace { - class VISIBILITY_HIDDEN GVN : public FunctionPass { + class GVN : public FunctionPass { bool runOnFunction(Function &F); public: static char ID; // Pass identification, replacement for typeid @@ -698,45 +704,35 @@ namespace { ValueTable VN; DenseMap<BasicBlock*, ValueNumberScope*> localAvail; - - typedef DenseMap<Value*, SmallPtrSet<Instruction*, 4> > PhiMapType; - PhiMapType phiMap; - - + // This transformation requires dominator postdominator info virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<DominatorTree>(); AU.addRequired<MemoryDependenceAnalysis>(); AU.addRequired<AliasAnalysis>(); - + AU.addPreserved<DominatorTree>(); AU.addPreserved<AliasAnalysis>(); } - + // Helper fuctions // FIXME: eliminate or document these better bool processLoad(LoadInst* L, SmallVectorImpl<Instruction*> &toErase); - bool processInstruction(Instruction* I, + bool processInstruction(Instruction *I, SmallVectorImpl<Instruction*> &toErase); bool processNonLocalLoad(LoadInst* L, SmallVectorImpl<Instruction*> &toErase); - bool processBlock(BasicBlock* BB); - Value *GetValueForBlock(BasicBlock *BB, Instruction* orig, - DenseMap<BasicBlock*, Value*> &Phis, - bool top_level = false); + bool processBlock(BasicBlock *BB); void dump(DenseMap<uint32_t, Value*>& d); bool iterateOnFunction(Function &F); - Value* CollapsePhi(PHINode* p); - bool isSafeReplacement(PHINode* p, Instruction* inst); + Value *CollapsePhi(PHINode* p); bool performPRE(Function& F); - Value* lookupNumber(BasicBlock* BB, uint32_t num); - bool mergeBlockIntoPredecessor(BasicBlock* BB); - Value* AttemptRedundancyElimination(Instruction* orig, unsigned valno); + Value *lookupNumber(BasicBlock *BB, uint32_t num); void cleanupGlobalSets(); void verifyRemoved(const Instruction *I) const; }; - + char GVN::ID = 0; } @@ -756,107 +752,31 @@ void GVN::dump(DenseMap<uint32_t, Value*>& d) { printf("}\n"); } -Value* GVN::CollapsePhi(PHINode* p) { - Value* constVal = p->hasConstantValue(); - if (!constVal) return 0; - - Instruction* inst = dyn_cast<Instruction>(constVal); - if (!inst) - return constVal; - - if (DT->dominates(inst, p)) - if (isSafeReplacement(p, inst)) - return inst; - return 0; -} - -bool GVN::isSafeReplacement(PHINode* p, Instruction* inst) { +static bool isSafeReplacement(PHINode* p, Instruction *inst) { if (!isa<PHINode>(inst)) return true; - + for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end(); UI != E; ++UI) if (PHINode* use_phi = dyn_cast<PHINode>(UI)) if (use_phi->getParent() == inst->getParent()) return false; - + return true; } -/// GetValueForBlock - Get the value to use within the specified basic block. -/// available values are in Phis. -Value *GVN::GetValueForBlock(BasicBlock *BB, Instruction* orig, - DenseMap<BasicBlock*, Value*> &Phis, - bool top_level) { - - // If we have already computed this value, return the previously computed val. - DenseMap<BasicBlock*, Value*>::iterator V = Phis.find(BB); - if (V != Phis.end() && !top_level) return V->second; - - // If the block is unreachable, just return undef, since this path - // can't actually occur at runtime. - if (!DT->isReachableFromEntry(BB)) - return Phis[BB] = Context->getUndef(orig->getType()); - - if (BasicBlock *Pred = BB->getSinglePredecessor()) { - Value *ret = GetValueForBlock(Pred, orig, Phis); - Phis[BB] = ret; - return ret; - } +Value *GVN::CollapsePhi(PHINode *PN) { + Value *ConstVal = PN->hasConstantValue(DT); + if (!ConstVal) return 0; - // Get the number of predecessors of this block so we can reserve space later. - // If there is already a PHI in it, use the #preds from it, otherwise count. - // Getting it from the PHI is constant time. - unsigned NumPreds; - if (PHINode *ExistingPN = dyn_cast<PHINode>(BB->begin())) - NumPreds = ExistingPN->getNumIncomingValues(); - else - NumPreds = std::distance(pred_begin(BB), pred_end(BB)); - - // Otherwise, the idom is the loop, so we need to insert a PHI node. Do so - // now, then get values to fill in the incoming values for the PHI. - PHINode *PN = PHINode::Create(orig->getType(), orig->getName()+".rle", - BB->begin()); - PN->reserveOperandSpace(NumPreds); - - Phis.insert(std::make_pair(BB, PN)); - - // Fill in the incoming values for the block. - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - Value* val = GetValueForBlock(*PI, orig, Phis); - PN->addIncoming(val, *PI); - } - - VN.getAliasAnalysis()->copyValue(orig, PN); - - // Attempt to collapse PHI nodes that are trivially redundant - Value* v = CollapsePhi(PN); - if (!v) { - // Cache our phi construction results - if (LoadInst* L = dyn_cast<LoadInst>(orig)) - phiMap[L->getPointerOperand()].insert(PN); - else - phiMap[orig].insert(PN); - - return PN; - } - - PN->replaceAllUsesWith(v); - if (isa<PointerType>(v->getType())) - MD->invalidateCachedPointerInfo(v); - - for (DenseMap<BasicBlock*, Value*>::iterator I = Phis.begin(), - E = Phis.end(); I != E; ++I) - if (I->second == PN) - I->second = v; - - DEBUG(cerr << "GVN removed: " << *PN); - MD->removeInstruction(PN); - PN->eraseFromParent(); - DEBUG(verifyRemoved(PN)); - - Phis[BB] = v; - return v; + Instruction *Inst = dyn_cast<Instruction>(ConstVal); + if (!Inst) + return ConstVal; + + if (DT->dominates(Inst, PN)) + if (isSafeReplacement(PN, Inst)) + return Inst; + return 0; } /// IsValueFullyAvailableInBlock - Return true if we can prove that the value @@ -869,11 +789,11 @@ Value *GVN::GetValueForBlock(BasicBlock *BB, Instruction* orig, /// currently speculating that it will be. /// 3) we are speculating for this block and have used that to speculate for /// other blocks. -static bool IsValueFullyAvailableInBlock(BasicBlock *BB, +static bool IsValueFullyAvailableInBlock(BasicBlock *BB, DenseMap<BasicBlock*, char> &FullyAvailableBlocks) { // Optimistically assume that the block is fully available and check to see // if we already know about this block in one lookup. - std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV = + std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV = FullyAvailableBlocks.insert(std::make_pair(BB, 2)); // If the entry already existed for this block, return the precomputed value. @@ -884,29 +804,29 @@ static bool IsValueFullyAvailableInBlock(BasicBlock *BB, IV.first->second = 3; return IV.first->second != 0; } - + // Otherwise, see if it is fully available in all predecessors. pred_iterator PI = pred_begin(BB), PE = pred_end(BB); - + // If this block has no predecessors, it isn't live-in here. if (PI == PE) goto SpeculationFailure; - + for (; PI != PE; ++PI) // If the value isn't fully available in one of our predecessors, then it // isn't fully available in this block either. Undo our previous // optimistic assumption and bail out. if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) goto SpeculationFailure; - + return true; - + // SpeculationFailure - If we get here, we found out that this is not, after // all, a fully-available block. We have a problem if we speculated on this and // used the speculation to mark other blocks as available. SpeculationFailure: char &BBVal = FullyAvailableBlocks[BB]; - + // If we didn't speculate on this, just return with it set to false. if (BBVal == 2) { BBVal = 0; @@ -918,7 +838,7 @@ SpeculationFailure: // 0 if set to one. SmallVector<BasicBlock*, 32> BBWorklist; BBWorklist.push_back(BB); - + while (!BBWorklist.empty()) { BasicBlock *Entry = BBWorklist.pop_back_val(); // Note that this sets blocks to 0 (unavailable) if they happen to not @@ -928,24 +848,372 @@ SpeculationFailure: // Mark as unavailable. EntryVal = 0; - + for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I) BBWorklist.push_back(*I); } - + return false; } + +/// CanCoerceMustAliasedValueToLoad - Return true if +/// CoerceAvailableValueToLoadType will succeed. +static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, + const Type *LoadTy, + const TargetData &TD) { + // If the loaded or stored value is an first class array or struct, don't try + // to transform them. We need to be able to bitcast to integer. + if (isa<StructType>(LoadTy) || isa<ArrayType>(LoadTy) || + isa<StructType>(StoredVal->getType()) || + isa<ArrayType>(StoredVal->getType())) + return false; + + // The store has to be at least as big as the load. + if (TD.getTypeSizeInBits(StoredVal->getType()) < + TD.getTypeSizeInBits(LoadTy)) + return false; + + return true; +} + + +/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and +/// then a load from a must-aliased pointer of a different type, try to coerce +/// the stored value. LoadedTy is the type of the load we want to replace and +/// InsertPt is the place to insert new instructions. +/// +/// If we can't do it, return null. +static Value *CoerceAvailableValueToLoadType(Value *StoredVal, + const Type *LoadedTy, + Instruction *InsertPt, + const TargetData &TD) { + if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD)) + return 0; + + const Type *StoredValTy = StoredVal->getType(); + + uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy); + uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy); + + // If the store and reload are the same size, we can always reuse it. + if (StoreSize == LoadSize) { + if (isa<PointerType>(StoredValTy) && isa<PointerType>(LoadedTy)) { + // Pointer to Pointer -> use bitcast. + return new BitCastInst(StoredVal, LoadedTy, "", InsertPt); + } + + // Convert source pointers to integers, which can be bitcast. + if (isa<PointerType>(StoredValTy)) { + StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); + StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); + } + + const Type *TypeToCastTo = LoadedTy; + if (isa<PointerType>(TypeToCastTo)) + TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext()); + + if (StoredValTy != TypeToCastTo) + StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt); + + // Cast to pointer if the load needs a pointer type. + if (isa<PointerType>(LoadedTy)) + StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt); + + return StoredVal; + } + + // If the loaded value is smaller than the available value, then we can + // extract out a piece from it. If the available value is too small, then we + // can't do anything. + assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail"); + + // Convert source pointers to integers, which can be manipulated. + if (isa<PointerType>(StoredValTy)) { + StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); + StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); + } + + // Convert vectors and fp to integer, which can be manipulated. + if (!isa<IntegerType>(StoredValTy)) { + StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize); + StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt); + } + + // If this is a big-endian system, we need to shift the value down to the low + // bits so that a truncate will work. + if (TD.isBigEndian()) { + Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize); + StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt); + } + + // Truncate the integer to the right size now. + const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize); + StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt); + + if (LoadedTy == NewIntTy) + return StoredVal; + + // If the result is a pointer, inttoptr. + if (isa<PointerType>(LoadedTy)) + return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt); + + // Otherwise, bitcast. + return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt); +} + +/// GetBaseWithConstantOffset - Analyze the specified pointer to see if it can +/// be expressed as a base pointer plus a constant offset. Return the base and +/// offset to the caller. +static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset, + const TargetData &TD) { + Operator *PtrOp = dyn_cast<Operator>(Ptr); + if (PtrOp == 0) return Ptr; + + // Just look through bitcasts. + if (PtrOp->getOpcode() == Instruction::BitCast) + return GetBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD); + + // If this is a GEP with constant indices, we can look through it. + GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp); + if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr; + + gep_type_iterator GTI = gep_type_begin(GEP); + for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E; + ++I, ++GTI) { + ConstantInt *OpC = cast<ConstantInt>(*I); + if (OpC->isZero()) continue; + + // Handle a struct and array indices which add their offset to the pointer. + if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); + } else { + uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); + Offset += OpC->getSExtValue()*Size; + } + } + + // Re-sign extend from the pointer size if needed to get overflow edge cases + // right. + unsigned PtrSize = TD.getPointerSizeInBits(); + if (PtrSize < 64) + Offset = (Offset << (64-PtrSize)) >> (64-PtrSize); + + return GetBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD); +} + + +/// AnalyzeLoadFromClobberingStore - This function is called when we have a +/// memdep query of a load that ends up being a clobbering store. This means +/// that the store *may* provide bits used by the load but we can't be sure +/// because the pointers don't mustalias. Check this case to see if there is +/// anything more we can do before we give up. This returns -1 if we have to +/// give up, or a byte number in the stored value of the piece that feeds the +/// load. +static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI, + const TargetData &TD) { + // If the loaded or stored value is an first class array or struct, don't try + // to transform them. We need to be able to bitcast to integer. + if (isa<StructType>(L->getType()) || isa<ArrayType>(L->getType()) || + isa<StructType>(DepSI->getOperand(0)->getType()) || + isa<ArrayType>(DepSI->getOperand(0)->getType())) + return -1; + + int64_t StoreOffset = 0, LoadOffset = 0; + Value *StoreBase = + GetBaseWithConstantOffset(DepSI->getPointerOperand(), StoreOffset, TD); + Value *LoadBase = + GetBaseWithConstantOffset(L->getPointerOperand(), LoadOffset, TD); + if (StoreBase != LoadBase) + return -1; + + // If the load and store are to the exact same address, they should have been + // a must alias. AA must have gotten confused. + // FIXME: Study to see if/when this happens. + if (LoadOffset == StoreOffset) { +#if 0 + errs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n" + << "Base = " << *StoreBase << "\n" + << "Store Ptr = " << *DepSI->getPointerOperand() << "\n" + << "Store Offs = " << StoreOffset << " - " << *DepSI << "\n" + << "Load Ptr = " << *L->getPointerOperand() << "\n" + << "Load Offs = " << LoadOffset << " - " << *L << "\n\n"; + errs() << "'" << L->getParent()->getParent()->getName() << "'" + << *L->getParent(); +#endif + return -1; + } + + // If the load and store don't overlap at all, the store doesn't provide + // anything to the load. In this case, they really don't alias at all, AA + // must have gotten confused. + // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then + // remove this check, as it is duplicated with what we have below. + uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType()); + uint64_t LoadSize = TD.getTypeSizeInBits(L->getType()); + + if ((StoreSize & 7) | (LoadSize & 7)) + return -1; + StoreSize >>= 3; // Convert to bytes. + LoadSize >>= 3; + + + bool isAAFailure = false; + if (StoreOffset < LoadOffset) { + isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset; + } else { + isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset; + } + if (isAAFailure) { +#if 0 + errs() << "STORE LOAD DEP WITH COMMON BASE:\n" + << "Base = " << *StoreBase << "\n" + << "Store Ptr = " << *DepSI->getPointerOperand() << "\n" + << "Store Offs = " << StoreOffset << " - " << *DepSI << "\n" + << "Load Ptr = " << *L->getPointerOperand() << "\n" + << "Load Offs = " << LoadOffset << " - " << *L << "\n\n"; + errs() << "'" << L->getParent()->getParent()->getName() << "'" + << *L->getParent(); +#endif + return -1; + } + + // If the Load isn't completely contained within the stored bits, we don't + // have all the bits to feed it. We could do something crazy in the future + // (issue a smaller load then merge the bits in) but this seems unlikely to be + // valuable. + if (StoreOffset > LoadOffset || + StoreOffset+StoreSize < LoadOffset+LoadSize) + return -1; + + // Okay, we can do this transformation. Return the number of bytes into the + // store that the load is. + return LoadOffset-StoreOffset; +} + + +/// GetStoreValueForLoad - This function is called when we have a +/// memdep query of a load that ends up being a clobbering store. This means +/// that the store *may* provide bits used by the load but we can't be sure +/// because the pointers don't mustalias. Check this case to see if there is +/// anything more we can do before we give up. +static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, + const Type *LoadTy, + Instruction *InsertPt, const TargetData &TD){ + LLVMContext &Ctx = SrcVal->getType()->getContext(); + + uint64_t StoreSize = TD.getTypeSizeInBits(SrcVal->getType())/8; + uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; + + + // Compute which bits of the stored value are being used by the load. Convert + // to an integer type to start with. + if (isa<PointerType>(SrcVal->getType())) + SrcVal = new PtrToIntInst(SrcVal, TD.getIntPtrType(Ctx), "tmp", InsertPt); + if (!isa<IntegerType>(SrcVal->getType())) + SrcVal = new BitCastInst(SrcVal, IntegerType::get(Ctx, StoreSize*8), + "tmp", InsertPt); + + // Shift the bits to the least significant depending on endianness. + unsigned ShiftAmt; + if (TD.isLittleEndian()) { + ShiftAmt = Offset*8; + } else { + ShiftAmt = (StoreSize-LoadSize-Offset)*8; + } + + if (ShiftAmt) + SrcVal = BinaryOperator::CreateLShr(SrcVal, + ConstantInt::get(SrcVal->getType(), ShiftAmt), "tmp", InsertPt); + + if (LoadSize != StoreSize) + SrcVal = new TruncInst(SrcVal, IntegerType::get(Ctx, LoadSize*8), + "tmp", InsertPt); + + return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD); +} + +struct AvailableValueInBlock { + /// BB - The basic block in question. + BasicBlock *BB; + /// V - The value that is live out of the block. + Value *V; + /// Offset - The byte offset in V that is interesting for the load query. + unsigned Offset; + + static AvailableValueInBlock get(BasicBlock *BB, Value *V, + unsigned Offset = 0) { + AvailableValueInBlock Res; + Res.BB = BB; + Res.V = V; + Res.Offset = Offset; + return Res; + } +}; + +/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, +/// construct SSA form, allowing us to eliminate LI. This returns the value +/// that should be used at LI's definition site. +static Value *ConstructSSAForLoadSet(LoadInst *LI, + SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, + const TargetData *TD, + AliasAnalysis *AA) { + SmallVector<PHINode*, 8> NewPHIs; + SSAUpdater SSAUpdate(&NewPHIs); + SSAUpdate.Initialize(LI); + + const Type *LoadTy = LI->getType(); + + for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { + BasicBlock *BB = ValuesPerBlock[i].BB; + Value *AvailableVal = ValuesPerBlock[i].V; + unsigned Offset = ValuesPerBlock[i].Offset; + + if (SSAUpdate.HasValueForBlock(BB)) + continue; + + if (AvailableVal->getType() != LoadTy) { + assert(TD && "Need target data to handle type mismatch case"); + AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy, + BB->getTerminator(), *TD); + + if (Offset) { + DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n" + << *ValuesPerBlock[i].V << '\n' + << *AvailableVal << '\n' << "\n\n\n"); + } + + + DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n" + << *ValuesPerBlock[i].V << '\n' + << *AvailableVal << '\n' << "\n\n\n"); + } + + SSAUpdate.AddAvailableValue(BB, AvailableVal); + } + + // Perform PHI construction. + Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent()); + + // If new PHI nodes were created, notify alias analysis. + if (isa<PointerType>(V->getType())) + for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) + AA->copyValue(LI, NewPHIs[i]); + + return V; +} + /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are /// non-local by performing PHI construction. bool GVN::processNonLocalLoad(LoadInst *LI, SmallVectorImpl<Instruction*> &toErase) { // Find the non-local dependencies of the load. - SmallVector<MemoryDependenceAnalysis::NonLocalDepEntry, 64> Deps; + SmallVector<MemoryDependenceAnalysis::NonLocalDepEntry, 64> Deps; MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(), Deps); - //DEBUG(cerr << "INVESTIGATING NONLOCAL LOAD: " << Deps.size() << *LI); - + //DEBUG(errs() << "INVESTIGATING NONLOCAL LOAD: " + // << Deps.size() << *LI << '\n'); + // If we had to process more than one hundred blocks to find the // dependencies, this load isn't worth worrying about. Optimizing // it will be too expensive. @@ -956,106 +1224,124 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // clobber in the current block. Reject this early. if (Deps.size() == 1 && Deps[0].second.isClobber()) { DEBUG( - DOUT << "GVN: non-local load "; - WriteAsOperand(*DOUT.stream(), LI); - DOUT << " is clobbered by " << *Deps[0].second.getInst(); + errs() << "GVN: non-local load "; + WriteAsOperand(errs(), LI); + errs() << " is clobbered by " << *Deps[0].second.getInst() << '\n'; ); return false; } - + // Filter out useless results (non-locals, etc). Keep track of the blocks // where we have a value available in repl, also keep track of whether we see // dependencies that produce an unknown value for the load (such as a call // that could potentially clobber the load). - SmallVector<std::pair<BasicBlock*, Value*>, 16> ValuesPerBlock; + SmallVector<AvailableValueInBlock, 16> ValuesPerBlock; SmallVector<BasicBlock*, 16> UnavailableBlocks; + + const TargetData *TD = 0; for (unsigned i = 0, e = Deps.size(); i != e; ++i) { BasicBlock *DepBB = Deps[i].first; MemDepResult DepInfo = Deps[i].second; - + if (DepInfo.isClobber()) { + // If the dependence is to a store that writes to a superset of the bits + // read by the load, we can extract the bits we need for the load from the + // stored value. + if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) { + if (TD == 0) + TD = getAnalysisIfAvailable<TargetData>(); + if (TD) { + int Offset = AnalyzeLoadFromClobberingStore(LI, DepSI, *TD); + if (Offset != -1) { + ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, + DepSI->getOperand(0), + Offset)); + continue; + } + } + } + + // FIXME: Handle memset/memcpy. UnavailableBlocks.push_back(DepBB); continue; } - + Instruction *DepInst = DepInfo.getInst(); - + // Loading the allocation -> undef. - if (isa<AllocationInst>(DepInst)) { - ValuesPerBlock.push_back(std::make_pair(DepBB, - Context->getUndef(LI->getType()))); + if (isa<AllocationInst>(DepInst) || isMalloc(DepInst)) { + ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, + UndefValue::get(LI->getType()))); continue; } - - if (StoreInst* S = dyn_cast<StoreInst>(DepInst)) { - // Reject loads and stores that are to the same address but are of - // different types. - // NOTE: 403.gcc does have this case (e.g. in readonly_fields_p) because - // of bitfield access, it would be interesting to optimize for it at some - // point. + + if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) { + // Reject loads and stores that are to the same address but are of + // different types if we have to. if (S->getOperand(0)->getType() != LI->getType()) { - UnavailableBlocks.push_back(DepBB); - continue; + if (TD == 0) + TD = getAnalysisIfAvailable<TargetData>(); + + // If the stored value is larger or equal to the loaded value, we can + // reuse it. + if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getOperand(0), + LI->getType(), *TD)) { + UnavailableBlocks.push_back(DepBB); + continue; + } } - - ValuesPerBlock.push_back(std::make_pair(DepBB, S->getOperand(0))); - - } else if (LoadInst* LD = dyn_cast<LoadInst>(DepInst)) { + + ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, + S->getOperand(0))); + continue; + } + + if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) { + // If the types mismatch and we can't handle it, reject reuse of the load. if (LD->getType() != LI->getType()) { - UnavailableBlocks.push_back(DepBB); - continue; + if (TD == 0) + TD = getAnalysisIfAvailable<TargetData>(); + + // If the stored value is larger or equal to the loaded value, we can + // reuse it. + if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){ + UnavailableBlocks.push_back(DepBB); + continue; + } } - ValuesPerBlock.push_back(std::make_pair(DepBB, LD)); - } else { - UnavailableBlocks.push_back(DepBB); + ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD)); continue; } + + UnavailableBlocks.push_back(DepBB); + continue; } - + // If we have no predecessors that produce a known value for this load, exit // early. if (ValuesPerBlock.empty()) return false; - + // If all of the instructions we depend on produce a known value for this // load, then it is fully redundant and we can use PHI insertion to compute // its value. Insert PHIs and remove the fully redundant value now. if (UnavailableBlocks.empty()) { - // Use cached PHI construction information from previous runs - SmallPtrSet<Instruction*, 4> &p = phiMap[LI->getPointerOperand()]; - // FIXME: What does phiMap do? Are we positive it isn't getting invalidated? - for (SmallPtrSet<Instruction*, 4>::iterator I = p.begin(), E = p.end(); - I != E; ++I) { - if ((*I)->getParent() == LI->getParent()) { - DEBUG(cerr << "GVN REMOVING NONLOCAL LOAD #1: " << *LI); - LI->replaceAllUsesWith(*I); - if (isa<PointerType>((*I)->getType())) - MD->invalidateCachedPointerInfo(*I); - toErase.push_back(LI); - NumGVNLoad++; - return true; - } - - ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I)); - } - - DEBUG(cerr << "GVN REMOVING NONLOCAL LOAD: " << *LI); + DEBUG(errs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); - DenseMap<BasicBlock*, Value*> BlockReplValues; - BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end()); // Perform PHI construction. - Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true); - LI->replaceAllUsesWith(v); - - if (isa<PHINode>(v)) - v->takeName(LI); - if (isa<PointerType>(v->getType())) - MD->invalidateCachedPointerInfo(v); + Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, + VN.getAliasAnalysis()); + LI->replaceAllUsesWith(V); + + if (isa<PHINode>(V)) + V->takeName(LI); + if (isa<PointerType>(V->getType())) + MD->invalidateCachedPointerInfo(V); toErase.push_back(LI); NumGVNLoad++; return true; } - + if (!EnablePRE || !EnableLoadPRE) return false; @@ -1066,7 +1352,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // prefer to not increase code size. As such, we only do this when we know // that we only have to insert *one* load (which means we're basically moving // the load, not inserting a new one). - + SmallPtrSet<BasicBlock *, 4> Blockers; for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) Blockers.insert(UnavailableBlocks[i]); @@ -1090,28 +1376,28 @@ bool GVN::processNonLocalLoad(LoadInst *LI, if (TmpBB->getTerminator()->getNumSuccessors() != 1) allSingleSucc = false; } - + assert(TmpBB); LoadBB = TmpBB; - + // If we have a repl set with LI itself in it, this means we have a loop where // at least one of the values is LI. Since this means that we won't be able // to eliminate LI even if we insert uses in the other predecessors, we will // end up increasing code size. Reject this by scanning for LI. for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) - if (ValuesPerBlock[i].second == LI) + if (ValuesPerBlock[i].V == LI) return false; - + if (isSinglePred) { bool isHot = false; for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) - if (Instruction *I = dyn_cast<Instruction>(ValuesPerBlock[i].second)) - // "Hot" Instruction is in some loop (because it dominates its dep. - // instruction). - if (DT->dominates(LI, I)) { - isHot = true; - break; - } + if (Instruction *I = dyn_cast<Instruction>(ValuesPerBlock[i].V)) + // "Hot" Instruction is in some loop (because it dominates its dep. + // instruction). + if (DT->dominates(LI, I)) { + isHot = true; + break; + } // We are interested only in "hot" instructions. We don't want to do any // mis-optimizations here. @@ -1128,7 +1414,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, DenseMap<BasicBlock*, char> FullyAvailableBlocks; for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) - FullyAvailableBlocks[ValuesPerBlock[i].first] = true; + FullyAvailableBlocks[ValuesPerBlock[i].BB] = true; for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) FullyAvailableBlocks[UnavailableBlocks[i]] = false; @@ -1136,33 +1422,33 @@ bool GVN::processNonLocalLoad(LoadInst *LI, PI != E; ++PI) { if (IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) continue; - + // If this load is not available in multiple predecessors, reject it. if (UnavailablePred && UnavailablePred != *PI) return false; UnavailablePred = *PI; } - + assert(UnavailablePred != 0 && "Fully available value should be eliminated above!"); - + // If the loaded pointer is PHI node defined in this block, do PHI translation // to get its value in the predecessor. Value *LoadPtr = LI->getOperand(0)->DoPHITranslation(LoadBB, UnavailablePred); - + // Make sure the value is live in the predecessor. If it was defined by a // non-PHI instruction in this block, we don't know how to recompute it above. if (Instruction *LPInst = dyn_cast<Instruction>(LoadPtr)) if (!DT->dominates(LPInst->getParent(), UnavailablePred)) { - DEBUG(cerr << "COULDN'T PRE LOAD BECAUSE PTR IS UNAVAILABLE IN PRED: " - << *LPInst << *LI << "\n"); + DEBUG(errs() << "COULDN'T PRE LOAD BECAUSE PTR IS UNAVAILABLE IN PRED: " + << *LPInst << '\n' << *LI << "\n"); return false; } - + // We don't currently handle critical edges :( if (UnavailablePred->getTerminator()->getNumSuccessors() != 1) { - DEBUG(cerr << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '" - << UnavailablePred->getName() << "': " << *LI); + DEBUG(errs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '" + << UnavailablePred->getName() << "': " << *LI << '\n'); return false; } @@ -1182,28 +1468,23 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // Okay, we can eliminate this load by inserting a reload in the predecessor // and using PHI construction to get the value in the other predecessors, do // it. - DEBUG(cerr << "GVN REMOVING PRE LOAD: " << *LI); - + DEBUG(errs() << "GVN REMOVING PRE LOAD: " << *LI << '\n'); + Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, LI->getAlignment(), UnavailablePred->getTerminator()); - - SmallPtrSet<Instruction*, 4> &p = phiMap[LI->getPointerOperand()]; - for (SmallPtrSet<Instruction*, 4>::iterator I = p.begin(), E = p.end(); - I != E; ++I) - ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I)); - - DenseMap<BasicBlock*, Value*> BlockReplValues; - BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end()); - BlockReplValues[UnavailablePred] = NewLoad; - + + // Add the newly created load. + ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,NewLoad)); + // Perform PHI construction. - Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true); - LI->replaceAllUsesWith(v); - if (isa<PHINode>(v)) - v->takeName(LI); - if (isa<PointerType>(v->getType())) - MD->invalidateCachedPointerInfo(v); + Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, + VN.getAliasAnalysis()); + LI->replaceAllUsesWith(V); + if (isa<PHINode>(V)) + V->takeName(LI); + if (isa<PointerType>(V->getType())) + MD->invalidateCachedPointerInfo(V); toErase.push_back(LI); NumPRELoad++; return true; @@ -1214,64 +1495,119 @@ bool GVN::processNonLocalLoad(LoadInst *LI, bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { if (L->isVolatile()) return false; - - Value* pointer = L->getPointerOperand(); // ... to a pointer that has been loaded from before... - MemDepResult dep = MD->getDependency(L); - + MemDepResult Dep = MD->getDependency(L); + // If the value isn't available, don't do anything! - if (dep.isClobber()) { + if (Dep.isClobber()) { + // FIXME: We should handle memset/memcpy/memmove as dependent instructions + // to forward the value if available. + //if (isa<MemIntrinsic>(Dep.getInst())) + //errs() << "LOAD DEPENDS ON MEM: " << *L << "\n" << *Dep.getInst()<<"\n\n"; + + // Check to see if we have something like this: + // store i32 123, i32* %P + // %A = bitcast i32* %P to i8* + // %B = gep i8* %A, i32 1 + // %C = load i8* %B + // + // We could do that by recognizing if the clobber instructions are obviously + // a common base + constant offset, and if the previous store (or memset) + // completely covers this load. This sort of thing can happen in bitfield + // access code. + if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) + if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) { + int Offset = AnalyzeLoadFromClobberingStore(L, DepSI, *TD); + if (Offset != -1) { + Value *AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset, + L->getType(), L, *TD); + DEBUG(errs() << "GVN COERCED STORE BITS:\n" << *DepSI << '\n' + << *AvailVal << '\n' << *L << "\n\n\n"); + + // Replace the load! + L->replaceAllUsesWith(AvailVal); + if (isa<PointerType>(AvailVal->getType())) + MD->invalidateCachedPointerInfo(AvailVal); + toErase.push_back(L); + NumGVNLoad++; + return true; + } + } + DEBUG( // fast print dep, using operator<< on instruction would be too slow - DOUT << "GVN: load "; - WriteAsOperand(*DOUT.stream(), L); - Instruction *I = dep.getInst(); - DOUT << " is clobbered by " << *I; + errs() << "GVN: load "; + WriteAsOperand(errs(), L); + Instruction *I = Dep.getInst(); + errs() << " is clobbered by " << *I << '\n'; ); return false; } // If it is defined in another block, try harder. - if (dep.isNonLocal()) + if (Dep.isNonLocal()) return processNonLocalLoad(L, toErase); - Instruction *DepInst = dep.getInst(); + Instruction *DepInst = Dep.getInst(); if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) { - // Only forward substitute stores to loads of the same type. - // FIXME: Could do better! - if (DepSI->getPointerOperand()->getType() != pointer->getType()) - return false; + Value *StoredVal = DepSI->getOperand(0); + // The store and load are to a must-aliased pointer, but they may not + // actually have the same type. See if we know how to reuse the stored + // value (depending on its type). + const TargetData *TD = 0; + if (StoredVal->getType() != L->getType() && + (TD = getAnalysisIfAvailable<TargetData>())) { + StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), + L, *TD); + if (StoredVal == 0) + return false; + + DEBUG(errs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal + << '\n' << *L << "\n\n\n"); + } + // Remove it! - L->replaceAllUsesWith(DepSI->getOperand(0)); - if (isa<PointerType>(DepSI->getOperand(0)->getType())) - MD->invalidateCachedPointerInfo(DepSI->getOperand(0)); + L->replaceAllUsesWith(StoredVal); + if (isa<PointerType>(StoredVal->getType())) + MD->invalidateCachedPointerInfo(StoredVal); toErase.push_back(L); NumGVNLoad++; return true; } if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) { - // Only forward substitute stores to loads of the same type. - // FIXME: Could do better! load i32 -> load i8 -> truncate on little endian. - if (DepLI->getType() != L->getType()) - return false; + Value *AvailableVal = DepLI; + + // The loads are of a must-aliased pointer, but they may not actually have + // the same type. See if we know how to reuse the previously loaded value + // (depending on its type). + const TargetData *TD = 0; + if (DepLI->getType() != L->getType() && + (TD = getAnalysisIfAvailable<TargetData>())) { + AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD); + if (AvailableVal == 0) + return false; + + DEBUG(errs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal + << "\n" << *L << "\n\n\n"); + } // Remove it! - L->replaceAllUsesWith(DepLI); + L->replaceAllUsesWith(AvailableVal); if (isa<PointerType>(DepLI->getType())) MD->invalidateCachedPointerInfo(DepLI); toErase.push_back(L); NumGVNLoad++; return true; } - + // If this load really doesn't depend on anything, then we must be loading an // undef value. This can happen when loading for a fresh allocation with no // intervening stores, for example. - if (isa<AllocationInst>(DepInst)) { - L->replaceAllUsesWith(Context->getUndef(L->getType())); + if (isa<AllocationInst>(DepInst) || isMalloc(DepInst)) { + L->replaceAllUsesWith(UndefValue::get(L->getType())); toErase.push_back(L); NumGVNLoad++; return true; @@ -1280,150 +1616,93 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { return false; } -Value* GVN::lookupNumber(BasicBlock* BB, uint32_t num) { +Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) { DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.find(BB); if (I == localAvail.end()) return 0; - - ValueNumberScope* locals = I->second; - - while (locals) { - DenseMap<uint32_t, Value*>::iterator I = locals->table.find(num); - if (I != locals->table.end()) + + ValueNumberScope *Locals = I->second; + while (Locals) { + DenseMap<uint32_t, Value*>::iterator I = Locals->table.find(num); + if (I != Locals->table.end()) return I->second; - else - locals = locals->parent; + Locals = Locals->parent; } - + return 0; } -/// AttemptRedundancyElimination - If the "fast path" of redundancy elimination -/// by inheritance from the dominator fails, see if we can perform phi -/// construction to eliminate the redundancy. -Value* GVN::AttemptRedundancyElimination(Instruction* orig, unsigned valno) { - BasicBlock* BaseBlock = orig->getParent(); - - SmallPtrSet<BasicBlock*, 4> Visited; - SmallVector<BasicBlock*, 8> Stack; - Stack.push_back(BaseBlock); - - DenseMap<BasicBlock*, Value*> Results; - - // Walk backwards through our predecessors, looking for instances of the - // value number we're looking for. Instances are recorded in the Results - // map, which is then used to perform phi construction. - while (!Stack.empty()) { - BasicBlock* Current = Stack.back(); - Stack.pop_back(); - - // If we've walked all the way to a proper dominator, then give up. Cases - // where the instance is in the dominator will have been caught by the fast - // path, and any cases that require phi construction further than this are - // probably not worth it anyways. Note that this is a SIGNIFICANT compile - // time improvement. - if (DT->properlyDominates(Current, orig->getParent())) return 0; - - DenseMap<BasicBlock*, ValueNumberScope*>::iterator LA = - localAvail.find(Current); - if (LA == localAvail.end()) return 0; - DenseMap<uint32_t, Value*>::iterator V = LA->second->table.find(valno); - - if (V != LA->second->table.end()) { - // Found an instance, record it. - Results.insert(std::make_pair(Current, V->second)); - continue; - } - - // If we reach the beginning of the function, then give up. - if (pred_begin(Current) == pred_end(Current)) - return 0; - - for (pred_iterator PI = pred_begin(Current), PE = pred_end(Current); - PI != PE; ++PI) - if (Visited.insert(*PI)) - Stack.push_back(*PI); - } - - // If we didn't find instances, give up. Otherwise, perform phi construction. - if (Results.size() == 0) - return 0; - else - return GetValueForBlock(BaseBlock, orig, Results, true); -} /// processInstruction - When calculating availability, handle an instruction /// by inserting it into the appropriate sets bool GVN::processInstruction(Instruction *I, SmallVectorImpl<Instruction*> &toErase) { - if (LoadInst* L = dyn_cast<LoadInst>(I)) { - bool changed = processLoad(L, toErase); - - if (!changed) { - unsigned num = VN.lookup_or_add(L); - localAvail[I->getParent()]->table.insert(std::make_pair(num, L)); + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + bool Changed = processLoad(LI, toErase); + + if (!Changed) { + unsigned Num = VN.lookup_or_add(LI); + localAvail[I->getParent()]->table.insert(std::make_pair(Num, LI)); } - - return changed; + + return Changed; } - - uint32_t nextNum = VN.getNextUnusedValueNumber(); - unsigned num = VN.lookup_or_add(I); - - if (BranchInst* BI = dyn_cast<BranchInst>(I)) { - localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); - + + uint32_t NextNum = VN.getNextUnusedValueNumber(); + unsigned Num = VN.lookup_or_add(I); + + if (BranchInst *BI = dyn_cast<BranchInst>(I)) { + localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); + if (!BI->isConditional() || isa<Constant>(BI->getCondition())) return false; - - Value* branchCond = BI->getCondition(); - uint32_t condVN = VN.lookup_or_add(branchCond); - - BasicBlock* trueSucc = BI->getSuccessor(0); - BasicBlock* falseSucc = BI->getSuccessor(1); - - if (trueSucc->getSinglePredecessor()) - localAvail[trueSucc]->table[condVN] = Context->getConstantIntTrue(); - if (falseSucc->getSinglePredecessor()) - localAvail[falseSucc]->table[condVN] = Context->getConstantIntFalse(); + + Value *BranchCond = BI->getCondition(); + uint32_t CondVN = VN.lookup_or_add(BranchCond); + + BasicBlock *TrueSucc = BI->getSuccessor(0); + BasicBlock *FalseSucc = BI->getSuccessor(1); + + if (TrueSucc->getSinglePredecessor()) + localAvail[TrueSucc]->table[CondVN] = + ConstantInt::getTrue(TrueSucc->getContext()); + if (FalseSucc->getSinglePredecessor()) + localAvail[FalseSucc]->table[CondVN] = + ConstantInt::getFalse(TrueSucc->getContext()); return false; - + // Allocations are always uniquely numbered, so we can save time and memory - // by fast failing them. + // by fast failing them. } else if (isa<AllocationInst>(I) || isa<TerminatorInst>(I)) { - localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); + localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); return false; } - + // Collapse PHI nodes if (PHINode* p = dyn_cast<PHINode>(I)) { - Value* constVal = CollapsePhi(p); - + Value *constVal = CollapsePhi(p); + if (constVal) { - for (PhiMapType::iterator PI = phiMap.begin(), PE = phiMap.end(); - PI != PE; ++PI) - PI->second.erase(p); - p->replaceAllUsesWith(constVal); if (isa<PointerType>(constVal->getType())) MD->invalidateCachedPointerInfo(constVal); VN.erase(p); - + toErase.push_back(p); } else { - localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); + localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); } - + // If the number we were assigned was a brand new VN, then we don't // need to do a lookup to see if the number already exists // somewhere in the domtree: it can't! - } else if (num == nextNum) { - localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); - + } else if (Num == NextNum) { + localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); + // Perform fast-path value-number based elimination of values inherited from // dominators. - } else if (Value* repl = lookupNumber(I->getParent(), num)) { + } else if (Value *repl = lookupNumber(I->getParent(), Num)) { // Remove it! VN.erase(I); I->replaceAllUsesWith(repl); @@ -1432,21 +1711,10 @@ bool GVN::processInstruction(Instruction *I, toErase.push_back(I); return true; -#if 0 - // Perform slow-pathvalue-number based elimination with phi construction. - } else if (Value* repl = AttemptRedundancyElimination(I, num)) { - // Remove it! - VN.erase(I); - I->replaceAllUsesWith(repl); - if (isa<PointerType>(repl->getType())) - MD->invalidateCachedPointerInfo(repl); - toErase.push_back(I); - return true; -#endif } else { - localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); + localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); } - + return false; } @@ -1457,35 +1725,35 @@ bool GVN::runOnFunction(Function& F) { VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); VN.setMemDep(MD); VN.setDomTree(DT); - - bool changed = false; - bool shouldContinue = true; - + + bool Changed = false; + bool ShouldContinue = true; + // Merge unconditional branches, allowing PRE to catch more // optimization opportunities. for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { - BasicBlock* BB = FI; + BasicBlock *BB = FI; ++FI; bool removedBlock = MergeBlockIntoPredecessor(BB, this); if (removedBlock) NumGVNBlocks++; - - changed |= removedBlock; + + Changed |= removedBlock; } - + unsigned Iteration = 0; - - while (shouldContinue) { - DEBUG(cerr << "GVN iteration: " << Iteration << "\n"); - shouldContinue = iterateOnFunction(F); - changed |= shouldContinue; + + while (ShouldContinue) { + DEBUG(errs() << "GVN iteration: " << Iteration << "\n"); + ShouldContinue = iterateOnFunction(F); + Changed |= ShouldContinue; ++Iteration; } - + if (EnablePRE) { bool PREChanged = true; while (PREChanged) { PREChanged = performPRE(F); - changed |= PREChanged; + Changed |= PREChanged; } } // FIXME: Should perform GVN again after PRE does something. PRE can move @@ -1495,27 +1763,27 @@ bool GVN::runOnFunction(Function& F) { cleanupGlobalSets(); - return changed; + return Changed; } -bool GVN::processBlock(BasicBlock* BB) { +bool GVN::processBlock(BasicBlock *BB) { // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and // incrementing BI before processing an instruction). SmallVector<Instruction*, 8> toErase; - bool changed_function = false; - + bool ChangedFunction = false; + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { - changed_function |= processInstruction(BI, toErase); + ChangedFunction |= processInstruction(BI, toErase); if (toErase.empty()) { ++BI; continue; } - + // If we need some instructions deleted, do it now. NumGVNInstr += toErase.size(); - + // Avoid iterator invalidation. bool AtStart = BI == BB->begin(); if (!AtStart) @@ -1523,7 +1791,7 @@ bool GVN::processBlock(BasicBlock* BB) { for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(), E = toErase.end(); I != E; ++I) { - DEBUG(cerr << "GVN removed: " << **I); + DEBUG(errs() << "GVN removed: " << **I << '\n'); MD->removeInstruction(*I); (*I)->eraseFromParent(); DEBUG(verifyRemoved(*I)); @@ -1535,8 +1803,8 @@ bool GVN::processBlock(BasicBlock* BB) { else ++BI; } - - return changed_function; + + return ChangedFunction; } /// performPRE - Perform a purely local form of PRE that looks for diamond @@ -1547,32 +1815,33 @@ bool GVN::performPRE(Function& F) { DenseMap<BasicBlock*, Value*> predMap; for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()), DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { - BasicBlock* CurrentBlock = *DI; - + BasicBlock *CurrentBlock = *DI; + // Nothing to PRE in the entry block. if (CurrentBlock == &F.getEntryBlock()) continue; - + for (BasicBlock::iterator BI = CurrentBlock->begin(), BE = CurrentBlock->end(); BI != BE; ) { Instruction *CurInst = BI++; - if (isa<AllocationInst>(CurInst) || isa<TerminatorInst>(CurInst) || - isa<PHINode>(CurInst) || (CurInst->getType() == Type::VoidTy) || + if (isa<AllocationInst>(CurInst) || + isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) || + CurInst->getType()->isVoidTy() || CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || isa<DbgInfoIntrinsic>(CurInst)) continue; - uint32_t valno = VN.lookup(CurInst); - + uint32_t ValNo = VN.lookup(CurInst); + // Look for the predecessors for PRE opportunities. We're // only trying to solve the basic diamond case, where // a value is computed in the successor and one predecessor, // but not the other. We also explicitly disallow cases // where the successor is its own predecessor, because they're // more complicated to get right. - unsigned numWith = 0; - unsigned numWithout = 0; - BasicBlock* PREPred = 0; + unsigned NumWith = 0; + unsigned NumWithout = 0; + BasicBlock *PREPred = 0; predMap.clear(); for (pred_iterator PI = pred_begin(CurrentBlock), @@ -1581,59 +1850,59 @@ bool GVN::performPRE(Function& F) { // own predecessor, on in blocks with predecessors // that are not reachable. if (*PI == CurrentBlock) { - numWithout = 2; + NumWithout = 2; break; } else if (!localAvail.count(*PI)) { - numWithout = 2; + NumWithout = 2; break; } - - DenseMap<uint32_t, Value*>::iterator predV = - localAvail[*PI]->table.find(valno); + + DenseMap<uint32_t, Value*>::iterator predV = + localAvail[*PI]->table.find(ValNo); if (predV == localAvail[*PI]->table.end()) { PREPred = *PI; - numWithout++; + NumWithout++; } else if (predV->second == CurInst) { - numWithout = 2; + NumWithout = 2; } else { predMap[*PI] = predV->second; - numWith++; + NumWith++; } } - + // Don't do PRE when it might increase code size, i.e. when // we would need to insert instructions in more than one pred. - if (numWithout != 1 || numWith == 0) + if (NumWithout != 1 || NumWith == 0) continue; - + // We can't do PRE safely on a critical edge, so instead we schedule // the edge to be split and perform the PRE the next time we iterate // on the function. - unsigned succNum = 0; + unsigned SuccNum = 0; for (unsigned i = 0, e = PREPred->getTerminator()->getNumSuccessors(); i != e; ++i) if (PREPred->getTerminator()->getSuccessor(i) == CurrentBlock) { - succNum = i; + SuccNum = i; break; } - - if (isCriticalEdge(PREPred->getTerminator(), succNum)) { - toSplit.push_back(std::make_pair(PREPred->getTerminator(), succNum)); + + if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) { + toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum)); continue; } - + // Instantiate the expression the in predecessor that lacked it. // Because we are going top-down through the block, all value numbers // will be available in the predecessor by the time we need them. Any // that weren't original present will have been instantiated earlier // in this loop. - Instruction* PREInstr = CurInst->clone(); + Instruction *PREInstr = CurInst->clone(); bool success = true; for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) { Value *Op = PREInstr->getOperand(i); if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op)) continue; - + if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) { PREInstr->setOperand(i, V); } else { @@ -1641,25 +1910,25 @@ bool GVN::performPRE(Function& F) { break; } } - + // Fail out if we encounter an operand that is not available in - // the PRE predecessor. This is typically because of loads which + // the PRE predecessor. This is typically because of loads which // are not value numbered precisely. if (!success) { delete PREInstr; DEBUG(verifyRemoved(PREInstr)); continue; } - + PREInstr->insertBefore(PREPred->getTerminator()); PREInstr->setName(CurInst->getName() + ".pre"); predMap[PREPred] = PREInstr; - VN.add(PREInstr, valno); + VN.add(PREInstr, ValNo); NumGVNPRE++; - + // Update the availability map to include the new instruction. - localAvail[PREPred]->table.insert(std::make_pair(valno, PREInstr)); - + localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr)); + // Create a PHI to make the value available in this block. PHINode* Phi = PHINode::Create(CurInst->getType(), CurInst->getName() + ".pre-phi", @@ -1667,27 +1936,27 @@ bool GVN::performPRE(Function& F) { for (pred_iterator PI = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock); PI != PE; ++PI) Phi->addIncoming(predMap[*PI], *PI); - - VN.add(Phi, valno); - localAvail[CurrentBlock]->table[valno] = Phi; - + + VN.add(Phi, ValNo); + localAvail[CurrentBlock]->table[ValNo] = Phi; + CurInst->replaceAllUsesWith(Phi); if (isa<PointerType>(Phi->getType())) MD->invalidateCachedPointerInfo(Phi); VN.erase(CurInst); - - DEBUG(cerr << "GVN PRE removed: " << *CurInst); + + DEBUG(errs() << "GVN PRE removed: " << *CurInst << '\n'); MD->removeInstruction(CurInst); CurInst->eraseFromParent(); DEBUG(verifyRemoved(CurInst)); Changed = true; } } - + for (SmallVector<std::pair<TerminatorInst*, unsigned>, 4>::iterator I = toSplit.begin(), E = toSplit.end(); I != E; ++I) SplitCriticalEdge(I->first, I->second, this); - + return Changed || toSplit.size(); } @@ -1705,25 +1974,24 @@ bool GVN::iterateOnFunction(Function &F) { } // Top-down walk of the dominator tree - bool changed = false; + bool Changed = false; #if 0 // Needed for value numbering with phi construction to work. ReversePostOrderTraversal<Function*> RPOT(&F); for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(), RE = RPOT.end(); RI != RE; ++RI) - changed |= processBlock(*RI); + Changed |= processBlock(*RI); #else for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()), DE = df_end(DT->getRootNode()); DI != DE; ++DI) - changed |= processBlock(DI->getBlock()); + Changed |= processBlock(DI->getBlock()); #endif - return changed; + return Changed; } void GVN::cleanupGlobalSets() { VN.clear(); - phiMap.clear(); for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.begin(), E = localAvail.end(); I != E; ++I) @@ -1736,18 +2004,6 @@ void GVN::cleanupGlobalSets() { void GVN::verifyRemoved(const Instruction *Inst) const { VN.verifyRemoved(Inst); - // Walk through the PHI map to make sure the instruction isn't hiding in there - // somewhere. - for (PhiMapType::iterator - I = phiMap.begin(), E = phiMap.end(); I != E; ++I) { - assert(I->first != Inst && "Inst is still a key in PHI map!"); - - for (SmallPtrSet<Instruction*, 4>::iterator - II = I->second.begin(), IE = I->second.end(); II != IE; ++II) { - assert(*II != Inst && "Inst is still a value in PHI map!"); - } - } - // Walk through the value number scope to make sure the instruction isn't // ferreted away in it. for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 88cf60e..e2d9e0b 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -51,11 +51,11 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Support/CFG.h" -#include "llvm/Support/Compiler.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Support/CommandLine.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" @@ -67,7 +67,7 @@ STATISTIC(NumReplaced, "Number of exit values replaced"); STATISTIC(NumLFTR , "Number of loop exit tests replaced"); namespace { - class VISIBILITY_HIDDEN IndVarSimplify : public LoopPass { + class IndVarSimplify : public LoopPass { IVUsers *IU; LoopInfo *LI; ScalarEvolution *SE; @@ -75,30 +75,30 @@ namespace { bool Changed; public: - static char ID; // Pass identification, replacement for typeid - IndVarSimplify() : LoopPass(&ID) {} - - virtual bool runOnLoop(Loop *L, LPPassManager &LPM); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<DominatorTree>(); - AU.addRequired<ScalarEvolution>(); - AU.addRequiredID(LoopSimplifyID); - AU.addRequired<LoopInfo>(); - AU.addRequired<IVUsers>(); - AU.addRequiredID(LCSSAID); - AU.addPreserved<ScalarEvolution>(); - AU.addPreservedID(LoopSimplifyID); - AU.addPreserved<IVUsers>(); - AU.addPreservedID(LCSSAID); - AU.setPreservesCFG(); - } + static char ID; // Pass identification, replacement for typeid + IndVarSimplify() : LoopPass(&ID) {} + + virtual bool runOnLoop(Loop *L, LPPassManager &LPM); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<DominatorTree>(); + AU.addRequired<LoopInfo>(); + AU.addRequired<ScalarEvolution>(); + AU.addRequiredID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + AU.addRequired<IVUsers>(); + AU.addPreserved<ScalarEvolution>(); + AU.addPreservedID(LoopSimplifyID); + AU.addPreservedID(LCSSAID); + AU.addPreserved<IVUsers>(); + AU.setPreservesCFG(); + } private: void RewriteNonIntegerIVs(Loop *L); - ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV* BackedgeTakenCount, + ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, Value *IndVar, BasicBlock *ExitingBlock, BranchInst *BI, @@ -129,7 +129,7 @@ Pass *llvm::createIndVarSimplifyPass() { /// SCEV analysis can determine a loop-invariant trip count of the loop, which /// is actually a much broader range than just linear tests. ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, - const SCEV* BackedgeTakenCount, + const SCEV *BackedgeTakenCount, Value *IndVar, BasicBlock *ExitingBlock, BranchInst *BI, @@ -138,13 +138,13 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, // against the preincremented value, otherwise we prefer to compare against // the post-incremented value. Value *CmpIndVar; - const SCEV* RHS = BackedgeTakenCount; + const SCEV *RHS = BackedgeTakenCount; if (ExitingBlock == L->getLoopLatch()) { // Add one to the "backedge-taken" count to get the trip count. // If this addition may overflow, we have to be more pessimistic and // cast the induction variable before doing the add. - const SCEV* Zero = SE->getIntegerSCEV(0, BackedgeTakenCount->getType()); - const SCEV* N = + const SCEV *Zero = SE->getIntegerSCEV(0, BackedgeTakenCount->getType()); + const SCEV *N = SE->getAddExpr(BackedgeTakenCount, SE->getIntegerSCEV(1, BackedgeTakenCount->getType())); if ((isa<SCEVConstant>(N) && !N->isZero()) || @@ -182,13 +182,13 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, else Opcode = ICmpInst::ICMP_EQ; - DOUT << "INDVARS: Rewriting loop exit condition to:\n" - << " LHS:" << *CmpIndVar // includes a newline - << " op:\t" - << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n" - << " RHS:\t" << *RHS << "\n"; + DEBUG(errs() << "INDVARS: Rewriting loop exit condition to:\n" + << " LHS:" << *CmpIndVar << '\n' + << " op:\t" + << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n" + << " RHS:\t" << *RHS << "\n"); - ICmpInst *Cond = new ICmpInst(Opcode, CmpIndVar, ExitCnt, "exitcond", BI); + ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond"); Instruction *OrigCond = cast<Instruction>(BI->getCondition()); // It's tempting to use replaceAllUsesWith here to fully replace the old @@ -264,7 +264,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, // Okay, this instruction has a user outside of the current loop // and varies predictably *inside* the loop. Evaluate the value it // contains when the loop exits, if possible. - const SCEV* ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); + const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); if (!ExitValue->isLoopInvariant(L)) continue; @@ -273,25 +273,23 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); - DOUT << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal - << " LoopVal = " << *Inst << "\n"; + DEBUG(errs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' + << " LoopVal = " << *Inst << "\n"); PN->setIncomingValue(i, ExitVal); // If this instruction is dead now, delete it. RecursivelyDeleteTriviallyDeadInstructions(Inst); - // If we're inserting code into the exit block rather than the - // preheader, we can (and have to) remove the PHI entirely. - // This is safe, because the NewVal won't be variant - // in the loop, so we don't need an LCSSA phi node anymore. - if (ExitBlocks.size() == 1) { + if (NumPreds == 1) { + // Completely replace a single-pred PHI. This is safe, because the + // NewVal won't be variant in the loop, so we don't need an LCSSA phi + // node anymore. PN->replaceAllUsesWith(ExitVal); RecursivelyDeleteTriviallyDeadInstructions(PN); - break; } } - if (ExitBlocks.size() != 1) { + if (NumPreds != 1) { // Clone the PHI and delete the original one. This lets IVUsers and // any other maps purge the original user from their records. PHINode *NewPN = PN->clone(); @@ -339,7 +337,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { RewriteNonIntegerIVs(L); BasicBlock *ExitingBlock = L->getExitingBlock(); // may be null - const SCEV* BackedgeTakenCount = SE->getBackedgeTakenCount(L); + const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); // Create a rewriter object which we'll use to transform the code with. SCEVExpander Rewriter(*SE); @@ -367,14 +365,14 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { NeedCannIV = true; } for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { - const SCEV* Stride = IU->StrideOrder[i]; + const SCEV *Stride = IU->StrideOrder[i]; const Type *Ty = SE->getEffectiveSCEVType(Stride->getType()); if (!LargestType || SE->getTypeSizeInBits(Ty) > SE->getTypeSizeInBits(LargestType)) LargestType = Ty; - std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI = + std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = IU->IVUsesByStride.find(IU->StrideOrder[i]); assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); @@ -403,7 +401,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { ++NumInserted; Changed = true; - DOUT << "INDVARS: New CanIV: " << *IndVar; + DEBUG(errs() << "INDVARS: New CanIV: " << *IndVar << '\n'); // Now that the official induction variable is established, reinsert // the old canonical-looking variable after it so that the IR remains @@ -458,9 +456,9 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType, // the need for the code evaluation methods to insert induction variables // of different sizes. for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { - const SCEV* Stride = IU->StrideOrder[i]; + const SCEV *Stride = IU->StrideOrder[i]; - std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI = + std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = IU->IVUsesByStride.find(IU->StrideOrder[i]); assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); ilist<IVStrideUse> &List = SI->second->Users; @@ -471,7 +469,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType, Instruction *User = UI->getUser(); // Compute the final addrec to expand into code. - const SCEV* AR = IU->getReplacementExpr(*UI); + const SCEV *AR = IU->getReplacementExpr(*UI); // FIXME: It is an extremely bad idea to indvar substitute anything more // complex than affine induction variables. Doing so will put expensive @@ -508,8 +506,8 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType, NewVal->takeName(Op); User->replaceUsesOfWith(Op, NewVal); UI->setOperandValToReplace(NewVal); - DOUT << "INDVARS: Rewrote IV '" << *AR << "' " << *Op - << " into = " << *NewVal << "\n"; + DEBUG(errs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' + << " into = " << *NewVal << "\n"); ++NumRemoved; Changed = true; @@ -546,8 +544,19 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) { // New instructions were inserted at the end of the preheader. if (isa<PHINode>(I)) break; - if (I->isTrapping()) + // Don't move instructions which might have side effects, since the side + // effects need to complete before instructions inside the loop. Also + // don't move instructions which might read memory, since the loop may + // modify memory. Note that it's okay if the instruction might have + // undefined behavior: LoopSimplify guarantees that the preheader + // dominates the exit block. + if (I->mayHaveSideEffects() || I->mayReadFromMemory()) continue; + // Don't sink static AllocaInsts out of the entry block, which would + // turn them into dynamic allocas! + if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) + if (AI->isStaticAlloca()) + continue; // Determine if there is a use in or before the loop (direct or // otherwise). bool UsedInLoop = false; @@ -630,7 +639,8 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) { // Check incoming value. ConstantFP *InitValue = dyn_cast<ConstantFP>(PH->getIncomingValue(IncomingEdge)); if (!InitValue) return; - uint64_t newInitValue = Type::Int32Ty->getPrimitiveSizeInBits(); + uint64_t newInitValue = + Type::getInt32Ty(PH->getContext())->getPrimitiveSizeInBits(); if (!convertToInt(InitValue->getValueAPF(), &newInitValue)) return; @@ -646,7 +656,8 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) { IncrVIndex = 0; IncrValue = dyn_cast<ConstantFP>(Incr->getOperand(IncrVIndex)); if (!IncrValue) return; - uint64_t newIncrValue = Type::Int32Ty->getPrimitiveSizeInBits(); + uint64_t newIncrValue = + Type::getInt32Ty(PH->getContext())->getPrimitiveSizeInBits(); if (!convertToInt(IncrValue->getValueAPF(), &newIncrValue)) return; @@ -677,7 +688,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) { EVIndex = 0; EV = dyn_cast<ConstantFP>(EC->getOperand(EVIndex)); if (!EV) return; - uint64_t intEV = Type::Int32Ty->getPrimitiveSizeInBits(); + uint64_t intEV = Type::getInt32Ty(PH->getContext())->getPrimitiveSizeInBits(); if (!convertToInt(EV->getValueAPF(), &intEV)) return; @@ -710,24 +721,26 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) { if (NewPred == CmpInst::BAD_ICMP_PREDICATE) return; // Insert new integer induction variable. - PHINode *NewPHI = PHINode::Create(Type::Int32Ty, + PHINode *NewPHI = PHINode::Create(Type::getInt32Ty(PH->getContext()), PH->getName()+".int", PH); - NewPHI->addIncoming(Context->getConstantInt(Type::Int32Ty, newInitValue), + NewPHI->addIncoming(ConstantInt::get(Type::getInt32Ty(PH->getContext()), + newInitValue), PH->getIncomingBlock(IncomingEdge)); Value *NewAdd = BinaryOperator::CreateAdd(NewPHI, - Context->getConstantInt(Type::Int32Ty, + ConstantInt::get(Type::getInt32Ty(PH->getContext()), newIncrValue), Incr->getName()+".int", Incr); NewPHI->addIncoming(NewAdd, PH->getIncomingBlock(BackEdge)); // The back edge is edge 1 of newPHI, whatever it may have been in the // original PHI. - ConstantInt *NewEV = Context->getConstantInt(Type::Int32Ty, intEV); + ConstantInt *NewEV = ConstantInt::get(Type::getInt32Ty(PH->getContext()), + intEV); Value *LHS = (EVIndex == 1 ? NewPHI->getIncomingValue(1) : NewEV); Value *RHS = (EVIndex == 1 ? NewEV : NewPHI->getIncomingValue(1)); - ICmpInst *NewEC = new ICmpInst(NewPred, LHS, RHS, EC->getNameStart(), - EC->getParent()->getTerminator()); + ICmpInst *NewEC = new ICmpInst(EC->getParent()->getTerminator(), + NewPred, LHS, RHS, EC->getName()); // In the following deltions, PH may become dead and may be deleted. // Use a WeakVH to observe whether this happens. @@ -739,7 +752,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) { RecursivelyDeleteTriviallyDeadInstructions(EC); // Delete old, floating point, increment instruction. - Incr->replaceAllUsesWith(Context->getUndef(Incr->getType())); + Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); RecursivelyDeleteTriviallyDeadInstructions(Incr); // Replace floating induction variable, if it isn't already deleted. diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index 59fbd39..7c96c49 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -40,7 +40,9 @@ #include "llvm/Pass.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" +#include "llvm/Operator.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/MallocHelper.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -48,11 +50,13 @@ #include "llvm/Support/CallSite.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/InstVisitor.h" +#include "llvm/Support/IRBuilder.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/PatternMatch.h" -#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -60,7 +64,6 @@ #include "llvm/ADT/STLExtras.h" #include <algorithm> #include <climits> -#include <sstream> using namespace llvm; using namespace llvm::PatternMatch; @@ -71,29 +74,49 @@ STATISTIC(NumDeadStore, "Number of dead stores eliminated"); STATISTIC(NumSunkInst , "Number of instructions sunk"); namespace { - class VISIBILITY_HIDDEN InstCombiner - : public FunctionPass, - public InstVisitor<InstCombiner, Instruction*> { - // Worklist of all of the instructions that need to be simplified. + /// InstCombineWorklist - This is the worklist management logic for + /// InstCombine. + class InstCombineWorklist { SmallVector<Instruction*, 256> Worklist; DenseMap<Instruction*, unsigned> WorklistMap; - TargetData *TD; - bool MustPreserveLCSSA; + + void operator=(const InstCombineWorklist&RHS); // DO NOT IMPLEMENT + InstCombineWorklist(const InstCombineWorklist&); // DO NOT IMPLEMENT public: - static char ID; // Pass identification, replacement for typeid - InstCombiner() : FunctionPass(&ID) {} - - LLVMContext* getContext() { return Context; } - - /// AddToWorkList - Add the specified instruction to the worklist if it - /// isn't already in it. - void AddToWorkList(Instruction *I) { - if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) + InstCombineWorklist() {} + + bool isEmpty() const { return Worklist.empty(); } + + /// Add - Add the specified instruction to the worklist if it isn't already + /// in it. + void Add(Instruction *I) { + if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) { + DEBUG(errs() << "IC: ADD: " << *I << '\n'); Worklist.push_back(I); + } + } + + void AddValue(Value *V) { + if (Instruction *I = dyn_cast<Instruction>(V)) + Add(I); } - // RemoveFromWorkList - remove I from the worklist if it exists. - void RemoveFromWorkList(Instruction *I) { + /// AddInitialGroup - Add the specified batch of stuff in reverse order. + /// which should only be done when the worklist is empty and when the group + /// has no duplicates. + void AddInitialGroup(Instruction *const *List, unsigned NumEntries) { + assert(Worklist.empty() && "Worklist must be empty to add initial group"); + Worklist.reserve(NumEntries+16); + DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n"); + for (; NumEntries; --NumEntries) { + Instruction *I = List[NumEntries-1]; + WorklistMap.insert(std::make_pair(I, Worklist.size())); + Worklist.push_back(I); + } + } + + // Remove - remove I from the worklist if it exists. + void Remove(Instruction *I) { DenseMap<Instruction*, unsigned>::iterator It = WorklistMap.find(I); if (It == WorklistMap.end()) return; // Not in worklist. @@ -103,51 +126,74 @@ namespace { WorklistMap.erase(It); } - Instruction *RemoveOneFromWorkList() { + Instruction *RemoveOne() { Instruction *I = Worklist.back(); Worklist.pop_back(); WorklistMap.erase(I); return I; } - /// AddUsersToWorkList - When an instruction is simplified, add all users of /// the instruction to the work lists because they might get more simplified /// now. /// - void AddUsersToWorkList(Value &I) { + void AddUsersToWorkList(Instruction &I) { for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ++UI) - AddToWorkList(cast<Instruction>(*UI)); - } - - /// AddUsesToWorkList - When an instruction is simplified, add operands to - /// the work lists because they might get more simplified now. - /// - void AddUsesToWorkList(Instruction &I) { - for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i) - if (Instruction *Op = dyn_cast<Instruction>(*i)) - AddToWorkList(Op); + Add(cast<Instruction>(*UI)); } - /// AddSoonDeadInstToWorklist - The specified instruction is about to become - /// dead. Add all of its operands to the worklist, turning them into - /// undef's to reduce the number of uses of those instructions. - /// - /// Return the specified operand before it is turned into an undef. - /// - Value *AddSoonDeadInstToWorklist(Instruction &I, unsigned op) { - Value *R = I.getOperand(op); - - for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i) - if (Instruction *Op = dyn_cast<Instruction>(*i)) { - AddToWorkList(Op); - // Set the operand to undef to drop the use. - *i = Context->getUndef(Op->getType()); - } + + /// Zap - check that the worklist is empty and nuke the backing store for + /// the map if it is large. + void Zap() { + assert(WorklistMap.empty() && "Worklist empty, but map not?"); - return R; + // Do an explicit clear, this shrinks the map if needed. + WorklistMap.clear(); } + }; +} // end anonymous namespace. + + +namespace { + /// InstCombineIRInserter - This is an IRBuilder insertion helper that works + /// just like the normal insertion helper, but also adds any new instructions + /// to the instcombine worklist. + class InstCombineIRInserter : public IRBuilderDefaultInserter<true> { + InstCombineWorklist &Worklist; + public: + InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {} + + void InsertHelper(Instruction *I, const Twine &Name, + BasicBlock *BB, BasicBlock::iterator InsertPt) const { + IRBuilderDefaultInserter<true>::InsertHelper(I, Name, BB, InsertPt); + Worklist.Add(I); + } + }; +} // end anonymous namespace + + +namespace { + class InstCombiner : public FunctionPass, + public InstVisitor<InstCombiner, Instruction*> { + TargetData *TD; + bool MustPreserveLCSSA; + bool MadeIRChange; + public: + /// Worklist - All of the instructions that need to be simplified. + InstCombineWorklist Worklist; + + /// Builder - This is an IRBuilder that automatically inserts new + /// instructions into the worklist when they are created. + typedef IRBuilder<true, ConstantFolder, InstCombineIRInserter> BuilderTy; + BuilderTy *Builder; + + static char ID; // Pass identification, replacement for typeid + InstCombiner() : FunctionPass(&ID), TD(0), Builder(0) {} + + LLVMContext *Context; + LLVMContext *getContext() const { return Context; } public: virtual bool runOnFunction(Function &F); @@ -155,12 +201,11 @@ namespace { bool DoOneIteration(Function &F, unsigned ItNum); virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<TargetData>(); AU.addPreservedID(LCSSAID); AU.setPreservesCFG(); } - TargetData &getTargetData() const { return *TD; } + TargetData *getTargetData() const { return TD; } // Visitation implementation - Implement instruction combining for different // instruction types. The semantics are as follows: @@ -187,8 +232,10 @@ namespace { Instruction *visitSDiv(BinaryOperator &I); Instruction *visitFDiv(BinaryOperator &I); Instruction *FoldAndOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS); + Instruction *FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS); Instruction *visitAnd(BinaryOperator &I); Instruction *FoldOrOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS); + Instruction *FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS); Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, Value *A, Value *B, Value *C); Instruction *visitOr (BinaryOperator &I); @@ -208,7 +255,7 @@ namespace { Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, ConstantInt *DivRHS); - Instruction *FoldGEPICmp(User *GEPLHS, Value *RHS, + Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, ICmpInst::Predicate Cond, Instruction &I); Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1, BinaryOperator &I); @@ -269,30 +316,10 @@ namespace { "New instruction already inserted into a basic block!"); BasicBlock *BB = Old.getParent(); BB->getInstList().insert(&Old, New); // Insert inst - AddToWorkList(New); + Worklist.Add(New); return New; } - - /// InsertCastBefore - Insert a cast of V to TY before the instruction POS. - /// This also adds the cast to the worklist. Finally, this returns the - /// cast. - Value *InsertCastBefore(Instruction::CastOps opc, Value *V, const Type *Ty, - Instruction &Pos) { - if (V->getType() == Ty) return V; - - if (Constant *CV = dyn_cast<Constant>(V)) - return Context->getConstantExprCast(opc, CV, Ty); - - Instruction *C = CastInst::Create(opc, V, Ty, V->getName(), &Pos); - AddToWorkList(C); - return C; - } - Value *InsertBitCastBefore(Value *V, const Type *Ty, Instruction &Pos) { - return InsertCastBefore(Instruction::BitCast, V, Ty, Pos); - } - - // ReplaceInstUsesWith - This method is to be used when an instruction is // found to be dead, replacable with another preexisting expression. Here // we add all uses of I to the worklist, replace all uses of I with the new @@ -300,16 +327,15 @@ namespace { // modified. // Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) { - AddUsersToWorkList(I); // Add all modified instrs to worklist - if (&I != V) { - I.replaceAllUsesWith(V); - return &I; - } else { - // If we are replacing the instruction with itself, this must be in a - // segment of unreachable code, so just clobber the instruction. - I.replaceAllUsesWith(Context->getUndef(I.getType())); - return &I; - } + Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist. + + // If we are replacing the instruction with itself, this must be in a + // segment of unreachable code, so just clobber the instruction. + if (&I == V) + V = UndefValue::get(I.getType()); + + I.replaceAllUsesWith(V); + return &I; } // EraseInstFromFunction - When dealing with an instruction that has side @@ -317,10 +343,19 @@ namespace { // instruction. Instead, visit methods should return the value returned by // this function. Instruction *EraseInstFromFunction(Instruction &I) { + DEBUG(errs() << "IC: ERASE " << I << '\n'); + assert(I.use_empty() && "Cannot erase instruction that is used!"); - AddUsesToWorkList(I); - RemoveFromWorkList(&I); + // Make sure that we reprocess all operands now that we reduced their + // use counts. + if (I.getNumOperands() < 8) { + for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i) + if (Instruction *Op = dyn_cast<Instruction>(*i)) + Worklist.Add(Op); + } + Worklist.Remove(&I); I.eraseFromParent(); + MadeIRChange = true; return 0; // Don't do anything with FI } @@ -364,10 +399,15 @@ namespace { Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt& UndefElts, unsigned Depth = 0); - // FoldOpIntoPhi - Given a binary operator or cast instruction which has a - // PHI node as operand #0, see if we can fold the instruction into the PHI - // (which is only possible if all operands to the PHI are constants). - Instruction *FoldOpIntoPhi(Instruction &I); + // FoldOpIntoPhi - Given a binary operator, cast instruction, or select + // which has a PHI node as operand #0, see if we can fold the instruction + // into the PHI (which is only possible if all operands to the PHI are + // constants). + // + // If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms + // that would normally be unprofitable because they strongly encourage jump + // threading. + Instruction *FoldOpIntoPhi(Instruction &I, bool AllowAggressive = false); // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary" // operator and they all are only used by the PHI, PHI together their @@ -399,7 +439,7 @@ namespace { unsigned PrefAlign = 0); }; -} +} // end anonymous namespace char InstCombiner::ID = 0; static RegisterPass<InstCombiner> @@ -409,7 +449,8 @@ X("instcombine", "Combine redundant instructions"); // 0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst static unsigned getComplexity(Value *V) { if (isa<Instruction>(V)) { - if (BinaryOperator::isNeg(V) || BinaryOperator::isFNeg(V) || + if (BinaryOperator::isNeg(V) || + BinaryOperator::isFNeg(V) || BinaryOperator::isNot(V)) return 3; return 4; @@ -429,7 +470,7 @@ static bool isOnlyUse(Value *V) { static const Type *getPromotedType(const Type *Ty) { if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty)) { if (ITy->getBitWidth() < 32) - return Type::Int32Ty; + return Type::getInt32Ty(Ty->getContext()); } return Ty; } @@ -438,29 +479,12 @@ static const Type *getPromotedType(const Type *Ty) { /// expression bitcast, or a GetElementPtrInst with all zero indices, return the /// operand value, otherwise return null. static Value *getBitCastOperand(Value *V) { - if (BitCastInst *I = dyn_cast<BitCastInst>(V)) - // BitCastInst? - return I->getOperand(0); - else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) { - // GetElementPtrInst? - if (GEP->hasAllZeroIndices()) - return GEP->getOperand(0); - } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { - if (CE->getOpcode() == Instruction::BitCast) - // BitCast ConstantExp? - return CE->getOperand(0); - else if (CE->getOpcode() == Instruction::GetElementPtr) { - // GetElementPtr ConstantExp? - for (User::op_iterator I = CE->op_begin() + 1, E = CE->op_end(); - I != E; ++I) { - ConstantInt *CI = dyn_cast<ConstantInt>(I); - if (!CI || !CI->isZero()) - // Any non-zero indices? Not cast-like. - return 0; - } - // All-zero indices? This is just like casting. - return CE->getOperand(0); - } + if (Operator *O = dyn_cast<Operator>(V)) { + if (O->getOpcode() == Instruction::BitCast) + return O->getOperand(0); + if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) + if (GEP->hasAllZeroIndices()) + return GEP->getPointerOperand(); } return 0; } @@ -474,7 +498,7 @@ isEliminableCastPair( const Type *DstTy, ///< The target type for the second cast instruction TargetData *TD ///< The target data for pointer size ) { - + const Type *SrcTy = CI->getOperand(0)->getType(); // A from above const Type *MidTy = CI->getType(); // B from above @@ -483,12 +507,15 @@ isEliminableCastPair( Instruction::CastOps secondOp = Instruction::CastOps(opcode); unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, - DstTy, TD->getIntPtrType()); + DstTy, + TD ? TD->getIntPtrType(CI->getContext()) : 0); // We don't want to form an inttoptr or ptrtoint that converts to an integer // type that differs from the pointer size. - if ((Res == Instruction::IntToPtr && SrcTy != TD->getIntPtrType()) || - (Res == Instruction::PtrToInt && DstTy != TD->getIntPtrType())) + if ((Res == Instruction::IntToPtr && + (!TD || SrcTy != TD->getIntPtrType(CI->getContext()))) || + (Res == Instruction::PtrToInt && + (!TD || DstTy != TD->getIntPtrType(CI->getContext())))) Res = 0; return Instruction::CastOps(Res); @@ -503,7 +530,7 @@ static bool ValueRequiresCast(Instruction::CastOps opcode, const Value *V, // If this is another cast that can be eliminated, it isn't codegen either. if (const CastInst *CI = dyn_cast<CastInst>(V)) - if (isEliminableCastPair(CI, opcode, Ty, TD)) + if (isEliminableCastPair(CI, opcode, Ty, TD)) return false; return true; } @@ -528,7 +555,7 @@ bool InstCombiner::SimplifyCommutative(BinaryOperator &I) { if (BinaryOperator *Op = dyn_cast<BinaryOperator>(I.getOperand(0))) if (Op->getOpcode() == Opcode && isa<Constant>(Op->getOperand(1))) { if (isa<Constant>(I.getOperand(1))) { - Constant *Folded = Context->getConstantExpr(I.getOpcode(), + Constant *Folded = ConstantExpr::get(I.getOpcode(), cast<Constant>(I.getOperand(1)), cast<Constant>(Op->getOperand(1))); I.setOperand(0, Op->getOperand(0)); @@ -541,11 +568,11 @@ bool InstCombiner::SimplifyCommutative(BinaryOperator &I) { Constant *C2 = cast<Constant>(Op1->getOperand(1)); // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2)) - Constant *Folded = Context->getConstantExpr(I.getOpcode(), C1, C2); + Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2); Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0), Op1->getOperand(0), Op1->getName(), &I); - AddToWorkList(New); + Worklist.Add(New); I.setOperand(0, New); I.setOperand(1, Folded); return true; @@ -568,17 +595,17 @@ bool InstCombiner::SimplifyCompare(CmpInst &I) { // dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction // if the LHS is a constant zero (which is the 'negate' form). // -static inline Value *dyn_castNegVal(Value *V, LLVMContext* Context) { +static inline Value *dyn_castNegVal(Value *V) { if (BinaryOperator::isNeg(V)) return BinaryOperator::getNegArgument(V); // Constants can be considered to be negated values if they can be folded. if (ConstantInt *C = dyn_cast<ConstantInt>(V)) - return Context->getConstantExprNeg(C); + return ConstantExpr::getNeg(C); if (ConstantVector *C = dyn_cast<ConstantVector>(V)) if (C->getType()->getElementType()->isInteger()) - return Context->getConstantExprNeg(C); + return ConstantExpr::getNeg(C); return 0; } @@ -587,28 +614,28 @@ static inline Value *dyn_castNegVal(Value *V, LLVMContext* Context) { // instruction if the LHS is a constant negative zero (which is the 'negate' // form). // -static inline Value *dyn_castFNegVal(Value *V, LLVMContext* Context) { +static inline Value *dyn_castFNegVal(Value *V) { if (BinaryOperator::isFNeg(V)) return BinaryOperator::getFNegArgument(V); // Constants can be considered to be negated values if they can be folded. if (ConstantFP *C = dyn_cast<ConstantFP>(V)) - return Context->getConstantExprFNeg(C); + return ConstantExpr::getFNeg(C); if (ConstantVector *C = dyn_cast<ConstantVector>(V)) if (C->getType()->getElementType()->isFloatingPoint()) - return Context->getConstantExprFNeg(C); + return ConstantExpr::getFNeg(C); return 0; } -static inline Value *dyn_castNotVal(Value *V, LLVMContext* Context) { +static inline Value *dyn_castNotVal(Value *V) { if (BinaryOperator::isNot(V)) return BinaryOperator::getNotArgument(V); // Constants can be considered to be not'ed values... if (ConstantInt *C = dyn_cast<ConstantInt>(V)) - return Context->getConstantInt(~C->getValue()); + return ConstantInt::get(C->getType(), ~C->getValue()); return 0; } @@ -617,8 +644,7 @@ static inline Value *dyn_castNotVal(Value *V, LLVMContext* Context) { // non-constant operand of the multiply, and set CST to point to the multiplier. // Otherwise, return null. // -static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST, - LLVMContext* Context) { +static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) { if (V->hasOneUse() && V->getType()->isInteger()) if (Instruction *I = dyn_cast<Instruction>(V)) { if (I->getOpcode() == Instruction::Mul) @@ -629,48 +655,27 @@ static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST, // The multiplier is really 1 << CST. uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); uint32_t CSTVal = CST->getLimitedValue(BitWidth); - CST = Context->getConstantInt(APInt(BitWidth, 1).shl(CSTVal)); + CST = ConstantInt::get(V->getType()->getContext(), + APInt(BitWidth, 1).shl(CSTVal)); return I->getOperand(0); } } return 0; } -/// dyn_castGetElementPtr - If this is a getelementptr instruction or constant -/// expression, return it. -static User *dyn_castGetElementPtr(Value *V) { - if (isa<GetElementPtrInst>(V)) return cast<User>(V); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) - if (CE->getOpcode() == Instruction::GetElementPtr) - return cast<User>(V); - return false; -} - -/// getOpcode - If this is an Instruction or a ConstantExpr, return the -/// opcode value. Otherwise return UserOp1. -static unsigned getOpcode(const Value *V) { - if (const Instruction *I = dyn_cast<Instruction>(V)) - return I->getOpcode(); - if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) - return CE->getOpcode(); - // Use UserOp1 to mean there's no opcode. - return Instruction::UserOp1; -} - /// AddOne - Add one to a ConstantInt -static Constant *AddOne(Constant *C, LLVMContext* Context) { - return Context->getConstantExprAdd(C, - Context->getConstantInt(C->getType(), 1)); +static Constant *AddOne(Constant *C) { + return ConstantExpr::getAdd(C, + ConstantInt::get(C->getType(), 1)); } /// SubOne - Subtract one from a ConstantInt -static Constant *SubOne(ConstantInt *C, LLVMContext* Context) { - return Context->getConstantExprSub(C, - Context->getConstantInt(C->getType(), 1)); +static Constant *SubOne(ConstantInt *C) { + return ConstantExpr::getSub(C, + ConstantInt::get(C->getType(), 1)); } /// MultiplyOverflows - True if the multiply can not be expressed in an int /// this size. -static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign, - LLVMContext* Context) { +static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) { uint32_t W = C1->getBitWidth(); APInt LHSExt = C1->getValue(), RHSExt = C2->getValue(); if (sign) { @@ -697,7 +702,7 @@ static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign, /// are any bits set in the constant that are not demanded. If so, shrink the /// constant and return true. static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, - APInt Demanded, LLVMContext* Context) { + APInt Demanded) { assert(I && "No instruction?"); assert(OpNo < I->getNumOperands() && "Operand index too large"); @@ -712,7 +717,7 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, // This instruction is producing bits that are not demanded. Shrink the RHS. Demanded &= OpC->getValue(); - I->setOperand(OpNo, Context->getConstantInt(Demanded)); + I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded)); return true; } @@ -784,7 +789,7 @@ bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask, Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, KnownZero, KnownOne, Depth); if (NewVal == 0) return false; - U.set(NewVal); + U = NewVal; return true; } @@ -844,7 +849,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (DemandedMask == 0) { // Not demanding any bits from V. if (isa<UndefValue>(V)) return 0; - return Context->getUndef(VTy); + return UndefValue::get(VTy); } if (Depth == 6) // Limit search depth. @@ -886,7 +891,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If all of the demanded bits in the inputs are known zeros, return zero. if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) - return Context->getNullValue(VTy); + return Constant::getNullValue(VTy); } else if (I->getOpcode() == Instruction::Or) { // We can simplify (X|Y) -> X or Y in the user's context if we know that @@ -955,10 +960,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If all of the demanded bits in the inputs are known zeros, return zero. if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) - return Context->getNullValue(VTy); + return Constant::getNullValue(VTy); // If the RHS is a constant, see if we can simplify it. - if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero, Context)) + if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero)) return I; // Output known-1 bits are only known if set in both the LHS & RHS. @@ -995,7 +1000,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return I->getOperand(1); // If the RHS is a constant, see if we can simplify it. - if (ShrinkDemandedConstant(I, 1, DemandedMask, Context)) + if (ShrinkDemandedConstant(I, 1, DemandedMask)) return I; // Output known-0 bits are only known if clear in both the LHS & RHS. @@ -1030,7 +1035,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // other, turn this into an *inclusive* or. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) { - Instruction *Or = + Instruction *Or = BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), I->getName()); return InsertNewInstBefore(Or, *I); @@ -1043,7 +1048,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { // all known if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) { - Constant *AndC = Context->getConstantInt(~RHSKnownOne & DemandedMask); + Constant *AndC = Constant::getIntegerValue(VTy, + ~RHSKnownOne & DemandedMask); Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp"); return InsertNewInstBefore(And, *I); @@ -1052,9 +1058,36 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If the RHS is a constant, see if we can simplify it. // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1. - if (ShrinkDemandedConstant(I, 1, DemandedMask, Context)) + if (ShrinkDemandedConstant(I, 1, DemandedMask)) return I; + // If our LHS is an 'and' and if it has one use, and if any of the bits we + // are flipping are known to be set, then the xor is just resetting those + // bits to zero. We can just knock out bits from the 'and' and the 'xor', + // simplifying both of them. + if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0))) + if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() && + isa<ConstantInt>(I->getOperand(1)) && + isa<ConstantInt>(LHSInst->getOperand(1)) && + (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) { + ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1)); + ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1)); + APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask); + + Constant *AndC = + ConstantInt::get(I->getType(), NewMask & AndRHS->getValue()); + Instruction *NewAnd = + BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp"); + InsertNewInstBefore(NewAnd, *I); + + Constant *XorC = + ConstantInt::get(I->getType(), NewMask & XorRHS->getValue()); + Instruction *NewXor = + BinaryOperator::CreateXor(NewAnd, XorC, "tmp"); + return InsertNewInstBefore(NewXor, *I); + } + + RHSKnownZero = KnownZeroOut; RHSKnownOne = KnownOneOut; break; @@ -1069,8 +1102,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. - if (ShrinkDemandedConstant(I, 1, DemandedMask, Context) || - ShrinkDemandedConstant(I, 2, DemandedMask, Context)) + if (ShrinkDemandedConstant(I, 1, DemandedMask) || + ShrinkDemandedConstant(I, 2, DemandedMask)) return I; // Only known if known in both the LHS and RHS. @@ -1194,7 +1227,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If the RHS of the add has bits set that can't affect the input, reduce // the constant. - if (ShrinkDemandedConstant(I, 1, InDemandedBits, Context)) + if (ShrinkDemandedConstant(I, 1, InDemandedBits)) return I; // Avoid excess work. @@ -1415,10 +1448,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Instruction *NewVal; if (InputBit > ResultBit) NewVal = BinaryOperator::CreateLShr(I->getOperand(1), - Context->getConstantInt(I->getType(), InputBit-ResultBit)); + ConstantInt::get(I->getType(), InputBit-ResultBit)); else NewVal = BinaryOperator::CreateShl(I->getOperand(1), - Context->getConstantInt(I->getType(), ResultBit-InputBit)); + ConstantInt::get(I->getType(), ResultBit-InputBit)); NewVal->takeName(I); return InsertNewInstBefore(NewVal, *I); } @@ -1434,12 +1467,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { - Constant *C = Context->getConstantInt(RHSKnownOne); - if (isa<PointerType>(V->getType())) - C = Context->getConstantExprIntToPtr(C, V->getType()); - return C; - } + if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) + return Constant::getIntegerValue(VTy, RHSKnownOne); return false; } @@ -1465,13 +1494,13 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, return 0; } else if (DemandedElts == 0) { // If nothing is demanded, provide undef. UndefElts = EltMask; - return Context->getUndef(V->getType()); + return UndefValue::get(V->getType()); } UndefElts = 0; if (ConstantVector *CP = dyn_cast<ConstantVector>(V)) { const Type *EltTy = cast<VectorType>(V->getType())->getElementType(); - Constant *Undef = Context->getUndef(EltTy); + Constant *Undef = UndefValue::get(EltTy); std::vector<Constant*> Elts; for (unsigned i = 0; i != VWidth; ++i) @@ -1486,7 +1515,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, } // If we changed the constant, return it. - Constant *NewCP = Context->getConstantVector(Elts); + Constant *NewCP = ConstantVector::get(Elts); return NewCP != CP ? NewCP : 0; } else if (isa<ConstantAggregateZero>(V)) { // Simplify the CAZ to a ConstantVector where the non-demanded elements are @@ -1498,15 +1527,15 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, return 0; const Type *EltTy = cast<VectorType>(V->getType())->getElementType(); - Constant *Zero = Context->getNullValue(EltTy); - Constant *Undef = Context->getUndef(EltTy); + Constant *Zero = Constant::getNullValue(EltTy); + Constant *Undef = UndefValue::get(EltTy); std::vector<Constant*> Elts; for (unsigned i = 0; i != VWidth; ++i) { Constant *Elt = DemandedElts[i] ? Zero : Undef; Elts.push_back(Elt); } UndefElts = DemandedElts ^ EltMask; - return Context->getConstantVector(Elts); + return ConstantVector::get(Elts); } // Limit search depth. @@ -1553,8 +1582,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // If this is inserting an element that isn't demanded, remove this // insertelement. unsigned IdxNo = Idx->getZExtValue(); - if (IdxNo >= VWidth || !DemandedElts[IdxNo]) - return AddSoonDeadInstToWorklist(*I, 0); + if (IdxNo >= VWidth || !DemandedElts[IdxNo]) { + Worklist.Add(I); + return I->getOperand(0); + } // Otherwise, the element inserted overwrites whatever was there, so the // input demanded set is simpler than the output set. @@ -1620,12 +1651,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, std::vector<Constant*> Elts; for (unsigned i = 0; i < VWidth; ++i) { if (UndefElts[i]) - Elts.push_back(Context->getUndef(Type::Int32Ty)); + Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context))); else - Elts.push_back(Context->getConstantInt(Type::Int32Ty, + Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), Shuffle->getMaskValue(i))); } - I->setOperand(2, Context->getConstantVector(Elts)); + I->setOperand(2, ConstantVector::get(Elts)); MadeChange = true; } break; @@ -1678,7 +1709,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, UndefElts = UndefElts2; if (VWidth > InVWidth) { - assert(0 && "Unimp"); + llvm_unreachable("Unimp"); // If there are more elements in the result than there are in the source, // then an output element is undef if the corresponding input element is // undef. @@ -1686,7 +1717,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, if (UndefElts2[OutIdx/Ratio]) UndefElts.set(OutIdx); } else if (VWidth < InVWidth) { - assert(0 && "Unimp"); + llvm_unreachable("Unimp"); // If there are more elements in the source than there are in the result, // then a result element is undef if all of the corresponding input // elements are undef. @@ -1752,11 +1783,13 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, Value *LHS = II->getOperand(1); Value *RHS = II->getOperand(2); // Extract the element as scalars. - LHS = InsertNewInstBefore(new ExtractElementInst(LHS, 0U,"tmp"), *II); - RHS = InsertNewInstBefore(new ExtractElementInst(RHS, 0U,"tmp"), *II); + LHS = InsertNewInstBefore(ExtractElementInst::Create(LHS, + ConstantInt::get(Type::getInt32Ty(*Context), 0U, false), "tmp"), *II); + RHS = InsertNewInstBefore(ExtractElementInst::Create(RHS, + ConstantInt::get(Type::getInt32Ty(*Context), 0U, false), "tmp"), *II); switch (II->getIntrinsicID()) { - default: assert(0 && "Case stmts out of sync!"); + default: llvm_unreachable("Case stmts out of sync!"); case Intrinsic::x86_sse_sub_ss: case Intrinsic::x86_sse2_sub_sd: TmpV = InsertNewInstBefore(BinaryOperator::CreateFSub(LHS, RHS, @@ -1771,9 +1804,9 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, Instruction *New = InsertElementInst::Create( - Context->getUndef(II->getType()), TmpV, 0U, II->getName()); + UndefValue::get(II->getType()), TmpV, + ConstantInt::get(Type::getInt32Ty(*Context), 0U, false), II->getName()); InsertNewInstBefore(New, *II); - AddSoonDeadInstToWorklist(*II, 0); return New; } } @@ -1799,8 +1832,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, /// 'shouldApply' and 'apply' methods. /// template<typename Functor> -static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F, - LLVMContext* Context) { +static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) { unsigned Opcode = Root.getOpcode(); Value *LHS = Root.getOperand(0); @@ -1833,7 +1865,7 @@ static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F, // Make what used to be the LHS of the root be the user of the root... Value *ExtraOperand = TmpLHSI->getOperand(1); if (&Root == TmpLHSI) { - Root.replaceAllUsesWith(Context->getNullValue(TmpLHSI->getType())); + Root.replaceAllUsesWith(Constant::getNullValue(TmpLHSI->getType())); return 0; } Root.replaceAllUsesWith(TmpLHSI); // Users now use TmpLHSI @@ -1872,12 +1904,11 @@ namespace { // AddRHS - Implements: X + X --> X << 1 struct AddRHS { Value *RHS; - LLVMContext* Context; - AddRHS(Value *rhs, LLVMContext* C) : RHS(rhs), Context(C) {} + explicit AddRHS(Value *rhs) : RHS(rhs) {} bool shouldApply(Value *LHS) const { return LHS == RHS; } Instruction *apply(BinaryOperator &Add) const { return BinaryOperator::CreateShl(Add.getOperand(0), - Context->getConstantInt(Add.getType(), 1)); + ConstantInt::get(Add.getType(), 1)); } }; @@ -1885,12 +1916,11 @@ struct AddRHS { // iff C1&C2 == 0 struct AddMaskingAnd { Constant *C2; - LLVMContext* Context; - AddMaskingAnd(Constant *c, LLVMContext* C) : C2(c), Context(C) {} + explicit AddMaskingAnd(Constant *c) : C2(c) {} bool shouldApply(Value *LHS) const { ConstantInt *C1; return match(LHS, m_And(m_Value(), m_ConstantInt(C1))) && - Context->getConstantExprAnd(C1, C2)->isNullValue(); + ConstantExpr::getAnd(C1, C2)->isNullValue(); } Instruction *apply(BinaryOperator &Add) const { return BinaryOperator::CreateOr(Add.getOperand(0), Add.getOperand(1)); @@ -1901,11 +1931,8 @@ struct AddMaskingAnd { static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO, InstCombiner *IC) { - LLVMContext* Context = IC->getContext(); - - if (CastInst *CI = dyn_cast<CastInst>(&I)) { - return IC->InsertCastBefore(CI->getOpcode(), SO, I.getType(), I); - } + if (CastInst *CI = dyn_cast<CastInst>(&I)) + return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType()); // Figure out if the constant is the left or the right argument. bool ConstIsRHS = isa<Constant>(I.getOperand(1)); @@ -1913,24 +1940,24 @@ static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO, if (Constant *SOC = dyn_cast<Constant>(SO)) { if (ConstIsRHS) - return Context->getConstantExpr(I.getOpcode(), SOC, ConstOperand); - return Context->getConstantExpr(I.getOpcode(), ConstOperand, SOC); + return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand); + return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC); } Value *Op0 = SO, *Op1 = ConstOperand; if (!ConstIsRHS) std::swap(Op0, Op1); - Instruction *New; + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) - New = BinaryOperator::Create(BO->getOpcode(), Op0, Op1,SO->getName()+".op"); - else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) - New = CmpInst::Create(CI->getOpcode(), CI->getPredicate(), Op0, Op1, - SO->getName()+".cmp"); - else { - assert(0 && "Unknown binary instruction type!"); - abort(); - } - return IC->InsertNewInstBefore(New, I); + return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1, + SO->getName()+".op"); + if (ICmpInst *CI = dyn_cast<ICmpInst>(&I)) + return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, + SO->getName()+".cmp"); + if (FCmpInst *CI = dyn_cast<FCmpInst>(&I)) + return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, + SO->getName()+".cmp"); + llvm_unreachable("Unknown binary instruction type!"); } // FoldOpIntoSelect - Given an instruction with a select as one operand and a @@ -1946,7 +1973,7 @@ static Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI, if (isa<Constant>(TV) || isa<Constant>(FV)) { // Bool selects with constant operands can be folded to logical ops. - if (SI->getType() == Type::Int1Ty) return 0; + if (SI->getType() == Type::getInt1Ty(*IC->getContext())) return 0; Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, IC); Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, IC); @@ -1958,20 +1985,34 @@ static Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI, } -/// FoldOpIntoPhi - Given a binary operator or cast instruction which has a PHI -/// node as operand #0, see if we can fold the instruction into the PHI (which -/// is only possible if all operands to the PHI are constants). -Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { +/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which +/// has a PHI node as operand #0, see if we can fold the instruction into the +/// PHI (which is only possible if all operands to the PHI are constants). +/// +/// If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms +/// that would normally be unprofitable because they strongly encourage jump +/// threading. +Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I, + bool AllowAggressive) { + AllowAggressive = false; PHINode *PN = cast<PHINode>(I.getOperand(0)); unsigned NumPHIValues = PN->getNumIncomingValues(); - if (!PN->hasOneUse() || NumPHIValues == 0) return 0; - - // Check to see if all of the operands of the PHI are constants. If there is - // one non-constant value, remember the BB it is. If there is more than one - // or if *it* is a PHI, bail out. + if (NumPHIValues == 0 || + // We normally only transform phis with a single use, unless we're trying + // hard to make jump threading happen. + (!PN->hasOneUse() && !AllowAggressive)) + return 0; + + + // Check to see if all of the operands of the PHI are simple constants + // (constantint/constantfp/undef). If there is one non-constant value, + // remember the BB it is in. If there is more than one or if *it* is a PHI, + // bail out. We don't do arbitrary constant expressions here because moving + // their computation can be expensive without a cost model. BasicBlock *NonConstBB = 0; for (unsigned i = 0; i != NumPHIValues; ++i) - if (!isa<Constant>(PN->getIncomingValue(i))) { + if (!isa<Constant>(PN->getIncomingValue(i)) || + isa<ConstantExpr>(PN->getIncomingValue(i))) { if (NonConstBB) return 0; // More than one non-const value. if (isa<PHINode>(PN->getIncomingValue(i))) return 0; // Itself a phi. NonConstBB = PN->getIncomingBlock(i); @@ -1986,7 +2027,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { // operation in that block. However, if this is a critical edge, we would be // inserting the computation one some other paths (e.g. inside a loop). Only // do this if the pred block is unconditionally branching into the phi block. - if (NonConstBB) { + if (NonConstBB != 0 && !AllowAggressive) { BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator()); if (!BI || !BI->isUnconditional()) return 0; } @@ -1998,15 +2039,37 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { NewPN->takeName(PN); // Next, add all of the operands to the PHI. - if (I.getNumOperands() == 2) { + if (SelectInst *SI = dyn_cast<SelectInst>(&I)) { + // We only currently try to fold the condition of a select when it is a phi, + // not the true/false values. + Value *TrueV = SI->getTrueValue(); + Value *FalseV = SI->getFalseValue(); + BasicBlock *PhiTransBB = PN->getParent(); + for (unsigned i = 0; i != NumPHIValues; ++i) { + BasicBlock *ThisBB = PN->getIncomingBlock(i); + Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB); + Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB); + Value *InV = 0; + if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { + InV = InC->isNullValue() ? FalseVInPred : TrueVInPred; + } else { + assert(PN->getIncomingBlock(i) == NonConstBB); + InV = SelectInst::Create(PN->getIncomingValue(i), TrueVInPred, + FalseVInPred, + "phitmp", NonConstBB->getTerminator()); + Worklist.Add(cast<Instruction>(InV)); + } + NewPN->addIncoming(InV, ThisBB); + } + } else if (I.getNumOperands() == 2) { Constant *C = cast<Constant>(I.getOperand(1)); for (unsigned i = 0; i != NumPHIValues; ++i) { Value *InV = 0; if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { if (CmpInst *CI = dyn_cast<CmpInst>(&I)) - InV = Context->getConstantExprCompare(CI->getPredicate(), InC, C); + InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); else - InV = Context->getConstantExpr(I.getOpcode(), InC, C); + InV = ConstantExpr::get(I.getOpcode(), InC, C); } else { assert(PN->getIncomingBlock(i) == NonConstBB); if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) @@ -2014,14 +2077,14 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { PN->getIncomingValue(i), C, "phitmp", NonConstBB->getTerminator()); else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) - InV = CmpInst::Create(CI->getOpcode(), + InV = CmpInst::Create(CI->getOpcode(), CI->getPredicate(), PN->getIncomingValue(i), C, "phitmp", NonConstBB->getTerminator()); else - assert(0 && "Unknown binop!"); + llvm_unreachable("Unknown binop!"); - AddToWorkList(cast<Instruction>(InV)); + Worklist.Add(cast<Instruction>(InV)); } NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } @@ -2031,13 +2094,13 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { for (unsigned i = 0; i != NumPHIValues; ++i) { Value *InV; if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { - InV = Context->getConstantExprCast(CI->getOpcode(), InC, RetTy); + InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy); } else { assert(PN->getIncomingBlock(i) == NonConstBB); InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i), I.getType(), "phitmp", NonConstBB->getTerminator()); - AddToWorkList(cast<Instruction>(InV)); + Worklist.Add(cast<Instruction>(InV)); } NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } @@ -2098,13 +2161,10 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (SimplifyDemandedInstructionBits(I)) return &I; - // zext(i1) - 1 -> select i1, 0, -1 + // zext(bool) + C -> bool ? C + 1 : C if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS)) - if (CI->isAllOnesValue() && - ZI->getOperand(0)->getType() == Type::Int1Ty) - return SelectInst::Create(ZI->getOperand(0), - Context->getNullValue(I.getType()), - Context->getConstantIntAllOnesValue(I.getType())); + if (ZI->getSrcTy() == Type::getInt1Ty(*Context)) + return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); } if (isa<PHINode>(LHS)) @@ -2146,24 +2206,23 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { const Type *MiddleType = 0; switch (Size) { default: break; - case 32: MiddleType = Type::Int32Ty; break; - case 16: MiddleType = Type::Int16Ty; break; - case 8: MiddleType = Type::Int8Ty; break; + case 32: MiddleType = Type::getInt32Ty(*Context); break; + case 16: MiddleType = Type::getInt16Ty(*Context); break; + case 8: MiddleType = Type::getInt8Ty(*Context); break; } if (MiddleType) { - Instruction *NewTrunc = new TruncInst(XorLHS, MiddleType, "sext"); - InsertNewInstBefore(NewTrunc, I); + Value *NewTrunc = Builder->CreateTrunc(XorLHS, MiddleType, "sext"); return new SExtInst(NewTrunc, I.getType(), I.getName()); } } } - if (I.getType() == Type::Int1Ty) + if (I.getType() == Type::getInt1Ty(*Context)) return BinaryOperator::CreateXor(LHS, RHS); // X + X --> X << 1 if (I.getType()->isInteger()) { - if (Instruction *Result = AssociativeOpt(I, AddRHS(RHS, Context), Context)) + if (Instruction *Result = AssociativeOpt(I, AddRHS(RHS))) return Result; if (Instruction *RHSI = dyn_cast<Instruction>(RHS)) { @@ -2180,11 +2239,10 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // -A + B --> B - A // -A + -B --> -(A + B) - if (Value *LHSV = dyn_castNegVal(LHS, Context)) { + if (Value *LHSV = dyn_castNegVal(LHS)) { if (LHS->getType()->isIntOrIntVector()) { - if (Value *RHSV = dyn_castNegVal(RHS, Context)) { - Instruction *NewAdd = BinaryOperator::CreateAdd(LHSV, RHSV, "sum"); - InsertNewInstBefore(NewAdd, I); + if (Value *RHSV = dyn_castNegVal(RHS)) { + Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); return BinaryOperator::CreateNeg(NewAdd); } } @@ -2194,34 +2252,34 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // A + -B --> A - B if (!isa<Constant>(RHS)) - if (Value *V = dyn_castNegVal(RHS, Context)) + if (Value *V = dyn_castNegVal(RHS)) return BinaryOperator::CreateSub(LHS, V); ConstantInt *C2; - if (Value *X = dyn_castFoldableMul(LHS, C2, Context)) { + if (Value *X = dyn_castFoldableMul(LHS, C2)) { if (X == RHS) // X*C + X --> X * (C+1) - return BinaryOperator::CreateMul(RHS, AddOne(C2, Context)); + return BinaryOperator::CreateMul(RHS, AddOne(C2)); // X*C1 + X*C2 --> X * (C1+C2) ConstantInt *C1; - if (X == dyn_castFoldableMul(RHS, C1, Context)) - return BinaryOperator::CreateMul(X, Context->getConstantExprAdd(C1, C2)); + if (X == dyn_castFoldableMul(RHS, C1)) + return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2)); } // X + X*C --> X * (C+1) - if (dyn_castFoldableMul(RHS, C2, Context) == LHS) - return BinaryOperator::CreateMul(LHS, AddOne(C2, Context)); + if (dyn_castFoldableMul(RHS, C2) == LHS) + return BinaryOperator::CreateMul(LHS, AddOne(C2)); // X + ~X --> -1 since ~X = -X-1 - if (dyn_castNotVal(LHS, Context) == RHS || - dyn_castNotVal(RHS, Context) == LHS) - return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType())); + if (dyn_castNotVal(LHS) == RHS || + dyn_castNotVal(RHS) == LHS) + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); // (A & C1)+(B & C2) --> (A & C1)|(B & C2) iff C1&C2 == 0 if (match(RHS, m_And(m_Value(), m_ConstantInt(C2)))) - if (Instruction *R = AssociativeOpt(I, AddMaskingAnd(C2, Context), Context)) + if (Instruction *R = AssociativeOpt(I, AddMaskingAnd(C2))) return R; // A+B --> A|B iff A and B have no bits set in common. @@ -2258,8 +2316,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { } if (W == Y) { - Value *NewAdd = InsertNewInstBefore(BinaryOperator::CreateAdd(X, Z, - LHS->getName()), I); + Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName()); return BinaryOperator::CreateMul(W, NewAdd); } } @@ -2268,11 +2325,12 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) { Value *X = 0; if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X - return BinaryOperator::CreateSub(SubOne(CRHS, Context), X); + return BinaryOperator::CreateSub(SubOne(CRHS), X); // (X & FF00) + xx00 -> (X+xx00) & FF00 - if (LHS->hasOneUse() && match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) { - Constant *Anded = Context->getConstantExprAnd(CRHS, C2); + if (LHS->hasOneUse() && + match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) { + Constant *Anded = ConstantExpr::getAnd(CRHS, C2); if (Anded == CRHS) { // See if all bits from the first bit set in the Add RHS up are included // in the mask. First, get the rightmost bit. @@ -2286,8 +2344,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (AddRHSHighBits == AddRHSHighBitsAnd) { // Okay, the xform is safe. Insert the new add pronto. - Value *NewAdd = InsertNewInstBefore(BinaryOperator::CreateAdd(X, CRHS, - LHS->getName()), I); + Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName()); return BinaryOperator::CreateAnd(NewAdd, C2); } } @@ -2299,28 +2356,6 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { return R; } - // add (cast *A to intptrtype) B -> - // cast (GEP (cast *A to i8*) B) --> intptrtype - { - CastInst *CI = dyn_cast<CastInst>(LHS); - Value *Other = RHS; - if (!CI) { - CI = dyn_cast<CastInst>(RHS); - Other = LHS; - } - if (CI && CI->getType()->isSized() && - (CI->getType()->getScalarSizeInBits() == - TD->getIntPtrType()->getPrimitiveSizeInBits()) - && isa<PointerType>(CI->getOperand(0)->getType())) { - unsigned AS = - cast<PointerType>(CI->getOperand(0)->getType())->getAddressSpace(); - Value *I2 = InsertBitCastBefore(CI->getOperand(0), - Context->getPointerType(Type::Int8Ty, AS), I); - I2 = InsertNewInstBefore(GetElementPtrInst::Create(I2, Other, "ctg2"), I); - return new PtrToIntInst(I2, CI->getType()); - } - } - // add (select X 0 (sub n A)) A --> select X A n { SelectInst *SI = dyn_cast<SelectInst>(LHS); @@ -2336,10 +2371,12 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // Can we fold the add into the argument of the select? // We check both true and false select arguments for a matching subtract. - if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A)))) + if (match(FV, m_Zero()) && + match(TV, m_Sub(m_Value(N), m_Specific(A)))) // Fold the add into the true select value. return SelectInst::Create(SI->getCondition(), N, A); - if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A)))) + if (match(TV, m_Zero()) && + match(FV, m_Sub(m_Value(N), m_Specific(A)))) // Fold the add into the false select value. return SelectInst::Create(SI->getCondition(), A, N); } @@ -2351,14 +2388,13 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // (add (sext x), cst) --> (sext (add x, cst')) if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) { Constant *CI = - Context->getConstantExprTrunc(RHSC, LHSConv->getOperand(0)->getType()); + ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); if (LHSConv->hasOneUse() && - Context->getConstantExprSExt(CI, I.getType()) == RHSC && + ConstantExpr::getSExt(CI, I.getType()) == RHSC && WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { // Insert the new, smaller add. - Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), - CI, "addconv"); - InsertNewInstBefore(NewAdd, I); + Value *NewAdd = Builder->CreateAdd(LHSConv->getOperand(0), + CI, "addconv"); return new SExtInst(NewAdd, I.getType()); } } @@ -2373,10 +2409,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { WillNotOverflowSignedAdd(LHSConv->getOperand(0), RHSConv->getOperand(0))) { // Insert the new integer add. - Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0), - "addconv"); - InsertNewInstBefore(NewAdd, I); + Value *NewAdd = Builder->CreateAdd(LHSConv->getOperand(0), + RHSConv->getOperand(0), "addconv"); return new SExtInst(NewAdd, I.getType()); } } @@ -2392,7 +2426,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { if (Constant *RHSC = dyn_cast<Constant>(RHS)) { // X + 0 --> X if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) { - if (CFP->isExactlyValue(Context->getConstantFPNegativeZero + if (CFP->isExactlyValue(ConstantFP::getNegativeZero (I.getType())->getValueAPF())) return ReplaceInstUsesWith(I, LHS); } @@ -2404,12 +2438,12 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { // -A + B --> B - A // -A + -B --> -(A + B) - if (Value *LHSV = dyn_castFNegVal(LHS, Context)) + if (Value *LHSV = dyn_castFNegVal(LHS)) return BinaryOperator::CreateFSub(RHS, LHSV); // A + -B --> A - B if (!isa<Constant>(RHS)) - if (Value *V = dyn_castFNegVal(RHS, Context)) + if (Value *V = dyn_castFNegVal(RHS)) return BinaryOperator::CreateFSub(LHS, V); // Check for X+0.0. Simplify it to X if we know X is not -0.0. @@ -2427,14 +2461,13 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { // instcombined. if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) { Constant *CI = - Context->getConstantExprFPToSI(CFP, LHSConv->getOperand(0)->getType()); + ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType()); if (LHSConv->hasOneUse() && - Context->getConstantExprSIToFP(CI, I.getType()) == CFP && + ConstantExpr::getSIToFP(CI, I.getType()) == CFP && WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { // Insert the new integer add. - Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), - CI, "addconv"); - InsertNewInstBefore(NewAdd, I); + Value *NewAdd = Builder->CreateAdd(LHSConv->getOperand(0), + CI, "addconv"); return new SIToFPInst(NewAdd, I.getType()); } } @@ -2449,10 +2482,8 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { WillNotOverflowSignedAdd(LHSConv->getOperand(0), RHSConv->getOperand(0))) { // Insert the new integer add. - Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0), - "addconv"); - InsertNewInstBefore(NewAdd, I); + Value *NewAdd = Builder->CreateAdd(LHSConv->getOperand(0), + RHSConv->getOperand(0), "addconv"); return new SIToFPInst(NewAdd, I.getType()); } } @@ -2465,10 +2496,10 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); if (Op0 == Op1) // sub X, X -> 0 - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); // If this is a 'B = x-(-A)', change to B = x+A... - if (Value *V = dyn_castNegVal(Op1, Context)) + if (Value *V = dyn_castNegVal(Op1)) return BinaryOperator::CreateAdd(Op0, V); if (isa<UndefValue>(Op0)) @@ -2484,7 +2515,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // C - ~X == X + (1+C) Value *X = 0; if (match(Op1, m_Not(m_Value(X)))) - return BinaryOperator::CreateAdd(X, AddOne(C, Context)); + return BinaryOperator::CreateAdd(X, AddOne(C)); // -(X >>u 31) -> (X >>s 31) // -(X >>s 31) -> (X >>u 31) @@ -2519,22 +2550,29 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) if (Instruction *R = FoldOpIntoSelect(I, SI, this)) return R; + + // C - zext(bool) -> bool ? C - 1 : C + if (ZExtInst *ZI = dyn_cast<ZExtInst>(Op1)) + if (ZI->getSrcTy() == Type::getInt1Ty(*Context)) + return SelectInst::Create(ZI->getOperand(0), SubOne(C), C); } - if (I.getType() == Type::Int1Ty) + if (I.getType() == Type::getInt1Ty(*Context)) return BinaryOperator::CreateXor(Op0, Op1); if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) { if (Op1I->getOpcode() == Instruction::Add) { if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y - return BinaryOperator::CreateNeg(Op1I->getOperand(1), I.getName()); + return BinaryOperator::CreateNeg(Op1I->getOperand(1), + I.getName()); else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y - return BinaryOperator::CreateNeg(Op1I->getOperand(0), I.getName()); + return BinaryOperator::CreateNeg(Op1I->getOperand(0), + I.getName()); else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) { if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1))) // C1-(X+C2) --> (C1-C2)-X return BinaryOperator::CreateSub( - Context->getConstantExprSub(CI1, CI2), Op1I->getOperand(0)); + ConstantExpr::getSub(CI1, CI2), Op1I->getOperand(0)); } } @@ -2558,8 +2596,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { (Op1I->getOperand(0) == Op0 || Op1I->getOperand(1) == Op0)) { Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0); - Value *NewNot = - InsertNewInstBefore(BinaryOperator::CreateNot(OtherOp, "B.not"), I); + Value *NewNot = Builder->CreateNot(OtherOp, "B.not"); return BinaryOperator::CreateAnd(Op0, NewNot); } @@ -2569,13 +2606,13 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { if (CSI->isZero()) if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1))) return BinaryOperator::CreateSDiv(Op1I->getOperand(0), - Context->getConstantExprNeg(DivRHS)); + ConstantExpr::getNeg(DivRHS)); // X - X*C --> X * (1-C) ConstantInt *C2 = 0; - if (dyn_castFoldableMul(Op1I, C2, Context) == Op0) { + if (dyn_castFoldableMul(Op1I, C2) == Op0) { Constant *CP1 = - Context->getConstantExprSub(Context->getConstantInt(I.getType(), 1), + ConstantExpr::getSub(ConstantInt::get(I.getType(), 1), C2); return BinaryOperator::CreateMul(Op0, CP1); } @@ -2590,18 +2627,19 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return ReplaceInstUsesWith(I, Op0I->getOperand(0)); } else if (Op0I->getOpcode() == Instruction::Sub) { if (Op0I->getOperand(0) == Op1) // (X-Y)-X == -Y - return BinaryOperator::CreateNeg(Op0I->getOperand(1), I.getName()); + return BinaryOperator::CreateNeg(Op0I->getOperand(1), + I.getName()); } } ConstantInt *C1; - if (Value *X = dyn_castFoldableMul(Op0, C1, Context)) { + if (Value *X = dyn_castFoldableMul(Op0, C1)) { if (X == Op1) // X*C - X --> X * (C-1) - return BinaryOperator::CreateMul(Op1, SubOne(C1, Context)); + return BinaryOperator::CreateMul(Op1, SubOne(C1)); ConstantInt *C2; // X*C1 - X*C2 -> X * (C1-C2) - if (X == dyn_castFoldableMul(Op1, C2, Context)) - return BinaryOperator::CreateMul(X, Context->getConstantExprSub(C1, C2)); + if (X == dyn_castFoldableMul(Op1, C2)) + return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2)); } return 0; } @@ -2610,15 +2648,17 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); // If this is a 'B = x-(-A)', change to B = x+A... - if (Value *V = dyn_castFNegVal(Op1, Context)) + if (Value *V = dyn_castFNegVal(Op1)) return BinaryOperator::CreateFAdd(Op0, V); if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) { if (Op1I->getOpcode() == Instruction::FAdd) { if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y - return BinaryOperator::CreateFNeg(Op1I->getOperand(1), I.getName()); + return BinaryOperator::CreateFNeg(Op1I->getOperand(1), + I.getName()); else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y - return BinaryOperator::CreateFNeg(Op1I->getOperand(0), I.getName()); + return BinaryOperator::CreateFNeg(Op1I->getOperand(0), + I.getName()); } } @@ -2657,26 +2697,24 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS, Instruction *InstCombiner::visitMul(BinaryOperator &I) { bool Changed = SimplifyCommutative(I); - Value *Op0 = I.getOperand(0); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - // TODO: If Op1 is undef and Op0 is finite, return zero. - if (!I.getType()->isFPOrFPVector() && - isa<UndefValue>(I.getOperand(1))) // undef * X -> 0 - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + if (isa<UndefValue>(Op1)) // undef * X -> 0 + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - // Simplify mul instructions with a constant RHS... - if (Constant *Op1 = dyn_cast<Constant>(I.getOperand(1))) { - if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { + // Simplify mul instructions with a constant RHS. + if (Constant *Op1C = dyn_cast<Constant>(Op1)) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1C)) { // ((X << C1)*C2) == (X * (C2 << C1)) if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0)) if (SI->getOpcode() == Instruction::Shl) if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1))) return BinaryOperator::CreateMul(SI->getOperand(0), - Context->getConstantExprShl(CI, ShOp)); + ConstantExpr::getShl(CI, ShOp)); if (CI->isZero()) - return ReplaceInstUsesWith(I, Op1); // X * 0 == 0 + return ReplaceInstUsesWith(I, Op1C); // X * 0 == 0 if (CI->equalsInt(1)) // X * 1 == X return ReplaceInstUsesWith(I, Op0); if (CI->isAllOnesValue()) // X * -1 == 0 - X @@ -2685,12 +2723,13 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { const APInt& Val = cast<ConstantInt>(CI)->getValue(); if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C return BinaryOperator::CreateShl(Op0, - Context->getConstantInt(Op0->getType(), Val.logBase2())); + ConstantInt::get(Op0->getType(), Val.logBase2())); } - } else if (isa<VectorType>(Op1->getType())) { - // TODO: If Op1 is all zeros and Op0 is all finite, return all zeros. + } else if (isa<VectorType>(Op1C->getType())) { + if (Op1C->isNullValue()) + return ReplaceInstUsesWith(I, Op1C); - if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) { + if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) { if (Op1V->isAllOnesValue()) // X * -1 == 0 - X return BinaryOperator::CreateNeg(Op0, I.getName()); @@ -2705,13 +2744,10 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() && - isa<ConstantInt>(Op0I->getOperand(1)) && isa<ConstantInt>(Op1)) { + isa<ConstantInt>(Op0I->getOperand(1)) && isa<ConstantInt>(Op1C)) { // Canonicalize (X+C1)*C2 -> X*C2+C1*C2. - Instruction *Add = BinaryOperator::CreateMul(Op0I->getOperand(0), - Op1, "tmp"); - InsertNewInstBefore(Add, I); - Value *C1C2 = Context->getConstantExprMul(Op1, - cast<Constant>(Op0I->getOperand(1))); + Value *Add = Builder->CreateMul(Op0I->getOperand(0), Op1C, "tmp"); + Value *C1C2 = Builder->CreateMul(Op1C, Op0I->getOperand(1)); return BinaryOperator::CreateAdd(Add, C1C2); } @@ -2726,93 +2762,80 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { return NV; } - if (Value *Op0v = dyn_castNegVal(Op0, Context)) // -X * -Y = X*Y - if (Value *Op1v = dyn_castNegVal(I.getOperand(1), Context)) + if (Value *Op0v = dyn_castNegVal(Op0)) // -X * -Y = X*Y + if (Value *Op1v = dyn_castNegVal(Op1)) return BinaryOperator::CreateMul(Op0v, Op1v); // (X / Y) * Y = X - (X % Y) // (X / Y) * -Y = (X % Y) - X { - Value *Op1 = I.getOperand(1); + Value *Op1C = Op1; BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0); if (!BO || (BO->getOpcode() != Instruction::UDiv && BO->getOpcode() != Instruction::SDiv)) { - Op1 = Op0; - BO = dyn_cast<BinaryOperator>(I.getOperand(1)); + Op1C = Op0; + BO = dyn_cast<BinaryOperator>(Op1); } - Value *Neg = dyn_castNegVal(Op1, Context); + Value *Neg = dyn_castNegVal(Op1C); if (BO && BO->hasOneUse() && - (BO->getOperand(1) == Op1 || BO->getOperand(1) == Neg) && + (BO->getOperand(1) == Op1C || BO->getOperand(1) == Neg) && (BO->getOpcode() == Instruction::UDiv || BO->getOpcode() == Instruction::SDiv)) { Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1); - Instruction *Rem; + // If the division is exact, X % Y is zero. + if (SDivOperator *SDiv = dyn_cast<SDivOperator>(BO)) + if (SDiv->isExact()) { + if (Op1BO == Op1C) + return ReplaceInstUsesWith(I, Op0BO); + return BinaryOperator::CreateNeg(Op0BO); + } + + Value *Rem; if (BO->getOpcode() == Instruction::UDiv) - Rem = BinaryOperator::CreateURem(Op0BO, Op1BO); + Rem = Builder->CreateURem(Op0BO, Op1BO); else - Rem = BinaryOperator::CreateSRem(Op0BO, Op1BO); - - InsertNewInstBefore(Rem, I); + Rem = Builder->CreateSRem(Op0BO, Op1BO); Rem->takeName(BO); - if (Op1BO == Op1) + if (Op1BO == Op1C) return BinaryOperator::CreateSub(Op0BO, Rem); - else - return BinaryOperator::CreateSub(Rem, Op0BO); + return BinaryOperator::CreateSub(Rem, Op0BO); } } - if (I.getType() == Type::Int1Ty) - return BinaryOperator::CreateAnd(Op0, I.getOperand(1)); + /// i1 mul -> i1 and. + if (I.getType() == Type::getInt1Ty(*Context)) + return BinaryOperator::CreateAnd(Op0, Op1); + // X*(1 << Y) --> X << Y + // (1 << Y)*X --> X << Y + { + Value *Y; + if (match(Op0, m_Shl(m_One(), m_Value(Y)))) + return BinaryOperator::CreateShl(Op1, Y); + if (match(Op1, m_Shl(m_One(), m_Value(Y)))) + return BinaryOperator::CreateShl(Op0, Y); + } + // If one of the operands of the multiply is a cast from a boolean value, then // we know the bool is either zero or one, so this is a 'masking' multiply. - // See if we can simplify things based on how the boolean was originally - // formed. - CastInst *BoolCast = 0; - if (ZExtInst *CI = dyn_cast<ZExtInst>(Op0)) - if (CI->getOperand(0)->getType() == Type::Int1Ty) - BoolCast = CI; - if (!BoolCast) - if (ZExtInst *CI = dyn_cast<ZExtInst>(I.getOperand(1))) - if (CI->getOperand(0)->getType() == Type::Int1Ty) - BoolCast = CI; - if (BoolCast) { - if (ICmpInst *SCI = dyn_cast<ICmpInst>(BoolCast->getOperand(0))) { - Value *SCIOp0 = SCI->getOperand(0), *SCIOp1 = SCI->getOperand(1); - const Type *SCOpTy = SCIOp0->getType(); - bool TIS = false; - - // If the icmp is true iff the sign bit of X is set, then convert this - // multiply into a shift/and combination. - if (isa<ConstantInt>(SCIOp1) && - isSignBitCheck(SCI->getPredicate(), cast<ConstantInt>(SCIOp1), TIS) && - TIS) { - // Shift the X value right to turn it into "all signbits". - Constant *Amt = Context->getConstantInt(SCIOp0->getType(), - SCOpTy->getPrimitiveSizeInBits()-1); - Value *V = - InsertNewInstBefore( - BinaryOperator::Create(Instruction::AShr, SCIOp0, Amt, - BoolCast->getOperand(0)->getName()+ - ".mask"), I); - - // If the multiply type is not the same as the source type, sign extend - // or truncate to the multiply type. - if (I.getType() != V->getType()) { - uint32_t SrcBits = V->getType()->getPrimitiveSizeInBits(); - uint32_t DstBits = I.getType()->getPrimitiveSizeInBits(); - Instruction::CastOps opcode = - (SrcBits == DstBits ? Instruction::BitCast : - (SrcBits < DstBits ? Instruction::SExt : Instruction::Trunc)); - V = InsertCastBefore(opcode, V, I.getType(), I); - } + // X * Y (where Y is 0 or 1) -> X & (0-Y) + if (!isa<VectorType>(I.getType())) { + // -2 is "-1 << 1" so it is all bits set except the low one. + APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true); + + Value *BoolCast = 0, *OtherOp = 0; + if (MaskedValueIsZero(Op0, Negative2)) + BoolCast = Op0, OtherOp = Op1; + else if (MaskedValueIsZero(Op1, Negative2)) + BoolCast = Op1, OtherOp = Op0; - Value *OtherOp = Op0 == BoolCast ? I.getOperand(1) : Op0; - return BinaryOperator::CreateAnd(V, OtherOp); - } + if (BoolCast) { + Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()), + BoolCast, "tmp"); + return BinaryOperator::CreateAnd(V, OtherOp); } } @@ -2821,17 +2844,17 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { Instruction *InstCombiner::visitFMul(BinaryOperator &I) { bool Changed = SimplifyCommutative(I); - Value *Op0 = I.getOperand(0); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); // Simplify mul instructions with a constant RHS... - if (Constant *Op1 = dyn_cast<Constant>(I.getOperand(1))) { - if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1)) { + if (Constant *Op1C = dyn_cast<Constant>(Op1)) { + if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1C)) { // "In IEEE floating point, x*1 is not equivalent to x for nans. However, // ANSI says we can drop signals, so we can do this anyway." (from GCC) if (Op1F->isExactlyValue(1.0)) return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0' - } else if (isa<VectorType>(Op1->getType())) { - if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) { + } else if (isa<VectorType>(Op1C->getType())) { + if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) { // As above, vector X*splat(1.0) -> X in all defined cases. if (Constant *Splat = Op1V->getSplatValue()) { if (ConstantFP *F = dyn_cast<ConstantFP>(Splat)) @@ -2851,8 +2874,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { return NV; } - if (Value *Op0v = dyn_castFNegVal(Op0, Context)) // -X * -Y = X*Y - if (Value *Op1v = dyn_castFNegVal(I.getOperand(1), Context)) + if (Value *Op0v = dyn_castFNegVal(Op0)) // -X * -Y = X*Y + if (Value *Op1v = dyn_castFNegVal(Op1)) return BinaryOperator::CreateFMul(Op0v, Op1v); return Changed ? &I : 0; @@ -2907,11 +2930,11 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { I != E; ++I) { if (*I == SI) { *I = SI->getOperand(NonNullOperand); - AddToWorkList(BBI); + Worklist.Add(BBI); } else if (*I == SelectCond) { - *I = NonNullOperand == 1 ? Context->getConstantIntTrue() : - Context->getConstantIntFalse(); - AddToWorkList(BBI); + *I = NonNullOperand == 1 ? ConstantInt::getTrue(*Context) : + ConstantInt::getFalse(*Context); + Worklist.Add(BBI); } } @@ -2942,7 +2965,7 @@ Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) { if (isa<UndefValue>(Op0)) { if (Op0->getType()->isFPOrFPVector()) return ReplaceInstUsesWith(I, Op0); - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); } // X / undef -> undef @@ -2962,12 +2985,12 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { // (sdiv X, X) --> 1 (udiv X, X) --> 1 if (Op0 == Op1) { if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) { - Constant *CI = Context->getConstantInt(Ty->getElementType(), 1); + Constant *CI = ConstantInt::get(Ty->getElementType(), 1); std::vector<Constant*> Elts(Ty->getNumElements(), CI); - return ReplaceInstUsesWith(I, Context->getConstantVector(Elts)); + return ReplaceInstUsesWith(I, ConstantVector::get(Elts)); } - Constant *CI = Context->getConstantInt(I.getType(), 1); + Constant *CI = ConstantInt::get(I.getType(), 1); return ReplaceInstUsesWith(I, CI); } @@ -2989,11 +3012,11 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode()) if (ConstantInt *LHSRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) { if (MultiplyOverflows(RHS, LHSRHS, - I.getOpcode()==Instruction::SDiv, Context)) - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + I.getOpcode()==Instruction::SDiv)) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); else return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0), - Context->getConstantExprMul(RHS, LHSRHS)); + ConstantExpr::getMul(RHS, LHSRHS)); } if (!RHS->isZero()) { // avoid X udiv 0 @@ -3009,10 +3032,10 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { // 0 / X == 0, we don't need to preserve faults! if (ConstantInt *LHS = dyn_cast<ConstantInt>(Op0)) if (LHS->equalsInt(0)) - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); // It can't be division by zero, hence it must be division by one. - if (I.getType() == Type::Int1Ty) + if (I.getType() == Type::getInt1Ty(*Context)) return ReplaceInstUsesWith(I, Op0); if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) { @@ -3038,14 +3061,13 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { // if so, convert to a right shift. if (C->getValue().isPowerOf2()) // 0 not included in isPowerOf2 return BinaryOperator::CreateLShr(Op0, - Context->getConstantInt(Op0->getType(), C->getValue().logBase2())); + ConstantInt::get(Op0->getType(), C->getValue().logBase2())); // X udiv C, where C >= signbit if (C->getValue().isNegative()) { - Value *IC = InsertNewInstBefore(new ICmpInst(ICmpInst::ICMP_ULT, Op0, C), - I); - return SelectInst::Create(IC, Context->getNullValue(I.getType()), - Context->getConstantInt(I.getType(), 1)); + Value *IC = Builder->CreateICmpULT( Op0, C); + return SelectInst::Create(IC, Constant::getNullValue(I.getType()), + ConstantInt::get(I.getType(), 1)); } } @@ -3057,10 +3079,8 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { if (C1.isPowerOf2()) { Value *N = RHSI->getOperand(1); const Type *NTy = N->getType(); - if (uint32_t C2 = C1.logBase2()) { - Constant *C2V = Context->getConstantInt(NTy, C2); - N = InsertNewInstBefore(BinaryOperator::CreateAdd(N, C2V, "tmp"), I); - } + if (uint32_t C2 = C1.logBase2()) + N = Builder->CreateAdd(N, ConstantInt::get(NTy, C2), "tmp"); return BinaryOperator::CreateLShr(Op0, N); } } @@ -3076,16 +3096,12 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { // Compute the shift amounts uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2(); // Construct the "on true" case of the select - Constant *TC = Context->getConstantInt(Op0->getType(), TSA); - Instruction *TSI = BinaryOperator::CreateLShr( - Op0, TC, SI->getName()+".t"); - TSI = InsertNewInstBefore(TSI, I); + Constant *TC = ConstantInt::get(Op0->getType(), TSA); + Value *TSI = Builder->CreateLShr(Op0, TC, SI->getName()+".t"); // Construct the "on false" case of the select - Constant *FC = Context->getConstantInt(Op0->getType(), FSA); - Instruction *FSI = BinaryOperator::CreateLShr( - Op0, FC, SI->getName()+".f"); - FSI = InsertNewInstBefore(FSI, I); + Constant *FC = ConstantInt::get(Op0->getType(), FSA); + Value *FSI = Builder->CreateLShr(Op0, FC, SI->getName()+".f"); // construct the select instruction and return it. return SelectInst::Create(SI->getOperand(0), TSI, FSI, SI->getName()); @@ -3105,17 +3121,45 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { // sdiv X, -1 == -X if (RHS->isAllOnesValue()) return BinaryOperator::CreateNeg(Op0); + + // sdiv X, C --> ashr X, log2(C) + if (cast<SDivOperator>(&I)->isExact() && + RHS->getValue().isNonNegative() && + RHS->getValue().isPowerOf2()) { + Value *ShAmt = llvm::ConstantInt::get(RHS->getType(), + RHS->getValue().exactLogBase2()); + return BinaryOperator::CreateAShr(Op0, ShAmt, I.getName()); + } + + // -X/C --> X/-C provided the negation doesn't overflow. + if (SubOperator *Sub = dyn_cast<SubOperator>(Op0)) + if (isa<Constant>(Sub->getOperand(0)) && + cast<Constant>(Sub->getOperand(0))->isNullValue() && + Sub->hasNoSignedWrap()) + return BinaryOperator::CreateSDiv(Sub->getOperand(1), + ConstantExpr::getNeg(RHS)); } // If the sign bits of both operands are zero (i.e. we can prove they are // unsigned inputs), turn this into a udiv. if (I.getType()->isInteger()) { APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); - if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) { - // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set - return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); + if (MaskedValueIsZero(Op0, Mask)) { + if (MaskedValueIsZero(Op1, Mask)) { + // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set + return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); + } + ConstantInt *ShiftedInt; + if (match(Op1, m_Shl(m_ConstantInt(ShiftedInt), m_Value())) && + ShiftedInt->getValue().isPowerOf2()) { + // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) + // Safe because the only negative value (1 << Y) can take on is + // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have + // the sign bit set. + return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); + } } - } + } return 0; } @@ -3134,7 +3178,7 @@ Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) { if (isa<UndefValue>(Op0)) { // undef % X -> 0 if (I.getType()->isFPOrFPVector()) return ReplaceInstUsesWith(I, Op0); // X % undef -> undef (could be SNaN) - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); } if (isa<UndefValue>(Op1)) return ReplaceInstUsesWith(I, Op1); // X % undef -> undef @@ -3159,15 +3203,15 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { // 0 % X == 0 for integer, we don't need to preserve faults! if (Constant *LHS = dyn_cast<Constant>(Op0)) if (LHS->isNullValue()) - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { // X % 0 == undef, we don't need to preserve faults! if (RHS->equalsInt(0)) - return ReplaceInstUsesWith(I, Context->getUndef(I.getType())); + return ReplaceInstUsesWith(I, UndefValue::get(I.getType())); if (RHS->equalsInt(1)) // X % 1 == 0 - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) { if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) { @@ -3199,7 +3243,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { // if so, convert to a bitwise and. if (ConstantInt *C = dyn_cast<ConstantInt>(RHS)) if (C->getValue().isPowerOf2()) - return BinaryOperator::CreateAnd(Op0, SubOne(C, Context)); + return BinaryOperator::CreateAnd(Op0, SubOne(C)); } if (Instruction *RHSI = dyn_cast<Instruction>(I.getOperand(1))) { @@ -3207,9 +3251,8 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { if (RHSI->getOpcode() == Instruction::Shl && isa<ConstantInt>(RHSI->getOperand(0))) { if (cast<ConstantInt>(RHSI->getOperand(0))->getValue().isPowerOf2()) { - Constant *N1 = Context->getConstantIntAllOnesValue(I.getType()); - Value *Add = InsertNewInstBefore(BinaryOperator::CreateAdd(RHSI, N1, - "tmp"), I); + Constant *N1 = Constant::getAllOnesValue(I.getType()); + Value *Add = Builder->CreateAdd(RHSI, N1, "tmp"); return BinaryOperator::CreateAnd(Op0, Add); } } @@ -3223,12 +3266,10 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { // STO == 0 and SFO == 0 handled above. if ((STO->getValue().isPowerOf2()) && (SFO->getValue().isPowerOf2())) { - Value *TrueAnd = InsertNewInstBefore( - BinaryOperator::CreateAnd(Op0, SubOne(STO, Context), - SI->getName()+".t"), I); - Value *FalseAnd = InsertNewInstBefore( - BinaryOperator::CreateAnd(Op0, SubOne(SFO, Context), - SI->getName()+".f"), I); + Value *TrueAnd = Builder->CreateAnd(Op0, SubOne(STO), + SI->getName()+".t"); + Value *FalseAnd = Builder->CreateAnd(Op0, SubOne(SFO), + SI->getName()+".f"); return SelectInst::Create(SI->getOperand(0), TrueAnd, FalseAnd); } } @@ -3241,15 +3282,15 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); // Handle the integer rem common cases - if (Instruction *common = commonIRemTransforms(I)) - return common; + if (Instruction *Common = commonIRemTransforms(I)) + return Common; - if (Value *RHSNeg = dyn_castNegVal(Op1, Context)) + if (Value *RHSNeg = dyn_castNegVal(Op1)) if (!isa<Constant>(RHSNeg) || (isa<ConstantInt>(RHSNeg) && cast<ConstantInt>(RHSNeg)->getValue().isStrictlyPositive())) { // X % -Y -> X % Y - AddUsesToWorkList(I); + Worklist.AddValue(I.getOperand(1)); I.setOperand(1, RHSNeg); return &I; } @@ -3279,15 +3320,15 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { for (unsigned i = 0; i != VWidth; ++i) { if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) { if (RHS->getValue().isNegative()) - Elts[i] = cast<ConstantInt>(Context->getConstantExprNeg(RHS)); + Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS)); else Elts[i] = RHS; } } - Constant *NewRHSV = Context->getConstantVector(Elts); + Constant *NewRHSV = ConstantVector::get(Elts); if (NewRHSV != RHSV) { - AddUsesToWorkList(I); + Worklist.AddValue(I.getOperand(1)); I.setOperand(1, NewRHSV); return &I; } @@ -3351,7 +3392,7 @@ static unsigned getICmpCode(const ICmpInst *ICI) { case ICmpInst::ICMP_SLE: return 6; // 110 // True -> 7 default: - assert(0 && "Invalid ICmp predicate!"); + llvm_unreachable("Invalid ICmp predicate!"); return 0; } } @@ -3379,7 +3420,7 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) { // True -> 7 default: // Not expecting FCMP_FALSE and FCMP_TRUE; - assert(0 && "Unexpected FCmp predicate!"); + llvm_unreachable("Unexpected FCmp predicate!"); return 0; } } @@ -3389,10 +3430,10 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) { /// new ICmp instruction. The sign is passed in to determine which kind /// of predicate to use in the new icmp instruction. static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS, - LLVMContext* Context) { + LLVMContext *Context) { switch (code) { - default: assert(0 && "Illegal ICmp code!"); - case 0: return Context->getConstantIntFalse(); + default: llvm_unreachable("Illegal ICmp code!"); + case 0: return ConstantInt::getFalse(*Context); case 1: if (sign) return new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS); @@ -3415,7 +3456,7 @@ static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS, return new ICmpInst(ICmpInst::ICMP_SLE, LHS, RHS); else return new ICmpInst(ICmpInst::ICMP_ULE, LHS, RHS); - case 7: return Context->getConstantIntTrue(); + case 7: return ConstantInt::getTrue(*Context); } } @@ -3423,9 +3464,9 @@ static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS, /// opcode and two operands into either a FCmp instruction. isordered is passed /// in to determine which kind of predicate to use in the new fcmp instruction. static Value *getFCmpValue(bool isordered, unsigned code, - Value *LHS, Value *RHS, LLVMContext* Context) { + Value *LHS, Value *RHS, LLVMContext *Context) { switch (code) { - default: assert(0 && "Illegal FCmp code!"); + default: llvm_unreachable("Illegal FCmp code!"); case 0: if (isordered) return new FCmpInst(FCmpInst::FCMP_ORD, LHS, RHS); @@ -3461,7 +3502,7 @@ static Value *getFCmpValue(bool isordered, unsigned code, return new FCmpInst(FCmpInst::FCMP_OLE, LHS, RHS); else return new FCmpInst(FCmpInst::FCMP_ULE, LHS, RHS); - case 7: return Context->getConstantIntTrue(); + case 7: return ConstantInt::getTrue(*Context); } } @@ -3504,7 +3545,7 @@ struct FoldICmpLogical { case Instruction::And: Code = LHSCode & RHSCode; break; case Instruction::Or: Code = LHSCode | RHSCode; break; case Instruction::Xor: Code = LHSCode ^ RHSCode; break; - default: assert(0 && "Illegal logical opcode!"); return 0; + default: llvm_unreachable("Illegal logical opcode!"); return 0; } bool isSigned = ICmpInst::isSignedPredicate(RHSICI->getPredicate()) || @@ -3529,14 +3570,13 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, Value *X = Op->getOperand(0); Constant *Together = 0; if (!Op->isShift()) - Together = Context->getConstantExprAnd(AndRHS, OpRHS); + Together = ConstantExpr::getAnd(AndRHS, OpRHS); switch (Op->getOpcode()) { case Instruction::Xor: if (Op->hasOneUse()) { // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2) - Instruction *And = BinaryOperator::CreateAnd(X, AndRHS); - InsertNewInstBefore(And, TheAnd); + Value *And = Builder->CreateAnd(X, AndRHS); And->takeName(Op); return BinaryOperator::CreateXor(And, Together); } @@ -3547,8 +3587,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, if (Op->hasOneUse() && Together != OpRHS) { // (X | C1) & C2 --> (X | (C1&C2)) & C2 - Instruction *Or = BinaryOperator::CreateOr(X, Together); - InsertNewInstBefore(Or, TheAnd); + Value *Or = Builder->CreateOr(X, Together); Or->takeName(Op); return BinaryOperator::CreateAnd(Or, AndRHS); } @@ -3578,8 +3617,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, return &TheAnd; } else { // Pull the XOR out of the AND. - Instruction *NewAnd = BinaryOperator::CreateAnd(X, AndRHS); - InsertNewInstBefore(NewAnd, TheAnd); + Value *NewAnd = Builder->CreateAnd(X, AndRHS); NewAnd->takeName(Op); return BinaryOperator::CreateXor(NewAnd, AndRHS); } @@ -3595,7 +3633,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, uint32_t BitWidth = AndRHS->getType()->getBitWidth(); uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal)); - ConstantInt *CI = Context->getConstantInt(AndRHS->getValue() & ShlMask); + ConstantInt *CI = ConstantInt::get(*Context, AndRHS->getValue() & ShlMask); if (CI->getValue() == ShlMask) { // Masking out bits that the shift already masks @@ -3615,7 +3653,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, uint32_t BitWidth = AndRHS->getType()->getBitWidth(); uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); - ConstantInt *CI = Context->getConstantInt(AndRHS->getValue() & ShrMask); + ConstantInt *CI = ConstantInt::get(*Context, AndRHS->getValue() & ShrMask); if (CI->getValue() == ShrMask) { // Masking out bits that the shift already masks. @@ -3634,14 +3672,12 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, uint32_t BitWidth = AndRHS->getType()->getBitWidth(); uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); - Constant *C = Context->getConstantInt(AndRHS->getValue() & ShrMask); + Constant *C = ConstantInt::get(*Context, AndRHS->getValue() & ShrMask); if (C == AndRHS) { // Masking out bits shifted in. // (Val ashr C1) & C2 -> (Val lshr C1) & C2 // Make the argument unsigned. Value *ShVal = Op->getOperand(0); - ShVal = InsertNewInstBefore( - BinaryOperator::CreateLShr(ShVal, OpRHS, - Op->getName()), TheAnd); + ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName()); return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName()); } } @@ -3659,7 +3695,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, bool isSigned, bool Inside, Instruction &IB) { - assert(cast<ConstantInt>(Context->getConstantExprICmp((isSigned ? + assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ? ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() && "Lo is not <= Hi in range emission code!"); @@ -3675,10 +3711,9 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, } // Emit V-Lo <u Hi-Lo - Constant *NegLo = Context->getConstantExprNeg(Lo); - Instruction *Add = BinaryOperator::CreateAdd(V, NegLo, V->getName()+".off"); - InsertNewInstBefore(Add, IB); - Constant *UpperBound = Context->getConstantExprAdd(NegLo, Hi); + Constant *NegLo = ConstantExpr::getNeg(Lo); + Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); + Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi); return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound); } @@ -3686,7 +3721,7 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, return new ICmpInst(ICmpInst::ICMP_EQ, V, V); // V < Min || V >= Hi -> V > Hi-1 - Hi = SubOne(cast<ConstantInt>(Hi), Context); + Hi = SubOne(cast<ConstantInt>(Hi)); if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) { ICmpInst::Predicate pred = (isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT); @@ -3695,10 +3730,9 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, // Emit V-Lo >u Hi-1-Lo // Note that Hi has already had one subtracted from it, above. - ConstantInt *NegLo = cast<ConstantInt>(Context->getConstantExprNeg(Lo)); - Instruction *Add = BinaryOperator::CreateAdd(V, NegLo, V->getName()+".off"); - InsertNewInstBefore(Add, IB); - Constant *LowerBound = Context->getConstantExprAdd(NegLo, Hi); + ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo)); + Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); + Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi); return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound); } @@ -3740,7 +3774,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, switch (LHSI->getOpcode()) { default: return 0; case Instruction::And: - if (Context->getConstantExprAnd(N, Mask) == Mask) { + if (ConstantExpr::getAnd(N, Mask) == Mask) { // If the AndRHS is a power of two minus one (0+1+), this is simple. if ((Mask->getValue().countLeadingZeros() + Mask->getValue().countPopulation()) == @@ -3764,17 +3798,14 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0 if ((Mask->getValue().countLeadingZeros() + Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth() - && Context->getConstantExprAnd(N, Mask)->isNullValue()) + && ConstantExpr::getAnd(N, Mask)->isNullValue()) break; return 0; } - Instruction *New; if (isSub) - New = BinaryOperator::CreateSub(LHSI->getOperand(0), RHS, "fold"); - else - New = BinaryOperator::CreateAdd(LHSI->getOperand(0), RHS, "fold"); - return InsertNewInstBefore(New, I); + return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold"); + return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold"); } /// FoldAndOfICmps - Fold (icmp)&(icmp) if possible. @@ -3785,16 +3816,17 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, ICmpInst::Predicate LHSCC, RHSCC; // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2). - if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), m_ConstantInt(LHSCst))) || - !match(RHS, m_ICmp(RHSCC, m_Value(Val2), m_ConstantInt(RHSCst)))) + if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), + m_ConstantInt(LHSCst))) || + !match(RHS, m_ICmp(RHSCC, m_Value(Val2), + m_ConstantInt(RHSCst)))) return 0; // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C) // where C is a power of 2 if (LHSCst == RHSCst && LHSCC == RHSCC && LHSCC == ICmpInst::ICMP_ULT && LHSCst->getValue().isPowerOf2()) { - Instruction *NewOr = BinaryOperator::CreateOr(Val, Val2); - InsertNewInstBefore(NewOr, I); + Value *NewOr = Builder->CreateOr(Val, Val2); return new ICmpInst(LHSCC, NewOr, LHSCst); } @@ -3837,14 +3869,14 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, assert(LHSCst != RHSCst && "Compares not folded above?"); switch (LHSCC) { - default: assert(0 && "Unknown integer condition code!"); + default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: switch (RHSCC) { - default: assert(0 && "Unknown integer condition code!"); + default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X == 13 & X == 15) -> false case ICmpInst::ICMP_UGT: // (X == 13 & X > 15) -> false case ICmpInst::ICMP_SGT: // (X == 13 & X > 15) -> false - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13 case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13 case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13 @@ -3852,13 +3884,13 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, } case ICmpInst::ICMP_NE: switch (RHSCC) { - default: assert(0 && "Unknown integer condition code!"); + default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_ULT: - if (LHSCst == SubOne(RHSCst, Context)) // (X != 13 & X u< 14) -> X < 13 + if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13 return new ICmpInst(ICmpInst::ICMP_ULT, Val, LHSCst); break; // (X != 13 & X u< 15) -> no change case ICmpInst::ICMP_SLT: - if (LHSCst == SubOne(RHSCst, Context)) // (X != 13 & X s< 14) -> X < 13 + if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13 return new ICmpInst(ICmpInst::ICMP_SLT, Val, LHSCst); break; // (X != 13 & X s< 15) -> no change case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15 @@ -3866,23 +3898,21 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15 return ReplaceInstUsesWith(I, RHS); case ICmpInst::ICMP_NE: - if (LHSCst == SubOne(RHSCst, Context)){// (X != 13 & X != 14) -> X-13 >u 1 - Constant *AddCST = Context->getConstantExprNeg(LHSCst); - Instruction *Add = BinaryOperator::CreateAdd(Val, AddCST, - Val->getName()+".off"); - InsertNewInstBefore(Add, I); + if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1 + Constant *AddCST = ConstantExpr::getNeg(LHSCst); + Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); return new ICmpInst(ICmpInst::ICMP_UGT, Add, - Context->getConstantInt(Add->getType(), 1)); + ConstantInt::get(Add->getType(), 1)); } break; // (X != 13 & X != 15) -> no change } break; case ICmpInst::ICMP_ULT: switch (RHSCC) { - default: assert(0 && "Unknown integer condition code!"); + default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); case ICmpInst::ICMP_SGT: // (X u< 13 & X s> 15) -> no change break; case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13 @@ -3894,10 +3924,10 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, break; case ICmpInst::ICMP_SLT: switch (RHSCC) { - default: assert(0 && "Unknown integer condition code!"); + default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X s< 13 & X == 15) -> false case ICmpInst::ICMP_SGT: // (X s< 13 & X s> 15) -> false - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change break; case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13 @@ -3909,18 +3939,18 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, break; case ICmpInst::ICMP_UGT: switch (RHSCC) { - default: assert(0 && "Unknown integer condition code!"); + default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15 case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15 return ReplaceInstUsesWith(I, RHS); case ICmpInst::ICMP_SGT: // (X u> 13 & X s> 15) -> no change break; case ICmpInst::ICMP_NE: - if (RHSCst == AddOne(LHSCst, Context)) // (X u> 13 & X != 14) -> X u> 14 + if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14 return new ICmpInst(LHSCC, Val, RHSCst); break; // (X u> 13 & X != 15) -> no change case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) <u 1 - return InsertRangeTest(Val, AddOne(LHSCst, Context), + return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, false, true, I); case ICmpInst::ICMP_SLT: // (X u> 13 & X s< 15) -> no change break; @@ -3928,18 +3958,18 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, break; case ICmpInst::ICMP_SGT: switch (RHSCC) { - default: assert(0 && "Unknown integer condition code!"); + default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15 case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15 return ReplaceInstUsesWith(I, RHS); case ICmpInst::ICMP_UGT: // (X s> 13 & X u> 15) -> no change break; case ICmpInst::ICMP_NE: - if (RHSCst == AddOne(LHSCst, Context)) // (X s> 13 & X != 14) -> X s> 14 + if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14 return new ICmpInst(LHSCC, Val, RHSCst); break; // (X s> 13 & X != 15) -> no change case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1 - return InsertRangeTest(Val, AddOne(LHSCst, Context), + return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, true, true, I); case ICmpInst::ICMP_ULT: // (X s> 13 & X u< 15) -> no change break; @@ -3950,13 +3980,89 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, return 0; } +Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, + FCmpInst *RHS) { + + if (LHS->getPredicate() == FCmpInst::FCMP_ORD && + RHS->getPredicate() == FCmpInst::FCMP_ORD) { + // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y) + if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1))) + if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) { + // If either of the constants are nans, then the whole thing returns + // false. + if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); + return new FCmpInst(FCmpInst::FCMP_ORD, + LHS->getOperand(0), RHS->getOperand(0)); + } + + // Handle vector zeros. This occurs because the canonical form of + // "fcmp ord x,x" is "fcmp ord x, 0". + if (isa<ConstantAggregateZero>(LHS->getOperand(1)) && + isa<ConstantAggregateZero>(RHS->getOperand(1))) + return new FCmpInst(FCmpInst::FCMP_ORD, + LHS->getOperand(0), RHS->getOperand(0)); + return 0; + } + + Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); + Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); + FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); + + + if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { + // Swap RHS operands to match LHS. + Op1CC = FCmpInst::getSwappedPredicate(Op1CC); + std::swap(Op1LHS, Op1RHS); + } + + if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { + // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y). + if (Op0CC == Op1CC) + return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); + + if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); + if (Op0CC == FCmpInst::FCMP_TRUE) + return ReplaceInstUsesWith(I, RHS); + if (Op1CC == FCmpInst::FCMP_TRUE) + return ReplaceInstUsesWith(I, LHS); + + bool Op0Ordered; + bool Op1Ordered; + unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); + unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); + if (Op1Pred == 0) { + std::swap(LHS, RHS); + std::swap(Op0Pred, Op1Pred); + std::swap(Op0Ordered, Op1Ordered); + } + if (Op0Pred == 0) { + // uno && ueq -> uno && (uno || eq) -> ueq + // ord && olt -> ord && (ord && lt) -> olt + if (Op0Ordered == Op1Ordered) + return ReplaceInstUsesWith(I, RHS); + + // uno && oeq -> uno && (ord && eq) -> false + // uno && ord -> false + if (!Op0Ordered) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); + // ord && ueq -> ord && (uno || eq) -> oeq + return cast<Instruction>(getFCmpValue(true, Op1Pred, + Op0LHS, Op0RHS, Context)); + } + } + + return 0; +} + Instruction *InstCombiner::visitAnd(BinaryOperator &I) { bool Changed = SimplifyCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); if (isa<UndefValue>(Op1)) // X & undef -> 0 - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); // and X, X = X if (Op0 == Op1) @@ -3976,36 +4082,32 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { } if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) { - const APInt& AndRHSMask = AndRHS->getValue(); + const APInt &AndRHSMask = AndRHS->getValue(); APInt NotAndRHS(~AndRHSMask); // Optimize a variety of ((val OP C1) & C2) combinations... - if (isa<BinaryOperator>(Op0)) { - Instruction *Op0I = cast<Instruction>(Op0); + if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { Value *Op0LHS = Op0I->getOperand(0); Value *Op0RHS = Op0I->getOperand(1); switch (Op0I->getOpcode()) { + default: break; case Instruction::Xor: case Instruction::Or: // If the mask is only needed on one incoming arm, push it up. - if (Op0I->hasOneUse()) { - if (MaskedValueIsZero(Op0LHS, NotAndRHS)) { - // Not masking anything out for the LHS, move to RHS. - Instruction *NewRHS = BinaryOperator::CreateAnd(Op0RHS, AndRHS, - Op0RHS->getName()+".masked"); - InsertNewInstBefore(NewRHS, I); - return BinaryOperator::Create( - cast<BinaryOperator>(Op0I)->getOpcode(), Op0LHS, NewRHS); - } - if (!isa<Constant>(Op0RHS) && - MaskedValueIsZero(Op0RHS, NotAndRHS)) { - // Not masking anything out for the RHS, move to LHS. - Instruction *NewLHS = BinaryOperator::CreateAnd(Op0LHS, AndRHS, - Op0LHS->getName()+".masked"); - InsertNewInstBefore(NewLHS, I); - return BinaryOperator::Create( - cast<BinaryOperator>(Op0I)->getOpcode(), NewLHS, Op0RHS); - } + if (!Op0I->hasOneUse()) break; + + if (MaskedValueIsZero(Op0LHS, NotAndRHS)) { + // Not masking anything out for the LHS, move to RHS. + Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS, + Op0RHS->getName()+".masked"); + return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS); + } + if (!isa<Constant>(Op0RHS) && + MaskedValueIsZero(Op0RHS, NotAndRHS)) { + // Not masking anything out for the RHS, move to LHS. + Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS, + Op0LHS->getName()+".masked"); + return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS); } break; @@ -4036,8 +4138,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { ConstantInt *A = dyn_cast<ConstantInt>(Op0LHS); if (!(A && A->isZero()) && // avoid infinite recursion. MaskedValueIsZero(Op0LHS, Mask)) { - Instruction *NewNeg = BinaryOperator::CreateNeg(Op0RHS); - InsertNewInstBefore(NewNeg, I); + Value *NewNeg = Builder->CreateNeg(Op0RHS); return BinaryOperator::CreateAnd(NewNeg, AndRHS); } } @@ -4048,9 +4149,8 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { // (1 << x) & 1 --> zext(x == 0) // (1 >> x) & 1 --> zext(x == 0) if (AndRHSMask == 1 && Op0LHS == AndRHS) { - Instruction *NewICmp = new ICmpInst(ICmpInst::ICMP_EQ, Op0RHS, - Context->getNullValue(I.getType())); - InsertNewInstBefore(NewICmp, I); + Value *NewICmp = + Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType())); return new ZExtInst(NewICmp, I.getType()); } break; @@ -4072,21 +4172,18 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { // into : and (cast X to T), trunc_or_bitcast(C1)&C2 // This will fold the two constants together, which may allow // other simplifications. - Instruction *NewCast = CastInst::CreateTruncOrBitCast( + Value *NewCast = Builder->CreateTruncOrBitCast( CastOp->getOperand(0), I.getType(), CastOp->getName()+".shrunk"); - NewCast = InsertNewInstBefore(NewCast, I); // trunc_or_bitcast(C1)&C2 - Constant *C3 = - Context->getConstantExprTruncOrBitCast(AndCI,I.getType()); - C3 = Context->getConstantExprAnd(C3, AndRHS); + Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); + C3 = ConstantExpr::getAnd(C3, AndRHS); return BinaryOperator::CreateAnd(NewCast, C3); } else if (CastOp->getOpcode() == Instruction::Or) { // Change: and (cast (or X, C1) to T), C2 // into : trunc(C1)&C2 iff trunc(C1)&C2 == C2 - Constant *C3 = - Context->getConstantExprTruncOrBitCast(AndCI,I.getType()); - if (Context->getConstantExprAnd(C3, AndRHS) == AndRHS) + Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); + if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS) // trunc(C1)&C2 return ReplaceInstUsesWith(I, AndRHS); } @@ -4103,17 +4200,16 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { return NV; } - Value *Op0NotVal = dyn_castNotVal(Op0, Context); - Value *Op1NotVal = dyn_castNotVal(Op1, Context); + Value *Op0NotVal = dyn_castNotVal(Op0); + Value *Op1NotVal = dyn_castNotVal(Op1); if (Op0NotVal == Op1 || Op1NotVal == Op0) // A & ~A == ~A & A == 0 - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); // (~A & ~B) == (~(A | B)) - De Morgan's Law if (Op0NotVal && Op1NotVal && isOnlyUse(Op0) && isOnlyUse(Op1)) { - Instruction *Or = BinaryOperator::CreateOr(Op0NotVal, Op1NotVal, - I.getName()+".demorgan"); - InsertNewInstBefore(Or, I); + Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal, + I.getName()+".demorgan"); return BinaryOperator::CreateNot(Or); } @@ -4159,11 +4255,8 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { cast<BinaryOperator>(Op1)->swapOperands(); std::swap(A, B); } - if (A == Op0) { // A&(A^B) -> A & ~B - Instruction *NotB = BinaryOperator::CreateNot(B, "tmp"); - InsertNewInstBefore(NotB, I); - return BinaryOperator::CreateAnd(A, NotB); - } + if (A == Op0) // A&(A^B) -> A & ~B + return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp")); } // (A&((~A)|B)) -> A&B @@ -4177,7 +4270,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1)) { // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) - if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS),Context)) + if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) return R; if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0)) @@ -4190,16 +4283,15 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ? const Type *SrcTy = Op0C->getOperand(0)->getType(); - if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() && + if (SrcTy == Op1C->getOperand(0)->getType() && + SrcTy->isIntOrIntVector() && // Only do this if the casts both really cause code to be generated. ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), I.getType(), TD) && ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), I.getType(), TD)) { - Instruction *NewOp = BinaryOperator::CreateAnd(Op0C->getOperand(0), - Op1C->getOperand(0), - I.getName()); - InsertNewInstBefore(NewOp, I); + Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0), + Op1C->getOperand(0), I.getName()); return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); } } @@ -4210,10 +4302,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && SI0->getOperand(1) == SI1->getOperand(1) && (SI0->hasOneUse() || SI1->hasOneUse())) { - Instruction *NewOp = - InsertNewInstBefore(BinaryOperator::CreateAnd(SI0->getOperand(0), - SI1->getOperand(0), - SI0->getName()), I); + Value *NewOp = + Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0), + SI0->getName()); return BinaryOperator::Create(SI1->getOpcode(), NewOp, SI1->getOperand(1)); } @@ -4221,66 +4312,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { // If and'ing two fcmp, try combine them into one. if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) { - if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) { - if (LHS->getPredicate() == FCmpInst::FCMP_ORD && - RHS->getPredicate() == FCmpInst::FCMP_ORD) { - // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y) - if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1))) - if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) { - // If either of the constants are nans, then the whole thing returns - // false. - if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); - return new FCmpInst(FCmpInst::FCMP_ORD, LHS->getOperand(0), - RHS->getOperand(0)); - } - } else { - Value *Op0LHS, *Op0RHS, *Op1LHS, *Op1RHS; - FCmpInst::Predicate Op0CC, Op1CC; - if (match(Op0, m_FCmp(Op0CC, m_Value(Op0LHS), m_Value(Op0RHS))) && - match(Op1, m_FCmp(Op1CC, m_Value(Op1LHS), m_Value(Op1RHS)))) { - if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { - // Swap RHS operands to match LHS. - Op1CC = FCmpInst::getSwappedPredicate(Op1CC); - std::swap(Op1LHS, Op1RHS); - } - if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { - // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y). - if (Op0CC == Op1CC) - return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); - else if (Op0CC == FCmpInst::FCMP_FALSE || - Op1CC == FCmpInst::FCMP_FALSE) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); - else if (Op0CC == FCmpInst::FCMP_TRUE) - return ReplaceInstUsesWith(I, Op1); - else if (Op1CC == FCmpInst::FCMP_TRUE) - return ReplaceInstUsesWith(I, Op0); - bool Op0Ordered; - bool Op1Ordered; - unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); - unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); - if (Op1Pred == 0) { - std::swap(Op0, Op1); - std::swap(Op0Pred, Op1Pred); - std::swap(Op0Ordered, Op1Ordered); - } - if (Op0Pred == 0) { - // uno && ueq -> uno && (uno || eq) -> ueq - // ord && olt -> ord && (ord && lt) -> olt - if (Op0Ordered == Op1Ordered) - return ReplaceInstUsesWith(I, Op1); - // uno && oeq -> uno && (ord && eq) -> false - // uno && ord -> false - if (!Op0Ordered) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); - // ord && ueq -> ord && (uno || eq) -> oeq - return cast<Instruction>(getFCmpValue(true, Op1Pred, - Op0LHS, Op0RHS, Context)); - } - } - } - } - } + if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) + if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) + return Res; } return Changed ? &I : 0; @@ -4450,7 +4484,8 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { /// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then /// we can simplify this expression to "cond ? C : D or B". static Instruction *MatchSelectFromAndOr(Value *A, Value *B, - Value *C, Value *D) { + Value *C, Value *D, + LLVMContext *Context) { // If A is not a select of -1/0, this cannot match. Value *Cond = 0; if (!match(A, m_SelectCst<-1, 0>(m_Value(Cond)))) @@ -4477,8 +4512,10 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, ICmpInst::Predicate LHSCC, RHSCC; // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). - if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), m_ConstantInt(LHSCst))) || - !match(RHS, m_ICmp(RHSCC, m_Value(Val2), m_ConstantInt(RHSCst)))) + if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), + m_ConstantInt(LHSCst))) || + !match(RHS, m_ICmp(RHSCC, m_Value(Val2), + m_ConstantInt(RHSCst)))) return 0; // From here on, we only handle: @@ -4520,18 +4557,16 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, assert(LHSCst != RHSCst && "Compares not folded above?"); switch (LHSCC) { - default: assert(0 && "Unknown integer condition code!"); + default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: switch (RHSCC) { - default: assert(0 && "Unknown integer condition code!"); + default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: - if (LHSCst == SubOne(RHSCst, Context)) { + if (LHSCst == SubOne(RHSCst)) { // (X == 13 | X == 14) -> X-13 <u 2 - Constant *AddCST = Context->getConstantExprNeg(LHSCst); - Instruction *Add = BinaryOperator::CreateAdd(Val, AddCST, - Val->getName()+".off"); - InsertNewInstBefore(Add, I); - AddCST = Context->getConstantExprSub(AddOne(RHSCst, Context), LHSCst); + Constant *AddCST = ConstantExpr::getNeg(LHSCst); + Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); + AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst); return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST); } break; // (X == 13 | X == 15) -> no change @@ -4546,7 +4581,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, break; case ICmpInst::ICMP_NE: switch (RHSCC) { - default: assert(0 && "Unknown integer condition code!"); + default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X != 13 | X == 15) -> X != 13 case ICmpInst::ICMP_UGT: // (X != 13 | X u> 15) -> X != 13 case ICmpInst::ICMP_SGT: // (X != 13 | X s> 15) -> X != 13 @@ -4554,12 +4589,12 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, case ICmpInst::ICMP_NE: // (X != 13 | X != 15) -> true case ICmpInst::ICMP_ULT: // (X != 13 | X u< 15) -> true case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); } break; case ICmpInst::ICMP_ULT: switch (RHSCC) { - default: assert(0 && "Unknown integer condition code!"); + default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change break; case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2 @@ -4567,7 +4602,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, // this can cause overflow. if (RHSCst->isMaxValue(false)) return ReplaceInstUsesWith(I, LHS); - return InsertRangeTest(Val, LHSCst, AddOne(RHSCst, Context), + return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), false, false, I); case ICmpInst::ICMP_SGT: // (X u< 13 | X s> 15) -> no change break; @@ -4580,7 +4615,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, break; case ICmpInst::ICMP_SLT: switch (RHSCC) { - default: assert(0 && "Unknown integer condition code!"); + default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X s< 13 | X == 14) -> no change break; case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) s> 2 @@ -4588,7 +4623,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, // this can cause overflow. if (RHSCst->isMaxValue(true)) return ReplaceInstUsesWith(I, LHS); - return InsertRangeTest(Val, LHSCst, AddOne(RHSCst, Context), + return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), true, false, I); case ICmpInst::ICMP_UGT: // (X s< 13 | X u> 15) -> no change break; @@ -4601,7 +4636,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, break; case ICmpInst::ICMP_UGT: switch (RHSCC) { - default: assert(0 && "Unknown integer condition code!"); + default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X u> 13 | X == 15) -> X u> 13 case ICmpInst::ICMP_UGT: // (X u> 13 | X u> 15) -> X u> 13 return ReplaceInstUsesWith(I, LHS); @@ -4609,14 +4644,14 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, break; case ICmpInst::ICMP_NE: // (X u> 13 | X != 15) -> true case ICmpInst::ICMP_ULT: // (X u> 13 | X u< 15) -> true - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); case ICmpInst::ICMP_SLT: // (X u> 13 | X s< 15) -> no change break; } break; case ICmpInst::ICMP_SGT: switch (RHSCC) { - default: assert(0 && "Unknown integer condition code!"); + default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X s> 13 | X == 15) -> X > 13 case ICmpInst::ICMP_SGT: // (X s> 13 | X s> 15) -> X > 13 return ReplaceInstUsesWith(I, LHS); @@ -4624,7 +4659,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, break; case ICmpInst::ICMP_NE: // (X s> 13 | X != 15) -> true case ICmpInst::ICMP_SLT: // (X s> 13 | X s< 15) -> true - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); case ICmpInst::ICMP_ULT: // (X s> 13 | X u< 15) -> no change break; } @@ -4633,6 +4668,72 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, return 0; } +Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, + FCmpInst *RHS) { + if (LHS->getPredicate() == FCmpInst::FCMP_UNO && + RHS->getPredicate() == FCmpInst::FCMP_UNO && + LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) { + if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1))) + if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) { + // If either of the constants are nans, then the whole thing returns + // true. + if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); + + // Otherwise, no need to compare the two constants, compare the + // rest. + return new FCmpInst(FCmpInst::FCMP_UNO, + LHS->getOperand(0), RHS->getOperand(0)); + } + + // Handle vector zeros. This occurs because the canonical form of + // "fcmp uno x,x" is "fcmp uno x, 0". + if (isa<ConstantAggregateZero>(LHS->getOperand(1)) && + isa<ConstantAggregateZero>(RHS->getOperand(1))) + return new FCmpInst(FCmpInst::FCMP_UNO, + LHS->getOperand(0), RHS->getOperand(0)); + + return 0; + } + + Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); + Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); + FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); + + if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { + // Swap RHS operands to match LHS. + Op1CC = FCmpInst::getSwappedPredicate(Op1CC); + std::swap(Op1LHS, Op1RHS); + } + if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { + // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y). + if (Op0CC == Op1CC) + return new FCmpInst((FCmpInst::Predicate)Op0CC, + Op0LHS, Op0RHS); + if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); + if (Op0CC == FCmpInst::FCMP_FALSE) + return ReplaceInstUsesWith(I, RHS); + if (Op1CC == FCmpInst::FCMP_FALSE) + return ReplaceInstUsesWith(I, LHS); + bool Op0Ordered; + bool Op1Ordered; + unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); + unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); + if (Op0Ordered == Op1Ordered) { + // If both are ordered or unordered, return a new fcmp with + // or'ed predicates. + Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, + Op0LHS, Op0RHS, Context); + if (Instruction *I = dyn_cast<Instruction>(RV)) + return I; + // Otherwise, it's a constant boolean value... + return ReplaceInstUsesWith(I, RV); + } + } + return 0; +} + /// FoldOrWithConstants - This helper function folds: /// /// ((A | B) & C1) | (B & C2) @@ -4655,8 +4756,7 @@ Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op, if (!Xor.isAllOnesValue()) return 0; if (V1 == A || V1 == B) { - Instruction *NewOp = - InsertNewInstBefore(BinaryOperator::CreateAnd((V1 == A) ? B : A, CI1), I); + Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1); return BinaryOperator::CreateOr(NewOp, V1); } @@ -4668,7 +4768,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); if (isa<UndefValue>(Op1)) // X | undef -> -1 - return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); // or X, X = X if (Op0 == Op1) @@ -4691,21 +4791,21 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { ConstantInt *C1 = 0; Value *X = 0; // (X & C1) | C2 --> (X | C2) & (C1|C2) - if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && isOnlyUse(Op0)) { - Instruction *Or = BinaryOperator::CreateOr(X, RHS); - InsertNewInstBefore(Or, I); + if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && + isOnlyUse(Op0)) { + Value *Or = Builder->CreateOr(X, RHS); Or->takeName(Op0); return BinaryOperator::CreateAnd(Or, - Context->getConstantInt(RHS->getValue() | C1->getValue())); + ConstantInt::get(*Context, RHS->getValue() | C1->getValue())); } // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2) - if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) && isOnlyUse(Op0)) { - Instruction *Or = BinaryOperator::CreateOr(X, RHS); - InsertNewInstBefore(Or, I); + if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) && + isOnlyUse(Op0)) { + Value *Or = Builder->CreateOr(X, RHS); Or->takeName(Op0); return BinaryOperator::CreateXor(Or, - Context->getConstantInt(C1->getValue() & ~RHS->getValue())); + ConstantInt::get(*Context, C1->getValue() & ~RHS->getValue())); } // Try to fold constant and into select arguments. @@ -4738,19 +4838,19 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { } // (X^C)|Y -> (X|Y)^C iff Y&C == 0 - if (Op0->hasOneUse() && match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) && + if (Op0->hasOneUse() && + match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) && MaskedValueIsZero(Op1, C1->getValue())) { - Instruction *NOr = BinaryOperator::CreateOr(A, Op1); - InsertNewInstBefore(NOr, I); + Value *NOr = Builder->CreateOr(A, Op1); NOr->takeName(Op0); return BinaryOperator::CreateXor(NOr, C1); } // Y|(X^C) -> (X|Y)^C iff Y&C == 0 - if (Op1->hasOneUse() && match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) && + if (Op1->hasOneUse() && + match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) && MaskedValueIsZero(Op0, C1->getValue())) { - Instruction *NOr = BinaryOperator::CreateOr(A, Op0); - InsertNewInstBefore(NOr, I); + Value *NOr = Builder->CreateOr(A, Op0); NOr->takeName(Op0); return BinaryOperator::CreateXor(NOr, C1); } @@ -4801,20 +4901,19 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { V1 = C, V2 = A, V3 = B; if (V1) { - Value *Or = - InsertNewInstBefore(BinaryOperator::CreateOr(V2, V3, "tmp"), I); + Value *Or = Builder->CreateOr(V2, V3, "tmp"); return BinaryOperator::CreateAnd(V1, Or); } } // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants - if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D)) + if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D, Context)) return Match; - if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C)) + if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C, Context)) return Match; - if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D)) + if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D, Context)) return Match; - if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C)) + if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C, Context)) return Match; // ((A&~B)|(~A&B)) -> A^B @@ -4841,10 +4940,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && SI0->getOperand(1) == SI1->getOperand(1) && (SI0->hasOneUse() || SI1->hasOneUse())) { - Instruction *NewOp = - InsertNewInstBefore(BinaryOperator::CreateOr(SI0->getOperand(0), - SI1->getOperand(0), - SI0->getName()), I); + Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0), + SI0->getName()); return BinaryOperator::Create(SI1->getOpcode(), NewOp, SI1->getOperand(1)); } @@ -4865,26 +4962,25 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (match(Op0, m_Not(m_Value(A)))) { // ~A | Op1 if (A == Op1) // ~A | A == -1 - return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); } else { A = 0; } // Note, A is still live here! if (match(Op1, m_Not(m_Value(B)))) { // Op0 | ~B if (Op0 == B) - return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); // (~A | ~B) == (~(A & B)) - De Morgan's Law if (A && isOnlyUse(Op0) && isOnlyUse(Op1)) { - Value *And = InsertNewInstBefore(BinaryOperator::CreateAnd(A, B, - I.getName()+".demorgan"), I); + Value *And = Builder->CreateAnd(A, B, I.getName()+".demorgan"); return BinaryOperator::CreateNot(And); } } // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B) if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) { - if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS),Context)) + if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) return R; if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0))) @@ -4899,17 +4995,16 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (!isa<ICmpInst>(Op0C->getOperand(0)) || !isa<ICmpInst>(Op1C->getOperand(0))) { const Type *SrcTy = Op0C->getOperand(0)->getType(); - if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() && + if (SrcTy == Op1C->getOperand(0)->getType() && + SrcTy->isIntOrIntVector() && // Only do this if the casts both really cause code to be // generated. ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), I.getType(), TD) && ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), I.getType(), TD)) { - Instruction *NewOp = BinaryOperator::CreateOr(Op0C->getOperand(0), - Op1C->getOperand(0), - I.getName()); - InsertNewInstBefore(NewOp, I); + Value *NewOp = Builder->CreateOr(Op0C->getOperand(0), + Op1C->getOperand(0), I.getName()); return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); } } @@ -4919,61 +5014,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) { - if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) { - if (LHS->getPredicate() == FCmpInst::FCMP_UNO && - RHS->getPredicate() == FCmpInst::FCMP_UNO && - LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) { - if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1))) - if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) { - // If either of the constants are nans, then the whole thing returns - // true. - if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); - - // Otherwise, no need to compare the two constants, compare the - // rest. - return new FCmpInst(FCmpInst::FCMP_UNO, LHS->getOperand(0), - RHS->getOperand(0)); - } - } else { - Value *Op0LHS, *Op0RHS, *Op1LHS, *Op1RHS; - FCmpInst::Predicate Op0CC, Op1CC; - if (match(Op0, m_FCmp(Op0CC, m_Value(Op0LHS), m_Value(Op0RHS))) && - match(Op1, m_FCmp(Op1CC, m_Value(Op1LHS), m_Value(Op1RHS)))) { - if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { - // Swap RHS operands to match LHS. - Op1CC = FCmpInst::getSwappedPredicate(Op1CC); - std::swap(Op1LHS, Op1RHS); - } - if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { - // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y). - if (Op0CC == Op1CC) - return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); - else if (Op0CC == FCmpInst::FCMP_TRUE || - Op1CC == FCmpInst::FCMP_TRUE) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); - else if (Op0CC == FCmpInst::FCMP_FALSE) - return ReplaceInstUsesWith(I, Op1); - else if (Op1CC == FCmpInst::FCMP_FALSE) - return ReplaceInstUsesWith(I, Op0); - bool Op0Ordered; - bool Op1Ordered; - unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); - unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); - if (Op0Ordered == Op1Ordered) { - // If both are ordered or unordered, return a new fcmp with - // or'ed predicates. - Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, - Op0LHS, Op0RHS, Context); - if (Instruction *I = dyn_cast<Instruction>(RV)) - return I; - // Otherwise, it's a constant boolean value... - return ReplaceInstUsesWith(I, RV); - } - } - } - } - } + if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) + if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) + return Res; } return Changed ? &I : 0; @@ -5001,14 +5044,14 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (isa<UndefValue>(Op0)) // Handle undef ^ undef -> 0 special case. This is a common // idiom (misuse). - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); return ReplaceInstUsesWith(I, Op1); // X ^ undef -> undef } // xor X, X = 0, even if X is nested in a sequence of Xor's. - if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1), Context)) { + if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1))) { assert(Result == &I && "AssociativeOpt didn't work?"); Result=Result; - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); } // See if we can simplify any instructions used by the instruction whose sole @@ -5020,22 +5063,20 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X // Is this a ~ operation? - if (Value *NotOp = dyn_castNotVal(&I, Context)) { + if (Value *NotOp = dyn_castNotVal(&I)) { // ~(~X & Y) --> (X | ~Y) - De Morgan's Law // ~(~X | Y) === (X & ~Y) - De Morgan's Law if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) { if (Op0I->getOpcode() == Instruction::And || Op0I->getOpcode() == Instruction::Or) { - if (dyn_castNotVal(Op0I->getOperand(1), Context)) Op0I->swapOperands(); - if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0), Context)) { - Instruction *NotY = - BinaryOperator::CreateNot(Op0I->getOperand(1), - Op0I->getOperand(1)->getName()+".not"); - InsertNewInstBefore(NotY, I); + if (dyn_castNotVal(Op0I->getOperand(1))) Op0I->swapOperands(); + if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) { + Value *NotY = + Builder->CreateNot(Op0I->getOperand(1), + Op0I->getOperand(1)->getName()+".not"); if (Op0I->getOpcode() == Instruction::And) return BinaryOperator::CreateOr(Op0NotVal, NotY); - else - return BinaryOperator::CreateAnd(Op0NotVal, NotY); + return BinaryOperator::CreateAnd(Op0NotVal, NotY); } } } @@ -5043,7 +5084,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { - if (RHS == Context->getConstantIntTrue() && Op0->hasOneUse()) { + if (RHS->isOne() && Op0->hasOneUse()) { // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0)) return new ICmpInst(ICI->getInversePredicate(), @@ -5059,16 +5100,12 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) { if (CI->hasOneUse() && Op0C->hasOneUse()) { Instruction::CastOps Opcode = Op0C->getOpcode(); - if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) { - if (RHS == Context->getConstantExprCast(Opcode, - Context->getConstantIntTrue(), - Op0C->getDestTy())) { - Instruction *NewCI = InsertNewInstBefore(CmpInst::Create( - CI->getOpcode(), CI->getInversePredicate(), - CI->getOperand(0), CI->getOperand(1)), I); - NewCI->takeName(CI); - return CastInst::Create(Opcode, NewCI, Op0C->getType()); - } + if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && + (RHS == ConstantExpr::getCast(Opcode, + ConstantInt::getTrue(*Context), + Op0C->getDestTy()))) { + CI->setPredicate(CI->getInversePredicate()); + return CastInst::Create(Opcode, CI, Op0C->getType()); } } } @@ -5078,9 +5115,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { // ~(c-X) == X-c-1 == X+(-c-1) if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue()) if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) { - Constant *NegOp0I0C = Context->getConstantExprNeg(Op0I0C); - Constant *ConstantRHS = Context->getConstantExprSub(NegOp0I0C, - Context->getConstantInt(I.getType(), 1)); + Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C); + Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C, + ConstantInt::get(I.getType(), 1)); return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS); } @@ -5088,28 +5125,28 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (Op0I->getOpcode() == Instruction::Add) { // ~(X-c) --> (-c-1)-X if (RHS->isAllOnesValue()) { - Constant *NegOp0CI = Context->getConstantExprNeg(Op0CI); + Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI); return BinaryOperator::CreateSub( - Context->getConstantExprSub(NegOp0CI, - Context->getConstantInt(I.getType(), 1)), + ConstantExpr::getSub(NegOp0CI, + ConstantInt::get(I.getType(), 1)), Op0I->getOperand(0)); } else if (RHS->getValue().isSignBit()) { // (X + C) ^ signbit -> (X + C + signbit) - Constant *C = - Context->getConstantInt(RHS->getValue() + Op0CI->getValue()); + Constant *C = ConstantInt::get(*Context, + RHS->getValue() + Op0CI->getValue()); return BinaryOperator::CreateAdd(Op0I->getOperand(0), C); } } else if (Op0I->getOpcode() == Instruction::Or) { // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0 if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) { - Constant *NewRHS = Context->getConstantExprOr(Op0CI, RHS); + Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS); // Anything in both C1 and C2 is known to be zero, remove it from // NewRHS. - Constant *CommonBits = Context->getConstantExprAnd(Op0CI, RHS); - NewRHS = Context->getConstantExprAnd(NewRHS, - Context->getConstantExprNot(CommonBits)); - AddToWorkList(Op0I); + Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS); + NewRHS = ConstantExpr::getAnd(NewRHS, + ConstantExpr::getNot(CommonBits)); + Worklist.Add(Op0I); I.setOperand(0, Op0I->getOperand(0)); I.setOperand(1, NewRHS); return &I; @@ -5127,13 +5164,13 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { return NV; } - if (Value *X = dyn_castNotVal(Op0, Context)) // ~A ^ A == -1 + if (Value *X = dyn_castNotVal(Op0)) // ~A ^ A == -1 if (X == Op1) - return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - if (Value *X = dyn_castNotVal(Op1, Context)) // A ^ ~A == -1 + if (Value *X = dyn_castNotVal(Op1)) // A ^ ~A == -1 if (X == Op0) - return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1); @@ -5152,7 +5189,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { return ReplaceInstUsesWith(I, B); // A^(A^B) == B } else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) { return ReplaceInstUsesWith(I, A); // A^(B^A) == B - } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && Op1I->hasOneUse()){ + } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && + Op1I->hasOneUse()){ if (A == Op0) { // A^(A&B) -> A^(B&A) Op1I->swapOperands(); std::swap(A, B); @@ -5167,26 +5205,23 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0); if (Op0I) { Value *A, *B; - if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && Op0I->hasOneUse()) { + if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && + Op0I->hasOneUse()) { if (A == Op1) // (B|A)^B == (A|B)^B std::swap(A, B); - if (B == Op1) { // (A|B)^B == A & ~B - Instruction *NotB = - InsertNewInstBefore(BinaryOperator::CreateNot(Op1, "tmp"), I); - return BinaryOperator::CreateAnd(A, NotB); - } + if (B == Op1) // (A|B)^B == A & ~B + return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp")); } else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) { return ReplaceInstUsesWith(I, B); // (A^B)^A == B } else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) { return ReplaceInstUsesWith(I, A); // (B^A)^A == B - } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && Op0I->hasOneUse()){ + } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && + Op0I->hasOneUse()){ if (A == Op1) // (A&B)^A -> (B&A)^A std::swap(A, B); if (B == Op1 && // (B&A)^A == ~B & A !isa<ConstantInt>(Op1)) { // Canonical form is (B&C)^C - Instruction *N = - InsertNewInstBefore(BinaryOperator::CreateNot(A, "tmp"), I); - return BinaryOperator::CreateAnd(N, Op1); + return BinaryOperator::CreateAnd(Builder->CreateNot(A, "tmp"), Op1); } } } @@ -5196,10 +5231,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { Op0I->getOpcode() == Op1I->getOpcode() && Op0I->getOperand(1) == Op1I->getOperand(1) && (Op1I->hasOneUse() || Op1I->hasOneUse())) { - Instruction *NewOp = - InsertNewInstBefore(BinaryOperator::CreateXor(Op0I->getOperand(0), - Op1I->getOperand(0), - Op0I->getName()), I); + Value *NewOp = + Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0), + Op0I->getName()); return BinaryOperator::Create(Op1I->getOpcode(), NewOp, Op1I->getOperand(1)); } @@ -5235,8 +5269,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { X = B, Y = A, Z = C; if (X) { - Instruction *NewOp = - InsertNewInstBefore(BinaryOperator::CreateXor(Y, Z, Op0->getName()), I); + Value *NewOp = Builder->CreateXor(Y, Z, Op0->getName()); return BinaryOperator::CreateAnd(NewOp, X); } } @@ -5244,7 +5277,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) - if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS),Context)) + if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) return R; // fold (xor (cast A), (cast B)) -> (cast (xor A, B)) @@ -5258,10 +5291,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { I.getType(), TD) && ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), I.getType(), TD)) { - Instruction *NewOp = BinaryOperator::CreateXor(Op0C->getOperand(0), - Op1C->getOperand(0), - I.getName()); - InsertNewInstBefore(NewOp, I); + Value *NewOp = Builder->CreateXor(Op0C->getOperand(0), + Op1C->getOperand(0), I.getName()); return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); } } @@ -5271,8 +5302,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { } static ConstantInt *ExtractElement(Constant *V, Constant *Idx, - LLVMContext* Context) { - return cast<ConstantInt>(Context->getConstantExprExtractElement(V, Idx)); + LLVMContext *Context) { + return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx)); } static bool HasAddOverflow(ConstantInt *Result, @@ -5290,13 +5321,13 @@ static bool HasAddOverflow(ConstantInt *Result, /// AddWithOverflow - Compute Result = In1+In2, returning true if the result /// overflowed for this type. static bool AddWithOverflow(Constant *&Result, Constant *In1, - Constant *In2, LLVMContext* Context, + Constant *In2, LLVMContext *Context, bool IsSigned = false) { - Result = Context->getConstantExprAdd(In1, In2); + Result = ConstantExpr::getAdd(In1, In2); if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - Constant *Idx = Context->getConstantInt(Type::Int32Ty, i); + Constant *Idx = ConstantInt::get(Type::getInt32Ty(*Context), i); if (HasAddOverflow(ExtractElement(Result, Idx, Context), ExtractElement(In1, Idx, Context), ExtractElement(In2, Idx, Context), @@ -5326,13 +5357,13 @@ static bool HasSubOverflow(ConstantInt *Result, /// SubWithOverflow - Compute Result = In1-In2, returning true if the result /// overflowed for this type. static bool SubWithOverflow(Constant *&Result, Constant *In1, - Constant *In2, LLVMContext* Context, + Constant *In2, LLVMContext *Context, bool IsSigned = false) { - Result = Context->getConstantExprSub(In1, In2); + Result = ConstantExpr::getSub(In1, In2); if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - Constant *Idx = Context->getConstantInt(Type::Int32Ty, i); + Constant *Idx = ConstantInt::get(Type::getInt32Ty(*Context), i); if (HasSubOverflow(ExtractElement(Result, Idx, Context), ExtractElement(In1, Idx, Context), ExtractElement(In2, Idx, Context), @@ -5351,11 +5382,10 @@ static bool SubWithOverflow(Constant *&Result, Constant *In1, /// code necessary to compute the offset from the base pointer (without adding /// in the base pointer). Return the result as a signed integer of intptr size. static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) { - TargetData &TD = IC.getTargetData(); + TargetData &TD = *IC.getTargetData(); gep_type_iterator GTI = gep_type_begin(GEP); - const Type *IntPtrTy = TD.getIntPtrType(); - LLVMContext* Context = IC.getContext(); - Value *Result = Context->getNullValue(IntPtrTy); + const Type *IntPtrTy = TD.getIntPtrType(I.getContext()); + Value *Result = Constant::getNullValue(IntPtrTy); // Build a mask for high order bits. unsigned IntPtrWidth = TD.getPointerSizeInBits(); @@ -5372,74 +5402,49 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) { if (const StructType *STy = dyn_cast<StructType>(*GTI)) { Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); - if (ConstantInt *RC = dyn_cast<ConstantInt>(Result)) - Result = - Context->getConstantInt(RC->getValue() + APInt(IntPtrWidth, Size)); - else - Result = IC.InsertNewInstBefore( - BinaryOperator::CreateAdd(Result, - Context->getConstantInt(IntPtrTy, Size), - GEP->getName()+".offs"), I); + Result = IC.Builder->CreateAdd(Result, + ConstantInt::get(IntPtrTy, Size), + GEP->getName()+".offs"); continue; } - Constant *Scale = Context->getConstantInt(IntPtrTy, Size); + Constant *Scale = ConstantInt::get(IntPtrTy, Size); Constant *OC = - Context->getConstantExprIntegerCast(OpC, IntPtrTy, true /*SExt*/); - Scale = Context->getConstantExprMul(OC, Scale); - if (Constant *RC = dyn_cast<Constant>(Result)) - Result = Context->getConstantExprAdd(RC, Scale); - else { - // Emit an add instruction. - Result = IC.InsertNewInstBefore( - BinaryOperator::CreateAdd(Result, Scale, - GEP->getName()+".offs"), I); - } + ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/); + Scale = ConstantExpr::getMul(OC, Scale); + // Emit an add instruction. + Result = IC.Builder->CreateAdd(Result, Scale, GEP->getName()+".offs"); continue; } // Convert to correct type. - if (Op->getType() != IntPtrTy) { - if (Constant *OpC = dyn_cast<Constant>(Op)) - Op = Context->getConstantExprIntegerCast(OpC, IntPtrTy, true); - else - Op = IC.InsertNewInstBefore(CastInst::CreateIntegerCast(Op, IntPtrTy, - true, - Op->getName()+".c"), I); - } + if (Op->getType() != IntPtrTy) + Op = IC.Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c"); if (Size != 1) { - Constant *Scale = Context->getConstantInt(IntPtrTy, Size); - if (Constant *OpC = dyn_cast<Constant>(Op)) - Op = Context->getConstantExprMul(OpC, Scale); - else // We'll let instcombine(mul) convert this to a shl if possible. - Op = IC.InsertNewInstBefore(BinaryOperator::CreateMul(Op, Scale, - GEP->getName()+".idx"), I); + Constant *Scale = ConstantInt::get(IntPtrTy, Size); + // We'll let instcombine(mul) convert this to a shl if possible. + Op = IC.Builder->CreateMul(Op, Scale, GEP->getName()+".idx"); } // Emit an add instruction. - if (isa<Constant>(Op) && isa<Constant>(Result)) - Result = Context->getConstantExprAdd(cast<Constant>(Op), - cast<Constant>(Result)); - else - Result = IC.InsertNewInstBefore(BinaryOperator::CreateAdd(Op, Result, - GEP->getName()+".offs"), I); + Result = IC.Builder->CreateAdd(Op, Result, GEP->getName()+".offs"); } return Result; } -/// EvaluateGEPOffsetExpression - Return an value that can be used to compare of -/// the *offset* implied by GEP to zero. For example, if we have &A[i], we want -/// to return 'i' for "icmp ne i, 0". Note that, in general, indices can be -/// complex, and scales are involved. The above expression would also be legal -/// to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32). This -/// later form is less amenable to optimization though, and we are allowed to -/// generate the first by knowing that pointer arithmetic doesn't overflow. +/// EvaluateGEPOffsetExpression - Return a value that can be used to compare +/// the *offset* implied by a GEP to zero. For example, if we have &A[i], we +/// want to return 'i' for "icmp ne i, 0". Note that, in general, indices can +/// be complex, and scales are involved. The above expression would also be +/// legal to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32). +/// This later form is less amenable to optimization though, and we are allowed +/// to generate the first by knowing that pointer arithmetic doesn't overflow. /// /// If we can't emit an optimized form for this expression, this returns null. /// static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I, InstCombiner &IC) { - TargetData &TD = IC.getTargetData(); + TargetData &TD = *IC.getTargetData(); gep_type_iterator GTI = gep_type_begin(GEP); // Check to see if this gep only has a single variable index. If so, and if @@ -5502,8 +5507,9 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I, // we don't need to bother extending: the extension won't affect where the // computation crosses zero. if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) - VariableIdx = new TruncInst(VariableIdx, TD.getIntPtrType(), - VariableIdx->getNameStart(), &I); + VariableIdx = new TruncInst(VariableIdx, + TD.getIntPtrType(VariableIdx->getContext()), + VariableIdx->getName(), &I); return VariableIdx; } @@ -5523,40 +5529,39 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I, return 0; // Okay, we can do this evaluation. Start by converting the index to intptr. - const Type *IntPtrTy = TD.getIntPtrType(); + const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); if (VariableIdx->getType() != IntPtrTy) VariableIdx = CastInst::CreateIntegerCast(VariableIdx, IntPtrTy, true /*SExt*/, - VariableIdx->getNameStart(), &I); - Constant *OffsetVal = IC.getContext()->getConstantInt(IntPtrTy, NewOffs); + VariableIdx->getName(), &I); + Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs); return BinaryOperator::CreateAdd(VariableIdx, OffsetVal, "offset", &I); } /// FoldGEPICmp - Fold comparisons between a GEP instruction and something /// else. At this point we know that the GEP is on the LHS of the comparison. -Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS, +Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, ICmpInst::Predicate Cond, Instruction &I) { - assert(dyn_castGetElementPtr(GEPLHS) && "LHS is not a getelementptr!"); - // Look through bitcasts. if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS)) RHS = BCI->getOperand(0); Value *PtrBase = GEPLHS->getOperand(0); - if (PtrBase == RHS) { + if (TD && PtrBase == RHS && GEPLHS->isInBounds()) { // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0). // This transformation (ignoring the base and scales) is valid because we - // know pointers can't overflow. See if we can output an optimized form. + // know pointers can't overflow since the gep is inbounds. See if we can + // output an optimized form. Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, I, *this); // If not, synthesize the offset the hard way. if (Offset == 0) Offset = EmitGEPOffset(GEPLHS, I, *this); return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset, - Context->getNullValue(Offset->getType())); - } else if (User *GEPRHS = dyn_castGetElementPtr(RHS)) { + Constant::getNullValue(Offset->getType())); + } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) { // If the base pointers are different, but the indices are the same, just // compare the base pointer. if (PtrBase != GEPRHS->getOperand(0)) { @@ -5572,7 +5577,7 @@ Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS, // If all indices are the same, just compare the base pointers. if (IndicesTheSame) - return new ICmpInst(ICmpInst::getSignedPredicate(Cond), + return new ICmpInst(ICmpInst::getSignedPredicate(Cond), GEPLHS->getOperand(0), GEPRHS->getOperand(0)); // Otherwise, the base pointers are different and the indices are @@ -5622,7 +5627,7 @@ Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS, if (NumDifferences == 0) // SAME GEP? return ReplaceInstUsesWith(I, // No comparison is needed here. - Context->getConstantInt(Type::Int1Ty, + ConstantInt::get(Type::getInt1Ty(*Context), ICmpInst::isTrueWhenEqual(Cond))); else if (NumDifferences == 1) { @@ -5635,7 +5640,8 @@ Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS, // Only lower this if the icmp is the only user of the GEP or if we expect // the result to fold to a constant! - if ((isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) && + if (TD && + (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) && (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) { // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2) Value *L = EmitGEPOffset(GEPLHS, I, *this); @@ -5680,7 +5686,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, ICmpInst::Predicate Pred; switch (I.getPredicate()) { - default: assert(0 && "Unexpected predicate!"); + default: llvm_unreachable("Unexpected predicate!"); case FCmpInst::FCMP_UEQ: case FCmpInst::FCMP_OEQ: Pred = ICmpInst::ICMP_EQ; @@ -5706,9 +5712,9 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, Pred = ICmpInst::ICMP_NE; break; case FCmpInst::FCMP_ORD: - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); case FCmpInst::FCMP_UNO: - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); } const IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType()); @@ -5728,8 +5734,8 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, if (SMax.compare(RHS) == APFloat::cmpLessThan) { // smax < 13123.0 if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); } } else { // If the RHS value is > UnsignedMax, fold the comparison. This handles @@ -5740,8 +5746,8 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, if (UMax.compare(RHS) == APFloat::cmpLessThan) { // umax < 13123.0 if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); } } @@ -5753,8 +5759,8 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0 if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); } } @@ -5763,27 +5769,27 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, // casting the FP value to the integer value and back, checking for equality. // Don't do this for zero, because -0.0 is not fractional. Constant *RHSInt = LHSUnsigned - ? Context->getConstantExprFPToUI(RHSC, IntTy) - : Context->getConstantExprFPToSI(RHSC, IntTy); + ? ConstantExpr::getFPToUI(RHSC, IntTy) + : ConstantExpr::getFPToSI(RHSC, IntTy); if (!RHS.isZero()) { bool Equal = LHSUnsigned - ? Context->getConstantExprUIToFP(RHSInt, RHSC->getType()) == RHSC - : Context->getConstantExprSIToFP(RHSInt, RHSC->getType()) == RHSC; + ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC + : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC; if (!Equal) { // If we had a comparison against a fractional value, we have to adjust // the compare predicate and sometimes the value. RHSC is rounded towards // zero at this point. switch (Pred) { - default: assert(0 && "Unexpected integer comparison!"); + default: llvm_unreachable("Unexpected integer comparison!"); case ICmpInst::ICMP_NE: // (float)int != 4.4 --> true - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); case ICmpInst::ICMP_EQ: // (float)int == 4.4 --> false - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); case ICmpInst::ICMP_ULE: // (float)int <= 4.4 --> int <= 4 // (float)int <= -4.4 --> false if (RHS.isNegative()) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); break; case ICmpInst::ICMP_SLE: // (float)int <= 4.4 --> int <= 4 @@ -5795,7 +5801,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, // (float)int < -4.4 --> false // (float)int < 4.4 --> int <= 4 if (RHS.isNegative()) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); Pred = ICmpInst::ICMP_ULE; break; case ICmpInst::ICMP_SLT: @@ -5808,7 +5814,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, // (float)int > 4.4 --> int > 4 // (float)int > -4.4 --> true if (RHS.isNegative()) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); break; case ICmpInst::ICMP_SGT: // (float)int > 4.4 --> int > 4 @@ -5820,7 +5826,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, // (float)int >= -4.4 --> true // (float)int >= 4.4 --> int > 4 if (!RHS.isNegative()) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); Pred = ICmpInst::ICMP_UGT; break; case ICmpInst::ICMP_SGE: @@ -5844,22 +5850,22 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { // Fold trivial predicates. if (I.getPredicate() == FCmpInst::FCMP_FALSE) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 0)); if (I.getPredicate() == FCmpInst::FCMP_TRUE) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 1)); // Simplify 'fcmp pred X, X' if (Op0 == Op1) { switch (I.getPredicate()) { - default: assert(0 && "Unknown predicate!"); + default: llvm_unreachable("Unknown predicate!"); case FCmpInst::FCMP_UEQ: // True if unordered or equal case FCmpInst::FCMP_UGE: // True if unordered, greater than, or equal case FCmpInst::FCMP_ULE: // True if unordered, less than, or equal - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 1)); case FCmpInst::FCMP_OGT: // True if ordered and greater than case FCmpInst::FCMP_OLT: // True if ordered and less than case FCmpInst::FCMP_ONE: // True if ordered and operands are unequal - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 0)); case FCmpInst::FCMP_UNO: // True if unordered: isnan(X) | isnan(Y) case FCmpInst::FCMP_ULT: // True if unordered or less than @@ -5867,7 +5873,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { case FCmpInst::FCMP_UNE: // True if unordered or not equal // Canonicalize these to be 'fcmp uno %X, 0.0'. I.setPredicate(FCmpInst::FCMP_UNO); - I.setOperand(1, Context->getNullValue(Op0->getType())); + I.setOperand(1, Constant::getNullValue(Op0->getType())); return &I; case FCmpInst::FCMP_ORD: // True if ordered (no nans) @@ -5876,13 +5882,13 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { case FCmpInst::FCMP_OLE: // True if ordered and less than or equal // Canonicalize these to be 'fcmp ord %X, 0.0'. I.setPredicate(FCmpInst::FCMP_ORD); - I.setOperand(1, Context->getNullValue(Op0->getType())); + I.setOperand(1, Constant::getNullValue(Op0->getType())); return &I; } } if (isa<UndefValue>(Op1)) // fcmp pred X, undef -> undef - return ReplaceInstUsesWith(I, Context->getUndef(Type::Int1Ty)); + return ReplaceInstUsesWith(I, UndefValue::get(I.getType())); // Handle fcmp with constant RHS if (Constant *RHSC = dyn_cast<Constant>(Op1)) { @@ -5890,11 +5896,11 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) { if (CFP->getValueAPF().isNaN()) { if (FCmpInst::isOrdered(I.getPredicate())) // True if ordered and... - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); assert(FCmpInst::isUnordered(I.getPredicate()) && "Comparison must be either ordered or unordered!"); // True if unordered. - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); } } @@ -5905,7 +5911,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { // block. If in the same block, we're encouraging jump threading. If // not, we are just pessimizing the code by making an i1 phi. if (LHSI->getParent() == I.getParent()) - if (Instruction *NV = FoldOpIntoPhi(I)) + if (Instruction *NV = FoldOpIntoPhi(I, true)) return NV; break; case Instruction::SIToFP: @@ -5921,18 +5927,16 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { if (LHSI->hasOneUse()) { if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) { // Fold the known value into the constant operand. - Op1 = Context->getConstantExprCompare(I.getPredicate(), C, RHSC); + Op1 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC); // Insert a new FCmp of the other select operand. - Op2 = InsertNewInstBefore(new FCmpInst(I.getPredicate(), - LHSI->getOperand(2), RHSC, - I.getName()), I); + Op2 = Builder->CreateFCmp(I.getPredicate(), + LHSI->getOperand(2), RHSC, I.getName()); } else if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) { // Fold the known value into the constant operand. - Op2 = Context->getConstantExprCompare(I.getPredicate(), C, RHSC); + Op2 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC); // Insert a new FCmp of the other select operand. - Op1 = InsertNewInstBefore(new FCmpInst(I.getPredicate(), - LHSI->getOperand(1), RHSC, - I.getName()), I); + Op1 = Builder->CreateFCmp(I.getPredicate(), LHSI->getOperand(1), + RHSC, I.getName()); } } @@ -5952,28 +5956,27 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // icmp X, X if (Op0 == Op1) - return ReplaceInstUsesWith(I, Context->getConstantInt(Type::Int1Ty, + return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), I.isTrueWhenEqual())); if (isa<UndefValue>(Op1)) // X icmp undef -> undef - return ReplaceInstUsesWith(I, Context->getUndef(Type::Int1Ty)); + return ReplaceInstUsesWith(I, UndefValue::get(I.getType())); // icmp <global/alloca*/null>, <global/alloca*/null> - Global/Stack value // addresses never equal each other! We already know that Op0 != Op1. - if ((isa<GlobalValue>(Op0) || isa<AllocaInst>(Op0) || + if ((isa<GlobalValue>(Op0) || isa<AllocaInst>(Op0) || isa<ConstantPointerNull>(Op0)) && - (isa<GlobalValue>(Op1) || isa<AllocaInst>(Op1) || + (isa<GlobalValue>(Op1) || isa<AllocaInst>(Op1) || isa<ConstantPointerNull>(Op1))) - return ReplaceInstUsesWith(I, Context->getConstantInt(Type::Int1Ty, + return ReplaceInstUsesWith(I, ConstantInt::get(Type::getInt1Ty(*Context), !I.isTrueWhenEqual())); // icmp's with boolean values can always be turned into bitwise operations - if (Ty == Type::Int1Ty) { + if (Ty == Type::getInt1Ty(*Context)) { switch (I.getPredicate()) { - default: assert(0 && "Invalid icmp instruction!"); + default: llvm_unreachable("Invalid icmp instruction!"); case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B) - Instruction *Xor = BinaryOperator::CreateXor(Op0, Op1, I.getName()+"tmp"); - InsertNewInstBefore(Xor, I); + Value *Xor = Builder->CreateXor(Op0, Op1, I.getName()+"tmp"); return BinaryOperator::CreateNot(Xor); } case ICmpInst::ICMP_NE: // icmp eq i1 A, B -> A^B @@ -5983,32 +5986,28 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { std::swap(Op0, Op1); // Change icmp ugt -> icmp ult // FALL THROUGH case ICmpInst::ICMP_ULT:{ // icmp ult i1 A, B -> ~A & B - Instruction *Not = BinaryOperator::CreateNot(Op0, I.getName()+"tmp"); - InsertNewInstBefore(Not, I); + Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp"); return BinaryOperator::CreateAnd(Not, Op1); } case ICmpInst::ICMP_SGT: std::swap(Op0, Op1); // Change icmp sgt -> icmp slt // FALL THROUGH case ICmpInst::ICMP_SLT: { // icmp slt i1 A, B -> A & ~B - Instruction *Not = BinaryOperator::CreateNot(Op1, I.getName()+"tmp"); - InsertNewInstBefore(Not, I); + Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp"); return BinaryOperator::CreateAnd(Not, Op0); } case ICmpInst::ICMP_UGE: std::swap(Op0, Op1); // Change icmp uge -> icmp ule // FALL THROUGH case ICmpInst::ICMP_ULE: { // icmp ule i1 A, B -> ~A | B - Instruction *Not = BinaryOperator::CreateNot(Op0, I.getName()+"tmp"); - InsertNewInstBefore(Not, I); + Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp"); return BinaryOperator::CreateOr(Not, Op1); } case ICmpInst::ICMP_SGE: std::swap(Op0, Op1); // Change icmp sge -> icmp sle // FALL THROUGH case ICmpInst::ICMP_SLE: { // icmp sle i1 A, B -> A | ~B - Instruction *Not = BinaryOperator::CreateNot(Op1, I.getName()+"tmp"); - InsertNewInstBefore(Not, I); + Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp"); return BinaryOperator::CreateOr(Not, Op0); } } @@ -6040,20 +6039,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { default: break; case ICmpInst::ICMP_ULE: if (CI->isMaxValue(false)) // A <=u MAX -> TRUE - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); - return new ICmpInst(ICmpInst::ICMP_ULT, Op0, AddOne(CI, Context)); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); + return new ICmpInst(ICmpInst::ICMP_ULT, Op0, + AddOne(CI)); case ICmpInst::ICMP_SLE: if (CI->isMaxValue(true)) // A <=s MAX -> TRUE - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); - return new ICmpInst(ICmpInst::ICMP_SLT, Op0, AddOne(CI, Context)); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); + return new ICmpInst(ICmpInst::ICMP_SLT, Op0, + AddOne(CI)); case ICmpInst::ICMP_UGE: if (CI->isMinValue(false)) // A >=u MIN -> TRUE - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); - return new ICmpInst( ICmpInst::ICMP_UGT, Op0, SubOne(CI, Context)); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); + return new ICmpInst(ICmpInst::ICMP_UGT, Op0, + SubOne(CI)); case ICmpInst::ICMP_SGE: if (CI->isMinValue(true)) // A >=s MIN -> TRUE - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); - return new ICmpInst(ICmpInst::ICMP_SGT, Op0, SubOne(CI, Context)); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); + return new ICmpInst(ICmpInst::ICMP_SGT, Op0, + SubOne(CI)); } // If this comparison is a normal comparison, it demands all @@ -6100,110 +6103,114 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // that code below can assume that Min != Max. if (!isa<Constant>(Op0) && Op0Min == Op0Max) return new ICmpInst(I.getPredicate(), - Context->getConstantInt(Op0Min), Op1); + ConstantInt::get(*Context, Op0Min), Op1); if (!isa<Constant>(Op1) && Op1Min == Op1Max) - return new ICmpInst(I.getPredicate(), Op0, - Context->getConstantInt(Op1Min)); + return new ICmpInst(I.getPredicate(), Op0, + ConstantInt::get(*Context, Op1Min)); // Based on the range information we know about the LHS, see if we can // simplify this comparison. For example, (x&4) < 8 is always true. switch (I.getPredicate()) { - default: assert(0 && "Unknown icmp opcode!"); + default: llvm_unreachable("Unknown icmp opcode!"); case ICmpInst::ICMP_EQ: if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); break; case ICmpInst::ICMP_NE: if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); break; case ICmpInst::ICMP_ULT: if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { if (Op1Max == Op0Min+1) // A <u C -> A == C-1 if min(A)+1 == C - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, SubOne(CI, Context)); + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + SubOne(CI)); // (x <u 2147483648) -> (x >s -1) -> true if sign bit clear if (CI->isMinValue(true)) return new ICmpInst(ICmpInst::ICMP_SGT, Op0, - Context->getConstantIntAllOnesValue(Op0->getType())); + Constant::getAllOnesValue(Op0->getType())); } break; case ICmpInst::ICMP_UGT: if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { if (Op1Min == Op0Max-1) // A >u C -> A == C+1 if max(a)-1 == C - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, AddOne(CI, Context)); + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + AddOne(CI)); // (x >u 2147483647) -> (x <s 0) -> true if sign bit set if (CI->isMaxValue(true)) return new ICmpInst(ICmpInst::ICMP_SLT, Op0, - Context->getNullValue(Op0->getType())); + Constant::getNullValue(Op0->getType())); } break; case ICmpInst::ICMP_SLT: if (Op0Max.slt(Op1Min)) // A <s B -> true if max(A) < min(C) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { if (Op1Max == Op0Min+1) // A <s C -> A == C-1 if min(A)+1 == C - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, SubOne(CI, Context)); + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + SubOne(CI)); } break; case ICmpInst::ICMP_SGT: if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { if (Op1Min == Op0Max-1) // A >s C -> A == C+1 if max(A)-1 == C - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, AddOne(CI, Context)); + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + AddOne(CI)); } break; case ICmpInst::ICMP_SGE: assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!"); if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); break; case ICmpInst::ICMP_SLE: assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!"); if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); break; case ICmpInst::ICMP_UGE: assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!"); if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); break; case ICmpInst::ICMP_ULE: assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!"); if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); break; } @@ -6255,16 +6262,16 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } if (isAllZeros) return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), - Context->getNullValue(LHSI->getOperand(0)->getType())); + Constant::getNullValue(LHSI->getOperand(0)->getType())); } break; case Instruction::PHI: - // Only fold icmp into the PHI if the phi and fcmp are in the same + // Only fold icmp into the PHI if the phi and icmp are in the same // block. If in the same block, we're encouraging jump threading. If // not, we are just pessimizing the code by making an i1 phi. if (LHSI->getParent() == I.getParent()) - if (Instruction *NV = FoldOpIntoPhi(I)) + if (Instruction *NV = FoldOpIntoPhi(I, true)) return NV; break; case Instruction::Select: { @@ -6275,18 +6282,16 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (LHSI->hasOneUse()) { if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) { // Fold the known value into the constant operand. - Op1 = Context->getConstantExprICmp(I.getPredicate(), C, RHSC); + Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); // Insert a new ICmp of the other select operand. - Op2 = InsertNewInstBefore(new ICmpInst(I.getPredicate(), - LHSI->getOperand(2), RHSC, - I.getName()), I); + Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2), + RHSC, I.getName()); } else if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) { // Fold the known value into the constant operand. - Op2 = Context->getConstantExprICmp(I.getPredicate(), C, RHSC); + Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); // Insert a new ICmp of the other select operand. - Op1 = InsertNewInstBefore(new ICmpInst(I.getPredicate(), - LHSI->getOperand(1), RHSC, - I.getName()), I); + Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1), + RHSC, I.getName()); } } @@ -6298,19 +6303,31 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // If we have (malloc != null), and if the malloc has a single use, we // can assume it is successful and remove the malloc. if (LHSI->hasOneUse() && isa<ConstantPointerNull>(RHSC)) { - AddToWorkList(LHSI); - return ReplaceInstUsesWith(I, Context->getConstantInt(Type::Int1Ty, - !I.isTrueWhenEqual())); + Worklist.Add(LHSI); + return ReplaceInstUsesWith(I, + ConstantInt::get(Type::getInt1Ty(*Context), + !I.isTrueWhenEqual())); + } + break; + case Instruction::Call: + // If we have (malloc != null), and if the malloc has a single use, we + // can assume it is successful and remove the malloc. + if (isMalloc(LHSI) && LHSI->hasOneUse() && + isa<ConstantPointerNull>(RHSC)) { + Worklist.Add(LHSI); + return ReplaceInstUsesWith(I, + ConstantInt::get(Type::getInt1Ty(*Context), + !I.isTrueWhenEqual())); } break; } } // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now. - if (User *GEP = dyn_castGetElementPtr(Op0)) + if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0)) if (Instruction *NI = FoldGEPICmp(GEP, Op1, I.getPredicate(), I)) return NI; - if (User *GEP = dyn_castGetElementPtr(Op1)) + if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1)) if (Instruction *NI = FoldGEPICmp(GEP, Op0, ICmpInst::getSwappedPredicate(I.getPredicate()), I)) return NI; @@ -6333,10 +6350,10 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // If Op1 is a constant, we can fold the cast into the constant. if (Op0->getType() != Op1->getType()) { if (Constant *Op1C = dyn_cast<Constant>(Op1)) { - Op1 = Context->getConstantExprBitCast(Op1C, Op0->getType()); + Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType()); } else { // Otherwise, cast the RHS right before the icmp - Op1 = InsertBitCastBefore(Op1, Op0->getType(), I); + Op1 = Builder->CreateBitCast(Op1, Op0->getType()); } } return new ICmpInst(I.getPredicate(), Op0, Op1); @@ -6397,16 +6414,12 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // Mask = -1 >> count-trailing-zeros(Cst). if (!CI->isZero() && !CI->isOne()) { const APInt &AP = CI->getValue(); - ConstantInt *Mask = Context->getConstantInt( + ConstantInt *Mask = ConstantInt::get(*Context, APInt::getLowBitsSet(AP.getBitWidth(), AP.getBitWidth() - AP.countTrailingZeros())); - Instruction *And1 = BinaryOperator::CreateAnd(Op0I->getOperand(0), - Mask); - Instruction *And2 = BinaryOperator::CreateAnd(Op1I->getOperand(0), - Mask); - InsertNewInstBefore(And1, I); - InsertNewInstBefore(And2, I); + Value *And1 = Builder->CreateAnd(Op0I->getOperand(0), Mask); + Value *And2 = Builder->CreateAnd(Op1I->getOperand(0), Mask); return new ICmpInst(I.getPredicate(), And1, And2); } } @@ -6435,7 +6448,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0 Value *OtherVal = A == Op1 ? B : A; return new ICmpInst(I.getPredicate(), OtherVal, - Context->getNullValue(A->getType())); + Constant::getNullValue(A->getType())); } if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) { @@ -6444,10 +6457,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (match(B, m_ConstantInt(C1)) && match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) { Constant *NC = - Context->getConstantInt(C1->getValue() ^ C2->getValue()); - Instruction *Xor = BinaryOperator::CreateXor(C, NC, "tmp"); - return new ICmpInst(I.getPredicate(), A, - InsertNewInstBefore(Xor, I)); + ConstantInt::get(*Context, C1->getValue() ^ C2->getValue()); + Value *Xor = Builder->CreateXor(C, NC, "tmp"); + return new ICmpInst(I.getPredicate(), A, Xor); } // A^B == A^D -> B == D @@ -6463,18 +6475,18 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // A == (A^B) -> B == 0 Value *OtherVal = A == Op0 ? B : A; return new ICmpInst(I.getPredicate(), OtherVal, - Context->getNullValue(A->getType())); + Constant::getNullValue(A->getType())); } // (A-B) == A -> B == 0 if (match(Op0, m_Sub(m_Specific(Op1), m_Value(B)))) return new ICmpInst(I.getPredicate(), B, - Context->getNullValue(B->getType())); + Constant::getNullValue(B->getType())); // A == (A-B) -> B == 0 if (match(Op1, m_Sub(m_Specific(Op0), m_Value(B)))) return new ICmpInst(I.getPredicate(), B, - Context->getNullValue(B->getType())); + Constant::getNullValue(B->getType())); // (X&Z) == (Y&Z) -> (X^Y) & Z == 0 if (Op0->hasOneUse() && Op1->hasOneUse() && @@ -6493,10 +6505,10 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } if (X) { // Build (X^Y) & Z - Op1 = InsertNewInstBefore(BinaryOperator::CreateXor(X, Y, "tmp"), I); - Op1 = InsertNewInstBefore(BinaryOperator::CreateAnd(Op1, Z, "tmp"), I); + Op1 = Builder->CreateXor(X, Y, "tmp"); + Op1 = Builder->CreateAnd(Op1, Z, "tmp"); I.setOperand(0, Op1); - I.setOperand(1, Context->getNullValue(Op1->getType())); + I.setOperand(1, Constant::getNullValue(Op1->getType())); return &I; } } @@ -6535,13 +6547,13 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and // C2 (CI). By solving for X we can turn this into a range check // instead of computing a divide. - Constant *Prod = Context->getConstantExprMul(CmpRHS, DivRHS); + Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS); // Determine if the product overflows by seeing if the product is // not equal to the divide. Make sure we do the same kind of divide // as in the LHS instruction that we're folding. - bool ProdOV = (DivIsSigned ? Context->getConstantExprSDiv(Prod, DivRHS) : - Context->getConstantExprUDiv(Prod, DivRHS)) != CmpRHS; + bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) : + ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS; // Get the ICmp opcode ICmpInst::Predicate Pred = ICI.getPredicate(); @@ -6565,8 +6577,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0. if (CmpRHSV == 0) { // (X / pos) op 0 // Can't overflow. e.g. X/2 op 0 --> [-1, 2) - LoBound = cast<ConstantInt>(Context->getConstantExprNeg(SubOne(DivRHS, - Context))); + LoBound = cast<ConstantInt>(ConstantExpr::getNeg(SubOne(DivRHS))); HiBound = DivRHS; } else if (CmpRHSV.isStrictlyPositive()) { // (X / pos) op pos LoBound = Prod; // e.g. X/5 op 3 --> [15, 20) @@ -6575,11 +6586,11 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, Context, true); } else { // (X / pos) op neg // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14) - HiBound = AddOne(Prod, Context); + HiBound = AddOne(Prod); LoOverflow = HiOverflow = ProdOV ? -1 : 0; if (!LoOverflow) { ConstantInt* DivNeg = - cast<ConstantInt>(Context->getConstantExprNeg(DivRHS)); + cast<ConstantInt>(ConstantExpr::getNeg(DivRHS)); LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, Context, true) ? -1 : 0; } @@ -6587,15 +6598,15 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0. if (CmpRHSV == 0) { // (X / neg) op 0 // e.g. X/-5 op 0 --> [-4, 5) - LoBound = AddOne(DivRHS, Context); - HiBound = cast<ConstantInt>(Context->getConstantExprNeg(DivRHS)); + LoBound = AddOne(DivRHS); + HiBound = cast<ConstantInt>(ConstantExpr::getNeg(DivRHS)); if (HiBound == DivRHS) { // -INTMIN = INTMIN HiOverflow = 1; // [INTMIN+1, overflow) HiBound = 0; // e.g. X/INTMIN = 0 --> X > INTMIN } } else if (CmpRHSV.isStrictlyPositive()) { // (X / neg) op pos // e.g. X/-5 op 3 --> [-19, -14) - HiBound = AddOne(Prod, Context); + HiBound = AddOne(Prod); HiOverflow = LoOverflow = ProdOV ? -1 : 0; if (!LoOverflow) LoOverflow = AddWithOverflow(LoBound, HiBound, @@ -6613,42 +6624,42 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, Value *X = DivI->getOperand(0); switch (Pred) { - default: assert(0 && "Unhandled icmp opcode!"); + default: llvm_unreachable("Unhandled icmp opcode!"); case ICmpInst::ICMP_EQ: if (LoOverflow && HiOverflow) - return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context)); else if (HiOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, X, LoBound); else if (LoOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, X, HiBound); else return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, true, ICI); case ICmpInst::ICMP_NE: if (LoOverflow && HiOverflow) - return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context)); else if (HiOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, X, LoBound); else if (LoOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, X, HiBound); else return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, false, ICI); case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_SLT: if (LoOverflow == +1) // Low bound is greater than input range. - return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context)); if (LoOverflow == -1) // Low bound is less than input range. - return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context)); return new ICmpInst(Pred, X, LoBound); case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_SGT: if (HiOverflow == +1) // High bound greater than input range. - return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context)); else if (HiOverflow == -1) // High bound less than input range. - return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context)); if (Pred == ICmpInst::ICMP_UGT) return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound); else @@ -6682,7 +6693,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, NewRHS.zext(SrcBits); NewRHS |= KnownOne; return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), - Context->getConstantInt(NewRHS)); + ConstantInt::get(*Context, NewRHS)); } } break; @@ -6699,7 +6710,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // the operation, just stop using the Xor. if (!XorCST->getValue().isNegative()) { ICI.setOperand(0, CompareVal); - AddToWorkList(LHSI); + Worklist.Add(LHSI); return &ICI; } @@ -6711,10 +6722,10 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if (isTrueIfPositive) return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal, - SubOne(RHS, Context)); + SubOne(RHS)); else return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal, - AddOne(RHS, Context)); + AddOne(RHS)); } if (LHSI->hasOneUse()) { @@ -6725,7 +6736,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, ? ICI.getUnsignedPredicate() : ICI.getSignedPredicate(); return new ICmpInst(Pred, LHSI->getOperand(0), - Context->getConstantInt(RHSV ^ SignBit)); + ConstantInt::get(*Context, RHSV ^ SignBit)); } // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A) @@ -6736,7 +6747,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, : ICI.getSignedPredicate(); Pred = ICI.getSwappedPredicate(Pred); return new ICmpInst(Pred, LHSI->getOperand(0), - Context->getConstantInt(RHSV ^ NotSignBit)); + ConstantInt::get(*Context, RHSV ^ NotSignBit)); } } } @@ -6763,12 +6774,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, NewCST.zext(BitWidth); APInt NewCI = RHSV; NewCI.zext(BitWidth); - Instruction *NewAnd = - BinaryOperator::CreateAnd(Cast->getOperand(0), - Context->getConstantInt(NewCST),LHSI->getName()); - InsertNewInstBefore(NewAnd, ICI); + Value *NewAnd = + Builder->CreateAnd(Cast->getOperand(0), + ConstantInt::get(*Context, NewCST), LHSI->getName()); return new ICmpInst(ICI.getPredicate(), NewAnd, - Context->getConstantInt(NewCI)); + ConstantInt::get(*Context, NewCI)); } } @@ -6805,32 +6815,31 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if (CanFold) { Constant *NewCst; if (Shift->getOpcode() == Instruction::Shl) - NewCst = Context->getConstantExprLShr(RHS, ShAmt); + NewCst = ConstantExpr::getLShr(RHS, ShAmt); else - NewCst = Context->getConstantExprShl(RHS, ShAmt); + NewCst = ConstantExpr::getShl(RHS, ShAmt); // Check to see if we are shifting out any of the bits being // compared. - if (Context->getConstantExpr(Shift->getOpcode(), + if (ConstantExpr::get(Shift->getOpcode(), NewCst, ShAmt) != RHS) { // If we shifted bits out, the fold is not going to work out. // As a special case, check to see if this means that the // result is always true or false now. if (ICI.getPredicate() == ICmpInst::ICMP_EQ) - return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context)); if (ICI.getPredicate() == ICmpInst::ICMP_NE) - return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context)); } else { ICI.setOperand(1, NewCst); Constant *NewAndCST; if (Shift->getOpcode() == Instruction::Shl) - NewAndCST = Context->getConstantExprLShr(AndCST, ShAmt); + NewAndCST = ConstantExpr::getLShr(AndCST, ShAmt); else - NewAndCST = Context->getConstantExprShl(AndCST, ShAmt); + NewAndCST = ConstantExpr::getShl(AndCST, ShAmt); LHSI->setOperand(1, NewAndCST); LHSI->setOperand(0, Shift->getOperand(0)); - AddToWorkList(Shift); // Shift is dead. - AddUsesToWorkList(ICI); + Worklist.Add(Shift); // Shift is dead. return &ICI; } } @@ -6845,19 +6854,15 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // Compute C << Y. Value *NS; if (Shift->getOpcode() == Instruction::LShr) { - NS = BinaryOperator::CreateShl(AndCST, - Shift->getOperand(1), "tmp"); + NS = Builder->CreateShl(AndCST, Shift->getOperand(1), "tmp"); } else { // Insert a logical shift. - NS = BinaryOperator::CreateLShr(AndCST, - Shift->getOperand(1), "tmp"); + NS = Builder->CreateLShr(AndCST, Shift->getOperand(1), "tmp"); } - InsertNewInstBefore(cast<Instruction>(NS), ICI); // Compute X & (C << Y). - Instruction *NewAnd = - BinaryOperator::CreateAnd(Shift->getOperand(0), NS, LHSI->getName()); - InsertNewInstBefore(NewAnd, ICI); + Value *NewAnd = + Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName()); ICI.setOperand(0, NewAnd); return &ICI; @@ -6881,11 +6886,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // If we are comparing against bits always shifted out, the // comparison cannot succeed. Constant *Comp = - Context->getConstantExprShl(Context->getConstantExprLShr(RHS, ShAmt), + ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt), ShAmt); if (Comp != RHS) {// Comparing against a bit that we know is zero. bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; - Constant *Cst = Context->getConstantInt(Type::Int1Ty, IsICMP_NE); + Constant *Cst = ConstantInt::get(Type::getInt1Ty(*Context), IsICMP_NE); return ReplaceInstUsesWith(ICI, Cst); } @@ -6893,15 +6898,13 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // Otherwise strength reduce the shift into an and. uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); Constant *Mask = - Context->getConstantInt(APInt::getLowBitsSet(TypeBits, + ConstantInt::get(*Context, APInt::getLowBitsSet(TypeBits, TypeBits-ShAmtVal)); - Instruction *AndI = - BinaryOperator::CreateAnd(LHSI->getOperand(0), - Mask, LHSI->getName()+".mask"); - Value *And = InsertNewInstBefore(AndI, ICI); + Value *And = + Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask"); return new ICmpInst(ICI.getPredicate(), And, - Context->getConstantInt(RHSV.lshr(ShAmtVal))); + ConstantInt::get(*Context, RHSV.lshr(ShAmtVal))); } } @@ -6910,15 +6913,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if (LHSI->hasOneUse() && isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) { // (X << 31) <s 0 --> (X&1) != 0 - Constant *Mask = Context->getConstantInt(APInt(TypeBits, 1) << + Constant *Mask = ConstantInt::get(*Context, APInt(TypeBits, 1) << (TypeBits-ShAmt->getZExtValue()-1)); - Instruction *AndI = - BinaryOperator::CreateAnd(LHSI->getOperand(0), - Mask, LHSI->getName()+".mask"); - Value *And = InsertNewInstBefore(AndI, ICI); - + Value *And = + Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask"); return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ, - And, Context->getNullValue(And->getType())); + And, Constant::getNullValue(And->getType())); } break; } @@ -6948,7 +6948,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if (Comp != RHSV) { // Comparing against a bit that we know is zero. bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; - Constant *Cst = Context->getConstantInt(Type::Int1Ty, IsICMP_NE); + Constant *Cst = ConstantInt::get(Type::getInt1Ty(*Context), IsICMP_NE); return ReplaceInstUsesWith(ICI, Cst); } @@ -6959,20 +6959,18 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, MaskedValueIsZero(LHSI->getOperand(0), APInt::getLowBitsSet(Comp.getBitWidth(), ShAmtVal))) { return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), - Context->getConstantExprShl(RHS, ShAmt)); + ConstantExpr::getShl(RHS, ShAmt)); } if (LHSI->hasOneUse()) { // Otherwise strength reduce the shift into an and. APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); - Constant *Mask = Context->getConstantInt(Val); + Constant *Mask = ConstantInt::get(*Context, Val); - Instruction *AndI = - BinaryOperator::CreateAnd(LHSI->getOperand(0), - Mask, LHSI->getName()+".mask"); - Value *And = InsertNewInstBefore(AndI, ICI); + Value *And = Builder->CreateAnd(LHSI->getOperand(0), + Mask, LHSI->getName()+".mask"); return new ICmpInst(ICI.getPredicate(), And, - Context->getConstantExprShl(RHS, ShAmt)); + ConstantExpr::getShl(RHS, ShAmt)); } break; } @@ -7005,18 +7003,18 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if (ICI.isSignedPredicate()) { if (CR.getLower().isSignBit()) { return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0), - Context->getConstantInt(CR.getUpper())); + ConstantInt::get(*Context, CR.getUpper())); } else if (CR.getUpper().isSignBit()) { return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0), - Context->getConstantInt(CR.getLower())); + ConstantInt::get(*Context, CR.getLower())); } } else { if (CR.getLower().isMinValue()) { return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0), - Context->getConstantInt(CR.getUpper())); + ConstantInt::get(*Context, CR.getUpper())); } else if (CR.getUpper().isMinValue()) { return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0), - Context->getConstantInt(CR.getLower())); + ConstantInt::get(*Context, CR.getLower())); } } } @@ -7036,12 +7034,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if (RHSV == 0 && isa<ConstantInt>(BO->getOperand(1)) &&BO->hasOneUse()){ const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue(); if (V.sgt(APInt(V.getBitWidth(), 1)) && V.isPowerOf2()) { - Instruction *NewRem = - BinaryOperator::CreateURem(BO->getOperand(0), BO->getOperand(1), - BO->getName()); - InsertNewInstBefore(NewRem, ICI); - return new ICmpInst(ICI.getPredicate(), NewRem, - Context->getNullValue(BO->getType())); + Value *NewRem = + Builder->CreateURem(BO->getOperand(0), BO->getOperand(1), + BO->getName()); + return new ICmpInst(ICI.getPredicate(), NewRem, + Constant::getNullValue(BO->getType())); } } break; @@ -7050,19 +7047,18 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) { if (BO->hasOneUse()) return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), - Context->getConstantExprSub(RHS, BOp1C)); + ConstantExpr::getSub(RHS, BOp1C)); } else if (RHSV == 0) { // Replace ((add A, B) != 0) with (A != -B) if A or B is // efficiently invertible, or if the add has just this one use. Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1); - if (Value *NegVal = dyn_castNegVal(BOp1, Context)) + if (Value *NegVal = dyn_castNegVal(BOp1)) return new ICmpInst(ICI.getPredicate(), BOp0, NegVal); - else if (Value *NegVal = dyn_castNegVal(BOp0, Context)) + else if (Value *NegVal = dyn_castNegVal(BOp0)) return new ICmpInst(ICI.getPredicate(), NegVal, BOp1); else if (BO->hasOneUse()) { - Instruction *Neg = BinaryOperator::CreateNeg(BOp1); - InsertNewInstBefore(Neg, ICI); + Value *Neg = Builder->CreateNeg(BOp1); Neg->takeName(BO); return new ICmpInst(ICI.getPredicate(), BOp0, Neg); } @@ -7073,7 +7069,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // the explicit xor. if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), - Context->getConstantExprXor(RHS, BOC)); + ConstantExpr::getXor(RHS, BOC)); // FALLTHROUGH case Instruction::Sub: @@ -7087,10 +7083,10 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // If bits are being or'd in that are not present in the constant we // are comparing against, then the comparison could never succeed! if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) { - Constant *NotCI = Context->getConstantExprNot(RHS); - if (!Context->getConstantExprAnd(BOC, NotCI)->isNullValue()) + Constant *NotCI = ConstantExpr::getNot(RHS); + if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue()) return ReplaceInstUsesWith(ICI, - Context->getConstantInt(Type::Int1Ty, + ConstantInt::get(Type::getInt1Ty(*Context), isICMP_NE)); } break; @@ -7101,19 +7097,19 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // comparison can never succeed! if ((RHSV & ~BOC->getValue()) != 0) return ReplaceInstUsesWith(ICI, - Context->getConstantInt(Type::Int1Ty, + ConstantInt::get(Type::getInt1Ty(*Context), isICMP_NE)); // If we have ((X & C) == C), turn it into ((X & C) != 0). if (RHS == BOC && RHSV.isPowerOf2()) return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, LHSI, - Context->getNullValue(RHS->getType())); + Constant::getNullValue(RHS->getType())); // Replace (and X, (1 << size(X)-1) != 0) with x s< 0 if (BOC->getValue().isSignBit()) { Value *X = BO->getOperand(0); - Constant *Zero = Context->getNullValue(X->getType()); + Constant *Zero = Constant::getNullValue(X->getType()); ICmpInst::Predicate pred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; return new ICmpInst(pred, X, Zero); @@ -7122,7 +7118,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // ((X & ~7) == 0) --> X < 8 if (RHSV == 0 && isHighOnes(BOC)) { Value *X = BO->getOperand(0); - Constant *NegX = Context->getConstantExprNeg(BOC); + Constant *NegX = ConstantExpr::getNeg(BOC); ICmpInst::Predicate pred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; return new ICmpInst(pred, X, NegX); @@ -7133,9 +7129,9 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) { // Handle icmp {eq|ne} <intrinsic>, intcst. if (II->getIntrinsicID() == Intrinsic::bswap) { - AddToWorkList(II); + Worklist.Add(II); ICI.setOperand(0, II->getOperand(1)); - ICI.setOperand(1, Context->getConstantInt(RHSV.byteSwap())); + ICI.setOperand(1, ConstantInt::get(*Context, RHSV.byteSwap())); return &ICI; } } @@ -7155,17 +7151,17 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the // integer type is the same size as the pointer type. - if (LHSCI->getOpcode() == Instruction::PtrToInt && - getTargetData().getPointerSizeInBits() == + if (TD && LHSCI->getOpcode() == Instruction::PtrToInt && + TD->getPointerSizeInBits() == cast<IntegerType>(DestTy)->getBitWidth()) { Value *RHSOp = 0; if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) { - RHSOp = Context->getConstantExprIntToPtr(RHSC, SrcTy); + RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy); } else if (PtrToIntInst *RHSC = dyn_cast<PtrToIntInst>(ICI.getOperand(1))) { RHSOp = RHSC->getOperand(0); // If the pointer types don't match, insert a bitcast. if (LHSCIOp->getType() != RHSOp->getType()) - RHSOp = InsertBitCastBefore(RHSOp, LHSCIOp->getType(), ICI); + RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType()); } if (RHSOp) @@ -7212,8 +7208,8 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // Compute the constant that would happen if we truncated to SrcTy then // reextended to DestTy. - Constant *Res1 = Context->getConstantExprTrunc(CI, SrcTy); - Constant *Res2 = Context->getConstantExprCast(LHSCI->getOpcode(), + Constant *Res1 = ConstantExpr::getTrunc(CI, SrcTy); + Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(), Res1, DestTy); // If the re-extended constant didn't change... @@ -7239,9 +7235,9 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // First, handle some easy cases. We know the result cannot be equal at this // point so handle the ICI.isEquality() cases if (ICI.getPredicate() == ICmpInst::ICMP_EQ) - return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context)); if (ICI.getPredicate() == ICmpInst::ICMP_NE) - return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context)); // Evaluate the comparison for LT (we invert for GT below). LE and GE cases // should have been folded away previously and not enter in here. @@ -7249,20 +7245,19 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { if (isSignedCmp) { // We're performing a signed comparison. if (cast<ConstantInt>(CI)->getValue().isNegative()) - Result = Context->getConstantIntFalse(); // X < (small) --> false + Result = ConstantInt::getFalse(*Context); // X < (small) --> false else - Result = Context->getConstantIntTrue(); // X < (large) --> true + Result = ConstantInt::getTrue(*Context); // X < (large) --> true } else { // We're performing an unsigned comparison. if (isSignedExt) { // We're performing an unsigned comp with a sign extended value. // This is true if the input is >= 0. [aka >s -1] - Constant *NegOne = Context->getConstantIntAllOnesValue(SrcTy); - Result = InsertNewInstBefore(new ICmpInst(ICmpInst::ICMP_SGT, LHSCIOp, - NegOne, ICI.getName()), ICI); + Constant *NegOne = Constant::getAllOnesValue(SrcTy); + Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName()); } else { // Unsigned extend & unsigned compare -> always true. - Result = Context->getConstantIntTrue(); + Result = ConstantInt::getTrue(*Context); } } @@ -7275,7 +7270,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { ICI.getPredicate()==ICmpInst::ICMP_SGT) && "ICmp should be folded!"); if (Constant *CI = dyn_cast<Constant>(Result)) - return ReplaceInstUsesWith(ICI, Context->getConstantExprNot(CI)); + return ReplaceInstUsesWith(ICI, ConstantExpr::getNot(CI)); return BinaryOperator::CreateNot(Result); } @@ -7317,21 +7312,21 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { // shl X, 0 == X and shr X, 0 == X // shl 0, X == 0 and shr 0, X == 0 - if (Op1 == Context->getNullValue(Op1->getType()) || - Op0 == Context->getNullValue(Op0->getType())) + if (Op1 == Constant::getNullValue(Op1->getType()) || + Op0 == Constant::getNullValue(Op0->getType())) return ReplaceInstUsesWith(I, Op0); if (isa<UndefValue>(Op0)) { if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef return ReplaceInstUsesWith(I, Op0); else // undef << X -> 0, undef >>u X -> 0 - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); } if (isa<UndefValue>(Op1)) { if (I.getOpcode() == Instruction::AShr) // X >>s undef -> X return ReplaceInstUsesWith(I, Op0); else // X << undef, X >>u undef -> 0 - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); } // See if we can fold away this shift. @@ -7363,9 +7358,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // if (Op1->uge(TypeBits)) { if (I.getOpcode() != Instruction::AShr) - return ReplaceInstUsesWith(I, Context->getNullValue(Op0->getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType())); else { - I.setOperand(1, Context->getConstantInt(I.getType(), TypeBits-1)); + I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1)); return &I; } } @@ -7375,7 +7370,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, if (BO->getOpcode() == Instruction::Mul && isLeftShift) if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1))) return BinaryOperator::CreateMul(BO->getOperand(0), - Context->getConstantExprShl(BOOp, Op1)); + ConstantExpr::getShl(BOOp, Op1)); // Try to fold constant and into select arguments. if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) @@ -7396,10 +7391,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, if (TrOp && I.isLogicalShift() && TrOp->isShift() && isa<ConstantInt>(TrOp->getOperand(1))) { // Okay, we'll do this xform. Make the shift of shift. - Constant *ShAmt = Context->getConstantExprZExt(Op1, TrOp->getType()); - Instruction *NSh = BinaryOperator::Create(I.getOpcode(), TrOp, ShAmt, - I.getName()); - InsertNewInstBefore(NSh, I); // (shift2 (shift1 & 0x00FF), c2) + Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType()); + // (shift2 (shift1 & 0x00FF), c2) + Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName()); // For logical shifts, the truncation has the effect of making the high // part of the register be zeros. Emulate this by inserting an AND to @@ -7420,10 +7414,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, MaskV = MaskV.lshr(Op1->getZExtValue()); } - Instruction *And = - BinaryOperator::CreateAnd(NSh, Context->getConstantInt(MaskV), - TI->getName()); - InsertNewInstBefore(And, I); // shift1 & 0x00FF + // shift1 & 0x00FF + Value *And = Builder->CreateAnd(NSh, ConstantInt::get(*Context, MaskV), + TI->getName()); // Return the value truncated to the interesting size. return new TruncInst(And, I.getType()); @@ -7444,17 +7437,15 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // These operators commute. // Turn (Y + (X >> C)) << C -> (X + (Y << C)) & (~0 << C) if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() && - match(Op0BO->getOperand(1), m_Shr(m_Value(V1), m_Specific(Op1)))){ - Instruction *YS = BinaryOperator::CreateShl( - Op0BO->getOperand(0), Op1, - Op0BO->getName()); - InsertNewInstBefore(YS, I); // (Y << C) - Instruction *X = - BinaryOperator::Create(Op0BO->getOpcode(), YS, V1, - Op0BO->getOperand(1)->getName()); - InsertNewInstBefore(X, I); // (X + (Y << C)) + match(Op0BO->getOperand(1), m_Shr(m_Value(V1), + m_Specific(Op1)))) { + Value *YS = // (Y << C) + Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); + // (X + (Y << C)) + Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1, + Op0BO->getOperand(1)->getName()); uint32_t Op1Val = Op1->getLimitedValue(TypeBits); - return BinaryOperator::CreateAnd(X, Context->getConstantInt( + return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context, APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); } @@ -7465,16 +7456,12 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, m_And(m_Shr(m_Value(V1), m_Specific(Op1)), m_ConstantInt(CC))) && cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse()) { - Instruction *YS = BinaryOperator::CreateShl( - Op0BO->getOperand(0), Op1, - Op0BO->getName()); - InsertNewInstBefore(YS, I); // (Y << C) - Instruction *XM = - BinaryOperator::CreateAnd(V1, - Context->getConstantExprShl(CC, Op1), - V1->getName()+".mask"); - InsertNewInstBefore(XM, I); // X & (CC << C) - + Value *YS = // (Y << C) + Builder->CreateShl(Op0BO->getOperand(0), Op1, + Op0BO->getName()); + // X & (CC << C) + Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), + V1->getName()+".mask"); return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM); } } @@ -7483,17 +7470,15 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, case Instruction::Sub: { // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && - match(Op0BO->getOperand(0), m_Shr(m_Value(V1), m_Specific(Op1)))){ - Instruction *YS = BinaryOperator::CreateShl( - Op0BO->getOperand(1), Op1, - Op0BO->getName()); - InsertNewInstBefore(YS, I); // (Y << C) - Instruction *X = - BinaryOperator::Create(Op0BO->getOpcode(), V1, YS, - Op0BO->getOperand(0)->getName()); - InsertNewInstBefore(X, I); // (X + (Y << C)) + match(Op0BO->getOperand(0), m_Shr(m_Value(V1), + m_Specific(Op1)))) { + Value *YS = // (Y << C) + Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); + // (X + (Y << C)) + Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS, + Op0BO->getOperand(0)->getName()); uint32_t Op1Val = Op1->getLimitedValue(TypeBits); - return BinaryOperator::CreateAnd(X, Context->getConstantInt( + return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context, APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); } @@ -7504,15 +7489,11 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, m_ConstantInt(CC))) && V2 == Op1 && cast<BinaryOperator>(Op0BO->getOperand(0)) ->getOperand(0)->hasOneUse()) { - Instruction *YS = BinaryOperator::CreateShl( - Op0BO->getOperand(1), Op1, - Op0BO->getName()); - InsertNewInstBefore(YS, I); // (Y << C) - Instruction *XM = - BinaryOperator::CreateAnd(V1, - Context->getConstantExprShl(CC, Op1), - V1->getName()+".mask"); - InsertNewInstBefore(XM, I); // X & (CC << C) + Value *YS = // (Y << C) + Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); + // X & (CC << C) + Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), + V1->getName()+".mask"); return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS); } @@ -7552,11 +7533,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, isValid = Op0C->getValue()[TypeBits-1] == highBitSet; if (isValid) { - Constant *NewRHS = Context->getConstantExpr(I.getOpcode(), Op0C, Op1); + Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1); - Instruction *NewShift = - BinaryOperator::Create(I.getOpcode(), Op0BO->getOperand(0), Op1); - InsertNewInstBefore(NewShift, I); + Value *NewShift = + Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1); NewShift->takeName(Op0BO); return BinaryOperator::Create(Op0BO->getOpcode(), NewShift, @@ -7589,31 +7569,33 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // saturates. if (AmtSum >= TypeBits) { if (I.getOpcode() != Instruction::AShr) - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); AmtSum = TypeBits-1; // Saturate to 31 for i32 ashr. } return BinaryOperator::Create(I.getOpcode(), X, - Context->getConstantInt(Ty, AmtSum)); - } else if (ShiftOp->getOpcode() == Instruction::LShr && - I.getOpcode() == Instruction::AShr) { + ConstantInt::get(Ty, AmtSum)); + } + + if (ShiftOp->getOpcode() == Instruction::LShr && + I.getOpcode() == Instruction::AShr) { if (AmtSum >= TypeBits) - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); // ((X >>u C1) >>s C2) -> (X >>u (C1+C2)) since C1 != 0. - return BinaryOperator::CreateLShr(X, Context->getConstantInt(Ty, AmtSum)); - } else if (ShiftOp->getOpcode() == Instruction::AShr && - I.getOpcode() == Instruction::LShr) { + return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum)); + } + + if (ShiftOp->getOpcode() == Instruction::AShr && + I.getOpcode() == Instruction::LShr) { // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0. if (AmtSum >= TypeBits) AmtSum = TypeBits-1; - Instruction *Shift = - BinaryOperator::CreateAShr(X, Context->getConstantInt(Ty, AmtSum)); - InsertNewInstBefore(Shift, I); + Value *Shift = Builder->CreateAShr(X, ConstantInt::get(Ty, AmtSum)); APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask)); + return BinaryOperator::CreateAnd(Shift, ConstantInt::get(*Context, Mask)); } // Okay, if we get here, one shift must be left, and the other shift must be @@ -7622,12 +7604,12 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // If we have ((X >>? C) << C), turn this into X & (-1 << C). if (I.getOpcode() == Instruction::Shl) { APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1)); - return BinaryOperator::CreateAnd(X, Context->getConstantInt(Mask)); + return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context, Mask)); } // If we have ((X << C) >>u C), turn this into X & (-1 >>u C). if (I.getOpcode() == Instruction::LShr) { APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1)); - return BinaryOperator::CreateAnd(X, Context->getConstantInt(Mask)); + return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context, Mask)); } // We can simplify ((X << C) >>s C) into a trunc + sext. // NOTE: we could do this for any C, but that would make 'unusual' integer @@ -7641,15 +7623,12 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, case 32 : case 64 : case 128: - SExtType = Context->getIntegerType(Ty->getBitWidth() - ShiftAmt1); + SExtType = IntegerType::get(*Context, Ty->getBitWidth() - ShiftAmt1); break; default: break; } - if (SExtType) { - Instruction *NewTrunc = new TruncInst(X, SExtType, "sext"); - InsertNewInstBefore(NewTrunc, I); - return new SExtInst(NewTrunc, Ty); - } + if (SExtType) + return new SExtInst(Builder->CreateTrunc(X, SExtType, "sext"), Ty); // Otherwise, we can't handle it yet. } else if (ShiftAmt1 < ShiftAmt2) { uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1; @@ -7658,23 +7637,21 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, if (I.getOpcode() == Instruction::Shl) { assert(ShiftOp->getOpcode() == Instruction::LShr || ShiftOp->getOpcode() == Instruction::AShr); - Instruction *Shift = - BinaryOperator::CreateShl(X, Context->getConstantInt(Ty, ShiftDiff)); - InsertNewInstBefore(Shift, I); + Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask)); + return BinaryOperator::CreateAnd(Shift, + ConstantInt::get(*Context, Mask)); } // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2) if (I.getOpcode() == Instruction::LShr) { assert(ShiftOp->getOpcode() == Instruction::Shl); - Instruction *Shift = - BinaryOperator::CreateLShr(X, Context->getConstantInt(Ty, ShiftDiff)); - InsertNewInstBefore(Shift, I); + Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff)); APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask)); + return BinaryOperator::CreateAnd(Shift, + ConstantInt::get(*Context, Mask)); } // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. @@ -7686,24 +7663,22 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, if (I.getOpcode() == Instruction::Shl) { assert(ShiftOp->getOpcode() == Instruction::LShr || ShiftOp->getOpcode() == Instruction::AShr); - Instruction *Shift = - BinaryOperator::Create(ShiftOp->getOpcode(), X, - Context->getConstantInt(Ty, ShiftDiff)); - InsertNewInstBefore(Shift, I); + Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X, + ConstantInt::get(Ty, ShiftDiff)); APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask)); + return BinaryOperator::CreateAnd(Shift, + ConstantInt::get(*Context, Mask)); } // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2) if (I.getOpcode() == Instruction::LShr) { assert(ShiftOp->getOpcode() == Instruction::Shl); - Instruction *Shift = - BinaryOperator::CreateShl(X, Context->getConstantInt(Ty, ShiftDiff)); - InsertNewInstBefore(Shift, I); + Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask)); + return BinaryOperator::CreateAnd(Shift, + ConstantInt::get(*Context, Mask)); } // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in. @@ -7718,12 +7693,13 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, /// X*Scale+Offset. /// static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, - int &Offset, LLVMContext* Context) { - assert(Val->getType() == Type::Int32Ty && "Unexpected allocation size type!"); + int &Offset, LLVMContext *Context) { + assert(Val->getType() == Type::getInt32Ty(*Context) && + "Unexpected allocation size type!"); if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { Offset = CI->getZExtValue(); Scale = 0; - return Context->getConstantInt(Type::Int32Ty, 0); + return ConstantInt::get(Type::getInt32Ty(*Context), 0); } else if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) { if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) { if (I->getOpcode() == Instruction::Shl) { @@ -7763,6 +7739,9 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, AllocationInst &AI) { const PointerType *PTy = cast<PointerType>(CI.getType()); + BuilderTy AllocaBuilder(*Builder); + AllocaBuilder.SetInsertPoint(AI.getParent(), &AI); + // Remove any uses of AI that are dead. assert(!CI.use_empty() && "Dead instructions should be removed earlier!"); @@ -7773,11 +7752,14 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, ++UI; // If this instruction uses AI more than once, don't break UI. ++NumDeadInst; - DOUT << "IC: DCE: " << *User; + DEBUG(errs() << "IC: DCE: " << *User << '\n'); EraseInstFromFunction(*User); } } - + + // This requires TargetData to get the alloca alignment and size information. + if (!TD) return 0; + // Get the type really allocated and the type casted to. const Type *AllocElTy = AI.getAllocatedType(); const Type *CastElTy = PTy->getElementType(); @@ -7816,30 +7798,22 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, if (Scale == 1) { Amt = NumElements; } else { - // If the allocation size is constant, form a constant mul expression - Amt = Context->getConstantInt(Type::Int32Ty, Scale); - if (isa<ConstantInt>(NumElements)) - Amt = Context->getConstantExprMul(cast<ConstantInt>(NumElements), - cast<ConstantInt>(Amt)); - // otherwise multiply the amount and the number of elements - else { - Instruction *Tmp = BinaryOperator::CreateMul(Amt, NumElements, "tmp"); - Amt = InsertNewInstBefore(Tmp, AI); - } + Amt = ConstantInt::get(Type::getInt32Ty(*Context), Scale); + // Insert before the alloca, not before the cast. + Amt = AllocaBuilder.CreateMul(Amt, NumElements, "tmp"); } if (int Offset = (AllocElTySize*ArrayOffset)/CastElTySize) { - Value *Off = Context->getConstantInt(Type::Int32Ty, Offset, true); - Instruction *Tmp = BinaryOperator::CreateAdd(Amt, Off, "tmp"); - Amt = InsertNewInstBefore(Tmp, AI); + Value *Off = ConstantInt::get(Type::getInt32Ty(*Context), Offset, true); + Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp"); } AllocationInst *New; if (isa<MallocInst>(AI)) - New = new MallocInst(CastElTy, Amt, AI.getAlignment()); + New = AllocaBuilder.CreateMalloc(CastElTy, Amt); else - New = new AllocaInst(CastElTy, Amt, AI.getAlignment()); - InsertNewInstBefore(New, AI); + New = AllocaBuilder.CreateAlloca(CastElTy, Amt); + New->setAlignment(AI.getAlignment()); New->takeName(&AI); // If the allocation has one real use plus a dbg.declare, just remove the @@ -7851,11 +7825,9 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, // things that used it to use the new cast. This will also hack on CI, but it // will die soon. else if (!AI.hasOneUse()) { - AddUsesToWorkList(AI); // New is the allocation instruction, pointer typed. AI is the original // allocation instruction, also pointer typed. Thus, cast to use is BitCast. - CastInst *NewCast = new BitCastInst(New, AI.getType(), "tmpcast"); - InsertNewInstBefore(NewCast, AI); + Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast"); AI.replaceAllUsesWith(NewCast); } return ReplaceInstUsesWith(CI, New); @@ -7923,6 +7895,23 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty, CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc, NumCastsRemoved); + case Instruction::UDiv: + case Instruction::URem: { + // UDiv and URem can be truncated if all the truncated bits are zero. + uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); + uint32_t BitWidth = Ty->getScalarSizeInBits(); + if (BitWidth < OrigBitWidth) { + APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth); + if (MaskedValueIsZero(I->getOperand(0), Mask) && + MaskedValueIsZero(I->getOperand(1), Mask)) { + return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, + NumCastsRemoved) && + CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc, + NumCastsRemoved); + } + } + break; + } case Instruction::Shl: // If we are truncating the result of this SHL, and if it's a shift of a // constant amount, we can always perform a SHL in a smaller type. @@ -7993,7 +7982,7 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty, Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned) { if (Constant *C = dyn_cast<Constant>(V)) - return Context->getConstantExprIntegerCast(C, Ty, + return ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/); // Otherwise, it must be an instruction. @@ -8009,7 +7998,9 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, case Instruction::Xor: case Instruction::AShr: case Instruction::LShr: - case Instruction::Shl: { + case Instruction::Shl: + case Instruction::UDiv: + case Instruction::URem: { Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned); Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned); Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); @@ -8046,7 +8037,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, } default: // TODO: Can handle more cases here. - assert(0 && "Unreachable!"); + llvm_unreachable("Unreachable!"); break; } @@ -8089,13 +8080,14 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { static const Type *FindElementAtOffset(const Type *Ty, int64_t Offset, SmallVectorImpl<Value*> &NewIndices, const TargetData *TD, - LLVMContext* Context) { + LLVMContext *Context) { + if (!TD) return 0; if (!Ty->isSized()) return 0; // Start with the index over the outer type. Note that the type size // might be zero (even if the offset isn't zero) if the indexed type // is something like [0 x {int, int}] - const Type *IntPtrTy = TD->getIntPtrType(); + const Type *IntPtrTy = TD->getIntPtrType(*Context); int64_t FirstIdx = 0; if (int64_t TySize = TD->getTypeAllocSize(Ty)) { FirstIdx = Offset/TySize; @@ -8110,7 +8102,7 @@ static const Type *FindElementAtOffset(const Type *Ty, int64_t Offset, assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset"); } - NewIndices.push_back(Context->getConstantInt(IntPtrTy, FirstIdx)); + NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx)); // Index into the types. If we fail, set OrigBase to null. while (Offset) { @@ -8124,14 +8116,14 @@ static const Type *FindElementAtOffset(const Type *Ty, int64_t Offset, "Offset must stay within the indexed type"); unsigned Elt = SL->getElementContainingOffset(Offset); - NewIndices.push_back(Context->getConstantInt(Type::Int32Ty, Elt)); + NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(*Context), Elt)); Offset -= SL->getElementOffset(Elt); Ty = STy->getElementType(Elt); } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) { uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType()); assert(EltSize && "Cannot index into a zero-sized array"); - NewIndices.push_back(Context->getConstantInt(IntPtrTy,Offset/EltSize)); + NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize)); Offset %= EltSize; Ty = AT->getElementType(); } else { @@ -8154,7 +8146,7 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { // Changing the cast operand is usually not a good idea but it is safe // here because the pointer operand is being replaced with another // pointer operand so the opcode doesn't need to change. - AddToWorkList(GEP); + Worklist.Add(GEP); CI.setOperand(0, GEP->getOperand(0)); return &CI; } @@ -8163,7 +8155,7 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { // GEP computes a constant offset, see if we can convert these three // instructions into fewer. This typically happens with unions and other // non-type-safe code. - if (GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0))) { + if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0))) { if (GEP->hasAllConstantIndices()) { // We are guaranteed to get a constant from EmitGEPOffset. ConstantInt *OffsetV = @@ -8179,10 +8171,10 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { // If we were able to index down into an element, create the GEP // and bitcast the result. This eliminates one bitcast, potentially // two. - Instruction *NGEP = GetElementPtrInst::Create(OrigBase, - NewIndices.begin(), - NewIndices.end(), ""); - InsertNewInstBefore(NGEP, CI); + Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ? + Builder->CreateInBoundsGEP(OrigBase, + NewIndices.begin(), NewIndices.end()) : + Builder->CreateGEP(OrigBase, NewIndices.begin(), NewIndices.end()); NGEP->takeName(GEP); if (isa<BitCastInst>(CI)) @@ -8214,10 +8206,8 @@ static bool isSafeIntegerType(const Type *Ty) { } } -/// Only the TRUNC, ZEXT, SEXT, and BITCAST can both operand and result as -/// integer types. This function implements the common transforms for all those -/// cases. -/// @brief Implement the transforms common to CastInst with integer operands +/// commonIntCastTransforms - This function implements the common transforms +/// for trunc, zext, and sext. Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { if (Instruction *Result = commonCastTransforms(CI)) return Result; @@ -8241,11 +8231,10 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { // Attempt to propagate the cast into the instruction for int->int casts. int NumCastsRemoved = 0; - if (!isa<BitCastInst>(CI) && - // Only do this if the dest type is a simple type, don't convert the - // expression tree to something weird like i93 unless the source is also - // strange. - (isSafeIntegerType(DestTy->getScalarType()) || + // Only do this if the dest type is a simple type, don't convert the + // expression tree to something weird like i93 unless the source is also + // strange. + if ((isSafeIntegerType(DestTy->getScalarType()) || !isSafeIntegerType(SrcI->getType()->getScalarType())) && CanEvaluateInDifferentType(SrcI, DestTy, CI.getOpcode(), NumCastsRemoved)) { @@ -8261,7 +8250,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { default: // All the others use floating point so we shouldn't actually // get here because of the check above. - assert(0 && "Unknown cast type"); + llvm_unreachable("Unknown cast type"); case Instruction::Trunc: DoXForm = true; break; @@ -8307,8 +8296,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { } if (DoXForm) { - DOUT << "ICE: EvaluateInDifferentType converting expression type to avoid" - << " cast: " << CI; + DEBUG(errs() << "ICE: EvaluateInDifferentType converting expression type" + " to avoid cast: " << CI); Value *Res = EvaluateInDifferentType(SrcI, DestTy, CI.getOpcode() == Instruction::SExt); if (JustReplace) @@ -8317,9 +8306,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { assert(Res->getType() == DestTy); switch (CI.getOpcode()) { - default: assert(0 && "Unknown cast type!"); + default: llvm_unreachable("Unknown cast type!"); case Instruction::Trunc: - case Instruction::BitCast: // Just replace this cast with the result. return ReplaceInstUsesWith(CI, Res); case Instruction::ZExt: { @@ -8332,8 +8320,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { return ReplaceInstUsesWith(CI, Res); // We need to emit an AND to clear the high bits. - Constant *C = Context->getConstantInt(APInt::getLowBitsSet(DestBitSize, - SrcBitSize)); + Constant *C = ConstantInt::get(*Context, + APInt::getLowBitsSet(DestBitSize, SrcBitSize)); return BinaryOperator::CreateAnd(Res, C); } case Instruction::SExt: { @@ -8344,9 +8332,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { return ReplaceInstUsesWith(CI, Res); // We need to emit a cast to truncate, then a cast to sext. - return CastInst::Create(Instruction::SExt, - InsertCastBefore(Instruction::Trunc, Res, Src->getType(), - CI), DestTy); + return new SExtInst(Builder->CreateTrunc(Res, Src->getType()), DestTy); } } } @@ -8362,16 +8348,12 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { case Instruction::Or: case Instruction::Xor: // If we are discarding information, rewrite. - if (DestBitSize <= SrcBitSize && DestBitSize != 1) { - // Don't insert two casts if they cannot be eliminated. We allow - // two casts to be inserted if the sizes are the same. This could - // only be converting signedness, which is a noop. - if (DestBitSize == SrcBitSize || - !ValueRequiresCast(CI.getOpcode(), Op1, DestTy,TD) || + if (DestBitSize < SrcBitSize && DestBitSize != 1) { + // Don't insert two casts unless at least one can be eliminated. + if (!ValueRequiresCast(CI.getOpcode(), Op1, DestTy, TD) || !ValueRequiresCast(CI.getOpcode(), Op0, DestTy, TD)) { - Instruction::CastOps opcode = CI.getOpcode(); - Value *Op0c = InsertCastBefore(opcode, Op0, DestTy, *SrcI); - Value *Op1c = InsertCastBefore(opcode, Op1, DestTy, *SrcI); + Value *Op0c = Builder->CreateTrunc(Op0, DestTy, Op0->getName()); + Value *Op1c = Builder->CreateTrunc(Op1, DestTy, Op1->getName()); return BinaryOperator::Create( cast<BinaryOperator>(SrcI)->getOpcode(), Op0c, Op1c); } @@ -8380,62 +8362,25 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { // cast (xor bool X, true) to int --> xor (cast bool X to int), 1 if (isa<ZExtInst>(CI) && SrcBitSize == 1 && SrcI->getOpcode() == Instruction::Xor && - Op1 == Context->getConstantIntTrue() && + Op1 == ConstantInt::getTrue(*Context) && (!Op0->hasOneUse() || !isa<CmpInst>(Op0))) { - Value *New = InsertCastBefore(Instruction::ZExt, Op0, DestTy, CI); + Value *New = Builder->CreateZExt(Op0, DestTy, Op0->getName()); return BinaryOperator::CreateXor(New, - Context->getConstantInt(CI.getType(), 1)); - } - break; - case Instruction::SDiv: - case Instruction::UDiv: - case Instruction::SRem: - case Instruction::URem: - // If we are just changing the sign, rewrite. - if (DestBitSize == SrcBitSize) { - // Don't insert two casts if they cannot be eliminated. We allow - // two casts to be inserted if the sizes are the same. This could - // only be converting signedness, which is a noop. - if (!ValueRequiresCast(CI.getOpcode(), Op1, DestTy, TD) || - !ValueRequiresCast(CI.getOpcode(), Op0, DestTy, TD)) { - Value *Op0c = InsertCastBefore(Instruction::BitCast, - Op0, DestTy, *SrcI); - Value *Op1c = InsertCastBefore(Instruction::BitCast, - Op1, DestTy, *SrcI); - return BinaryOperator::Create( - cast<BinaryOperator>(SrcI)->getOpcode(), Op0c, Op1c); - } + ConstantInt::get(CI.getType(), 1)); } break; - case Instruction::Shl: - // Allow changing the sign of the source operand. Do not allow - // changing the size of the shift, UNLESS the shift amount is a - // constant. We must not change variable sized shifts to a smaller - // size, because it is undefined to shift more bits out than exist - // in the value. - if (DestBitSize == SrcBitSize || - (DestBitSize < SrcBitSize && isa<Constant>(Op1))) { - Instruction::CastOps opcode = (DestBitSize == SrcBitSize ? - Instruction::BitCast : Instruction::Trunc); - Value *Op0c = InsertCastBefore(opcode, Op0, DestTy, *SrcI); - Value *Op1c = InsertCastBefore(opcode, Op1, DestTy, *SrcI); + case Instruction::Shl: { + // Canonicalize trunc inside shl, if we can. + ConstantInt *CI = dyn_cast<ConstantInt>(Op1); + if (CI && DestBitSize < SrcBitSize && + CI->getLimitedValue(DestBitSize) < DestBitSize) { + Value *Op0c = Builder->CreateTrunc(Op0, DestTy, Op0->getName()); + Value *Op1c = Builder->CreateTrunc(Op1, DestTy, Op1->getName()); return BinaryOperator::CreateShl(Op0c, Op1c); } break; - case Instruction::AShr: - // If this is a signed shr, and if all bits shifted in are about to be - // truncated off, turn it into an unsigned shr to allow greater - // simplifications. - if (DestBitSize < SrcBitSize && - isa<ConstantInt>(Op1)) { - uint32_t ShiftAmt = cast<ConstantInt>(Op1)->getLimitedValue(SrcBitSize); - if (SrcBitSize > ShiftAmt && SrcBitSize-ShiftAmt >= DestBitSize) { - // Insert the new logical shift right. - return BinaryOperator::CreateLShr(Op0, Op1); - } - } - break; + } } return 0; } @@ -8450,11 +8395,10 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { uint32_t SrcBitWidth = Src->getType()->getScalarSizeInBits(); // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0) - if (DestBitWidth == 1 && - isa<VectorType>(Ty) == isa<VectorType>(Src->getType())) { - Constant *One = Context->getConstantInt(Src->getType(), 1); - Src = InsertNewInstBefore(BinaryOperator::CreateAnd(Src, One, "tmp"), CI); - Value *Zero = Context->getNullValue(Src->getType()); + if (DestBitWidth == 1) { + Constant *One = ConstantInt::get(Src->getType(), 1); + Src = Builder->CreateAnd(Src, One, "tmp"); + Value *Zero = Constant::getNullValue(Src->getType()); return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); } @@ -8469,12 +8413,12 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { APInt Mask(APInt::getLowBitsSet(SrcBitWidth, ShAmt).shl(DestBitWidth)); if (MaskedValueIsZero(ShiftOp, Mask)) { if (ShAmt >= DestBitWidth) // All zeros. - return ReplaceInstUsesWith(CI, Context->getNullValue(Ty)); + return ReplaceInstUsesWith(CI, Constant::getNullValue(Ty)); // Okay, we can shrink this. Truncate the input, then return a new // shift. - Value *V1 = InsertCastBefore(Instruction::Trunc, ShiftOp, Ty, CI); - Value *V2 = Context->getConstantExprTrunc(ShAmtV, Ty); + Value *V1 = Builder->CreateTrunc(ShiftOp, Ty, ShiftOp->getName()); + Value *V2 = ConstantExpr::getTrunc(ShAmtV, Ty); return BinaryOperator::CreateLShr(V1, V2); } } @@ -8499,20 +8443,15 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, if (!DoXform) return ICI; Value *In = ICI->getOperand(0); - Value *Sh = Context->getConstantInt(In->getType(), + Value *Sh = ConstantInt::get(In->getType(), In->getType()->getScalarSizeInBits()-1); - In = InsertNewInstBefore(BinaryOperator::CreateLShr(In, Sh, - In->getName()+".lobit"), - CI); + In = Builder->CreateLShr(In, Sh, In->getName()+".lobit"); if (In->getType() != CI.getType()) - In = CastInst::CreateIntegerCast(In, CI.getType(), - false/*ZExt*/, "tmp", &CI); + In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/, "tmp"); if (ICI->getPredicate() == ICmpInst::ICMP_SGT) { - Constant *One = Context->getConstantInt(In->getType(), 1); - In = InsertNewInstBefore(BinaryOperator::CreateXor(In, One, - In->getName()+".not"), - CI); + Constant *One = ConstantInt::get(In->getType(), 1); + In = Builder->CreateXor(In, One, In->getName()+".not"); } return ReplaceInstUsesWith(CI, In); @@ -8545,8 +8484,8 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, if (Op1CV != 0 && (Op1CV != KnownZeroMask)) { // (X&4) == 2 --> false // (X&4) != 2 --> true - Constant *Res = Context->getConstantInt(Type::Int1Ty, isNE); - Res = Context->getConstantExprZExt(Res, CI.getType()); + Constant *Res = ConstantInt::get(Type::getInt1Ty(*Context), isNE); + Res = ConstantExpr::getZExt(Res, CI.getType()); return ReplaceInstUsesWith(CI, Res); } @@ -8555,15 +8494,13 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, if (ShiftAmt) { // Perform a logical shr by shiftamt. // Insert the shift to put the result in the low bit. - In = InsertNewInstBefore(BinaryOperator::CreateLShr(In, - Context->getConstantInt(In->getType(), ShiftAmt), - In->getName()+".lobit"), CI); + In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt), + In->getName()+".lobit"); } if ((Op1CV != 0) == isNE) { // Toggle the low bit. - Constant *One = Context->getConstantInt(In->getType(), 1); - In = BinaryOperator::CreateXor(In, One, "tmp"); - InsertNewInstBefore(cast<Instruction>(In), CI); + Constant *One = ConstantInt::get(In->getType(), 1); + In = Builder->CreateXor(In, One, "tmp"); } if (CI.getType() == In->getType()) @@ -8600,21 +8537,21 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { // SrcSize > DstSize: trunc(a) & mask if (SrcSize < DstSize) { APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); - Constant *AndConst = Context->getConstantInt(A->getType(), AndValue); - Instruction *And = - BinaryOperator::CreateAnd(A, AndConst, CSrc->getName()+".mask"); - InsertNewInstBefore(And, CI); + Constant *AndConst = ConstantInt::get(A->getType(), AndValue); + Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask"); return new ZExtInst(And, CI.getType()); - } else if (SrcSize == DstSize) { + } + + if (SrcSize == DstSize) { APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); - return BinaryOperator::CreateAnd(A, Context->getConstantInt(A->getType(), + return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(), AndValue)); - } else if (SrcSize > DstSize) { - Instruction *Trunc = new TruncInst(A, CI.getType(), "tmp"); - InsertNewInstBefore(Trunc, CI); + } + if (SrcSize > DstSize) { + Value *Trunc = Builder->CreateTrunc(A, CI.getType(), "tmp"); APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize)); return BinaryOperator::CreateAnd(Trunc, - Context->getConstantInt(Trunc->getType(), + ConstantInt::get(Trunc->getType(), AndValue)); } } @@ -8631,8 +8568,8 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() && (transformZExtICmp(LHS, CI, false) || transformZExtICmp(RHS, CI, false))) { - Value *LCast = InsertCastBefore(Instruction::ZExt, LHS, CI.getType(), CI); - Value *RCast = InsertCastBefore(Instruction::ZExt, RHS, CI.getType(), CI); + Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName()); + Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName()); return BinaryOperator::Create(Instruction::Or, LCast, RCast); } } @@ -8645,7 +8582,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { if (TI0->getType() == CI.getType()) return BinaryOperator::CreateAnd(TI0, - Context->getConstantExprZExt(C, CI.getType())); + ConstantExpr::getZExt(C, CI.getType())); } // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)). @@ -8657,9 +8594,8 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { if (TruncInst *TI = dyn_cast<TruncInst>(And->getOperand(0))) { Value *TI0 = TI->getOperand(0); if (TI0->getType() == CI.getType()) { - Constant *ZC = Context->getConstantExprZExt(C, CI.getType()); - Instruction *NewAnd = BinaryOperator::CreateAnd(TI0, ZC, "tmp"); - InsertNewInstBefore(NewAnd, *And); + Constant *ZC = ConstantExpr::getZExt(C, CI.getType()); + Value *NewAnd = Builder->CreateAnd(TI0, ZC, "tmp"); return BinaryOperator::CreateXor(NewAnd, ZC); } } @@ -8674,14 +8610,14 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { Value *Src = CI.getOperand(0); // Canonicalize sign-extend from i1 to a select. - if (Src->getType() == Type::Int1Ty) + if (Src->getType() == Type::getInt1Ty(*Context)) return SelectInst::Create(Src, - Context->getConstantIntAllOnesValue(CI.getType()), - Context->getNullValue(CI.getType())); + Constant::getAllOnesValue(CI.getType()), + Constant::getNullValue(CI.getType())); // See if the value being truncated is already sign extended. If so, just // eliminate the trunc/sext pair. - if (getOpcode(Src) == Instruction::Trunc) { + if (Operator::getOpcode(Src) == Instruction::Trunc) { Value *Op = cast<User>(Src)->getOperand(0); unsigned OpBits = Op->getType()->getScalarSizeInBits(); unsigned MidBits = Src->getType()->getScalarSizeInBits(); @@ -8729,9 +8665,8 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { unsigned MidSize = Src->getType()->getScalarSizeInBits(); unsigned SrcDstSize = CI.getType()->getScalarSizeInBits(); unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize; - Constant *ShAmtV = Context->getConstantInt(CI.getType(), ShAmt); - I = InsertNewInstBefore(BinaryOperator::CreateShl(I, ShAmtV, - CI.getName()), CI); + Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt); + I = Builder->CreateShl(I, ShAmtV, CI.getName()); return BinaryOperator::CreateAShr(I, ShAmtV); } } @@ -8742,18 +8677,18 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { /// FitsInFPType - Return a Constant* for the specified FP constant if it fits /// in the specified FP type without changing its value. static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem, - LLVMContext* Context) { + LLVMContext *Context) { bool losesInfo; APFloat F = CFP->getValueAPF(); (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo); if (!losesInfo) - return Context->getConstantFP(F); + return ConstantFP::get(*Context, F); return 0; } /// LookThroughFPExtensions - If this is an fp extension instruction, look /// through it until we get the source value. -static Value *LookThroughFPExtensions(Value *V, LLVMContext* Context) { +static Value *LookThroughFPExtensions(Value *V, LLVMContext *Context) { if (Instruction *I = dyn_cast<Instruction>(V)) if (I->getOpcode() == Instruction::FPExt) return LookThroughFPExtensions(I->getOperand(0), Context); @@ -8762,12 +8697,12 @@ static Value *LookThroughFPExtensions(Value *V, LLVMContext* Context) { // that can accurately represent it. This allows us to turn // (float)((double)X+2.0) into x+2.0f. if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) { - if (CFP->getType() == Type::PPC_FP128Ty) + if (CFP->getType() == Type::getPPC_FP128Ty(*Context)) return V; // No constant folding of this. // See if the value can be truncated to float and then reextended. if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle, Context)) return V; - if (CFP->getType() == Type::DoubleTy) + if (CFP->getType() == Type::getDoubleTy(*Context)) return V; // Won't shrink. if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble, Context)) return V; @@ -8804,10 +8739,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { // the cast, do this xform. if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize && RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) { - LHSTrunc = InsertCastBefore(Instruction::FPExt, LHSTrunc, - CI.getType(), CI); - RHSTrunc = InsertCastBefore(Instruction::FPExt, RHSTrunc, - CI.getType(), CI); + LHSTrunc = Builder->CreateFPExt(LHSTrunc, CI.getType()); + RHSTrunc = Builder->CreateFPExt(RHSTrunc, CI.getType()); return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc); } } @@ -8875,10 +8808,11 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { // trunc to be exposed to other transforms. Don't do this for extending // ptrtoint's, because we don't know if the target sign or zero extends its // pointers. - if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) { - Value *P = InsertNewInstBefore(new PtrToIntInst(CI.getOperand(0), - TD->getIntPtrType(), - "tmp"), CI); + if (TD && + CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) { + Value *P = Builder->CreatePtrToInt(CI.getOperand(0), + TD->getIntPtrType(CI.getContext()), + "tmp"); return new TruncInst(P, CI.getType()); } @@ -8891,65 +8825,16 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { // allows the trunc to be exposed to other transforms. Don't do this for // extending inttoptr's, because we don't know if the target sign or zero // extends to pointers. - if (CI.getOperand(0)->getType()->getScalarSizeInBits() > + if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) { - Value *P = InsertNewInstBefore(new TruncInst(CI.getOperand(0), - TD->getIntPtrType(), - "tmp"), CI); + Value *P = Builder->CreateTrunc(CI.getOperand(0), + TD->getIntPtrType(CI.getContext()), "tmp"); return new IntToPtrInst(P, CI.getType()); } if (Instruction *I = commonCastTransforms(CI)) return I; - - const Type *DestPointee = cast<PointerType>(CI.getType())->getElementType(); - if (!DestPointee->isSized()) return 0; - - // If this is inttoptr(add (ptrtoint x), cst), try to turn this into a GEP. - ConstantInt *Cst; - Value *X; - if (match(CI.getOperand(0), m_Add(m_Cast<PtrToIntInst>(m_Value(X)), - m_ConstantInt(Cst)))) { - // If the source and destination operands have the same type, see if this - // is a single-index GEP. - if (X->getType() == CI.getType()) { - // Get the size of the pointee type. - uint64_t Size = TD->getTypeAllocSize(DestPointee); - - // Convert the constant to intptr type. - APInt Offset = Cst->getValue(); - Offset.sextOrTrunc(TD->getPointerSizeInBits()); - - // If Offset is evenly divisible by Size, we can do this xform. - if (Size && !APIntOps::srem(Offset, APInt(Offset.getBitWidth(), Size))){ - Offset = APIntOps::sdiv(Offset, APInt(Offset.getBitWidth(), Size)); - return GetElementPtrInst::Create(X, Context->getConstantInt(Offset)); - } - } - // TODO: Could handle other cases, e.g. where add is indexing into field of - // struct etc. - } else if (CI.getOperand(0)->hasOneUse() && - match(CI.getOperand(0), m_Add(m_Value(X), m_ConstantInt(Cst)))) { - // Otherwise, if this is inttoptr(add x, cst), try to turn this into an - // "inttoptr+GEP" instead of "add+intptr". - - // Get the size of the pointee type. - uint64_t Size = TD->getTypeAllocSize(DestPointee); - - // Convert the constant to intptr type. - APInt Offset = Cst->getValue(); - Offset.sextOrTrunc(TD->getPointerSizeInBits()); - - // If Offset is evenly divisible by Size, we can do this xform. - if (Size && !APIntOps::srem(Offset, APInt(Offset.getBitWidth(), Size))){ - Offset = APIntOps::sdiv(Offset, APInt(Offset.getBitWidth(), Size)); - - Instruction *P = InsertNewInstBefore(new IntToPtrInst(X, CI.getType(), - "tmp"), CI); - return GetElementPtrInst::Create(P, - Context->getConstantInt(Offset), "tmp"); - } - } + return 0; } @@ -8960,10 +8845,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { const Type *SrcTy = Src->getType(); const Type *DestTy = CI.getType(); - if (SrcTy->isInteger() && DestTy->isInteger()) { - if (Instruction *Result = commonIntCastTransforms(CI)) - return Result; - } else if (isa<PointerType>(SrcTy)) { + if (isa<PointerType>(SrcTy)) { if (Instruction *I = commonPointerCastTransforms(CI)) return I; } else { @@ -8987,8 +8869,10 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace()) return 0; - // If we are casting a malloc or alloca to a pointer to a type of the same + // If we are casting a alloca to a pointer to a type of the same // size, rewrite the allocation instruction to allocate the "right" type. + // There is no need to modify malloc calls because it is their bitcast that + // needs to be cleaned up. if (AllocationInst *AI = dyn_cast<AllocationInst>(Src)) if (Instruction *V = PromoteCastOfAllocation(CI, *AI)) return V; @@ -8996,7 +8880,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If the source and destination are pointers, and this cast is equivalent // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep. // This can enhance SROA and other transforms that want type-safe pointers. - Constant *ZeroUInt = Context->getNullValue(Type::Int32Ty); + Constant *ZeroUInt = Constant::getNullValue(Type::getInt32Ty(*Context)); unsigned NumZeros = 0; while (SrcElTy != DstElTy && isa<CompositeType>(SrcElTy) && !isa<PointerType>(SrcElTy) && @@ -9008,8 +8892,30 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If we found a path from the src to dest, create the getelementptr now. if (SrcElTy == DstElTy) { SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt); - return GetElementPtrInst::Create(Src, Idxs.begin(), Idxs.end(), "", - ((Instruction*) NULL)); + return GetElementPtrInst::CreateInBounds(Src, Idxs.begin(), Idxs.end(), "", + ((Instruction*) NULL)); + } + } + + if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) { + if (DestVTy->getNumElements() == 1) { + if (!isa<VectorType>(SrcTy)) { + Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType()); + return InsertElementInst::Create(UndefValue::get(DestTy), Elem, + Constant::getNullValue(Type::getInt32Ty(*Context))); + } + // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast) + } + } + + if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) { + if (SrcVTy->getNumElements() == 1) { + if (!isa<VectorType>(DestTy)) { + Value *Elem = + Builder->CreateExtractElement(Src, + Constant::getNullValue(Type::getInt32Ty(*Context))); + return CastInst::Create(Instruction::BitCast, Elem, DestTy); + } } } @@ -9030,10 +8936,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { Tmp->getOperand(0)->getType() == DestTy) || ((Tmp = dyn_cast<CastInst>(SVI->getOperand(1))) && Tmp->getOperand(0)->getType() == DestTy)) { - Value *LHS = InsertCastBefore(Instruction::BitCast, - SVI->getOperand(0), DestTy, CI); - Value *RHS = InsertCastBefore(Instruction::BitCast, - SVI->getOperand(1), DestTy, CI); + Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy); + Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy); // Return a new shuffle vector. Use the same element ID's, as we // know the vector types match #elts. return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2)); @@ -9076,9 +8980,9 @@ static unsigned GetSelectFoldableOperands(Instruction *I) { /// GetSelectFoldableConstant - For the same transformation as the previous /// function, return the identity constant that goes into the select. static Constant *GetSelectFoldableConstant(Instruction *I, - LLVMContext* Context) { + LLVMContext *Context) { switch (I->getOpcode()) { - default: assert(0 && "This cannot happen!"); abort(); + default: llvm_unreachable("This cannot happen!"); case Instruction::Add: case Instruction::Sub: case Instruction::Or: @@ -9086,11 +8990,11 @@ static Constant *GetSelectFoldableConstant(Instruction *I, case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: - return Context->getNullValue(I->getType()); + return Constant::getNullValue(I->getType()); case Instruction::And: - return Context->getAllOnesValue(I->getType()); + return Constant::getAllOnesValue(I->getType()); case Instruction::Mul: - return Context->getConstantInt(I->getType(), 1); + return ConstantInt::get(I->getType(), 1); } } @@ -9110,7 +9014,7 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, // Fold this by inserting a select from the input values. SelectInst *NewSI = SelectInst::Create(SI.getCondition(), TI->getOperand(0), - FI->getOperand(0), SI.getName()+".v"); + FI->getOperand(0), SI.getName()+".v"); InsertNewInstBefore(NewSI, SI); return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI, TI->getType()); @@ -9160,7 +9064,7 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, else return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp); } - assert(0 && "Shouldn't get here"); + llvm_unreachable("Shouldn't get here"); return 0; } @@ -9202,7 +9106,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, NewSel->takeName(TVI); if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TVI)) return BinaryOperator::Create(BO->getOpcode(), FalseVal, NewSel); - assert(0 && "Unknown instruction!!"); + llvm_unreachable("Unknown instruction!!"); } } } @@ -9231,7 +9135,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, NewSel->takeName(FVI); if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FVI)) return BinaryOperator::Create(BO->getOpcode(), TrueVal, NewSel); - assert(0 && "Unknown instruction!!"); + llvm_unreachable("Unknown instruction!!"); } } } @@ -9266,7 +9170,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, if (CI->isMinValue(Pred == ICmpInst::ICMP_SLT)) return ReplaceInstUsesWith(SI, FalseVal); // X < C ? X : C-1 --> X > C-1 ? C-1 : X - Constant *AdjustedRHS = SubOne(CI, Context); + Constant *AdjustedRHS = SubOne(CI); if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) || (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) { Pred = ICmpInst::getSwappedPredicate(Pred); @@ -9286,7 +9190,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, if (CI->isMaxValue(Pred == ICmpInst::ICMP_SGT)) return ReplaceInstUsesWith(SI, FalseVal); // X > C ? X : C+1 --> X < C+1 ? C+1 : X - Constant *AdjustedRHS = AddOne(CI, Context); + Constant *AdjustedRHS = AddOne(CI); if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) || (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) { Pred = ICmpInst::getSwappedPredicate(Pred); @@ -9323,10 +9227,10 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, if ((Pred == ICmpInst::ICMP_SLT && Op1CV == 0) || (Pred == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) { Value *In = ICI->getOperand(0); - Value *Sh = Context->getConstantInt(In->getType(), + Value *Sh = ConstantInt::get(In->getType(), In->getType()->getScalarSizeInBits()-1); In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh, - In->getName()+".lobit"), + In->getName()+".lobit"), *ICI); if (In->getType() != SI.getType()) In = CastInst::CreateIntegerCast(In, SI.getType(), @@ -9365,6 +9269,14 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, return Changed ? &SI : 0; } +/// isDefinedInBB - Return true if the value is an instruction defined in the +/// specified basicblock. +static bool isDefinedInBB(const Value *V, const BasicBlock *BB) { + const Instruction *I = dyn_cast<Instruction>(V); + return I != 0 && I->getParent() == BB; +} + + Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *CondVal = SI.getCondition(); Value *TrueVal = SI.getTrueValue(); @@ -9390,7 +9302,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return ReplaceInstUsesWith(SI, FalseVal); } - if (SI.getType() == Type::Int1Ty) { + if (SI.getType() == Type::getInt1Ty(*Context)) { if (ConstantInt *C = dyn_cast<ConstantInt>(TrueVal)) { if (C->getZExtValue()) { // Change: A = select B, true, C --> A = or B, C @@ -9438,26 +9350,6 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { } if (ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition())) { - - // (x <s 0) ? -1 : 0 -> ashr x, 31 - if (TrueValC->isAllOnesValue() && FalseValC->isZero()) - if (ConstantInt *CmpCst = dyn_cast<ConstantInt>(IC->getOperand(1))) { - if (IC->getPredicate() == ICmpInst::ICMP_SLT && CmpCst->isZero()) { - // The comparison constant and the result are not neccessarily the - // same width. Make an all-ones value by inserting a AShr. - Value *X = IC->getOperand(0); - uint32_t Bits = X->getType()->getScalarSizeInBits(); - Constant *ShAmt = Context->getConstantInt(X->getType(), Bits-1); - Instruction *SRA = BinaryOperator::Create(Instruction::AShr, X, - ShAmt, "ones"); - InsertNewInstBefore(SRA, SI); - - // Then cast to the appropriate width. - return CastInst::CreateIntegerCast(SRA, SI.getType(), true); - } - } - - // If one of the constants is zero (we know they can't both be) and we // have an icmp instruction with zero, and we have an 'and' with the // non-constant value, eliminate this whole mess. This corresponds to @@ -9568,10 +9460,11 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // select C, (add X, Y), (sub X, Z) Value *NegVal; // Compute -Z if (Constant *C = dyn_cast<Constant>(SubOp->getOperand(1))) { - NegVal = Context->getConstantExprNeg(C); + NegVal = ConstantExpr::getNeg(C); } else { NegVal = InsertNewInstBefore( - BinaryOperator::CreateNeg(SubOp->getOperand(1), "tmp"), SI); + BinaryOperator::CreateNeg(SubOp->getOperand(1), + "tmp"), SI); } Value *NewTrueOp = OtherAddOp; @@ -9595,6 +9488,17 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return FoldI; } + // See if we can fold the select into a phi node. The true/false values have + // to be live in the predecessor blocks. If they are instructions in SI's + // block, we can't map to the predecessor. + if (isa<PHINode>(SI.getCondition()) && + (!isDefinedInBB(SI.getTrueValue(), SI.getParent()) || + isa<PHINode>(SI.getTrueValue())) && + (!isDefinedInBB(SI.getFalseValue(), SI.getParent()) || + isa<PHINode>(SI.getFalseValue()))) + if (Instruction *NV = FoldOpIntoPhi(SI)) + return NV; + if (BinaryOperator::isNot(CondVal)) { SI.setOperand(0, BinaryOperator::getNotArgument(CondVal)); SI.setOperand(1, FalseVal); @@ -9617,7 +9521,7 @@ static unsigned EnforceKnownAlignment(Value *V, User *U = dyn_cast<User>(V); if (!U) return Align; - switch (getOpcode(U)) { + switch (Operator::getOpcode(U)) { default: break; case Instruction::BitCast: return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign); @@ -9650,16 +9554,13 @@ static unsigned EnforceKnownAlignment(Value *V, Align = PrefAlign; } } - } else if (AllocationInst *AI = dyn_cast<AllocationInst>(V)) { - // If there is a requested alignment and if this is an alloca, round up. We - // don't do this for malloc, because some systems can't respect the request. - if (isa<AllocaInst>(AI)) { - if (AI->getAlignment() >= PrefAlign) - Align = AI->getAlignment(); - else { - AI->setAlignment(PrefAlign); - Align = PrefAlign; - } + } else if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + // If there is a requested alignment and if this is an alloca, round up. + if (AI->getAlignment() >= PrefAlign) + Align = AI->getAlignment(); + else { + AI->setAlignment(PrefAlign); + Align = PrefAlign; } } @@ -9694,7 +9595,8 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { unsigned CopyAlign = MI->getAlignment(); if (CopyAlign < MinAlign) { - MI->setAlignment(MinAlign); + MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), + MinAlign, false)); return MI; } @@ -9715,7 +9617,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // Use an integer load+store unless we can find something better. Type *NewPtrTy = - Context->getPointerTypeUnqual(Context->getIntegerType(Size<<3)); + PointerType::getUnqual(IntegerType::get(*Context, Size<<3)); // Memcpy forces the use of i8* for the source and destination. That means // that if you're using memcpy to move one double around, you'll get a cast @@ -9725,7 +9627,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // integer datatype. if (Value *Op = getBitCastOperand(MI->getOperand(1))) { const Type *SrcETy = cast<PointerType>(Op->getType())->getElementType(); - if (SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) { + if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) { // The SrcETy might be something like {{{double}}} or [1 x double]. Rip // down through these levels if so. while (!SrcETy->isSingleValueType()) { @@ -9744,7 +9646,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { } if (SrcETy->isSingleValueType()) - NewPtrTy = Context->getPointerTypeUnqual(SrcETy); + NewPtrTy = PointerType::getUnqual(SrcETy); } } @@ -9754,28 +9656,29 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { SrcAlign = std::max(SrcAlign, CopyAlign); DstAlign = std::max(DstAlign, CopyAlign); - Value *Src = InsertBitCastBefore(MI->getOperand(2), NewPtrTy, *MI); - Value *Dest = InsertBitCastBefore(MI->getOperand(1), NewPtrTy, *MI); + Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewPtrTy); + Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewPtrTy); Instruction *L = new LoadInst(Src, "tmp", false, SrcAlign); InsertNewInstBefore(L, *MI); InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI); // Set the size of the copy to 0, it will be deleted on the next iteration. - MI->setOperand(3, Context->getNullValue(MemOpLength->getType())); + MI->setOperand(3, Constant::getNullValue(MemOpLength->getType())); return MI; } Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest()); if (MI->getAlignment() < Alignment) { - MI->setAlignment(Alignment); + MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), + Alignment, false)); return MI; } // Extract the length and alignment and fill if they are constant. ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength()); ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue()); - if (!LenC || !FillC || FillC->getType() != Type::Int8Ty) + if (!LenC || !FillC || FillC->getType() != Type::getInt8Ty(*Context)) return 0; uint64_t Len = LenC->getZExtValue(); Alignment = MI->getAlignment(); @@ -9785,21 +9688,21 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { // memset(s,c,n) -> store s, c (for n=1,2,4,8) if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { - const Type *ITy = Context->getIntegerType(Len*8); // n=1 -> i8. + const Type *ITy = IntegerType::get(*Context, Len*8); // n=1 -> i8. Value *Dest = MI->getDest(); - Dest = InsertBitCastBefore(Dest, Context->getPointerTypeUnqual(ITy), *MI); + Dest = Builder->CreateBitCast(Dest, PointerType::getUnqual(ITy)); // Alignment 0 is identity for alignment 1 for memset, but not store. if (Alignment == 0) Alignment = 1; // Extract the fill value and store. uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; - InsertNewInstBefore(new StoreInst(Context->getConstantInt(ITy, Fill), + InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill), Dest, false, Alignment), *MI); // Set the size of the copy to 0, it will be deleted on the next iteration. - MI->setLength(Context->getNullValue(LenC->getType())); + MI->setLength(Constant::getNullValue(LenC->getType())); return MI; } @@ -9820,8 +9723,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return &CI; } - - IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI); if (!II) return visitCallSite(&CI); @@ -9891,9 +9792,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Turn PPC lvx -> load if the pointer is known aligned. // Turn X86 loadups -> load if the pointer is known aligned. if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) { - Value *Ptr = InsertBitCastBefore(II->getOperand(1), - Context->getPointerTypeUnqual(II->getType()), - CI); + Value *Ptr = Builder->CreateBitCast(II->getOperand(1), + PointerType::getUnqual(II->getType())); return new LoadInst(Ptr); } break; @@ -9902,8 +9802,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Turn stvx -> store if the pointer is known aligned. if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) { const Type *OpPtrTy = - Context->getPointerTypeUnqual(II->getOperand(1)->getType()); - Value *Ptr = InsertBitCastBefore(II->getOperand(2), OpPtrTy, CI); + PointerType::getUnqual(II->getOperand(1)->getType()); + Value *Ptr = Builder->CreateBitCast(II->getOperand(2), OpPtrTy); return new StoreInst(II->getOperand(1), Ptr); } break; @@ -9913,8 +9813,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Turn X86 storeu -> store if the pointer is known aligned. if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) { const Type *OpPtrTy = - Context->getPointerTypeUnqual(II->getOperand(2)->getType()); - Value *Ptr = InsertBitCastBefore(II->getOperand(1), OpPtrTy, CI); + PointerType::getUnqual(II->getOperand(2)->getType()); + Value *Ptr = Builder->CreateBitCast(II->getOperand(1), OpPtrTy); return new StoreInst(II->getOperand(2), Ptr); } break; @@ -9951,9 +9851,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (AllEltsOk) { // Cast the input vectors to byte vectors. - Value *Op0 =InsertBitCastBefore(II->getOperand(1),Mask->getType(),CI); - Value *Op1 =InsertBitCastBefore(II->getOperand(2),Mask->getType(),CI); - Value *Result = Context->getUndef(Op0->getType()); + Value *Op0 = Builder->CreateBitCast(II->getOperand(1), Mask->getType()); + Value *Op1 = Builder->CreateBitCast(II->getOperand(2), Mask->getType()); + Value *Result = UndefValue::get(Op0->getType()); // Only extract each element once. Value *ExtractedElts[32]; @@ -9966,16 +9866,16 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Idx &= 31; // Match the hardware behavior. if (ExtractedElts[Idx] == 0) { - Instruction *Elt = - new ExtractElementInst(Idx < 16 ? Op0 : Op1, Idx&15, "tmp"); - InsertNewInstBefore(Elt, CI); - ExtractedElts[Idx] = Elt; + ExtractedElts[Idx] = + Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, + ConstantInt::get(Type::getInt32Ty(*Context), Idx&15, false), + "tmp"); } // Insert this value into the result vector. - Result = InsertElementInst::Create(Result, ExtractedElts[Idx], - i, "tmp"); - InsertNewInstBefore(cast<Instruction>(Result), CI); + Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx], + ConstantInt::get(Type::getInt32Ty(*Context), i, false), + "tmp"); } return CastInst::Create(Instruction::BitCast, Result, CI.getType()); } @@ -9999,7 +9899,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { TerminatorInst *TI = II->getParent()->getTerminator(); bool CannotRemove = false; for (++BI; &*BI != TI; ++BI) { - if (isa<AllocaInst>(BI)) { + if (isa<AllocaInst>(BI) || isMalloc(BI)) { CannotRemove = true; break; } @@ -10055,7 +9955,7 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS, const Type* DstTy = cast<PointerType>(CI->getType())->getElementType(); if (!SrcTy->isSized() || !DstTy->isSized()) return false; - if (TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy)) + if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy)) return false; return true; } @@ -10076,11 +9976,13 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { Instruction *OldCall = CS.getInstruction(); // If the call and callee calling conventions don't match, this call must // be unreachable, as the call is undefined. - new StoreInst(Context->getConstantIntTrue(), - Context->getUndef(Context->getPointerTypeUnqual(Type::Int1Ty)), + new StoreInst(ConstantInt::getTrue(*Context), + UndefValue::get(Type::getInt1PtrTy(*Context)), OldCall); - if (!OldCall->use_empty()) - OldCall->replaceAllUsesWith(Context->getUndef(OldCall->getType())); + // If OldCall dues not return void then replaceAllUsesWith undef. + // This allows ValueHandlers and custom metadata to adjust itself. + if (!OldCall->getType()->isVoidTy()) + OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType())); if (isa<CallInst>(OldCall)) // Not worth removing an invoke here. return EraseInstFromFunction(*OldCall); return 0; @@ -10090,18 +9992,20 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { // This instruction is not reachable, just remove it. We insert a store to // undef so that we know that this code is not reachable, despite the fact // that we can't modify the CFG here. - new StoreInst(Context->getConstantIntTrue(), - Context->getUndef(Context->getPointerTypeUnqual(Type::Int1Ty)), + new StoreInst(ConstantInt::getTrue(*Context), + UndefValue::get(Type::getInt1PtrTy(*Context)), CS.getInstruction()); - if (!CS.getInstruction()->use_empty()) + // If CS dues not return void then replaceAllUsesWith undef. + // This allows ValueHandlers and custom metadata to adjust itself. + if (!CS.getInstruction()->getType()->isVoidTy()) CS.getInstruction()-> - replaceAllUsesWith(Context->getUndef(CS.getInstruction()->getType())); + replaceAllUsesWith(UndefValue::get(CS.getInstruction()->getType())); if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) { // Don't break the CFG, insert a dummy cond branch. BranchInst::Create(II->getNormalDest(), II->getUnwindDest(), - Context->getConstantIntTrue(), II); + ConstantInt::getTrue(*Context), II); } return EraseInstFromFunction(*CS.getInstruction()); } @@ -10165,13 +10069,15 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (Callee->isDeclaration() && // Conversion is ok if changing from one pointer type to another or from // a pointer to an integer of the same size. - !((isa<PointerType>(OldRetTy) || OldRetTy == TD->getIntPtrType()) && - (isa<PointerType>(NewRetTy) || NewRetTy == TD->getIntPtrType()))) + !((isa<PointerType>(OldRetTy) || !TD || + OldRetTy == TD->getIntPtrType(Caller->getContext())) && + (isa<PointerType>(NewRetTy) || !TD || + NewRetTy == TD->getIntPtrType(Caller->getContext())))) return false; // Cannot transform this return value. if (!Caller->use_empty() && // void -> non-void is handled specially - NewRetTy != Type::VoidTy && !CastInst::isCastable(NewRetTy, OldRetTy)) + !NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy)) return false; // Cannot transform this return value. if (!CallerPAL.isEmpty() && !Caller->use_empty()) { @@ -10212,8 +10118,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // Converting from one pointer type to another or between a pointer and an // integer of the same size is safe even if we do not have a body. bool isConvertible = ActTy == ParamTy || - ((isa<PointerType>(ParamTy) || ParamTy == TD->getIntPtrType()) && - (isa<PointerType>(ActTy) || ActTy == TD->getIntPtrType())); + (TD && ((isa<PointerType>(ParamTy) || + ParamTy == TD->getIntPtrType(Caller->getContext())) && + (isa<PointerType>(ActTy) || + ActTy == TD->getIntPtrType(Caller->getContext())))); if (Callee->isDeclaration() && !isConvertible) return false; } @@ -10260,8 +10168,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { } else { Instruction::CastOps opcode = CastInst::getCastOpcode(*AI, false, ParamTy, false); - CastInst *NewCast = CastInst::Create(opcode, *AI, ParamTy, "tmp"); - Args.push_back(InsertNewInstBefore(NewCast, *Caller)); + Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy, "tmp")); } // Add any parameter attributes. @@ -10270,26 +10177,24 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { } // If the function takes more arguments than the call was taking, add them - // now... + // now. for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) - Args.push_back(Context->getNullValue(FT->getParamType(i))); + Args.push_back(Constant::getNullValue(FT->getParamType(i))); - // If we are removing arguments to the function, emit an obnoxious warning... + // If we are removing arguments to the function, emit an obnoxious warning. if (FT->getNumParams() < NumActualArgs) { if (!FT->isVarArg()) { - cerr << "WARNING: While resolving call to function '" - << Callee->getName() << "' arguments were dropped!\n"; + errs() << "WARNING: While resolving call to function '" + << Callee->getName() << "' arguments were dropped!\n"; } else { - // Add all of the arguments in their promoted form to the arg list... + // Add all of the arguments in their promoted form to the arg list. for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) { const Type *PTy = getPromotedType((*AI)->getType()); if (PTy != (*AI)->getType()) { // Must promote to pass through va_arg area! - Instruction::CastOps opcode = CastInst::getCastOpcode(*AI, false, - PTy, false); - Instruction *Cast = CastInst::Create(opcode, *AI, PTy, "tmp"); - InsertNewInstBefore(Cast, *Caller); - Args.push_back(Cast); + Instruction::CastOps opcode = + CastInst::getCastOpcode(*AI, false, PTy, false); + Args.push_back(Builder->CreateCast(opcode, *AI, PTy, "tmp")); } else { Args.push_back(*AI); } @@ -10304,10 +10209,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (Attributes FnAttrs = CallerPAL.getFnAttributes()) attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs)); - if (NewRetTy == Type::VoidTy) + if (NewRetTy->isVoidTy()) Caller->setName(""); // Void type should not have a name. - const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(),attrVec.end()); + const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(), + attrVec.end()); Instruction *NC; if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { @@ -10329,7 +10235,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // Insert a cast of the return type as necessary. Value *NV = NC; if (OldRetTy != NV->getType() && !Caller->use_empty()) { - if (NV->getType() != Type::VoidTy) { + if (!NV->getType()->isVoidTy()) { Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false, OldRetTy, false); NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp"); @@ -10343,16 +10249,17 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // Otherwise, it's a call, just insert cast right after the call instr InsertNewInstBefore(NC, *Caller); } - AddUsersToWorkList(*Caller); + Worklist.AddUsersToWorkList(*Caller); } else { - NV = Context->getUndef(Caller->getType()); + NV = UndefValue::get(Caller->getType()); } } - if (Caller->getType() != Type::VoidTy && !Caller->use_empty()) + + if (!Caller->use_empty()) Caller->replaceAllUsesWith(NV); - Caller->eraseFromParent(); - RemoveFromWorkList(Caller); + + EraseInstFromFunction(*Caller); return true; } @@ -10469,14 +10376,14 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { // Replace the trampoline call with a direct call. Let the generic // code sort out any function type mismatches. - FunctionType *NewFTy = - Context->getFunctionType(FTy->getReturnType(), NewTypes, + FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg()); Constant *NewCallee = - NestF->getType() == Context->getPointerTypeUnqual(NewFTy) ? - NestF : Context->getConstantExprBitCast(NestF, - Context->getPointerTypeUnqual(NewFTy)); - const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(),NewAttrs.end()); + NestF->getType() == PointerType::getUnqual(NewFTy) ? + NestF : ConstantExpr::getBitCast(NestF, + PointerType::getUnqual(NewFTy)); + const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(), + NewAttrs.end()); Instruction *NewCaller; if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { @@ -10495,10 +10402,10 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { setCallingConv(cast<CallInst>(Caller)->getCallingConv()); cast<CallInst>(NewCaller)->setAttributes(NewPAL); } - if (Caller->getType() != Type::VoidTy && !Caller->use_empty()) + if (!Caller->getType()->isVoidTy()) Caller->replaceAllUsesWith(NewCaller); Caller->eraseFromParent(); - RemoveFromWorkList(Caller); + Worklist.Remove(Caller); return 0; } } @@ -10508,13 +10415,13 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { // code sort out any function type mismatches. Constant *NewCallee = NestF->getType() == PTy ? NestF : - Context->getConstantExprBitCast(NestF, PTy); + ConstantExpr::getBitCast(NestF, PTy); CS.setCalledFunction(NewCallee); return CS.getInstruction(); } -/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(c,d)] -/// and if a/b/c/d and the add's all have a single use, turn this into two phi's +/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)] +/// and if a/b/c and the add's all have a single use, turn this into a phi /// and a single binop. Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0)); @@ -10526,8 +10433,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { const Type *LHSType = LHSVal->getType(); const Type *RHSType = RHSVal->getType(); - // Scan to see if all operands are the same opcode, all have one use, and all - // kill their operands (i.e. the operands have one use). + // Scan to see if all operands are the same opcode, and all have one use. for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i)); if (!I || I->getOpcode() != Opc || !I->hasOneUse() || @@ -10547,6 +10453,13 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { if (I->getOperand(0) != LHSVal) LHSVal = 0; if (I->getOperand(1) != RHSVal) RHSVal = 0; } + + // If both LHS and RHS would need a PHI, don't do this transformation, + // because it would increase the number of PHIs entering the block, + // which leads to higher register pressure. This is especially + // bad when the PHIs are in the header of a loop. + if (!LHSVal && !RHSVal) + return 0; // Otherwise, this is safe to transform! @@ -10589,8 +10502,8 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) return BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal); CmpInst *CIOp = cast<CmpInst>(FirstInst); - return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), LHSVal, - RHSVal); + return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), + LHSVal, RHSVal); } Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { @@ -10601,9 +10514,13 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { // This is true if all GEP bases are allocas and if all indices into them are // constants. bool AllBasePointersAreAllocas = true; + + // We don't want to replace this phi if the replacement would require + // more than one phi, which leads to higher register pressure. This is + // especially bad when the PHIs are in the header of a loop. + bool NeededPhi = false; - // Scan to see if all operands are the same opcode, all have one use, and all - // kill their operands (i.e. the operands have one use). + // Scan to see if all operands are the same opcode, and all have one use. for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i)); if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() || @@ -10632,7 +10549,16 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType()) return 0; + + // If we already needed a PHI for an earlier operand, and another operand + // also requires a PHI, we'd be introducing more PHIs than we're + // eliminating, which increases register pressure on entry to the PHI's + // block. + if (NeededPhi) + return 0; + FixedOperands[op] = 0; // Needs a PHI. + NeededPhi = true; } } @@ -10678,8 +10604,11 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { } Value *Base = FixedOperands[0]; - return GetElementPtrInst::Create(Base, FixedOperands.begin()+1, - FixedOperands.end()); + return cast<GEPOperator>(FirstInst)->isInBounds() ? + GetElementPtrInst::CreateInBounds(Base, FixedOperands.begin()+1, + FixedOperands.end()) : + GetElementPtrInst::Create(Base, FixedOperands.begin()+1, + FixedOperands.end()); } @@ -10836,7 +10765,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) return BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp); if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst)) - return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), + return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), PhiVal, ConstantOp); assert(isa<LoadInst>(FirstInst) && "Unknown operation"); @@ -10929,7 +10858,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { SmallPtrSet<PHINode*, 16> PotentiallyDeadPHIs; PotentiallyDeadPHIs.insert(&PN); if (DeadPHICycle(PU, PotentiallyDeadPHIs)) - return ReplaceInstUsesWith(PN, Context->getUndef(PN.getType())); + return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType())); } // If this phi has a single use, and if that use just computes a value for @@ -10941,7 +10870,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { if (PHIUser->hasOneUse() && (isa<BinaryOperator>(PHIUser) || isa<GetElementPtrInst>(PHIUser)) && PHIUser->use_back() == &PN) { - return ReplaceInstUsesWith(PN, Context->getUndef(PN.getType())); + return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType())); } } @@ -10982,30 +10911,14 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { return 0; } -static Value *InsertCastToIntPtrTy(Value *V, const Type *DTy, - Instruction *InsertPoint, - InstCombiner *IC) { - unsigned PtrSize = DTy->getScalarSizeInBits(); - unsigned VTySize = V->getType()->getScalarSizeInBits(); - // We must cast correctly to the pointer type. Ensure that we - // sign extend the integer value if it is smaller as this is - // used for address computation. - Instruction::CastOps opcode = - (VTySize < PtrSize ? Instruction::SExt : - (VTySize == PtrSize ? Instruction::BitCast : Instruction::Trunc)); - return IC->InsertCastBefore(opcode, V, DTy, *InsertPoint); -} - - Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { Value *PtrOp = GEP.getOperand(0); - // Is it 'getelementptr %P, i32 0' or 'getelementptr %P' - // If so, eliminate the noop. + // Eliminate 'getelementptr %P, i32 0' and 'getelementptr %P', they are noops. if (GEP.getNumOperands() == 1) return ReplaceInstUsesWith(GEP, PtrOp); if (isa<UndefValue>(GEP.getOperand(0))) - return ReplaceInstUsesWith(GEP, Context->getUndef(GEP.getType())); + return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType())); bool HasZeroPointerIndex = false; if (Constant *C = dyn_cast<Constant>(GEP.getOperand(1))) @@ -11015,78 +10928,48 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { return ReplaceInstUsesWith(GEP, PtrOp); // Eliminate unneeded casts for indices. - bool MadeChange = false; - - gep_type_iterator GTI = gep_type_begin(GEP); - for (User::op_iterator i = GEP.op_begin() + 1, e = GEP.op_end(); - i != e; ++i, ++GTI) { - if (isa<SequentialType>(*GTI)) { - if (CastInst *CI = dyn_cast<CastInst>(*i)) { - if (CI->getOpcode() == Instruction::ZExt || - CI->getOpcode() == Instruction::SExt) { - const Type *SrcTy = CI->getOperand(0)->getType(); - // We can eliminate a cast from i32 to i64 iff the target - // is a 32-bit pointer target. - if (SrcTy->getScalarSizeInBits() >= TD->getPointerSizeInBits()) { - MadeChange = true; - *i = CI->getOperand(0); - } - } - } + if (TD) { + bool MadeChange = false; + unsigned PtrSize = TD->getPointerSizeInBits(); + + gep_type_iterator GTI = gep_type_begin(GEP); + for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); + I != E; ++I, ++GTI) { + if (!isa<SequentialType>(*GTI)) continue; + // If we are using a wider index than needed for this platform, shrink it - // to what we need. If narrower, sign-extend it to what we need. - // If the incoming value needs a cast instruction, - // insert it. This explicit cast can make subsequent optimizations more - // obvious. - Value *Op = *i; - if (TD->getTypeSizeInBits(Op->getType()) > TD->getPointerSizeInBits()) { - if (Constant *C = dyn_cast<Constant>(Op)) { - *i = Context->getConstantExprTrunc(C, TD->getIntPtrType()); - MadeChange = true; - } else { - Op = InsertCastBefore(Instruction::Trunc, Op, TD->getIntPtrType(), - GEP); - *i = Op; - MadeChange = true; - } - } else if (TD->getTypeSizeInBits(Op->getType()) < TD->getPointerSizeInBits()) { - if (Constant *C = dyn_cast<Constant>(Op)) { - *i = Context->getConstantExprSExt(C, TD->getIntPtrType()); - MadeChange = true; - } else { - Op = InsertCastBefore(Instruction::SExt, Op, TD->getIntPtrType(), - GEP); - *i = Op; - MadeChange = true; - } - } + // to what we need. If narrower, sign-extend it to what we need. This + // explicit cast can make subsequent optimizations more obvious. + unsigned OpBits = cast<IntegerType>((*I)->getType())->getBitWidth(); + if (OpBits == PtrSize) + continue; + + *I = Builder->CreateIntCast(*I, TD->getIntPtrType(GEP.getContext()),true); + MadeChange = true; } + if (MadeChange) return &GEP; } - if (MadeChange) return &GEP; // Combine Indices - If the source pointer to this getelementptr instruction // is a getelementptr instruction, combine the indices of the two // getelementptr instructions into a single instruction. // - SmallVector<Value*, 8> SrcGEPOperands; - if (User *Src = dyn_castGetElementPtr(PtrOp)) - SrcGEPOperands.append(Src->op_begin(), Src->op_end()); - - if (!SrcGEPOperands.empty()) { + if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) { // Note that if our source is a gep chain itself that we wait for that // chain to be resolved before we perform this transformation. This // avoids us creating a TON of code in some cases. // - if (isa<GetElementPtrInst>(SrcGEPOperands[0]) && - cast<Instruction>(SrcGEPOperands[0])->getNumOperands() == 2) - return 0; // Wait until our source is folded to completion. + if (GetElementPtrInst *SrcGEP = + dyn_cast<GetElementPtrInst>(Src->getOperand(0))) + if (SrcGEP->getNumOperands() == 2) + return 0; // Wait until our source is folded to completion. SmallVector<Value*, 8> Indices; // Find out whether the last index in the source GEP is a sequential idx. bool EndsWithSequential = false; - for (gep_type_iterator I = gep_type_begin(*cast<User>(PtrOp)), - E = gep_type_end(*cast<User>(PtrOp)); I != E; ++I) + for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src); + I != E; ++I) EndsWithSequential = !isa<StructType>(*I); // Can we combine the two pointer arithmetics offsets? @@ -11094,98 +10977,68 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Replace: gep (gep %P, long B), long A, ... // With: T = long A+B; gep %P, T, ... // - Value *Sum, *SO1 = SrcGEPOperands.back(), *GO1 = GEP.getOperand(1); - if (SO1 == Context->getNullValue(SO1->getType())) { + Value *Sum; + Value *SO1 = Src->getOperand(Src->getNumOperands()-1); + Value *GO1 = GEP.getOperand(1); + if (SO1 == Constant::getNullValue(SO1->getType())) { Sum = GO1; - } else if (GO1 == Context->getNullValue(GO1->getType())) { + } else if (GO1 == Constant::getNullValue(GO1->getType())) { Sum = SO1; } else { - // If they aren't the same type, convert both to an integer of the - // target's pointer size. - if (SO1->getType() != GO1->getType()) { - if (Constant *SO1C = dyn_cast<Constant>(SO1)) { - SO1 = - Context->getConstantExprIntegerCast(SO1C, GO1->getType(), true); - } else if (Constant *GO1C = dyn_cast<Constant>(GO1)) { - GO1 = - Context->getConstantExprIntegerCast(GO1C, SO1->getType(), true); - } else { - unsigned PS = TD->getPointerSizeInBits(); - if (TD->getTypeSizeInBits(SO1->getType()) == PS) { - // Convert GO1 to SO1's type. - GO1 = InsertCastToIntPtrTy(GO1, SO1->getType(), &GEP, this); - - } else if (TD->getTypeSizeInBits(GO1->getType()) == PS) { - // Convert SO1 to GO1's type. - SO1 = InsertCastToIntPtrTy(SO1, GO1->getType(), &GEP, this); - } else { - const Type *PT = TD->getIntPtrType(); - SO1 = InsertCastToIntPtrTy(SO1, PT, &GEP, this); - GO1 = InsertCastToIntPtrTy(GO1, PT, &GEP, this); - } - } - } - if (isa<Constant>(SO1) && isa<Constant>(GO1)) - Sum = Context->getConstantExprAdd(cast<Constant>(SO1), - cast<Constant>(GO1)); - else { - Sum = BinaryOperator::CreateAdd(SO1, GO1, PtrOp->getName()+".sum"); - InsertNewInstBefore(cast<Instruction>(Sum), GEP); - } + // If they aren't the same type, then the input hasn't been processed + // by the loop above yet (which canonicalizes sequential index types to + // intptr_t). Just avoid transforming this until the input has been + // normalized. + if (SO1->getType() != GO1->getType()) + return 0; + Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum"); } - // Recycle the GEP we already have if possible. - if (SrcGEPOperands.size() == 2) { - GEP.setOperand(0, SrcGEPOperands[0]); + // Update the GEP in place if possible. + if (Src->getNumOperands() == 2) { + GEP.setOperand(0, Src->getOperand(0)); GEP.setOperand(1, Sum); return &GEP; - } else { - Indices.insert(Indices.end(), SrcGEPOperands.begin()+1, - SrcGEPOperands.end()-1); - Indices.push_back(Sum); - Indices.insert(Indices.end(), GEP.op_begin()+2, GEP.op_end()); } + Indices.append(Src->op_begin()+1, Src->op_end()-1); + Indices.push_back(Sum); + Indices.append(GEP.op_begin()+2, GEP.op_end()); } else if (isa<Constant>(*GEP.idx_begin()) && cast<Constant>(*GEP.idx_begin())->isNullValue() && - SrcGEPOperands.size() != 1) { + Src->getNumOperands() != 1) { // Otherwise we can do the fold if the first index of the GEP is a zero - Indices.insert(Indices.end(), SrcGEPOperands.begin()+1, - SrcGEPOperands.end()); - Indices.insert(Indices.end(), GEP.idx_begin()+1, GEP.idx_end()); + Indices.append(Src->op_begin()+1, Src->op_end()); + Indices.append(GEP.idx_begin()+1, GEP.idx_end()); } if (!Indices.empty()) - return GetElementPtrInst::Create(SrcGEPOperands[0], Indices.begin(), - Indices.end(), GEP.getName()); - - } else if (GlobalValue *GV = dyn_cast<GlobalValue>(PtrOp)) { - // GEP of global variable. If all of the indices for this GEP are - // constants, we can promote this to a constexpr instead of an instruction. - - // Scan for nonconstants... - SmallVector<Constant*, 8> Indices; - User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); - for (; I != E && isa<Constant>(*I); ++I) - Indices.push_back(cast<Constant>(*I)); - - if (I == E) { // If they are all constants... - Constant *CE = Context->getConstantExprGetElementPtr(GV, - &Indices[0],Indices.size()); - - // Replace all uses of the GEP with the new constexpr... - return ReplaceInstUsesWith(GEP, CE); - } - } else if (Value *X = getBitCastOperand(PtrOp)) { // Is the operand a cast? - if (!isa<PointerType>(X->getType())) { - // Not interesting. Source pointer must be a cast from pointer. - } else if (HasZeroPointerIndex) { - // transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... - // into : GEP [10 x i8]* X, i32 0, ... - // - // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ... - // into : GEP i8* X, ... - // - // This occurs when the program declares an array extern like "int X[];" + return (cast<GEPOperator>(&GEP)->isInBounds() && + Src->isInBounds()) ? + GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(), + Indices.end(), GEP.getName()) : + GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(), + Indices.end(), GEP.getName()); + } + + // Handle gep(bitcast x) and gep(gep x, 0, 0, 0). + if (Value *X = getBitCastOperand(PtrOp)) { + assert(isa<PointerType>(X->getType()) && "Must be cast from pointer"); + + // If the input bitcast is actually "bitcast(bitcast(x))", then we don't + // want to change the gep until the bitcasts are eliminated. + if (getBitCastOperand(X)) { + Worklist.AddValue(PtrOp); + return 0; + } + + // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... + // into : GEP [10 x i8]* X, i32 0, ... + // + // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ... + // into : GEP i8* X, ... + // + // This occurs when the program declares an array extern like "int X[];" + if (HasZeroPointerIndex) { const PointerType *CPTy = cast<PointerType>(PtrOp->getType()); const PointerType *XTy = cast<PointerType>(X->getType()); if (const ArrayType *CATy = @@ -11194,10 +11047,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (CATy->getElementType() == XTy->getElementType()) { // -> GEP i8* X, ... SmallVector<Value*, 8> Indices(GEP.idx_begin()+1, GEP.idx_end()); - return GetElementPtrInst::Create(X, Indices.begin(), Indices.end(), - GEP.getName()); - } else if (const ArrayType *XATy = - dyn_cast<ArrayType>(XTy->getElementType())) { + return cast<GEPOperator>(&GEP)->isInBounds() ? + GetElementPtrInst::CreateInBounds(X, Indices.begin(), Indices.end(), + GEP.getName()) : + GetElementPtrInst::Create(X, Indices.begin(), Indices.end(), + GEP.getName()); + } + + if (const ArrayType *XATy = dyn_cast<ArrayType>(XTy->getElementType())){ // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ? if (CATy->getElementType() == XATy->getElementType()) { // -> GEP [10 x i8]* X, i32 0, ... @@ -11216,16 +11073,17 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast const Type *SrcElTy = cast<PointerType>(X->getType())->getElementType(); const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType(); - if (isa<ArrayType>(SrcElTy) && + if (TD && isa<ArrayType>(SrcElTy) && TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) == TD->getTypeAllocSize(ResElTy)) { Value *Idx[2]; - Idx[0] = Context->getNullValue(Type::Int32Ty); + Idx[0] = Constant::getNullValue(Type::getInt32Ty(*Context)); Idx[1] = GEP.getOperand(1); - Value *V = InsertNewInstBefore( - GetElementPtrInst::Create(X, Idx, Idx + 2, GEP.getName()), GEP); + Value *NewGEP = cast<GEPOperator>(&GEP)->isInBounds() ? + Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) : + Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName()); // V and GEP are both pointer types --> BitCast - return new BitCastInst(V, GEP.getType()); + return new BitCastInst(NewGEP, GEP.getType()); } // Transform things like: @@ -11233,7 +11091,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // (where tmp = 8*tmp2) into: // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast - if (isa<ArrayType>(SrcElTy) && ResElTy == Type::Int8Ty) { + if (TD && isa<ArrayType>(SrcElTy) && ResElTy == Type::getInt8Ty(*Context)) { uint64_t ArrayEltSize = TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()); @@ -11243,17 +11101,16 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { ConstantInt *Scale = 0; if (ArrayEltSize == 1) { NewIdx = GEP.getOperand(1); - Scale = - Context->getConstantInt(cast<IntegerType>(NewIdx->getType()), 1); + Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1); } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) { - NewIdx = Context->getConstantInt(CI->getType(), 1); + NewIdx = ConstantInt::get(CI->getType(), 1); Scale = CI; } else if (Instruction *Inst =dyn_cast<Instruction>(GEP.getOperand(1))){ if (Inst->getOpcode() == Instruction::Shl && isa<ConstantInt>(Inst->getOperand(1))) { ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1)); uint32_t ShAmtVal = ShAmt->getLimitedValue(64); - Scale = Context->getConstantInt(cast<IntegerType>(Inst->getType()), + Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()), 1ULL << ShAmtVal); NewIdx = Inst->getOperand(0); } else if (Inst->getOpcode() == Instruction::Mul && @@ -11269,23 +11126,21 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // operation after making sure Scale doesn't have the sign bit set. if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL && Scale->getZExtValue() % ArrayEltSize == 0) { - Scale = Context->getConstantInt(Scale->getType(), + Scale = ConstantInt::get(Scale->getType(), Scale->getZExtValue() / ArrayEltSize); if (Scale->getZExtValue() != 1) { - Constant *C = - Context->getConstantExprIntegerCast(Scale, NewIdx->getType(), + Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(), false /*ZExt*/); - Instruction *Sc = BinaryOperator::CreateMul(NewIdx, C, "idxscale"); - NewIdx = InsertNewInstBefore(Sc, GEP); + NewIdx = Builder->CreateMul(NewIdx, C, "idxscale"); } // Insert the new GEP instruction. Value *Idx[2]; - Idx[0] = Context->getNullValue(Type::Int32Ty); + Idx[0] = Constant::getNullValue(Type::getInt32Ty(*Context)); Idx[1] = NewIdx; - Instruction *NewGEP = - GetElementPtrInst::Create(X, Idx, Idx + 2, GEP.getName()); - NewGEP = InsertNewInstBefore(NewGEP, GEP); + Value *NewGEP = cast<GEPOperator>(&GEP)->isInBounds() ? + Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) : + Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName()); // The NewGEP must be pointer typed, so must the old one -> BitCast return new BitCastInst(NewGEP, GEP.getType()); } @@ -11294,12 +11149,13 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } /// See if we can simplify: - /// X = bitcast A to B* + /// X = bitcast A* to B* /// Y = gep X, <...constant indices...> /// into a gep of the original struct. This is important for SROA and alias /// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged. if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) { - if (!isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) { + if (TD && + !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) { // Determine how much the GEP moves the pointer. We are guaranteed to get // a constant back from EmitGEPOffset. ConstantInt *OffsetV = @@ -11311,7 +11167,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (Offset == 0) { // If the bitcast is of an allocation, and the allocation will be // converted to match the type of the cast, don't touch this. - if (isa<AllocationInst>(BCI->getOperand(0))) { + if (isa<AllocationInst>(BCI->getOperand(0)) || + isMalloc(BCI->getOperand(0))) { // See if the bitcast simplifies, if so, don't nuke this GEP yet. if (Instruction *I = visitBitCast(*BCI)) { if (I != BCI) { @@ -11332,11 +11189,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { const Type *InTy = cast<PointerType>(BCI->getOperand(0)->getType())->getElementType(); if (FindElementAtOffset(InTy, Offset, NewIndices, TD, Context)) { - Instruction *NGEP = - GetElementPtrInst::Create(BCI->getOperand(0), NewIndices.begin(), - NewIndices.end()); - if (NGEP->getType() == GEP.getType()) return NGEP; - InsertNewInstBefore(NGEP, GEP); + Value *NGEP = cast<GEPOperator>(&GEP)->isInBounds() ? + Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(), + NewIndices.end()) : + Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(), + NewIndices.end()); + + if (NGEP->getType() == GEP.getType()) + return ReplaceInstUsesWith(GEP, NGEP); NGEP->takeName(&GEP); return new BitCastInst(NGEP, GEP.getType()); } @@ -11351,18 +11211,17 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) { if (AI.isArrayAllocation()) { // Check C != 1 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { const Type *NewTy = - Context->getArrayType(AI.getAllocatedType(), C->getZExtValue()); + ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); AllocationInst *New = 0; // Create and insert the replacement instruction... if (isa<MallocInst>(AI)) - New = new MallocInst(NewTy, 0, AI.getAlignment(), AI.getName()); + New = Builder->CreateMalloc(NewTy, 0, AI.getName()); else { assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!"); - New = new AllocaInst(NewTy, 0, AI.getAlignment(), AI.getName()); + New = Builder->CreateAlloca(NewTy, 0, AI.getName()); } - - InsertNewInstBefore(New, AI); + New->setAlignment(AI.getAlignment()); // Scan to the end of the allocation instructions, to skip over a block of // allocas if possible...also skip interleaved debug info @@ -11373,27 +11232,27 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) { // Now that I is pointing to the first non-allocation-inst in the block, // insert our getelementptr instruction... // - Value *NullIdx = Context->getNullValue(Type::Int32Ty); + Value *NullIdx = Constant::getNullValue(Type::getInt32Ty(*Context)); Value *Idx[2]; Idx[0] = NullIdx; Idx[1] = NullIdx; - Value *V = GetElementPtrInst::Create(New, Idx, Idx + 2, - New->getName()+".sub", It); + Value *V = GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2, + New->getName()+".sub", It); // Now make everything use the getelementptr instead of the original // allocation. return ReplaceInstUsesWith(AI, V); } else if (isa<UndefValue>(AI.getArraySize())) { - return ReplaceInstUsesWith(AI, Context->getNullValue(AI.getType())); + return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); } } - if (isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized()) { + if (TD && isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized()) { // If alloca'ing a zero byte object, replace the alloca with a null pointer. // Note that we only do this for alloca's, because malloc should allocate // and return a unique pointer, even for a zero byte allocation. if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0) - return ReplaceInstUsesWith(AI, Context->getNullValue(AI.getType())); + return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); // If the alignment is 0 (unspecified), assign it the preferred alignment. if (AI.getAlignment() == 0) @@ -11409,8 +11268,8 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) { // free undef -> unreachable. if (isa<UndefValue>(Op)) { // Insert a new store to null because we cannot modify the CFG here. - new StoreInst(Context->getConstantIntTrue(), - Context->getUndef(Context->getPointerTypeUnqual(Type::Int1Ty)), &FI); + new StoreInst(ConstantInt::getTrue(*Context), + UndefValue::get(Type::getInt1PtrTy(*Context)), &FI); return EraseInstFromFunction(FI); } @@ -11428,7 +11287,7 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) { // Change free (gep X, 0,0,0,0) into free(X) if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) { if (GEPI->hasAllZeroIndices()) { - AddToWorkList(GEPI); + Worklist.Add(GEPI); FI.setOperand(0, GEPI->getOperand(0)); return &FI; } @@ -11440,6 +11299,21 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) { EraseInstFromFunction(FI); return EraseInstFromFunction(*MI); } + if (isMalloc(Op)) { + if (CallInst* CI = extractMallocCallFromBitCast(Op)) { + if (Op->hasOneUse() && CI->hasOneUse()) { + EraseInstFromFunction(FI); + EraseInstFromFunction(*CI); + return EraseInstFromFunction(*cast<Instruction>(Op)); + } + } else { + // Op is a call to malloc + if (Op->hasOneUse()) { + EraseInstFromFunction(FI); + return EraseInstFromFunction(*cast<Instruction>(Op)); + } + } + } return 0; } @@ -11450,7 +11324,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, const TargetData *TD) { User *CI = cast<User>(LI.getOperand(0)); Value *CastOp = CI->getOperand(0); - LLVMContext* Context = IC.getContext(); + LLVMContext *Context = IC.getContext(); if (TD) { if (ConstantExpr *CE = dyn_cast<ConstantExpr>(CI)) { @@ -11479,7 +11353,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, SingleChar = 0; StrVal = (StrVal << 8) | SingleChar; } - Value *NL = Context->getConstantInt(StrVal); + Value *NL = ConstantInt::get(*Context, StrVal); return IC.ReplaceInstUsesWith(LI, NL); } } @@ -11505,26 +11379,26 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, if (Constant *CSrc = dyn_cast<Constant>(CastOp)) if (ASrcTy->getNumElements() != 0) { Value *Idxs[2]; - Idxs[0] = Idxs[1] = Context->getNullValue(Type::Int32Ty); - CastOp = Context->getConstantExprGetElementPtr(CSrc, Idxs, 2); + Idxs[0] = Idxs[1] = Constant::getNullValue(Type::getInt32Ty(*Context)); + CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2); SrcTy = cast<PointerType>(CastOp->getType()); SrcPTy = SrcTy->getElementType(); } - if ((SrcPTy->isInteger() || isa<PointerType>(SrcPTy) || + if (IC.getTargetData() && + (SrcPTy->isInteger() || isa<PointerType>(SrcPTy) || isa<VectorType>(SrcPTy)) && // Do not allow turning this into a load of an integer, which is then // casted to a pointer, this pessimizes pointer analysis a lot. (isa<PointerType>(SrcPTy) == isa<PointerType>(LI.getType())) && - IC.getTargetData().getTypeSizeInBits(SrcPTy) == - IC.getTargetData().getTypeSizeInBits(DestPTy)) { + IC.getTargetData()->getTypeSizeInBits(SrcPTy) == + IC.getTargetData()->getTypeSizeInBits(DestPTy)) { // Okay, we are casting from one integer or pointer type to another of // the same size. Instead of casting the pointer before the load, cast // the result of the loaded value. - Value *NewLoad = IC.InsertNewInstBefore(new LoadInst(CastOp, - CI->getName(), - LI.isVolatile()),LI); + Value *NewLoad = + IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); // Now cast the result of the load. return new BitCastInst(NewLoad, LI.getType()); } @@ -11537,14 +11411,16 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { Value *Op = LI.getOperand(0); // Attempt to improve the alignment. - unsigned KnownAlign = - GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType())); - if (KnownAlign > - (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) : - LI.getAlignment())) - LI.setAlignment(KnownAlign); - - // load (cast X) --> cast (load X) iff safe + if (TD) { + unsigned KnownAlign = + GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType())); + if (KnownAlign > + (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) : + LI.getAlignment())) + LI.setAlignment(KnownAlign); + } + + // load (cast X) --> cast (load X) iff safe. if (isa<CastInst>(Op)) if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) return Res; @@ -11562,29 +11438,28 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) { const Value *GEPI0 = GEPI->getOperand(0); // TODO: Consider a target hook for valid address spaces for this xform. - if (isa<ConstantPointerNull>(GEPI0) && - cast<PointerType>(GEPI0->getType())->getAddressSpace() == 0) { + if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){ // Insert a new store to null instruction before the load to indicate // that this code is not reachable. We do this instead of inserting // an unreachable instruction directly because we cannot modify the // CFG. - new StoreInst(Context->getUndef(LI.getType()), - Context->getNullValue(Op->getType()), &LI); - return ReplaceInstUsesWith(LI, Context->getUndef(LI.getType())); + new StoreInst(UndefValue::get(LI.getType()), + Constant::getNullValue(Op->getType()), &LI); + return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); } } if (Constant *C = dyn_cast<Constant>(Op)) { // load null/undef -> undef // TODO: Consider a target hook for valid address spaces for this xform. - if (isa<UndefValue>(C) || (C->isNullValue() && - cast<PointerType>(Op->getType())->getAddressSpace() == 0)) { + if (isa<UndefValue>(C) || + (C->isNullValue() && LI.getPointerAddressSpace() == 0)) { // Insert a new store to null instruction before the load to indicate that // this code is not reachable. We do this instead of inserting an // unreachable instruction directly because we cannot modify the CFG. - new StoreInst(Context->getUndef(LI.getType()), - Context->getNullValue(Op->getType()), &LI); - return ReplaceInstUsesWith(LI, Context->getUndef(LI.getType())); + new StoreInst(UndefValue::get(LI.getType()), + Constant::getNullValue(Op->getType()), &LI); + return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); } // Instcombine load (constant global) into the value loaded. @@ -11605,9 +11480,9 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { // that this code is not reachable. We do this instead of inserting // an unreachable instruction directly because we cannot modify the // CFG. - new StoreInst(Context->getUndef(LI.getType()), - Context->getNullValue(Op->getType()), &LI); - return ReplaceInstUsesWith(LI, Context->getUndef(LI.getType())); + new StoreInst(UndefValue::get(LI.getType()), + Constant::getNullValue(Op->getType()), &LI); + return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); } } else if (CE->isCast()) { @@ -11622,9 +11497,9 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op->getUnderlyingObject())){ if (GV->isConstant() && GV->hasDefinitiveInitializer()) { if (GV->getInitializer()->isNullValue()) - return ReplaceInstUsesWith(LI, Context->getNullValue(LI.getType())); + return ReplaceInstUsesWith(LI, Constant::getNullValue(LI.getType())); else if (isa<UndefValue>(GV->getInitializer())) - return ReplaceInstUsesWith(LI, Context->getUndef(LI.getType())); + return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); } } @@ -11643,10 +11518,10 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). if (isSafeToLoadUnconditionally(SI->getOperand(1), SI) && isSafeToLoadUnconditionally(SI->getOperand(2), SI)) { - Value *V1 = InsertNewInstBefore(new LoadInst(SI->getOperand(1), - SI->getOperand(1)->getName()+".val"), LI); - Value *V2 = InsertNewInstBefore(new LoadInst(SI->getOperand(2), - SI->getOperand(2)->getName()+".val"), LI); + Value *V1 = Builder->CreateLoad(SI->getOperand(1), + SI->getOperand(1)->getName()+".val"); + Value *V2 = Builder->CreateLoad(SI->getOperand(2), + SI->getOperand(2)->getName()+".val"); return SelectInst::Create(SI->getCondition(), V1, V2); } @@ -11674,7 +11549,6 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { User *CI = cast<User>(SI.getOperand(1)); Value *CastOp = CI->getOperand(0); - LLVMContext* Context = IC.getContext(); const Type *DestPTy = cast<PointerType>(CI->getType())->getElementType(); const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); @@ -11696,7 +11570,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { // constants. if (isa<ArrayType>(SrcPTy) || isa<StructType>(SrcPTy)) { // Index through pointer. - Constant *Zero = Context->getNullValue(Type::Int32Ty); + Constant *Zero = Constant::getNullValue(Type::getInt32Ty(*IC.getContext())); NewGEPIndices.push_back(Zero); while (1) { @@ -11713,7 +11587,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { } } - SrcTy = Context->getPointerType(SrcPTy, SrcTy->getAddressSpace()); + SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); } if (!SrcPTy->isInteger() && !isa<PointerType>(SrcPTy)) @@ -11721,10 +11595,11 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { // If the pointers point into different address spaces or if they point to // values with different sizes, we can't do the transformation. - if (SrcTy->getAddressSpace() != + if (!IC.getTargetData() || + SrcTy->getAddressSpace() != cast<PointerType>(CI->getType())->getAddressSpace() || - IC.getTargetData().getTypeSizeInBits(SrcPTy) != - IC.getTargetData().getTypeSizeInBits(DestPTy)) + IC.getTargetData()->getTypeSizeInBits(SrcPTy) != + IC.getTargetData()->getTypeSizeInBits(DestPTy)) return 0; // Okay, we are casting from one integer or pointer type to another of @@ -11745,22 +11620,12 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { // SIOp0 is a pointer to aggregate and this is a store to the first field, // emit a GEP to index into its first field. - if (!NewGEPIndices.empty()) { - if (Constant *C = dyn_cast<Constant>(CastOp)) - CastOp = Context->getConstantExprGetElementPtr(C, &NewGEPIndices[0], - NewGEPIndices.size()); - else - CastOp = IC.InsertNewInstBefore( - GetElementPtrInst::Create(CastOp, NewGEPIndices.begin(), - NewGEPIndices.end()), SI); - } + if (!NewGEPIndices.empty()) + CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices.begin(), + NewGEPIndices.end()); - if (Constant *C = dyn_cast<Constant>(SIOp0)) - NewCast = Context->getConstantExprCast(opcode, C, CastDstTy); - else - NewCast = IC.InsertNewInstBefore( - CastInst::Create(opcode, SIOp0, CastDstTy, SIOp0->getName()+".c"), - SI); + NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, + SIOp0->getName()+".c"); return new StoreInst(NewCast, CastOp); } @@ -11777,12 +11642,16 @@ static bool equivalentAddressValues(Value *A, Value *B) { if (A == B) return true; // Test if the values come form identical arithmetic instructions. + // This uses isIdenticalToWhenDefined instead of isIdenticalTo because + // its only used to compare two uses within the same basic block, which + // means that they'll always either have the same value or one of them + // will have an undefined value. if (isa<BinaryOperator>(A) || isa<CastInst>(A) || isa<PHINode>(A) || isa<GetElementPtrInst>(A)) if (Instruction *BI = dyn_cast<Instruction>(B)) - if (cast<Instruction>(A)->isIdenticalTo(BI)) + if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) return true; // Otherwise they may not be equivalent. @@ -11854,12 +11723,14 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { } // Attempt to improve the alignment. - unsigned KnownAlign = - GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType())); - if (KnownAlign > - (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) : - SI.getAlignment())) - SI.setAlignment(KnownAlign); + if (TD) { + unsigned KnownAlign = + GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType())); + if (KnownAlign > + (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) : + SI.getAlignment())) + SI.setAlignment(KnownAlign); + } // Do really simple DSE, to catch cases where there are several consecutive // stores to the same location, separated by a few arithmetic operations. This @@ -11914,12 +11785,11 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (SI.isVolatile()) return 0; // Don't hack volatile stores. // store X, null -> turns into 'unreachable' in SimplifyCFG - if (isa<ConstantPointerNull>(Ptr) && - cast<PointerType>(Ptr->getType())->getAddressSpace() == 0) { + if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) { if (!isa<UndefValue>(Val)) { - SI.setOperand(0, Context->getUndef(Val->getType())); + SI.setOperand(0, UndefValue::get(Val->getType())); if (Instruction *U = dyn_cast<Instruction>(Val)) - AddToWorkList(U); // Dropped a use. + Worklist.Add(U); // Dropped a use. ++NumCombined; } return 0; // Do not modify these! @@ -12096,41 +11966,34 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { // Cannonicalize fcmp_one -> fcmp_oeq FCmpInst::Predicate FPred; Value *Y; if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)), - TrueDest, FalseDest))) - if ((FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE || - FPred == FCmpInst::FCMP_OGE) && BI.getCondition()->hasOneUse()) { - FCmpInst *I = cast<FCmpInst>(BI.getCondition()); - FCmpInst::Predicate NewPred = FCmpInst::getInversePredicate(FPred); - Instruction *NewSCC = new FCmpInst(NewPred, X, Y, "", I); - NewSCC->takeName(I); - // Swap Destinations and condition... - BI.setCondition(NewSCC); + TrueDest, FalseDest)) && + BI.getCondition()->hasOneUse()) + if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE || + FPred == FCmpInst::FCMP_OGE) { + FCmpInst *Cond = cast<FCmpInst>(BI.getCondition()); + Cond->setPredicate(FCmpInst::getInversePredicate(FPred)); + + // Swap Destinations and condition. BI.setSuccessor(0, FalseDest); BI.setSuccessor(1, TrueDest); - RemoveFromWorkList(I); - I->eraseFromParent(); - AddToWorkList(NewSCC); + Worklist.Add(Cond); return &BI; } // Cannonicalize icmp_ne -> icmp_eq ICmpInst::Predicate IPred; if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)), - TrueDest, FalseDest))) - if ((IPred == ICmpInst::ICMP_NE || IPred == ICmpInst::ICMP_ULE || - IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE || - IPred == ICmpInst::ICMP_SGE) && BI.getCondition()->hasOneUse()) { - ICmpInst *I = cast<ICmpInst>(BI.getCondition()); - ICmpInst::Predicate NewPred = ICmpInst::getInversePredicate(IPred); - Instruction *NewSCC = new ICmpInst(NewPred, X, Y, "", I); - NewSCC->takeName(I); - // Swap Destinations and condition... - BI.setCondition(NewSCC); + TrueDest, FalseDest)) && + BI.getCondition()->hasOneUse()) + if (IPred == ICmpInst::ICMP_NE || IPred == ICmpInst::ICMP_ULE || + IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE || + IPred == ICmpInst::ICMP_SGE) { + ICmpInst *Cond = cast<ICmpInst>(BI.getCondition()); + Cond->setPredicate(ICmpInst::getInversePredicate(IPred)); + // Swap Destinations and condition. BI.setSuccessor(0, FalseDest); BI.setSuccessor(1, TrueDest); - RemoveFromWorkList(I); - I->eraseFromParent();; - AddToWorkList(NewSCC); + Worklist.Add(Cond); return &BI; } @@ -12145,10 +12008,10 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { // change 'switch (X+4) case 1:' into 'switch (X) case -3' for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2) SI.setOperand(i, - Context->getConstantExprSub(cast<Constant>(SI.getOperand(i)), + ConstantExpr::getSub(cast<Constant>(SI.getOperand(i)), AddRHS)); SI.setOperand(0, I->getOperand(0)); - AddToWorkList(I); + Worklist.Add(I); return &SI; } } @@ -12163,10 +12026,10 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { if (Constant *C = dyn_cast<Constant>(Agg)) { if (isa<UndefValue>(C)) - return ReplaceInstUsesWith(EV, Context->getUndef(EV.getType())); + return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType())); if (isa<ConstantAggregateZero>(C)) - return ReplaceInstUsesWith(EV, Context->getNullValue(EV.getType())); + return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType())); if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) { // Extract the element indexed by the first index out of the constant @@ -12214,10 +12077,8 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // %E = insertvalue { i32 } %X, i32 42, 0 // by switching the order of the insert and extract (though the // insertvalue should be left in, since it may have other uses). - Value *NewEV = InsertNewInstBefore( - ExtractValueInst::Create(IV->getAggregateOperand(), - EV.idx_begin(), EV.idx_end()), - EV); + Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(), + EV.idx_begin(), EV.idx_end()); return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(), insi, inse); } @@ -12303,17 +12164,17 @@ static std::vector<unsigned> getShuffleMask(const ShuffleVectorInst *SVI) { /// value is already around as a register, for example if it were inserted then /// extracted from the vector. static Value *FindScalarElement(Value *V, unsigned EltNo, - LLVMContext* Context) { + LLVMContext *Context) { assert(isa<VectorType>(V->getType()) && "Not looking at a vector?"); const VectorType *PTy = cast<VectorType>(V->getType()); unsigned Width = PTy->getNumElements(); if (EltNo >= Width) // Out of range access. - return Context->getUndef(PTy->getElementType()); + return UndefValue::get(PTy->getElementType()); if (isa<UndefValue>(V)) - return Context->getUndef(PTy->getElementType()); + return UndefValue::get(PTy->getElementType()); else if (isa<ConstantAggregateZero>(V)) - return Context->getNullValue(PTy->getElementType()); + return Constant::getNullValue(PTy->getElementType()); else if (ConstantVector *CP = dyn_cast<ConstantVector>(V)) return CP->getOperand(EltNo); else if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) { @@ -12339,7 +12200,7 @@ static Value *FindScalarElement(Value *V, unsigned EltNo, else if (InEl < LHSWidth*2) return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth, Context); else - return Context->getUndef(PTy->getElementType()); + return UndefValue::get(PTy->getElementType()); } // Otherwise, we don't know. @@ -12349,18 +12210,18 @@ static Value *FindScalarElement(Value *V, unsigned EltNo, Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { // If vector val is undef, replace extract with scalar undef. if (isa<UndefValue>(EI.getOperand(0))) - return ReplaceInstUsesWith(EI, Context->getUndef(EI.getType())); + return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); // If vector val is constant 0, replace extract with scalar 0. if (isa<ConstantAggregateZero>(EI.getOperand(0))) - return ReplaceInstUsesWith(EI, Context->getNullValue(EI.getType())); + return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType())); if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) { // If vector val is constant with all elements the same, replace EI with // that element. When the elements are not identical, we cannot replace yet // (we do that below, but only when the index is constant). Constant *op0 = C->getOperand(0); - for (unsigned i = 1; i < C->getNumOperands(); ++i) + for (unsigned i = 1; i != C->getNumOperands(); ++i) if (C->getOperand(i) != op0) { op0 = 0; break; @@ -12373,13 +12234,12 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { // find a previously computed scalar that was inserted into the vector. if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) { unsigned IndexVal = IdxC->getZExtValue(); - unsigned VectorWidth = - cast<VectorType>(EI.getOperand(0)->getType())->getNumElements(); + unsigned VectorWidth = EI.getVectorOperandType()->getNumElements(); // If this is extracting an invalid index, turn this into undef, to avoid // crashing the code below. if (IndexVal >= VectorWidth) - return ReplaceInstUsesWith(EI, Context->getUndef(EI.getType())); + return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); // This instruction only demands the single element from the input vector. // If the input vector has a single use, simplify it based on this use @@ -12411,42 +12271,27 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { } if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) { - if (I->hasOneUse()) { - // Push extractelement into predecessor operation if legal and - // profitable to do so - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { - bool isConstantElt = isa<ConstantInt>(EI.getOperand(1)); - if (CheapToScalarize(BO, isConstantElt)) { - ExtractElementInst *newEI0 = - new ExtractElementInst(BO->getOperand(0), EI.getOperand(1), - EI.getName()+".lhs"); - ExtractElementInst *newEI1 = - new ExtractElementInst(BO->getOperand(1), EI.getOperand(1), - EI.getName()+".rhs"); - InsertNewInstBefore(newEI0, EI); - InsertNewInstBefore(newEI1, EI); - return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1); - } - } else if (isa<LoadInst>(I)) { - unsigned AS = - cast<PointerType>(I->getOperand(0)->getType())->getAddressSpace(); - Value *Ptr = InsertBitCastBefore(I->getOperand(0), - Context->getPointerType(EI.getType(), AS),EI); - GetElementPtrInst *GEP = - GetElementPtrInst::Create(Ptr, EI.getOperand(1), I->getName()+".gep"); - InsertNewInstBefore(GEP, EI); - return new LoadInst(GEP); - } - } - if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) { + // Push extractelement into predecessor operation if legal and + // profitable to do so + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { + if (I->hasOneUse() && + CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) { + Value *newEI0 = + Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1), + EI.getName()+".lhs"); + Value *newEI1 = + Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1), + EI.getName()+".rhs"); + return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1); + } + } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) { // Extracting the inserted element? if (IE->getOperand(2) == EI.getOperand(1)) return ReplaceInstUsesWith(EI, IE->getOperand(1)); // If the inserted and extracted elements are constants, they must not // be the same value, extract from the pre-inserted value instead. - if (isa<Constant>(IE->getOperand(2)) && - isa<Constant>(EI.getOperand(1))) { - AddUsesToWorkList(EI); + if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) { + Worklist.AddValue(EI.getOperand(0)); EI.setOperand(0, IE->getOperand(0)); return &EI; } @@ -12465,11 +12310,14 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { SrcIdx -= LHSWidth; Src = SVI->getOperand(1); } else { - return ReplaceInstUsesWith(EI, Context->getUndef(EI.getType())); + return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); } - return new ExtractElementInst(Src, SrcIdx); + return ExtractElementInst::Create(Src, + ConstantInt::get(Type::getInt32Ty(*Context), SrcIdx, + false)); } } + // FIXME: Canonicalize extractelement(bitcast) -> bitcast(extractelement) } return 0; } @@ -12479,21 +12327,21 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { /// Otherwise, return false. static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, std::vector<Constant*> &Mask, - LLVMContext* Context) { + LLVMContext *Context) { assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() && "Invalid CollectSingleShuffleElements"); unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); if (isa<UndefValue>(V)) { - Mask.assign(NumElts, Context->getUndef(Type::Int32Ty)); + Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(*Context))); return true; } else if (V == LHS) { for (unsigned i = 0; i != NumElts; ++i) - Mask.push_back(Context->getConstantInt(Type::Int32Ty, i)); + Mask.push_back(ConstantInt::get(Type::getInt32Ty(*Context), i)); return true; } else if (V == RHS) { for (unsigned i = 0; i != NumElts; ++i) - Mask.push_back(Context->getConstantInt(Type::Int32Ty, i+NumElts)); + Mask.push_back(ConstantInt::get(Type::getInt32Ty(*Context), i+NumElts)); return true; } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { // If this is an insert of an extract from some other vector, include it. @@ -12510,7 +12358,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, // transitively ok. if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask, Context)) { // If so, update the mask to reflect the inserted undef. - Mask[InsertedIdx] = Context->getUndef(Type::Int32Ty); + Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(*Context)); return true; } } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){ @@ -12527,11 +12375,11 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, // If so, update the mask to reflect the inserted value. if (EI->getOperand(0) == LHS) { Mask[InsertedIdx % NumElts] = - Context->getConstantInt(Type::Int32Ty, ExtractedIdx); + ConstantInt::get(Type::getInt32Ty(*Context), ExtractedIdx); } else { assert(EI->getOperand(0) == RHS); Mask[InsertedIdx % NumElts] = - Context->getConstantInt(Type::Int32Ty, ExtractedIdx+NumElts); + ConstantInt::get(Type::getInt32Ty(*Context), ExtractedIdx+NumElts); } return true; @@ -12549,17 +12397,17 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, /// RHS of the shuffle instruction, if it is not null. Return a shuffle mask /// that computes V and the LHS value of the shuffle. static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask, - Value *&RHS, LLVMContext* Context) { + Value *&RHS, LLVMContext *Context) { assert(isa<VectorType>(V->getType()) && (RHS == 0 || V->getType() == RHS->getType()) && "Invalid shuffle!"); unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); if (isa<UndefValue>(V)) { - Mask.assign(NumElts, Context->getUndef(Type::Int32Ty)); + Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(*Context))); return V; } else if (isa<ConstantAggregateZero>(V)) { - Mask.assign(NumElts, Context->getConstantInt(Type::Int32Ty, 0)); + Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(*Context), 0)); return V; } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { // If this is an insert of an extract from some other vector, include it. @@ -12580,7 +12428,7 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask, RHS = EI->getOperand(0); Value *V = CollectShuffleElements(VecOp, Mask, RHS, Context); Mask[InsertedIdx % NumElts] = - Context->getConstantInt(Type::Int32Ty, NumElts+ExtractedIdx); + ConstantInt::get(Type::getInt32Ty(*Context), NumElts+ExtractedIdx); return V; } @@ -12590,7 +12438,7 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask, // Everything but the extracted element is replaced with the RHS. for (unsigned i = 0; i != NumElts; ++i) { if (i != InsertedIdx) - Mask[i] = Context->getConstantInt(Type::Int32Ty, NumElts+i); + Mask[i] = ConstantInt::get(Type::getInt32Ty(*Context), NumElts+i); } return V; } @@ -12608,7 +12456,7 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask, // Otherwise, can't do anything fancy. Return an identity vector. for (unsigned i = 0; i != NumElts; ++i) - Mask.push_back(Context->getConstantInt(Type::Int32Ty, i)); + Mask.push_back(ConstantInt::get(Type::getInt32Ty(*Context), i)); return V; } @@ -12635,45 +12483,23 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { return ReplaceInstUsesWith(IE, VecOp); if (InsertedIdx >= NumVectorElts) // Out of range insert. - return ReplaceInstUsesWith(IE, Context->getUndef(IE.getType())); + return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType())); // If we are extracting a value from a vector, then inserting it right // back into the same place, just use the input vector. if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx) return ReplaceInstUsesWith(IE, VecOp); - // We could theoretically do this for ANY input. However, doing so could - // turn chains of insertelement instructions into a chain of shufflevector - // instructions, and right now we do not merge shufflevectors. As such, - // only do this in a situation where it is clear that there is benefit. - if (isa<UndefValue>(VecOp) || isa<ConstantAggregateZero>(VecOp)) { - // Turn this into shuffle(EIOp0, VecOp, Mask). The result has all of - // the values of VecOp, except then one read from EIOp0. - // Build a new shuffle mask. - std::vector<Constant*> Mask; - if (isa<UndefValue>(VecOp)) - Mask.assign(NumVectorElts, Context->getUndef(Type::Int32Ty)); - else { - assert(isa<ConstantAggregateZero>(VecOp) && "Unknown thing"); - Mask.assign(NumVectorElts, Context->getConstantInt(Type::Int32Ty, - NumVectorElts)); - } - Mask[InsertedIdx] = - Context->getConstantInt(Type::Int32Ty, ExtractedIdx); - return new ShuffleVectorInst(EI->getOperand(0), VecOp, - Context->getConstantVector(Mask)); - } - // If this insertelement isn't used by some other insertelement, turn it // (and any insertelements it points to), into one big shuffle. if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) { std::vector<Constant*> Mask; Value *RHS = 0; Value *LHS = CollectShuffleElements(&IE, Mask, RHS, Context); - if (RHS == 0) RHS = Context->getUndef(LHS->getType()); + if (RHS == 0) RHS = UndefValue::get(LHS->getType()); // We now have a shuffle of LHS, RHS, Mask. return new ShuffleVectorInst(LHS, RHS, - Context->getConstantVector(Mask)); + ConstantVector::get(Mask)); } } } @@ -12697,7 +12523,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { // Undefined shuffle mask -> undefined value. if (isa<UndefValue>(SVI.getOperand(2))) - return ReplaceInstUsesWith(SVI, Context->getUndef(SVI.getType())); + return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType())); unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements(); @@ -12724,21 +12550,21 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { std::vector<Constant*> Elts; for (unsigned i = 0, e = Mask.size(); i != e; ++i) { if (Mask[i] >= 2*e) - Elts.push_back(Context->getUndef(Type::Int32Ty)); + Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context))); else { if ((Mask[i] >= e && isa<UndefValue>(RHS)) || (Mask[i] < e && isa<UndefValue>(LHS))) { Mask[i] = 2*e; // Turn into undef. - Elts.push_back(Context->getUndef(Type::Int32Ty)); + Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context))); } else { Mask[i] = Mask[i] % e; // Force to LHS. - Elts.push_back(Context->getConstantInt(Type::Int32Ty, Mask[i])); + Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), Mask[i])); } } } SVI.setOperand(0, SVI.getOperand(1)); - SVI.setOperand(1, Context->getUndef(RHS->getType())); - SVI.setOperand(2, Context->getConstantVector(Elts)); + SVI.setOperand(1, UndefValue::get(RHS->getType())); + SVI.setOperand(2, ConstantVector::get(Elts)); LHS = SVI.getOperand(0); RHS = SVI.getOperand(1); MadeChange = true; @@ -12788,14 +12614,14 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { std::vector<Constant*> Elts; for (unsigned i = 0, e = NewMask.size(); i != e; ++i) { if (NewMask[i] >= LHSInNElts*2) { - Elts.push_back(Context->getUndef(Type::Int32Ty)); + Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context))); } else { - Elts.push_back(Context->getConstantInt(Type::Int32Ty, NewMask[i])); + Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), NewMask[i])); } } return new ShuffleVectorInst(LHSSVI->getOperand(0), LHSSVI->getOperand(1), - Context->getConstantVector(Elts)); + ConstantVector::get(Elts)); } } } @@ -12855,6 +12681,9 @@ static void AddReachableCodeToWorklist(BasicBlock *BB, const TargetData *TD) { SmallVector<BasicBlock*, 256> Worklist; Worklist.push_back(BB); + + std::vector<Instruction*> InstrsForInstCombineWorklist; + InstrsForInstCombineWorklist.reserve(128); while (!Worklist.empty()) { BB = Worklist.back(); @@ -12863,44 +12692,28 @@ static void AddReachableCodeToWorklist(BasicBlock *BB, // We have now visited this block! If we've already been here, ignore it. if (!Visited.insert(BB)) continue; - DbgInfoIntrinsic *DBI_Prev = NULL; for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { Instruction *Inst = BBI++; // DCE instruction if trivially dead. if (isInstructionTriviallyDead(Inst)) { ++NumDeadInst; - DOUT << "IC: DCE: " << *Inst; + DEBUG(errs() << "IC: DCE: " << *Inst << '\n'); Inst->eraseFromParent(); continue; } // ConstantProp instruction if trivially constant. - if (Constant *C = ConstantFoldInstruction(Inst, TD)) { - DOUT << "IC: ConstFold to: " << *C << " from: " << *Inst; + if (Constant *C = ConstantFoldInstruction(Inst, BB->getContext(), TD)) { + DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " + << *Inst << '\n'); Inst->replaceAllUsesWith(C); ++NumConstProp; Inst->eraseFromParent(); continue; } - - // If there are two consecutive llvm.dbg.stoppoint calls then - // it is likely that the optimizer deleted code in between these - // two intrinsics. - DbgInfoIntrinsic *DBI_Next = dyn_cast<DbgInfoIntrinsic>(Inst); - if (DBI_Next) { - if (DBI_Prev - && DBI_Prev->getIntrinsicID() == llvm::Intrinsic::dbg_stoppoint - && DBI_Next->getIntrinsicID() == llvm::Intrinsic::dbg_stoppoint) { - IC.RemoveFromWorkList(DBI_Prev); - DBI_Prev->eraseFromParent(); - } - DBI_Prev = DBI_Next; - } else { - DBI_Prev = 0; - } - IC.AddToWorkList(Inst); + InstrsForInstCombineWorklist.push_back(Inst); } // Recursively visit successors. If this is a branch or switch on a @@ -12932,14 +12745,22 @@ static void AddReachableCodeToWorklist(BasicBlock *BB, for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) Worklist.push_back(TI->getSuccessor(i)); } + + // Once we've found all of the instructions to add to instcombine's worklist, + // add them in reverse order. This way instcombine will visit from the top + // of the function down. This jives well with the way that it adds all uses + // of instructions to the worklist after doing a transformation, thus avoiding + // some N^2 behavior in pathological cases. + IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0], + InstrsForInstCombineWorklist.size()); } bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { - bool Changed = false; - TD = &getAnalysis<TargetData>(); + MadeIRChange = false; + TD = getAnalysisIfAvailable<TargetData>(); - DEBUG(DOUT << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " - << F.getNameStr() << "\n"); + DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " + << F.getNameStr() << "\n"); { // Do a depth-first traversal of the function, populate the worklist with @@ -12957,71 +12778,73 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { while (Term != BB->begin()) { // Remove instrs bottom-up BasicBlock::iterator I = Term; --I; - DOUT << "IC: DCE: " << *I; + DEBUG(errs() << "IC: DCE: " << *I << '\n'); // A debug intrinsic shouldn't force another iteration if we weren't // going to do one without it. if (!isa<DbgInfoIntrinsic>(I)) { ++NumDeadInst; - Changed = true; + MadeIRChange = true; } - if (!I->use_empty()) - I->replaceAllUsesWith(Context->getUndef(I->getType())); + + + // If I is not void type then replaceAllUsesWith undef. + // This allows ValueHandlers and custom metadata to adjust itself. + if (!I->getType()->isVoidTy()) + I->replaceAllUsesWith(UndefValue::get(I->getType())); I->eraseFromParent(); } } } - while (!Worklist.empty()) { - Instruction *I = RemoveOneFromWorkList(); + while (!Worklist.isEmpty()) { + Instruction *I = Worklist.RemoveOne(); if (I == 0) continue; // skip null values. // Check to see if we can DCE the instruction. if (isInstructionTriviallyDead(I)) { - // Add operands to the worklist. - if (I->getNumOperands() < 4) - AddUsesToWorkList(*I); + DEBUG(errs() << "IC: DCE: " << *I << '\n'); + EraseInstFromFunction(*I); ++NumDeadInst; - - DOUT << "IC: DCE: " << *I; - - I->eraseFromParent(); - RemoveFromWorkList(I); - Changed = true; + MadeIRChange = true; continue; } // Instruction isn't dead, see if we can constant propagate it. - if (Constant *C = ConstantFoldInstruction(I, TD)) { - DOUT << "IC: ConstFold to: " << *C << " from: " << *I; + if (Constant *C = ConstantFoldInstruction(I, F.getContext(), TD)) { + DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n'); // Add operands to the worklist. - AddUsesToWorkList(*I); ReplaceInstUsesWith(*I, C); - ++NumConstProp; - I->eraseFromParent(); - RemoveFromWorkList(I); - Changed = true; + EraseInstFromFunction(*I); + MadeIRChange = true; continue; } - if (TD && - (I->getType()->getTypeID() == Type::VoidTyID || - I->isTrapping())) { + if (TD) { // See if we can constant fold its operands. for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) if (ConstantExpr *CE = dyn_cast<ConstantExpr>(i)) - if (Constant *NewC = ConstantFoldConstantExpression(CE, TD)) + if (Constant *NewC = ConstantFoldConstantExpression(CE, + F.getContext(), TD)) if (NewC != CE) { - i->set(NewC); - Changed = true; + *i = NewC; + MadeIRChange = true; } } // See if we can trivially sink this instruction to a successor basic block. if (I->hasOneUse()) { BasicBlock *BB = I->getParent(); - BasicBlock *UserParent = cast<Instruction>(I->use_back())->getParent(); + Instruction *UserInst = cast<Instruction>(I->use_back()); + BasicBlock *UserParent; + + // Get the block the use occurs in. + if (PHINode *PN = dyn_cast<PHINode>(UserInst)) + UserParent = PN->getIncomingBlock(I->use_begin().getUse()); + else + UserParent = UserInst->getParent(); + if (UserParent != BB) { bool UserIsSuccessor = false; // See if the user is one of our successors. @@ -13034,31 +12857,34 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { // If the user is one of our immediate successors, and if that successor // only has us as a predecessors (we'd have to split the critical edge // otherwise), we can keep going. - if (UserIsSuccessor && !isa<PHINode>(I->use_back()) && - next(pred_begin(UserParent)) == pred_end(UserParent)) + if (UserIsSuccessor && UserParent->getSinglePredecessor()) // Okay, the CFG is simple enough, try to sink this instruction. - Changed |= TryToSinkInstruction(I, UserParent); + MadeIRChange |= TryToSinkInstruction(I, UserParent); } } - // Now that we have an instruction, try combining it to simplify it... + // Now that we have an instruction, try combining it to simplify it. + Builder->SetInsertPoint(I->getParent(), I); + #ifndef NDEBUG std::string OrigI; #endif - DEBUG(std::ostringstream SS; I->print(SS); OrigI = SS.str();); + DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str();); + DEBUG(errs() << "IC: Visiting: " << OrigI << '\n'); + if (Instruction *Result = visit(*I)) { ++NumCombined; // Should we replace the old instruction with a new one? if (Result != I) { - DOUT << "IC: Old = " << *I - << " New = " << *Result; + DEBUG(errs() << "IC: Old = " << *I << '\n' + << " New = " << *Result << '\n'); // Everything uses the new instruction now. I->replaceAllUsesWith(Result); // Push the new instruction and any users onto the worklist. - AddToWorkList(Result); - AddUsersToWorkList(*Result); + Worklist.Add(Result); + Worklist.AddUsersToWorkList(*Result); // Move the name to the new instruction first. Result->takeName(I); @@ -13073,52 +12899,42 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { InstParent->getInstList().insert(InsertPos, Result); - // Make sure that we reprocess all operands now that we reduced their - // use counts. - AddUsesToWorkList(*I); - - // Instructions can end up on the worklist more than once. Make sure - // we do not process an instruction that has been deleted. - RemoveFromWorkList(I); - - // Erase the old instruction. - InstParent->getInstList().erase(I); + EraseInstFromFunction(*I); } else { #ifndef NDEBUG - DOUT << "IC: Mod = " << OrigI - << " New = " << *I; + DEBUG(errs() << "IC: Mod = " << OrigI << '\n' + << " New = " << *I << '\n'); #endif // If the instruction was modified, it's possible that it is now dead. // if so, remove it. if (isInstructionTriviallyDead(I)) { - // Make sure we process all operands now that we are reducing their - // use counts. - AddUsesToWorkList(*I); - - // Instructions may end up in the worklist more than once. Erase all - // occurrences of this instruction. - RemoveFromWorkList(I); - I->eraseFromParent(); + EraseInstFromFunction(*I); } else { - AddToWorkList(I); - AddUsersToWorkList(*I); + Worklist.Add(I); + Worklist.AddUsersToWorkList(*I); } } - Changed = true; + MadeIRChange = true; } } - assert(WorklistMap.empty() && "Worklist empty, but map not?"); - - // Do an explicit clear, this shrinks the map if needed. - WorklistMap.clear(); - return Changed; + Worklist.Zap(); + return MadeIRChange; } bool InstCombiner::runOnFunction(Function &F) { MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID); + Context = &F.getContext(); + + + /// Builder - This is an IRBuilder that automatically inserts new + /// instructions into the worklist when they are created. + IRBuilder<true, ConstantFolder, InstCombineIRInserter> + TheBuilder(F.getContext(), ConstantFolder(F.getContext()), + InstCombineIRInserter(Worklist)); + Builder = &TheBuilder; bool EverMadeChange = false; @@ -13126,6 +12942,8 @@ bool InstCombiner::runOnFunction(Function &F) { unsigned Iteration = 0; while (DoOneIteration(F, Iteration++)) EverMadeChange = true; + + Builder = 0; return EverMadeChange; } diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index dee7bfb..8b11edd 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" @@ -26,13 +27,13 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ValueHandle.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; STATISTIC(NumThreads, "Number of jumps threaded"); STATISTIC(NumFolds, "Number of terminators folded"); +STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi"); static cl::opt<unsigned> Threshold("jump-threading-threshold", @@ -56,7 +57,7 @@ namespace { /// In this case, the unconditional branch at the end of the first if can be /// revectored to the false side of the second if. /// - class VISIBILITY_HIDDEN JumpThreading : public FunctionPass { + class JumpThreading : public FunctionPass { TargetData *TD; #ifdef NDEBUG SmallPtrSet<BasicBlock*, 16> LoopHeaders; @@ -68,15 +69,16 @@ namespace { JumpThreading() : FunctionPass(&ID) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<TargetData>(); } bool runOnFunction(Function &F); void FindLoopHeaders(Function &F); bool ProcessBlock(BasicBlock *BB); - bool ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB, - unsigned JumpThreadCost); + bool ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB); + bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, + BasicBlock *PredBB); + BasicBlock *FactorCommonPHIPreds(PHINode *PN, Value *Val); bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB); bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB); @@ -99,8 +101,8 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); } /// runOnFunction - Top level algorithm. /// bool JumpThreading::runOnFunction(Function &F) { - DOUT << "Jump threading on function '" << F.getNameStart() << "'\n"; - TD = &getAnalysis<TargetData>(); + DEBUG(errs() << "Jump threading on function '" << F.getName() << "'\n"); + TD = getAnalysisIfAvailable<TargetData>(); FindLoopHeaders(F); @@ -119,8 +121,8 @@ bool JumpThreading::runOnFunction(Function &F) { // edges which simplifies the CFG. if (pred_begin(BB) == pred_end(BB) && BB != &BB->getParent()->getEntryBlock()) { - DOUT << " JT: Deleting dead block '" << BB->getNameStart() - << "' with terminator: " << *BB->getTerminator(); + DEBUG(errs() << " JT: Deleting dead block '" << BB->getName() + << "' with terminator: " << *BB->getTerminator() << '\n'); LoopHeaders.erase(BB); DeleteDeadBlock(BB); Changed = true; @@ -134,6 +136,48 @@ bool JumpThreading::runOnFunction(Function &F) { return EverChanged; } +/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to +/// thread across it. +static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) { + /// Ignore PHI nodes, these will be flattened when duplication happens. + BasicBlock::const_iterator I = BB->getFirstNonPHI(); + + // Sum up the cost of each instruction until we get to the terminator. Don't + // include the terminator because the copy won't include it. + unsigned Size = 0; + for (; !isa<TerminatorInst>(I); ++I) { + // Debugger intrinsics don't incur code size. + if (isa<DbgInfoIntrinsic>(I)) continue; + + // If this is a pointer->pointer bitcast, it is free. + if (isa<BitCastInst>(I) && isa<PointerType>(I->getType())) + continue; + + // All other instructions count for at least one unit. + ++Size; + + // Calls are more expensive. If they are non-intrinsic calls, we model them + // as having cost of 4. If they are a non-vector intrinsic, we model them + // as having cost of 2 total, and if they are a vector intrinsic, we model + // them as having cost 1. + if (const CallInst *CI = dyn_cast<CallInst>(I)) { + if (!isa<IntrinsicInst>(CI)) + Size += 3; + else if (!isa<VectorType>(CI->getType())) + Size += 1; + } + } + + // Threading through a switch statement is particularly profitable. If this + // block ends in a switch, decrease its cost to make it more likely to happen. + if (isa<SwitchInst>(I)) + Size = Size > 6 ? Size-6 : 0; + + return Size; +} + + + /// FindLoopHeaders - We do not want jump threading to turn proper loop /// structures into irreducible loops. Doing this breaks up the loop nesting /// hierarchy and pessimizes later transformations. To prevent this from @@ -173,52 +217,34 @@ BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Value *Val) { if (CommonPreds.size() == 1) return CommonPreds[0]; - DOUT << " Factoring out " << CommonPreds.size() - << " common predecessors.\n"; + DEBUG(errs() << " Factoring out " << CommonPreds.size() + << " common predecessors.\n"); return SplitBlockPredecessors(PN->getParent(), &CommonPreds[0], CommonPreds.size(), ".thr_comm", this); } -/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to -/// thread across it. -static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) { - /// Ignore PHI nodes, these will be flattened when duplication happens. - BasicBlock::const_iterator I = BB->getFirstNonPHI(); - - // Sum up the cost of each instruction until we get to the terminator. Don't - // include the terminator because the copy won't include it. - unsigned Size = 0; - for (; !isa<TerminatorInst>(I); ++I) { - // Debugger intrinsics don't incur code size. - if (isa<DbgInfoIntrinsic>(I)) continue; - - // If this is a pointer->pointer bitcast, it is free. - if (isa<BitCastInst>(I) && isa<PointerType>(I->getType())) - continue; - - // All other instructions count for at least one unit. - ++Size; - - // Calls are more expensive. If they are non-intrinsic calls, we model them - // as having cost of 4. If they are a non-vector intrinsic, we model them - // as having cost of 2 total, and if they are a vector intrinsic, we model - // them as having cost 1. - if (const CallInst *CI = dyn_cast<CallInst>(I)) { - if (!isa<IntrinsicInst>(CI)) - Size += 3; - else if (!isa<VectorType>(CI->getType())) - Size += 1; - } +/// GetBestDestForBranchOnUndef - If we determine that the specified block ends +/// in an undefined jump, decide which block is best to revector to. +/// +/// Since we can pick an arbitrary destination, we pick the successor with the +/// fewest predecessors. This should reduce the in-degree of the others. +/// +static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) { + TerminatorInst *BBTerm = BB->getTerminator(); + unsigned MinSucc = 0; + BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc); + // Compute the successor with the minimum number of predecessors. + unsigned MinNumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); + for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) { + TestBB = BBTerm->getSuccessor(i); + unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); + if (NumPreds < MinNumPreds) + MinSucc = i; } - // Threading through a switch statement is particularly profitable. If this - // block ends in a switch, decrease its cost to make it more likely to happen. - if (isa<SwitchInst>(I)) - Size = Size > 6 ? Size-6 : 0; - - return Size; + return MinSucc; } /// ProcessBlock - If there are any predecessors whose control can be threaded @@ -262,39 +288,28 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { // terminator to an unconditional branch. This can occur due to threading in // other blocks. if (isa<ConstantInt>(Condition)) { - DOUT << " In block '" << BB->getNameStart() - << "' folding terminator: " << *BB->getTerminator(); + DEBUG(errs() << " In block '" << BB->getName() + << "' folding terminator: " << *BB->getTerminator() << '\n'); ++NumFolds; ConstantFoldTerminator(BB); return true; } // If the terminator is branching on an undef, we can pick any of the - // successors to branch to. Since this is arbitrary, we pick the successor - // with the fewest predecessors. This should reduce the in-degree of the - // others. + // successors to branch to. Let GetBestDestForJumpOnUndef decide. if (isa<UndefValue>(Condition)) { - TerminatorInst *BBTerm = BB->getTerminator(); - unsigned MinSucc = 0; - BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc); - // Compute the successor with the minimum number of predecessors. - unsigned MinNumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); - for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) { - TestBB = BBTerm->getSuccessor(i); - unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); - if (NumPreds < MinNumPreds) - MinSucc = i; - } + unsigned BestSucc = GetBestDestForJumpOnUndef(BB); // Fold the branch/switch. + TerminatorInst *BBTerm = BB->getTerminator(); for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) { - if (i == MinSucc) continue; + if (i == BestSucc) continue; BBTerm->getSuccessor(i)->removePredecessor(BB); } - DOUT << " In block '" << BB->getNameStart() - << "' folding undef terminator: " << *BBTerm; - BranchInst::Create(BBTerm->getSuccessor(MinSucc), BBTerm); + DEBUG(errs() << " In block '" << BB->getName() + << "' folding undef terminator: " << *BBTerm << '\n'); + BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm); BBTerm->eraseFromParent(); return true; } @@ -419,8 +434,8 @@ bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB, else if (PredBI->getSuccessor(0) != BB) BranchDir = false; else { - DOUT << " In block '" << PredBB->getNameStart() - << "' folding terminator: " << *PredBB->getTerminator(); + DEBUG(errs() << " In block '" << PredBB->getName() + << "' folding terminator: " << *PredBB->getTerminator() << '\n'); ++NumFolds; ConstantFoldTerminator(PredBB); return true; @@ -431,29 +446,24 @@ bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB, // If the dest block has one predecessor, just fix the branch condition to a // constant and fold it. if (BB->getSinglePredecessor()) { - DOUT << " In block '" << BB->getNameStart() - << "' folding condition to '" << BranchDir << "': " - << *BB->getTerminator(); + DEBUG(errs() << " In block '" << BB->getName() + << "' folding condition to '" << BranchDir << "': " + << *BB->getTerminator() << '\n'); ++NumFolds; - DestBI->setCondition(Context->getConstantInt(Type::Int1Ty, BranchDir)); + Value *OldCond = DestBI->getCondition(); + DestBI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), + BranchDir)); ConstantFoldTerminator(BB); + RecursivelyDeleteTriviallyDeadInstructions(OldCond); return true; } - - // Otherwise we need to thread from PredBB to DestBB's successor which - // involves code duplication. Check to see if it is worth it. - unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); - if (JumpThreadCost > Threshold) { - DOUT << " Not threading BB '" << BB->getNameStart() - << "' - Cost is too high: " << JumpThreadCost << "\n"; - return false; - } + // Next, figure out which successor we are threading to. BasicBlock *SuccBB = DestBI->getSuccessor(!BranchDir); // Ok, try to thread it! - return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost); + return ThreadEdge(BB, PredBB, SuccBB); } /// ProcessSwitchOnDuplicateCond - We found a block and a predecessor of that @@ -472,7 +482,6 @@ bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, if (PredBB == DestBB) return false; - SwitchInst *PredSI = cast<SwitchInst>(PredBB->getTerminator()); SwitchInst *DestSI = cast<SwitchInst>(DestBB->getTerminator()); @@ -508,8 +517,8 @@ bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, // Otherwise, we're safe to make the change. Make sure that the edge from // DestSI to DestSucc is not critical and has no PHI nodes. - DOUT << "FORWARDING EDGE " << *DestVal << " FROM: " << *PredSI; - DOUT << "THROUGH: " << *DestSI; + DEBUG(errs() << "FORWARDING EDGE " << *DestVal << " FROM: " << *PredSI); + DEBUG(errs() << "THROUGH: " << *DestSI); // If the destination has PHI nodes, just split the edge for updating // simplicity. @@ -564,7 +573,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // If the returned value is the load itself, replace with an undef. This can // only happen in dead loops. - if (AvailableVal == LI) AvailableVal = Context->getUndef(LI->getType()); + if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType()); LI->replaceAllUsesWith(AvailableVal); LI->eraseFromParent(); return true; @@ -685,49 +694,74 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { } -/// ProcessJumpOnPHI - We have a conditional branch of switch on a PHI node in +/// ProcessJumpOnPHI - We have a conditional branch or switch on a PHI node in /// the current block. See if there are any simplifications we can do based on /// inputs to the phi node. /// bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) { - // See if the phi node has any constant values. If so, we can determine where - // the corresponding predecessor will branch. - ConstantInt *PredCst = 0; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if ((PredCst = dyn_cast<ConstantInt>(PN->getIncomingValue(i)))) - break; - - // If no incoming value has a constant, we don't know the destination of any - // predecessors. - if (PredCst == 0) - return false; - - // See if the cost of duplicating this block is low enough. BasicBlock *BB = PN->getParent(); - unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); - if (JumpThreadCost > Threshold) { - DOUT << " Not threading BB '" << BB->getNameStart() - << "' - Cost is too high: " << JumpThreadCost << "\n"; - return false; + + // See if the phi node has any constant integer or undef values. If so, we + // can determine where the corresponding predecessor will branch. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *PredVal = PN->getIncomingValue(i); + + // Check to see if this input is a constant integer. If so, the direction + // of the branch is predictable. + if (ConstantInt *CI = dyn_cast<ConstantInt>(PredVal)) { + // Merge any common predecessors that will act the same. + BasicBlock *PredBB = FactorCommonPHIPreds(PN, CI); + + BasicBlock *SuccBB; + if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) + SuccBB = BI->getSuccessor(CI->isZero()); + else { + SwitchInst *SI = cast<SwitchInst>(BB->getTerminator()); + SuccBB = SI->getSuccessor(SI->findCaseValue(CI)); + } + + // Ok, try to thread it! + return ThreadEdge(BB, PredBB, SuccBB); + } + + // If the input is an undef, then it doesn't matter which way it will go. + // Pick an arbitrary dest and thread the edge. + if (UndefValue *UV = dyn_cast<UndefValue>(PredVal)) { + // Merge any common predecessors that will act the same. + BasicBlock *PredBB = FactorCommonPHIPreds(PN, UV); + BasicBlock *SuccBB = + BB->getTerminator()->getSuccessor(GetBestDestForJumpOnUndef(BB)); + + // Ok, try to thread it! + return ThreadEdge(BB, PredBB, SuccBB); + } } - // If so, we can actually do this threading. Merge any common predecessors - // that will act the same. - BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst); + // If the incoming values are all variables, we don't know the destination of + // any predecessors. However, if any of the predecessor blocks end in an + // unconditional branch, we can *duplicate* the jump into that block in order + // to further encourage jump threading and to eliminate cases where we have + // branch on a phi of an icmp (branch on icmp is much better). + + // We don't want to do this tranformation for switches, because we don't + // really want to duplicate a switch. + if (isa<SwitchInst>(BB->getTerminator())) + return false; - // Next, figure out which successor we are threading to. - BasicBlock *SuccBB; - if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) - SuccBB = BI->getSuccessor(PredCst == Context->getConstantIntFalse()); - else { - SwitchInst *SI = cast<SwitchInst>(BB->getTerminator()); - SuccBB = SI->getSuccessor(SI->findCaseValue(PredCst)); + // Look for unconditional branch predecessors. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *PredBB = PN->getIncomingBlock(i); + if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator())) + if (PredBr->isUnconditional() && + // Try to duplicate BB into PredBB. + DuplicateCondBranchOnPHIIntoPred(BB, PredBB)) + return true; } - - // Ok, try to thread it! - return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost); + + return false; } + /// ProcessJumpOnLogicalPHI - PN's basic block contains a conditional branch /// whose condition is an AND/OR where one side is PN. If PN has constant /// operands that permit us to evaluate the condition for some operand, thread @@ -756,7 +790,8 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB, // We can only do the simplification for phi nodes of 'false' with AND or // 'true' with OR. See if we have any entries in the phi for this. unsigned PredNo = ~0U; - ConstantInt *PredCst = Context->getConstantInt(Type::Int1Ty, !isAnd); + ConstantInt *PredCst = ConstantInt::get(Type::getInt1Ty(BB->getContext()), + !isAnd); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { if (PN->getIncomingValue(i) == PredCst) { PredNo = i; @@ -768,14 +803,6 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB, if (PredNo == ~0U) return false; - // See if the cost of duplicating this block is low enough. - unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); - if (JumpThreadCost > Threshold) { - DOUT << " Not threading BB '" << BB->getNameStart() - << "' - Cost is too high: " << JumpThreadCost << "\n"; - return false; - } - // If so, we can actually do this threading. Merge any common predecessors // that will act the same. BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst); @@ -787,7 +814,7 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB, BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(isAnd); // Ok, try to thread it! - return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost); + return ThreadEdge(BB, PredBB, SuccBB); } /// GetResultOfComparison - Given an icmp/fcmp predicate and the left and right @@ -795,15 +822,15 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB, /// result can not be determined, a null pointer is returned. static Constant *GetResultOfComparison(CmpInst::Predicate pred, Value *LHS, Value *RHS, - LLVMContext* Context) { + LLVMContext &Context) { if (Constant *CLHS = dyn_cast<Constant>(LHS)) if (Constant *CRHS = dyn_cast<Constant>(RHS)) - return Context->getConstantExprCompare(pred, CLHS, CRHS); + return ConstantExpr::getCompare(pred, CLHS, CRHS); if (LHS == RHS) if (isa<IntegerType>(LHS->getType()) || isa<PointerType>(LHS->getType())) return ICmpInst::isTrueWhenEqual(pred) ? - Context->getConstantIntTrue() : Context->getConstantIntFalse(); + ConstantInt::getTrue(Context) : ConstantInt::getFalse(Context); return 0; } @@ -829,7 +856,7 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) { PredVal = PN->getIncomingValue(i); Constant *Res = GetResultOfComparison(Cmp->getPredicate(), PredVal, - RHS, Context); + RHS, Cmp->getContext()); if (!Res) { PredVal = 0; continue; @@ -854,14 +881,6 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) { if (PredVal == 0) return false; - // See if the cost of duplicating this block is low enough. - unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); - if (JumpThreadCost > Threshold) { - DOUT << " Not threading BB '" << BB->getNameStart() - << "' - Cost is too high: " << JumpThreadCost << "\n"; - return false; - } - // If so, we can actually do this threading. Merge any common predecessors // that will act the same. BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredVal); @@ -870,58 +889,77 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) { BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(!TrueDirection); // Ok, try to thread it! - return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost); + return ThreadEdge(BB, PredBB, SuccBB); } +/// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new +/// predecessor to the PHIBB block. If it has PHI nodes, add entries for +/// NewPred using the entries from OldPred (suitably mapped). +static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, + BasicBlock *OldPred, + BasicBlock *NewPred, + DenseMap<Instruction*, Value*> &ValueMap) { + for (BasicBlock::iterator PNI = PHIBB->begin(); + PHINode *PN = dyn_cast<PHINode>(PNI); ++PNI) { + // Ok, we have a PHI node. Figure out what the incoming value was for the + // DestBlock. + Value *IV = PN->getIncomingValueForBlock(OldPred); + + // Remap the value if necessary. + if (Instruction *Inst = dyn_cast<Instruction>(IV)) { + DenseMap<Instruction*, Value*>::iterator I = ValueMap.find(Inst); + if (I != ValueMap.end()) + IV = I->second; + } + + PN->addIncoming(IV, NewPred); + } +} + /// ThreadEdge - We have decided that it is safe and profitable to thread an /// edge from PredBB to SuccBB across BB. Transform the IR to reflect this /// change. bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, - BasicBlock *SuccBB, unsigned JumpThreadCost) { - + BasicBlock *SuccBB) { // If threading to the same block as we come from, we would infinite loop. if (SuccBB == BB) { - DOUT << " Not threading across BB '" << BB->getNameStart() - << "' - would thread to self!\n"; + DEBUG(errs() << " Not threading across BB '" << BB->getName() + << "' - would thread to self!\n"); return false; } // If threading this would thread across a loop header, don't thread the edge. // See the comments above FindLoopHeaders for justifications and caveats. if (LoopHeaders.count(BB)) { - DOUT << " Not threading from '" << PredBB->getNameStart() - << "' across loop header BB '" << BB->getNameStart() - << "' to dest BB '" << SuccBB->getNameStart() - << "' - it might create an irreducible loop!\n"; + DEBUG(errs() << " Not threading from '" << PredBB->getName() + << "' across loop header BB '" << BB->getName() + << "' to dest BB '" << SuccBB->getName() + << "' - it might create an irreducible loop!\n"); return false; } - // And finally, do it! - DOUT << " Threading edge from '" << PredBB->getNameStart() << "' to '" - << SuccBB->getNameStart() << "' with cost: " << JumpThreadCost - << ", across block:\n " - << *BB << "\n"; - - // Jump Threading can not update SSA properties correctly if the values - // defined in the duplicated block are used outside of the block itself. For - // this reason, we spill all values that are used outside of BB to the stack. - for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { - if (!I->isUsedOutsideOfBlock(BB)) - continue; - - // We found a use of I outside of BB. Create a new stack slot to - // break this inter-block usage pattern. - DemoteRegToStack(*I); + unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); + if (JumpThreadCost > Threshold) { + DEBUG(errs() << " Not threading BB '" << BB->getName() + << "' - Cost is too high: " << JumpThreadCost << "\n"); + return false; } - + + // And finally, do it! + DEBUG(errs() << " Threading edge from '" << PredBB->getName() << "' to '" + << SuccBB->getName() << "' with cost: " << JumpThreadCost + << ", across block:\n " + << *BB << "\n"); + // We are going to have to map operands from the original BB block to the new // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to // account for entry from PredBB. DenseMap<Instruction*, Value*> ValueMapping; - BasicBlock *NewBB = - BasicBlock::Create(BB->getName()+".thread", BB->getParent(), BB); + BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), + BB->getName()+".thread", + BB->getParent(), BB); NewBB->moveAfter(PredBB); BasicBlock::iterator BI = BB->begin(); @@ -932,7 +970,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, // mapping and using it to remap operands in the cloned instructions. for (; !isa<TerminatorInst>(BI); ++BI) { Instruction *New = BI->clone(); - New->setName(BI->getNameStart()); + New->setName(BI->getName()); NewBB->getInstList().push_back(New); ValueMapping[BI] = New; @@ -951,21 +989,48 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the // PHI nodes for NewBB now. - for (BasicBlock::iterator PNI = SuccBB->begin(); isa<PHINode>(PNI); ++PNI) { - PHINode *PN = cast<PHINode>(PNI); - // Ok, we have a PHI node. Figure out what the incoming value was for the - // DestBlock. - Value *IV = PN->getIncomingValueForBlock(BB); - - // Remap the value if necessary. - if (Instruction *Inst = dyn_cast<Instruction>(IV)) { - DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst); - if (I != ValueMapping.end()) - IV = I->second; + AddPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping); + + // If there were values defined in BB that are used outside the block, then we + // now have to update all uses of the value to use either the original value, + // the cloned value, or some PHI derived value. This can require arbitrary + // PHI insertion, of which we are prepared to do, clean these up now. + SSAUpdater SSAUpdate; + SmallVector<Use*, 16> UsesToRename; + for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + // Scan all uses of this instruction to see if it is used outside of its + // block, and if so, record them in UsesToRename. + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; + ++UI) { + Instruction *User = cast<Instruction>(*UI); + if (PHINode *UserPN = dyn_cast<PHINode>(User)) { + if (UserPN->getIncomingBlock(UI) == BB) + continue; + } else if (User->getParent() == BB) + continue; + + UsesToRename.push_back(&UI.getUse()); } - PN->addIncoming(IV, NewBB); + + // If there are no uses outside the block, we're done with this instruction. + if (UsesToRename.empty()) + continue; + + DEBUG(errs() << "JT: Renaming non-local uses of: " << *I << "\n"); + + // We found a use of I outside of BB. Rename all uses of I that are outside + // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks + // with the two values we know. + SSAUpdate.Initialize(I); + SSAUpdate.AddAvailableValue(BB, I); + SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]); + + while (!UsesToRename.empty()) + SSAUpdate.RewriteUse(*UsesToRename.pop_back_val()); + DEBUG(errs() << "\n"); } + // Ok, NewBB is good to go. Update the terminator of PredBB to jump to // NewBB instead of BB. This eliminates predecessors from BB, which requires // us to simplify any PHI nodes in BB. @@ -982,7 +1047,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BI = NewBB->begin(); for (BasicBlock::iterator E = NewBB->end(); BI != E; ) { Instruction *Inst = BI++; - if (Constant *C = ConstantFoldInstruction(Inst, TD)) { + if (Constant *C = ConstantFoldInstruction(Inst, BB->getContext(), TD)) { Inst->replaceAllUsesWith(C); Inst->eraseFromParent(); continue; @@ -995,3 +1060,120 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, ++NumThreads; return true; } + +/// DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch +/// to BB which contains an i1 PHI node and a conditional branch on that PHI. +/// If we can duplicate the contents of BB up into PredBB do so now, this +/// improves the odds that the branch will be on an analyzable instruction like +/// a compare. +bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, + BasicBlock *PredBB) { + // If BB is a loop header, then duplicating this block outside the loop would + // cause us to transform this into an irreducible loop, don't do this. + // See the comments above FindLoopHeaders for justifications and caveats. + if (LoopHeaders.count(BB)) { + DEBUG(errs() << " Not duplicating loop header '" << BB->getName() + << "' into predecessor block '" << PredBB->getName() + << "' - it might create an irreducible loop!\n"); + return false; + } + + unsigned DuplicationCost = getJumpThreadDuplicationCost(BB); + if (DuplicationCost > Threshold) { + DEBUG(errs() << " Not duplicating BB '" << BB->getName() + << "' - Cost is too high: " << DuplicationCost << "\n"); + return false; + } + + // Okay, we decided to do this! Clone all the instructions in BB onto the end + // of PredBB. + DEBUG(errs() << " Duplicating block '" << BB->getName() << "' into end of '" + << PredBB->getName() << "' to eliminate branch on phi. Cost: " + << DuplicationCost << " block is:" << *BB << "\n"); + + // We are going to have to map operands from the original BB block into the + // PredBB block. Evaluate PHI nodes in BB. + DenseMap<Instruction*, Value*> ValueMapping; + + BasicBlock::iterator BI = BB->begin(); + for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) + ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB); + + BranchInst *OldPredBranch = cast<BranchInst>(PredBB->getTerminator()); + + // Clone the non-phi instructions of BB into PredBB, keeping track of the + // mapping and using it to remap operands in the cloned instructions. + for (; BI != BB->end(); ++BI) { + Instruction *New = BI->clone(); + New->setName(BI->getName()); + PredBB->getInstList().insert(OldPredBranch, New); + ValueMapping[BI] = New; + + // Remap operands to patch up intra-block references. + for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) + if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) { + DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst); + if (I != ValueMapping.end()) + New->setOperand(i, I->second); + } + } + + // Check to see if the targets of the branch had PHI nodes. If so, we need to + // add entries to the PHI nodes for branch from PredBB now. + BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator()); + AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB, + ValueMapping); + AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB, + ValueMapping); + + // If there were values defined in BB that are used outside the block, then we + // now have to update all uses of the value to use either the original value, + // the cloned value, or some PHI derived value. This can require arbitrary + // PHI insertion, of which we are prepared to do, clean these up now. + SSAUpdater SSAUpdate; + SmallVector<Use*, 16> UsesToRename; + for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + // Scan all uses of this instruction to see if it is used outside of its + // block, and if so, record them in UsesToRename. + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; + ++UI) { + Instruction *User = cast<Instruction>(*UI); + if (PHINode *UserPN = dyn_cast<PHINode>(User)) { + if (UserPN->getIncomingBlock(UI) == BB) + continue; + } else if (User->getParent() == BB) + continue; + + UsesToRename.push_back(&UI.getUse()); + } + + // If there are no uses outside the block, we're done with this instruction. + if (UsesToRename.empty()) + continue; + + DEBUG(errs() << "JT: Renaming non-local uses of: " << *I << "\n"); + + // We found a use of I outside of BB. Rename all uses of I that are outside + // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks + // with the two values we know. + SSAUpdate.Initialize(I); + SSAUpdate.AddAvailableValue(BB, I); + SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]); + + while (!UsesToRename.empty()) + SSAUpdate.RewriteUse(*UsesToRename.pop_back_val()); + DEBUG(errs() << "\n"); + } + + // PredBB no longer jumps to BB, remove entries in the PHI node for the edge + // that we nuked. + BB->removePredecessor(PredBB); + + // Remove the unconditional branch at the end of the PredBB block. + OldPredBranch->eraseFromParent(); + + ++NumDupes; + return true; +} + + diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index d6daeca..756fbf3 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -35,8 +35,8 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Instructions.h" -#include "llvm/LLVMContext.h" #include "llvm/Target/TargetData.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" @@ -46,8 +46,8 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include "llvm/Support/CFG.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Debug.h" #include "llvm/ADT/Statistic.h" #include <algorithm> @@ -73,7 +73,7 @@ EnableLICMConstantMotion("enable-licm-constant-variables", cl::Hidden, "global variables")); namespace { - struct VISIBILITY_HIDDEN LICM : public LoopPass { + struct LICM : public LoopPass { static char ID; // Pass identification, replacement for typeid LICM() : LoopPass(&ID) {} @@ -91,6 +91,7 @@ namespace { AU.addRequired<AliasAnalysis>(); AU.addPreserved<ScalarEvolution>(); AU.addPreserved<DominanceFrontier>(); + AU.addPreservedID(LoopSimplifyID); } bool doFinalization() { @@ -338,7 +339,6 @@ void LICM::SinkRegion(DomTreeNode *N) { } } - /// HoistRegion - Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in depth /// first order w.r.t the DominatorTree. This allows us to visit definitions @@ -389,9 +389,13 @@ bool LICM::canSinkOrHoistInst(Instruction &I) { // Don't hoist loads which have may-aliased stores in loop. unsigned Size = 0; if (LI->getType()->isSized()) - Size = AA->getTargetData().getTypeStoreSize(LI->getType()); + Size = AA->getTypeStoreSize(LI->getType()); return !pointerInvalidatedByLoop(LI->getOperand(0), Size); } else if (CallInst *CI = dyn_cast<CallInst>(&I)) { + if (isa<DbgStopPointInst>(CI)) { + // Don't hoist/sink dbgstoppoints, we handle them separately + return false; + } // Handle obvious cases efficiently. AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI); if (Behavior == AliasAnalysis::DoesNotAccessMemory) @@ -465,7 +469,7 @@ bool LICM::isLoopInvariantInst(Instruction &I) { /// position, and may either delete it or move it to outside of the loop. /// void LICM::sink(Instruction &I) { - DOUT << "LICM sinking instruction: " << I; + DEBUG(errs() << "LICM sinking instruction: " << I); SmallVector<BasicBlock*, 8> ExitBlocks; CurLoop->getExitBlocks(ExitBlocks); @@ -482,22 +486,27 @@ void LICM::sink(Instruction &I) { if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[0], I.getParent())) { // Instruction is not used, just delete it. CurAST->deleteValue(&I); - if (!I.use_empty()) // If I has users in unreachable blocks, eliminate. - I.replaceAllUsesWith(Context->getUndef(I.getType())); + // If I has users in unreachable blocks, eliminate. + // If I is not void type then replaceAllUsesWith undef. + // This allows ValueHandlers and custom metadata to adjust itself. + if (!I.getType()->isVoidTy()) + I.replaceAllUsesWith(UndefValue::get(I.getType())); I.eraseFromParent(); } else { // Move the instruction to the start of the exit block, after any PHI // nodes in it. I.removeFromParent(); - BasicBlock::iterator InsertPt = ExitBlocks[0]->getFirstNonPHI(); ExitBlocks[0]->getInstList().insert(InsertPt, &I); } } else if (ExitBlocks.empty()) { // The instruction is actually dead if there ARE NO exit blocks. CurAST->deleteValue(&I); - if (!I.use_empty()) // If I has users in unreachable blocks, eliminate. - I.replaceAllUsesWith(Context->getUndef(I.getType())); + // If I has users in unreachable blocks, eliminate. + // If I is not void type then replaceAllUsesWith undef. + // This allows ValueHandlers and custom metadata to adjust itself. + if (!I.getType()->isVoidTy()) + I.replaceAllUsesWith(UndefValue::get(I.getType())); I.eraseFromParent(); } else { // Otherwise, if we have multiple exits, use the PromoteMem2Reg function to @@ -507,7 +516,7 @@ void LICM::sink(Instruction &I) { // Firstly, we create a stack object to hold the value... AllocaInst *AI = 0; - if (I.getType() != Type::VoidTy) { + if (!I.getType()->isVoidTy()) { AI = new AllocaInst(I.getType(), 0, I.getName(), I.getParent()->getParent()->getEntryBlock().begin()); CurAST->add(AI); @@ -593,7 +602,7 @@ void LICM::sink(Instruction &I) { if (AI) { std::vector<AllocaInst*> Allocas; Allocas.push_back(AI); - PromoteMemToReg(Allocas, *DT, *DF, CurAST); + PromoteMemToReg(Allocas, *DT, *DF, AI->getContext(), CurAST); } } } @@ -602,7 +611,8 @@ void LICM::sink(Instruction &I) { /// that is safe to hoist, this instruction is called to do the dirty work. /// void LICM::hoist(Instruction &I) { - DOUT << "LICM hoisting to " << Preheader->getName() << ": " << I; + DEBUG(errs() << "LICM hoisting to " << Preheader->getName() << ": " + << I << "\n"); // Remove the instruction from its current basic block... but don't delete the // instruction. @@ -623,7 +633,8 @@ void LICM::hoist(Instruction &I) { /// bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) { // If it is not a trapping instruction, it is always safe to hoist. - if (!Inst.isTrapping()) return true; + if (Inst.isSafeToSpeculativelyExecute()) + return true; // Otherwise we have to check to make sure that the instruction dominates all // of the exit blocks. If it doesn't, then there is a path out of the loop @@ -635,12 +646,6 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) { if (Inst.getParent() == CurLoop->getHeader()) return true; - // It's always safe to load from a global or alloca. - if (isa<LoadInst>(Inst)) - if (isa<AllocationInst>(Inst.getOperand(0)) || - isa<GlobalVariable>(Inst.getOperand(0))) - return true; - // Get the exit blocks for the current loop. SmallVector<BasicBlock*, 8> ExitBlocks; CurLoop->getExitBlocks(ExitBlocks); @@ -773,7 +778,7 @@ void LICM::PromoteValuesInLoop() { PromotedAllocas.reserve(PromotedValues.size()); for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) PromotedAllocas.push_back(PromotedValues[i].first); - PromoteMemToReg(PromotedAllocas, *DT, *DF, CurAST); + PromoteMemToReg(PromotedAllocas, *DT, *DF, Preheader->getContext(), CurAST); } /// FindPromotableValuesInLoop - Check the current loop for stores to definite @@ -862,7 +867,7 @@ void LICM::FindPromotableValuesInLoop( for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I) ValueToAllocaMap.insert(std::make_pair(I->getValue(), AI)); - DOUT << "LICM: Promoting value: " << *V << "\n"; + DEBUG(errs() << "LICM: Promoting value: " << *V << "\n"); } } diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 302cdec..5f93756 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -15,19 +15,17 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "loop-delete" - #include "llvm/Transforms/Scalar.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/SmallVector.h" - using namespace llvm; STATISTIC(NumDeleted, "Number of loops deleted"); namespace { - class VISIBILITY_HIDDEN LoopDeletion : public LoopPass { + class LoopDeletion : public LoopPass { public: static char ID; // Pass ID, replacement for typeid LoopDeletion() : LoopPass(&ID) {} @@ -38,9 +36,9 @@ namespace { bool SingleDominatingExit(Loop* L, SmallVector<BasicBlock*, 4>& exitingBlocks); bool IsLoopDead(Loop* L, SmallVector<BasicBlock*, 4>& exitingBlocks, - SmallVector<BasicBlock*, 4>& exitBlocks); - bool IsLoopInvariantInst(Instruction *I, Loop* L); - + SmallVector<BasicBlock*, 4>& exitBlocks, + bool &Changed, BasicBlock *Preheader); + virtual void getAnalysisUsage(AnalysisUsage& AU) const { AU.addRequired<ScalarEvolution>(); AU.addRequired<DominatorTree>(); @@ -84,32 +82,13 @@ bool LoopDeletion::SingleDominatingExit(Loop* L, return DT.dominates(exitingBlocks[0], latch); } -/// IsLoopInvariantInst - Checks if an instruction is invariant with respect to -/// a loop, which is defined as being true if all of its operands are defined -/// outside of the loop. These instructions can be hoisted out of the loop -/// if their results are needed. This could be made more aggressive by -/// recursively checking the operands for invariance, but it's not clear that -/// it's worth it. -bool LoopDeletion::IsLoopInvariantInst(Instruction *I, Loop* L) { - // PHI nodes are not loop invariant if defined in the loop. - if (isa<PHINode>(I) && L->contains(I->getParent())) - return false; - - // The instruction is loop invariant if all of its operands are loop-invariant - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) - if (!L->isLoopInvariant(I->getOperand(i))) - return false; - - // If we got this far, the instruction is loop invariant! - return true; -} - /// IsLoopDead - Determined if a loop is dead. This assumes that we've already /// checked for unique exit and exiting blocks, and that the code is in LCSSA /// form. bool LoopDeletion::IsLoopDead(Loop* L, SmallVector<BasicBlock*, 4>& exitingBlocks, - SmallVector<BasicBlock*, 4>& exitBlocks) { + SmallVector<BasicBlock*, 4>& exitBlocks, + bool &Changed, BasicBlock *Preheader) { BasicBlock* exitingBlock = exitingBlocks[0]; BasicBlock* exitBlock = exitBlocks[0]; @@ -122,7 +101,7 @@ bool LoopDeletion::IsLoopDead(Loop* L, while (PHINode* P = dyn_cast<PHINode>(BI)) { Value* incoming = P->getIncomingValueForBlock(exitingBlock); if (Instruction* I = dyn_cast<Instruction>(incoming)) - if (!IsLoopInvariantInst(I, L)) + if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator())) return false; BI++; @@ -181,15 +160,16 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { return false; // Finally, we have to check that the loop really is dead. - if (!IsLoopDead(L, exitingBlocks, exitBlocks)) - return false; + bool Changed = false; + if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader)) + return Changed; // Don't remove loops for which we can't solve the trip count. // They could be infinite, in which case we'd be changing program behavior. ScalarEvolution& SE = getAnalysis<ScalarEvolution>(); - const SCEV* S = SE.getBackedgeTakenCount(L); + const SCEV *S = SE.getBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(S)) - return false; + return Changed; // Now that we know the removal is safe, remove the loop by changing the // branch from the preheader to go to the single exit block. @@ -199,18 +179,12 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { // Because we're deleting a large chunk of code at once, the sequence in which // we remove things is very important to avoid invalidation issues. Don't // mess with this unless you have good reason and know what you're doing. - - // Move simple loop-invariant expressions out of the loop, since they - // might be needed by the exit phis. - for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end(); - LI != LE; ++LI) - for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end(); - BI != BE; ) { - Instruction* I = BI++; - if (!I->use_empty() && IsLoopInvariantInst(I, L)) - I->moveBefore(preheader->getTerminator()); - } - + + // Tell ScalarEvolution that the loop is deleted. Do this before + // deleting the loop so that ScalarEvolution can look at the loop + // to determine what it needs to clean up. + SE.forgetLoopBackedgeTakenCount(L); + // Connect the preheader directly to the exit block. TerminatorInst* TI = preheader->getTerminator(); TI->replaceUsesOfWith(L->getHeader(), exitBlock); @@ -248,11 +222,6 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { (*LI)->dropAllReferences(); } - // Tell ScalarEvolution that the loop is deleted. Do this before - // deleting the loop so that ScalarEvolution can look at the loop - // to determine what it needs to clean up. - SE.forgetLoopBackedgeTakenCount(L); - // Erase the instructions and the blocks without having to worry // about ordering because we already dropped the references. // NOTE: This iteration is safe because erasing the block does not remove its @@ -273,8 +242,9 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { // The last step is to inform the loop pass manager that we've // eliminated this loop. LPM.deleteLoopFromQueue(L); + Changed = true; NumDeleted++; - return true; + return Changed; } diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp index 38e3a8b..5f9d370 100644 --- a/lib/Transforms/Scalar/LoopIndexSplit.cpp +++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp @@ -51,7 +51,6 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "loop-index-split" - #include "llvm/Transforms/Scalar.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" @@ -61,7 +60,6 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Support/Compiler.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Statistic.h" @@ -73,8 +71,7 @@ STATISTIC(NumRestrictBounds, "Number of loop iteration space restricted"); namespace { - class VISIBILITY_HIDDEN LoopIndexSplit : public LoopPass { - + class LoopIndexSplit : public LoopPass { public: static char ID; // Pass ID, replacement for typeid LoopIndexSplit() : LoopPass(&ID) {} @@ -294,31 +291,33 @@ static bool isUsedOutsideLoop(Value *V, Loop *L) { // Return V+1 static Value *getPlusOne(Value *V, bool Sign, Instruction *InsertPt, - LLVMContext* Context) { - Constant *One = Context->getConstantInt(V->getType(), 1, Sign); + LLVMContext &Context) { + Constant *One = ConstantInt::get(V->getType(), 1, Sign); return BinaryOperator::CreateAdd(V, One, "lsp", InsertPt); } // Return V-1 static Value *getMinusOne(Value *V, bool Sign, Instruction *InsertPt, - LLVMContext* Context) { - Constant *One = Context->getConstantInt(V->getType(), 1, Sign); + LLVMContext &Context) { + Constant *One = ConstantInt::get(V->getType(), 1, Sign); return BinaryOperator::CreateSub(V, One, "lsp", InsertPt); } // Return min(V1, V1) static Value *getMin(Value *V1, Value *V2, bool Sign, Instruction *InsertPt) { - Value *C = new ICmpInst(Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, - V1, V2, "lsp", InsertPt); + Value *C = new ICmpInst(InsertPt, + Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + V1, V2, "lsp"); return SelectInst::Create(C, V1, V2, "lsp", InsertPt); } // Return max(V1, V2) static Value *getMax(Value *V1, Value *V2, bool Sign, Instruction *InsertPt) { - Value *C = new ICmpInst(Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, - V1, V2, "lsp", InsertPt); + Value *C = new ICmpInst(InsertPt, + Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + V1, V2, "lsp"); return SelectInst::Create(C, V2, V1, "lsp", InsertPt); } @@ -427,15 +426,15 @@ bool LoopIndexSplit::processOneIterationLoop() { // c1 = icmp uge i32 SplitValue, StartValue // c2 = icmp ult i32 SplitValue, ExitValue // and i32 c1, c2 - Instruction *C1 = new ICmpInst(ExitCondition->isSignedPredicate() ? + Instruction *C1 = new ICmpInst(BR, ExitCondition->isSignedPredicate() ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, - SplitValue, StartValue, "lisplit", BR); + SplitValue, StartValue, "lisplit"); CmpInst::Predicate C2P = ExitCondition->getPredicate(); BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); - if (LatchBR->getOperand(0) != Header) + if (LatchBR->getOperand(1) != Header) C2P = CmpInst::getInversePredicate(C2P); - Instruction *C2 = new ICmpInst(C2P, SplitValue, ExitValue, "lisplit", BR); + Instruction *C2 = new ICmpInst(BR, C2P, SplitValue, ExitValue, "lisplit"); Instruction *NSplitCond = BinaryOperator::CreateAnd(C1, C2, "lisplit", BR); SplitCondition->replaceAllUsesWith(NSplitCond); @@ -491,6 +490,8 @@ bool LoopIndexSplit::restrictLoopBound(ICmpInst &Op) { EBR->setSuccessor(1, T); } + LLVMContext &Context = Op.getContext(); + // New upper and lower bounds. Value *NLB = NULL; Value *NUB = NULL; @@ -698,7 +699,8 @@ void LoopIndexSplit::removeBlocks(BasicBlock *DeadBB, Loop *LP, E = df_end(DN); DI != E; ++DI) { BasicBlock *BB = DI->getBlock(); WorkList.push_back(BB); - BB->replaceAllUsesWith(UndefValue::get(Type::LabelTy)); + BB->replaceAllUsesWith(UndefValue::get( + Type::getLabelTy(DeadBB->getContext()))); } while (!WorkList.empty()) { @@ -877,6 +879,8 @@ bool LoopIndexSplit::splitLoop() { BasicBlock *ExitingBlock = ExitCondition->getParent(); if (!cleanBlock(ExitingBlock)) return false; + LLVMContext &Context = Header->getContext(); + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) { BranchInst *BR = dyn_cast<BranchInst>((*I)->getTerminator()); diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 1f7892a..70c69bb 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -32,7 +32,7 @@ using namespace llvm; STATISTIC(NumRotated, "Number of loops rotated"); namespace { - class VISIBILITY_HIDDEN RenameData { + class RenameData { public: RenameData(Instruction *O, Value *P, Instruction *H) : Original(O), PreHeader(P), Header(H) { } @@ -42,8 +42,7 @@ namespace { Instruction *Header; // New header replacement }; - class VISIBILITY_HIDDEN LoopRotate : public LoopPass { - + class LoopRotate : public LoopPass { public: static char ID; // Pass ID, replacement for typeid LoopRotate() : LoopPass(&ID) {} @@ -178,6 +177,11 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) { // Now, this loop is suitable for rotation. + // Anything ScalarEvolution may know about this loop or the PHI nodes + // in its header will soon be invalidated. + if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>()) + SE->forgetLoopBackedgeTakenCount(L); + // Find new Loop header. NewHeader is a Header's one and only successor // that is inside loop. Header's other successor is outside the // loop. Otherwise loop is not suitable for rotation. @@ -435,7 +439,8 @@ void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) { // Right now original pre-header has two successors, new header and // exit block. Insert new block between original pre-header and // new header such that loop's new pre-header has only one successor. - BasicBlock *NewPreHeader = BasicBlock::Create("bb.nph", + BasicBlock *NewPreHeader = BasicBlock::Create(OrigHeader->getContext(), + "bb.nph", OrigHeader->getParent(), NewHeader); LoopInfo &LI = LPM.getAnalysis<LoopInfo>(); @@ -511,26 +516,30 @@ void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) { DF->addBasicBlock(L->getHeader(), LatchSet); } - // If a loop block dominates new loop latch then its frontier is - // new header and Exit. + // If a loop block dominates new loop latch then add to its frontiers + // new header and Exit and remove new latch (which is equal to original + // header). BasicBlock *NewLatch = L->getLoopLatch(); - DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>(); - for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end(); - BI != BE; ++BI) { - BasicBlock *B = *BI; - if (DT->dominates(B, NewLatch)) { - DominanceFrontier::iterator BDFI = DF->find(B); - if (BDFI != DF->end()) { - DominanceFrontier::DomSetType &BSet = BDFI->second; - BSet = BDFI->second; - BSet.clear(); - BSet.insert(L->getHeader()); - BSet.insert(Exit); - } else { - DominanceFrontier::DomSetType BSet; - BSet.insert(L->getHeader()); - BSet.insert(Exit); - DF->addBasicBlock(B, BSet); + + assert(NewLatch == OrigHeader && "NewLatch is inequal to OrigHeader"); + + if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) { + for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end(); + BI != BE; ++BI) { + BasicBlock *B = *BI; + if (DT->dominates(B, NewLatch)) { + DominanceFrontier::iterator BDFI = DF->find(B); + if (BDFI != DF->end()) { + DominanceFrontier::DomSetType &BSet = BDFI->second; + BSet.erase(NewLatch); + BSet.insert(L->getHeader()); + BSet.insert(Exit); + } else { + DominanceFrontier::DomSetType BSet; + BSet.insert(L->getHeader()); + BSet.insert(Exit); + DF->addBasicBlock(B, BSet); + } } } } @@ -538,22 +547,7 @@ void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) { // Preserve canonical loop form, which means Exit block should // have only one predecessor. - BasicBlock *NExit = SplitEdge(L->getLoopLatch(), Exit, this); - - // Preserve LCSSA. - for (BasicBlock::iterator I = Exit->begin(); - (PN = dyn_cast<PHINode>(I)); ++I) { - unsigned N = PN->getNumIncomingValues(); - for (unsigned index = 0; index != N; ++index) - if (PN->getIncomingBlock(index) == NExit) { - PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName(), - NExit->begin()); - NewPN->addIncoming(PN->getIncomingValue(index), L->getLoopLatch()); - PN->setIncomingValue(index, NewPN); - PN->setIncomingBlock(index, NExit); - break; - } - } + SplitEdge(L->getLoopLatch(), Exit, this); assert(NewHeader && L->getHeader() == NewHeader && "Invalid loop header after loop rotation"); diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 046fed3..d8f6cc1 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -24,7 +24,6 @@ #include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" #include "llvm/Type.h" #include "llvm/DerivedTypes.h" #include "llvm/Analysis/Dominators.h" @@ -38,9 +37,9 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ValueHandle.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" #include <algorithm> using namespace llvm; @@ -64,26 +63,26 @@ namespace { /// IVInfo - This structure keeps track of one IV expression inserted during /// StrengthReduceStridedIVUsers. It contains the stride, the common base, as /// well as the PHI node and increment value created for rewrite. - struct VISIBILITY_HIDDEN IVExpr { - const SCEV* Stride; - const SCEV* Base; + struct IVExpr { + const SCEV *Stride; + const SCEV *Base; PHINode *PHI; - IVExpr(const SCEV* const stride, const SCEV* const base, PHINode *phi) + IVExpr(const SCEV *const stride, const SCEV *const base, PHINode *phi) : Stride(stride), Base(base), PHI(phi) {} }; /// IVsOfOneStride - This structure keeps track of all IV expression inserted /// during StrengthReduceStridedIVUsers for a particular stride of the IV. - struct VISIBILITY_HIDDEN IVsOfOneStride { + struct IVsOfOneStride { std::vector<IVExpr> IVs; - void addIV(const SCEV* const Stride, const SCEV* const Base, PHINode *PHI) { + void addIV(const SCEV *const Stride, const SCEV *const Base, PHINode *PHI) { IVs.push_back(IVExpr(Stride, Base, PHI)); } }; - class VISIBILITY_HIDDEN LoopStrengthReduce : public LoopPass { + class LoopStrengthReduce : public LoopPass { IVUsers *IU; LoopInfo *LI; DominatorTree *DT; @@ -92,11 +91,11 @@ namespace { /// IVsByStride - Keep track of all IVs that have been inserted for a /// particular stride. - std::map<const SCEV*, IVsOfOneStride> IVsByStride; + std::map<const SCEV *, IVsOfOneStride> IVsByStride; /// StrideNoReuse - Keep track of all the strides whose ivs cannot be /// reused (nor should they be rewritten to reuse other strides). - SmallSet<const SCEV*, 4> StrideNoReuse; + SmallSet<const SCEV *, 4> StrideNoReuse; /// DeadInsts - Keep track of instructions we may have made dead, so that /// we can remove them after we are done working. @@ -134,7 +133,7 @@ namespace { private: ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond, IVStrideUse* &CondUse, - const SCEV* const * &CondStride); + const SCEV *const * &CondStride); void OptimizeIndvars(Loop *L); void OptimizeLoopCountIV(Loop *L); @@ -150,16 +149,16 @@ namespace { IVStrideUse* &CondUse); bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse, - const SCEV* const * &CondStride); + const SCEV *const * &CondStride); bool RequiresTypeConversion(const Type *Ty, const Type *NewTy); - const SCEV* CheckForIVReuse(bool, bool, bool, const SCEV* const&, + const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *const&, IVExpr&, const Type*, const std::vector<BasedUser>& UsersToProcess); bool ValidScale(bool, int64_t, const std::vector<BasedUser>& UsersToProcess); bool ValidOffset(bool, int64_t, int64_t, const std::vector<BasedUser>& UsersToProcess); - const SCEV* CollectIVUsers(const SCEV* const &Stride, + const SCEV *CollectIVUsers(const SCEV *const &Stride, IVUsersOfOneStride &Uses, Loop *L, bool &AllUsesAreAddresses, @@ -169,11 +168,11 @@ namespace { const std::vector<BasedUser> &UsersToProcess, const Loop *L, bool AllUsesAreAddresses, - const SCEV* Stride); + const SCEV *Stride); void PrepareToStrengthReduceFully( std::vector<BasedUser> &UsersToProcess, - const SCEV* Stride, - const SCEV* CommonExprs, + const SCEV *Stride, + const SCEV *CommonExprs, const Loop *L, SCEVExpander &PreheaderRewriter); void PrepareToStrengthReduceFromSmallerStride( @@ -183,13 +182,13 @@ namespace { Instruction *PreInsertPt); void PrepareToStrengthReduceWithNewPhi( std::vector<BasedUser> &UsersToProcess, - const SCEV* Stride, - const SCEV* CommonExprs, + const SCEV *Stride, + const SCEV *CommonExprs, Value *CommonBaseV, Instruction *IVIncInsertPt, const Loop *L, SCEVExpander &PreheaderRewriter); - void StrengthReduceStridedIVUsers(const SCEV* const &Stride, + void StrengthReduceStridedIVUsers(const SCEV *const &Stride, IVUsersOfOneStride &Uses, Loop *L); void DeleteTriviallyDeadInstructions(); @@ -233,7 +232,7 @@ void LoopStrengthReduce::DeleteTriviallyDeadInstructions() { /// containsAddRecFromDifferentLoop - Determine whether expression S involves a /// subexpression that is an AddRec from a loop other than L. An outer loop /// of L is OK, but not an inner loop nor a disjoint loop. -static bool containsAddRecFromDifferentLoop(const SCEV* S, Loop *L) { +static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) { // This is very common, put it first. if (isa<SCEVConstant>(S)) return false; @@ -248,7 +247,7 @@ static bool containsAddRecFromDifferentLoop(const SCEV* S, Loop *L) { if (newLoop == L) return false; // if newLoop is an outer loop of L, this is OK. - if (!LoopInfoBase<BasicBlock>::isNotAlreadyContainedIn(L, newLoop)) + if (!LoopInfo::isNotAlreadyContainedIn(L, newLoop)) return false; } return true; @@ -328,7 +327,7 @@ namespace { /// this use. As the use is processed, information gets moved from this /// field to the Imm field (below). BasedUser values are sorted by this /// field. - const SCEV* Base; + const SCEV *Base; /// Inst - The instruction using the induction variable. Instruction *Inst; @@ -341,7 +340,7 @@ namespace { /// before Inst, because it will be folded into the imm field of the /// instruction. This is also sometimes used for loop-variant values that /// must be added inside the loop. - const SCEV* Imm; + const SCEV *Imm; /// Phi - The induction variable that performs the striding that /// should be used for this user. @@ -363,13 +362,13 @@ namespace { // Once we rewrite the code to insert the new IVs we want, update the // operands of Inst to use the new expression 'NewBase', with 'Imm' added // to it. - void RewriteInstructionToUseNewBase(const SCEV* const &NewBase, + void RewriteInstructionToUseNewBase(const SCEV *const &NewBase, Instruction *InsertPt, SCEVExpander &Rewriter, Loop *L, Pass *P, LoopInfo &LI, SmallVectorImpl<WeakVH> &DeadInsts); - Value *InsertCodeForBaseAtPosition(const SCEV* const &NewBase, + Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase, const Type *Ty, SCEVExpander &Rewriter, Instruction *IP, Loop *L, @@ -379,12 +378,12 @@ namespace { } void BasedUser::dump() const { - cerr << " Base=" << *Base; - cerr << " Imm=" << *Imm; - cerr << " Inst: " << *Inst; + errs() << " Base=" << *Base; + errs() << " Imm=" << *Imm; + errs() << " Inst: " << *Inst; } -Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV* const &NewBase, +Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase, const Type *Ty, SCEVExpander &Rewriter, Instruction *IP, Loop *L, @@ -408,7 +407,7 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV* const &NewBase, Value *Base = Rewriter.expandCodeFor(NewBase, 0, BaseInsertPt); - const SCEV* NewValSCEV = SE->getUnknown(Base); + const SCEV *NewValSCEV = SE->getUnknown(Base); // Always emit the immediate into the same block as the user. NewValSCEV = SE->getAddExpr(NewValSCEV, Imm); @@ -423,7 +422,7 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV* const &NewBase, // value of NewBase in the case that it's a diffferent instruction from // the PHI that NewBase is computed from, or null otherwise. // -void BasedUser::RewriteInstructionToUseNewBase(const SCEV* const &NewBase, +void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, Instruction *NewBasePt, SCEVExpander &Rewriter, Loop *L, Pass *P, LoopInfo &LI, @@ -460,9 +459,10 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV* const &NewBase, // Replace the use of the operand Value with the new Phi we just created. Inst->replaceUsesOfWith(OperandValToReplace, NewVal); - DOUT << " Replacing with "; - DEBUG(WriteAsOperand(*DOUT, NewVal, /*PrintType=*/false)); - DOUT << ", which has value " << *NewBase << " plus IMM " << *Imm << "\n"; + DEBUG(errs() << " Replacing with "); + DEBUG(WriteAsOperand(errs(), NewVal, /*PrintType=*/false)); + DEBUG(errs() << ", which has value " << *NewBase << " plus IMM " + << *Imm << "\n"); return; } @@ -483,43 +483,45 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV* const &NewBase, // loop because multiple copies sometimes do useful sinking of code in // that case(?). Instruction *OldLoc = dyn_cast<Instruction>(OperandValToReplace); + BasicBlock *PHIPred = PN->getIncomingBlock(i); if (L->contains(OldLoc->getParent())) { // If this is a critical edge, split the edge so that we do not insert // the code on all predecessor/successor paths. We do this unless this // is the canonical backedge for this loop, as this can make some // inserted code be in an illegal position. - BasicBlock *PHIPred = PN->getIncomingBlock(i); if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 && (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) { // First step, split the critical edge. - SplitCriticalEdge(PHIPred, PN->getParent(), P, false); + BasicBlock *NewBB = SplitCriticalEdge(PHIPred, PN->getParent(), + P, false); // Next step: move the basic block. In particular, if the PHI node // is outside of the loop, and PredTI is in the loop, we want to // move the block to be immediately before the PHI block, not // immediately after PredTI. - if (L->contains(PHIPred) && !L->contains(PN->getParent())) { - BasicBlock *NewBB = PN->getIncomingBlock(i); + if (L->contains(PHIPred) && !L->contains(PN->getParent())) NewBB->moveBefore(PN->getParent()); - } // Splitting the edge can reduce the number of PHI entries we have. e = PN->getNumIncomingValues(); + PHIPred = NewBB; + i = PN->getBasicBlockIndex(PHIPred); } } - Value *&Code = InsertedCode[PN->getIncomingBlock(i)]; + Value *&Code = InsertedCode[PHIPred]; if (!Code) { // Insert the code into the end of the predecessor block. Instruction *InsertPt = (L->contains(OldLoc->getParent())) ? - PN->getIncomingBlock(i)->getTerminator() : + PHIPred->getTerminator() : OldLoc->getParent()->getTerminator(); Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(), Rewriter, InsertPt, L, LI); - DOUT << " Changing PHI use to "; - DEBUG(WriteAsOperand(*DOUT, Code, /*PrintType=*/false)); - DOUT << ", which has value " << *NewBase << " plus IMM " << *Imm << "\n"; + DEBUG(errs() << " Changing PHI use to "); + DEBUG(WriteAsOperand(errs(), Code, /*PrintType=*/false)); + DEBUG(errs() << ", which has value " << *NewBase << " plus IMM " + << *Imm << "\n"); } // Replace the use of the operand Value with the new Phi we just created. @@ -535,7 +537,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV* const &NewBase, /// fitsInAddressMode - Return true if V can be subsumed within an addressing /// mode, and does not need to be put in a register first. -static bool fitsInAddressMode(const SCEV* const &V, const Type *AccessTy, +static bool fitsInAddressMode(const SCEV *const &V, const Type *AccessTy, const TargetLowering *TLI, bool HasBaseReg) { if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) { int64_t VC = SC->getValue()->getSExtValue(); @@ -567,12 +569,12 @@ static bool fitsInAddressMode(const SCEV* const &V, const Type *AccessTy, /// MoveLoopVariantsToImmediateField - Move any subexpressions from Val that are /// loop varying to the Imm operand. -static void MoveLoopVariantsToImmediateField(const SCEV* &Val, const SCEV* &Imm, +static void MoveLoopVariantsToImmediateField(const SCEV *&Val, const SCEV *&Imm, Loop *L, ScalarEvolution *SE) { if (Val->isLoopInvariant(L)) return; // Nothing to do. if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) { - SmallVector<const SCEV*, 4> NewOps; + SmallVector<const SCEV *, 4> NewOps; NewOps.reserve(SAE->getNumOperands()); for (unsigned i = 0; i != SAE->getNumOperands(); ++i) @@ -590,10 +592,10 @@ static void MoveLoopVariantsToImmediateField(const SCEV* &Val, const SCEV* &Imm, Val = SE->getAddExpr(NewOps); } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) { // Try to pull immediates out of the start value of nested addrec's. - const SCEV* Start = SARE->getStart(); + const SCEV *Start = SARE->getStart(); MoveLoopVariantsToImmediateField(Start, Imm, L, SE); - SmallVector<const SCEV*, 4> Ops(SARE->op_begin(), SARE->op_end()); + SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end()); Ops[0] = Start; Val = SE->getAddRecExpr(Ops, SARE->getLoop()); } else { @@ -609,15 +611,15 @@ static void MoveLoopVariantsToImmediateField(const SCEV* &Val, const SCEV* &Imm, /// Accumulate these immediate values into the Imm value. static void MoveImmediateValues(const TargetLowering *TLI, const Type *AccessTy, - const SCEV* &Val, const SCEV* &Imm, + const SCEV *&Val, const SCEV *&Imm, bool isAddress, Loop *L, ScalarEvolution *SE) { if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) { - SmallVector<const SCEV*, 4> NewOps; + SmallVector<const SCEV *, 4> NewOps; NewOps.reserve(SAE->getNumOperands()); for (unsigned i = 0; i != SAE->getNumOperands(); ++i) { - const SCEV* NewOp = SAE->getOperand(i); + const SCEV *NewOp = SAE->getOperand(i); MoveImmediateValues(TLI, AccessTy, NewOp, Imm, isAddress, L, SE); if (!NewOp->isLoopInvariant(L)) { @@ -636,11 +638,11 @@ static void MoveImmediateValues(const TargetLowering *TLI, return; } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) { // Try to pull immediates out of the start value of nested addrec's. - const SCEV* Start = SARE->getStart(); + const SCEV *Start = SARE->getStart(); MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE); if (Start != SARE->getStart()) { - SmallVector<const SCEV*, 4> Ops(SARE->op_begin(), SARE->op_end()); + SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end()); Ops[0] = Start; Val = SE->getAddRecExpr(Ops, SARE->getLoop()); } @@ -651,8 +653,8 @@ static void MoveImmediateValues(const TargetLowering *TLI, fitsInAddressMode(SME->getOperand(0), AccessTy, TLI, false) && SME->getNumOperands() == 2 && SME->isLoopInvariant(L)) { - const SCEV* SubImm = SE->getIntegerSCEV(0, Val->getType()); - const SCEV* NewOp = SME->getOperand(1); + const SCEV *SubImm = SE->getIntegerSCEV(0, Val->getType()); + const SCEV *NewOp = SME->getOperand(1); MoveImmediateValues(TLI, AccessTy, NewOp, SubImm, isAddress, L, SE); // If we extracted something out of the subexpressions, see if we can @@ -687,7 +689,7 @@ static void MoveImmediateValues(const TargetLowering *TLI, static void MoveImmediateValues(const TargetLowering *TLI, Instruction *User, - const SCEV* &Val, const SCEV* &Imm, + const SCEV *&Val, const SCEV *&Imm, bool isAddress, Loop *L, ScalarEvolution *SE) { const Type *AccessTy = getAccessType(User); @@ -697,19 +699,19 @@ static void MoveImmediateValues(const TargetLowering *TLI, /// SeparateSubExprs - Decompose Expr into all of the subexpressions that are /// added together. This is used to reassociate common addition subexprs /// together for maximal sharing when rewriting bases. -static void SeparateSubExprs(SmallVector<const SCEV*, 16> &SubExprs, - const SCEV* Expr, +static void SeparateSubExprs(SmallVector<const SCEV *, 16> &SubExprs, + const SCEV *Expr, ScalarEvolution *SE) { if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Expr)) { for (unsigned j = 0, e = AE->getNumOperands(); j != e; ++j) SeparateSubExprs(SubExprs, AE->getOperand(j), SE); } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Expr)) { - const SCEV* Zero = SE->getIntegerSCEV(0, Expr->getType()); + const SCEV *Zero = SE->getIntegerSCEV(0, Expr->getType()); if (SARE->getOperand(0) == Zero) { SubExprs.push_back(Expr); } else { // Compute the addrec with zero as its base. - SmallVector<const SCEV*, 4> Ops(SARE->op_begin(), SARE->op_end()); + SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end()); Ops[0] = Zero; // Start with zero base. SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop())); @@ -733,7 +735,7 @@ struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; }; /// not remove anything. This looks for things like (a+b+c) and /// (a+c+d) and computes the common (a+c) subexpression. The common expression /// is *removed* from the Bases and returned. -static const SCEV* +static const SCEV * RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, ScalarEvolution *SE, Loop *L, const TargetLowering *TLI) { @@ -741,9 +743,9 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, // Only one use? This is a very common case, so we handle it specially and // cheaply. - const SCEV* Zero = SE->getIntegerSCEV(0, Uses[0].Base->getType()); - const SCEV* Result = Zero; - const SCEV* FreeResult = Zero; + const SCEV *Zero = SE->getIntegerSCEV(0, Uses[0].Base->getType()); + const SCEV *Result = Zero; + const SCEV *FreeResult = Zero; if (NumUses == 1) { // If the use is inside the loop, use its base, regardless of what it is: // it is clearly shared across all the IV's. If the use is outside the loop @@ -759,13 +761,13 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, // Also track whether all uses of each expression can be moved into an // an addressing mode "for free"; such expressions are left within the loop. // struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; }; - std::map<const SCEV*, SubExprUseData> SubExpressionUseData; + std::map<const SCEV *, SubExprUseData> SubExpressionUseData; // UniqueSubExprs - Keep track of all of the subexpressions we see in the // order we see them. - SmallVector<const SCEV*, 16> UniqueSubExprs; + SmallVector<const SCEV *, 16> UniqueSubExprs; - SmallVector<const SCEV*, 16> SubExprs; + SmallVector<const SCEV *, 16> SubExprs; unsigned NumUsesInsideLoop = 0; for (unsigned i = 0; i != NumUses; ++i) { // If the user is outside the loop, just ignore it for base computation. @@ -809,7 +811,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, // Now that we know how many times each is used, build Result. Iterate over // UniqueSubexprs so that we have a stable ordering. for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) { - std::map<const SCEV*, SubExprUseData>::iterator I = + std::map<const SCEV *, SubExprUseData>::iterator I = SubExpressionUseData.find(UniqueSubExprs[i]); assert(I != SubExpressionUseData.end() && "Entry not found?"); if (I->second.Count == NumUsesInsideLoop) { // Found CSE! @@ -853,7 +855,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, if (FreeResult != Zero) { SeparateSubExprs(SubExprs, FreeResult, SE); for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) { - std::map<const SCEV*, SubExprUseData>::iterator I = + std::map<const SCEV *, SubExprUseData>::iterator I = SubExpressionUseData.find(SubExprs[j]); SubExpressionUseData.erase(I); } @@ -902,7 +904,8 @@ bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale, for (unsigned i = 0, e = UsersToProcess.size(); i!=e; ++i) { // If this is a load or other access, pass the type of the access in. - const Type *AccessTy = Type::VoidTy; + const Type *AccessTy = + Type::getVoidTy(UsersToProcess[i].Inst->getContext()); if (isAddressUse(UsersToProcess[i].Inst, UsersToProcess[i].OperandValToReplace)) AccessTy = getAccessType(UsersToProcess[i].Inst); @@ -934,7 +937,8 @@ bool LoopStrengthReduce::ValidOffset(bool HasBaseReg, for (unsigned i=0, e = UsersToProcess.size(); i!=e; ++i) { // If this is a load or other access, pass the type of the access in. - const Type *AccessTy = Type::VoidTy; + const Type *AccessTy = + Type::getVoidTy(UsersToProcess[i].Inst->getContext()); if (isAddressUse(UsersToProcess[i].Inst, UsersToProcess[i].OperandValToReplace)) AccessTy = getAccessType(UsersToProcess[i].Inst); @@ -982,10 +986,10 @@ bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1, /// be folded into the addressing mode, nor even that the factor be constant; /// a multiply (executed once) outside the loop is better than another IV /// within. Well, usually. -const SCEV* LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, +const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, bool AllUsesAreAddresses, bool AllUsesAreOutsideLoop, - const SCEV* const &Stride, + const SCEV *const &Stride, IVExpr &IV, const Type *Ty, const std::vector<BasedUser>& UsersToProcess) { if (StrideNoReuse.count(Stride)) @@ -995,7 +999,7 @@ const SCEV* LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, int64_t SInt = SC->getValue()->getSExtValue(); for (unsigned NewStride = 0, e = IU->StrideOrder.size(); NewStride != e; ++NewStride) { - std::map<const SCEV*, IVsOfOneStride>::iterator SI = + std::map<const SCEV *, IVsOfOneStride>::iterator SI = IVsByStride.find(IU->StrideOrder[NewStride]); if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first) || StrideNoReuse.count(SI->first)) @@ -1048,7 +1052,7 @@ const SCEV* LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, // an existing IV if we can. for (unsigned NewStride = 0, e = IU->StrideOrder.size(); NewStride != e; ++NewStride) { - std::map<const SCEV*, IVsOfOneStride>::iterator SI = + std::map<const SCEV *, IVsOfOneStride>::iterator SI = IVsByStride.find(IU->StrideOrder[NewStride]); if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first)) continue; @@ -1068,7 +1072,7 @@ const SCEV* LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, // -1*old. for (unsigned NewStride = 0, e = IU->StrideOrder.size(); NewStride != e; ++NewStride) { - std::map<const SCEV*, IVsOfOneStride>::iterator SI = + std::map<const SCEV *, IVsOfOneStride>::iterator SI = IVsByStride.find(IU->StrideOrder[NewStride]); if (SI == IVsByStride.end()) continue; @@ -1097,7 +1101,7 @@ static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) { /// isNonConstantNegative - Return true if the specified scev is negated, but /// not a constant. -static bool isNonConstantNegative(const SCEV* const &Expr) { +static bool isNonConstantNegative(const SCEV *const &Expr) { const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Expr); if (!Mul) return false; @@ -1114,7 +1118,7 @@ static bool isNonConstantNegative(const SCEV* const &Expr) { /// of the strided accesses, as well as the old information from Uses. We /// progressively move information from the Base field to the Imm field, until /// we eventually have the full access expression to rewrite the use. -const SCEV* LoopStrengthReduce::CollectIVUsers(const SCEV* const &Stride, +const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride, IVUsersOfOneStride &Uses, Loop *L, bool &AllUsesAreAddresses, @@ -1145,7 +1149,7 @@ const SCEV* LoopStrengthReduce::CollectIVUsers(const SCEV* const &Stride, // for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find // "A+B"), emit it to the preheader, then remove the expression from the // UsersToProcess base values. - const SCEV* CommonExprs = + const SCEV *CommonExprs = RemoveCommonExpressionsFromUseBases(UsersToProcess, SE, L, TLI); // Next, figure out what we can represent in the immediate fields of @@ -1211,7 +1215,7 @@ bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode( const std::vector<BasedUser> &UsersToProcess, const Loop *L, bool AllUsesAreAddresses, - const SCEV* Stride) { + const SCEV *Stride) { if (!EnableFullLSRMode) return false; @@ -1248,7 +1252,7 @@ bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode( if (!Imm) Imm = SE->getIntegerSCEV(0, Stride->getType()); const Instruction *Inst = UsersToProcess[i].Inst; const Type *AccessTy = getAccessType(Inst); - const SCEV* Diff = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm); + const SCEV *Diff = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm); if (!Diff->isZero() && (!AllUsesAreAddresses || !fitsInAddressMode(Diff, AccessTy, TLI, /*HasBaseReg=*/true))) @@ -1282,7 +1286,7 @@ bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode( /// /// Return the created phi node. /// -static PHINode *InsertAffinePhi(const SCEV* Start, const SCEV* Step, +static PHINode *InsertAffinePhi(const SCEV *Start, const SCEV *Step, Instruction *IVIncInsertPt, const Loop *L, SCEVExpander &Rewriter) { @@ -1302,7 +1306,7 @@ static PHINode *InsertAffinePhi(const SCEV* Start, const SCEV* Step, // If the stride is negative, insert a sub instead of an add for the // increment. bool isNegative = isNonConstantNegative(Step); - const SCEV* IncAmount = Step; + const SCEV *IncAmount = Step; if (isNegative) IncAmount = Rewriter.SE.getNegativeSCEV(Step); @@ -1341,13 +1345,13 @@ static void SortUsersToProcess(std::vector<BasedUser> &UsersToProcess) { // loop before users outside of the loop with a particular base. // // We would like to use stable_sort here, but we can't. The problem is that - // const SCEV*'s don't have a deterministic ordering w.r.t to each other, so + // const SCEV *'s don't have a deterministic ordering w.r.t to each other, so // we don't have anything to do a '<' comparison on. Because we think the // number of uses is small, do a horrible bubble sort which just relies on // ==. for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) { // Get a base value. - const SCEV* Base = UsersToProcess[i].Base; + const SCEV *Base = UsersToProcess[i].Base; // Compact everything with this base to be consecutive with this one. for (unsigned j = i+1; j != e; ++j) { @@ -1366,11 +1370,11 @@ static void SortUsersToProcess(std::vector<BasedUser> &UsersToProcess) { void LoopStrengthReduce::PrepareToStrengthReduceFully( std::vector<BasedUser> &UsersToProcess, - const SCEV* Stride, - const SCEV* CommonExprs, + const SCEV *Stride, + const SCEV *CommonExprs, const Loop *L, SCEVExpander &PreheaderRewriter) { - DOUT << " Fully reducing all users\n"; + DEBUG(errs() << " Fully reducing all users\n"); // Rewrite the UsersToProcess records, creating a separate PHI for each // unique Base value. @@ -1379,9 +1383,9 @@ LoopStrengthReduce::PrepareToStrengthReduceFully( // TODO: The uses are grouped by base, but not sorted. We arbitrarily // pick the first Imm value here to start with, and adjust it for the // other uses. - const SCEV* Imm = UsersToProcess[i].Imm; - const SCEV* Base = UsersToProcess[i].Base; - const SCEV* Start = SE->getAddExpr(CommonExprs, Base, Imm); + const SCEV *Imm = UsersToProcess[i].Imm; + const SCEV *Base = UsersToProcess[i].Base; + const SCEV *Start = SE->getAddExpr(CommonExprs, Base, Imm); PHINode *Phi = InsertAffinePhi(Start, Stride, IVIncInsertPt, L, PreheaderRewriter); // Loop over all the users with the same base. @@ -1413,13 +1417,13 @@ static Instruction *FindIVIncInsertPt(std::vector<BasedUser> &UsersToProcess, void LoopStrengthReduce::PrepareToStrengthReduceWithNewPhi( std::vector<BasedUser> &UsersToProcess, - const SCEV* Stride, - const SCEV* CommonExprs, + const SCEV *Stride, + const SCEV *CommonExprs, Value *CommonBaseV, Instruction *IVIncInsertPt, const Loop *L, SCEVExpander &PreheaderRewriter) { - DOUT << " Inserting new PHI:\n"; + DEBUG(errs() << " Inserting new PHI:\n"); PHINode *Phi = InsertAffinePhi(SE->getUnknown(CommonBaseV), Stride, IVIncInsertPt, L, @@ -1432,9 +1436,9 @@ LoopStrengthReduce::PrepareToStrengthReduceWithNewPhi( for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) UsersToProcess[i].Phi = Phi; - DOUT << " IV="; - DEBUG(WriteAsOperand(*DOUT, Phi, /*PrintType=*/false)); - DOUT << "\n"; + DEBUG(errs() << " IV="); + DEBUG(WriteAsOperand(errs(), Phi, /*PrintType=*/false)); + DEBUG(errs() << "\n"); } /// PrepareToStrengthReduceFromSmallerStride - Prepare for the given users to @@ -1447,8 +1451,8 @@ LoopStrengthReduce::PrepareToStrengthReduceFromSmallerStride( Value *CommonBaseV, const IVExpr &ReuseIV, Instruction *PreInsertPt) { - DOUT << " Rewriting in terms of existing IV of STRIDE " << *ReuseIV.Stride - << " and BASE " << *ReuseIV.Base << "\n"; + DEBUG(errs() << " Rewriting in terms of existing IV of STRIDE " + << *ReuseIV.Stride << " and BASE " << *ReuseIV.Base << "\n"); // All the users will share the reused IV. for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) @@ -1490,7 +1494,7 @@ static bool IsImmFoldedIntoAddrMode(GlobalValue *GV, int64_t Offset, /// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single /// stride of IV. All of the users may have different starting values, and this /// may not be the only stride. -void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride, +void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride, IVUsersOfOneStride &Uses, Loop *L) { // If all the users are moved to another stride, then there is nothing to do. @@ -1513,7 +1517,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride, // move information from the Base field to the Imm field, until we eventually // have the full access expression to rewrite the use. std::vector<BasedUser> UsersToProcess; - const SCEV* CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses, + const SCEV *CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses, AllUsesAreOutsideLoop, UsersToProcess); @@ -1531,9 +1535,11 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride, // If all uses are addresses, consider sinking the immediate part of the // common expression back into uses if they can fit in the immediate fields. if (TLI && HaveCommonExprs && AllUsesAreAddresses) { - const SCEV* NewCommon = CommonExprs; - const SCEV* Imm = SE->getIntegerSCEV(0, ReplacedTy); - MoveImmediateValues(TLI, Type::VoidTy, NewCommon, Imm, true, L, SE); + const SCEV *NewCommon = CommonExprs; + const SCEV *Imm = SE->getIntegerSCEV(0, ReplacedTy); + MoveImmediateValues(TLI, Type::getVoidTy( + L->getLoopPreheader()->getContext()), + NewCommon, Imm, true, L, SE); if (!Imm->isZero()) { bool DoSink = true; @@ -1548,11 +1554,12 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride, if (GV || Offset) // Pass VoidTy as the AccessTy to be conservative, because // there could be multiple access types among all the uses. - DoSink = IsImmFoldedIntoAddrMode(GV, Offset, Type::VoidTy, + DoSink = IsImmFoldedIntoAddrMode(GV, Offset, + Type::getVoidTy(L->getLoopPreheader()->getContext()), UsersToProcess, TLI); if (DoSink) { - DOUT << " Sinking " << *Imm << " back down into uses\n"; + DEBUG(errs() << " Sinking " << *Imm << " back down into uses\n"); for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm, Imm); CommonExprs = NewCommon; @@ -1564,9 +1571,9 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride, // Now that we know what we need to do, insert the PHI node itself. // - DOUT << "LSR: Examining IVs of TYPE " << *ReplacedTy << " of STRIDE " - << *Stride << ":\n" - << " Common base: " << *CommonExprs << "\n"; + DEBUG(errs() << "LSR: Examining IVs of TYPE " << *ReplacedTy << " of STRIDE " + << *Stride << ":\n" + << " Common base: " << *CommonExprs << "\n"); SCEVExpander Rewriter(*SE); SCEVExpander PreheaderRewriter(*SE); @@ -1576,11 +1583,13 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride, BasicBlock *LatchBlock = L->getLoopLatch(); Instruction *IVIncInsertPt = LatchBlock->getTerminator(); - Value *CommonBaseV = Context->getNullValue(ReplacedTy); + Value *CommonBaseV = Constant::getNullValue(ReplacedTy); - const SCEV* RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy); - IVExpr ReuseIV(SE->getIntegerSCEV(0, Type::Int32Ty), - SE->getIntegerSCEV(0, Type::Int32Ty), + const SCEV *RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy); + IVExpr ReuseIV(SE->getIntegerSCEV(0, + Type::getInt32Ty(Preheader->getContext())), + SE->getIntegerSCEV(0, + Type::getInt32Ty(Preheader->getContext())), 0); /// Choose a strength-reduction strategy and prepare for it by creating @@ -1618,7 +1627,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride, // strength-reduced forms. This outer loop handles all bases, the inner // loop handles all users of a particular base. while (!UsersToProcess.empty()) { - const SCEV* Base = UsersToProcess.back().Base; + const SCEV *Base = UsersToProcess.back().Base; Instruction *Inst = UsersToProcess.back().Inst; // Emit the code for Base into the preheader. @@ -1626,17 +1635,17 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride, if (!Base->isZero()) { BaseV = PreheaderRewriter.expandCodeFor(Base, 0, PreInsertPt); - DOUT << " INSERTING code for BASE = " << *Base << ":"; + DEBUG(errs() << " INSERTING code for BASE = " << *Base << ":"); if (BaseV->hasName()) - DOUT << " Result value name = %" << BaseV->getNameStr(); - DOUT << "\n"; + DEBUG(errs() << " Result value name = %" << BaseV->getName()); + DEBUG(errs() << "\n"); // If BaseV is a non-zero constant, make sure that it gets inserted into // the preheader, instead of being forward substituted into the uses. We // do this by forcing a BitCast (noop cast) to be inserted into the // preheader in this case. if (!fitsInAddressMode(Base, getAccessType(Inst), TLI, false) && - !isa<Instruction>(BaseV)) { + isa<Constant>(BaseV)) { // We want this constant emitted into the preheader! This is just // using cast as a copy so BitCast (no-op cast) is appropriate BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert", @@ -1650,15 +1659,15 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride, // FIXME: Use emitted users to emit other users. BasedUser &User = UsersToProcess.back(); - DOUT << " Examining "; + DEBUG(errs() << " Examining "); if (User.isUseOfPostIncrementedValue) - DOUT << "postinc"; + DEBUG(errs() << "postinc"); else - DOUT << "preinc"; - DOUT << " use "; - DEBUG(WriteAsOperand(*DOUT, UsersToProcess.back().OperandValToReplace, + DEBUG(errs() << "preinc"); + DEBUG(errs() << " use "); + DEBUG(WriteAsOperand(errs(), UsersToProcess.back().OperandValToReplace, /*PrintType=*/false)); - DOUT << " in Inst: " << *(User.Inst); + DEBUG(errs() << " in Inst: " << *User.Inst); // If this instruction wants to use the post-incremented value, move it // after the post-inc and use its value instead of the PHI. @@ -1673,7 +1682,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride, User.Inst->moveBefore(IVIncInsertPt); } - const SCEV* RewriteExpr = SE->getUnknown(RewriteOp); + const SCEV *RewriteExpr = SE->getUnknown(RewriteOp); if (SE->getEffectiveSCEVType(RewriteOp->getType()) != SE->getEffectiveSCEVType(ReplacedTy)) { @@ -1705,7 +1714,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride, // The base has been used to initialize the PHI node but we don't want // it here. if (!ReuseIV.Base->isZero()) { - const SCEV* typedBase = ReuseIV.Base; + const SCEV *typedBase = ReuseIV.Base; if (SE->getEffectiveSCEVType(RewriteExpr->getType()) != SE->getEffectiveSCEVType(ReuseIV.Base->getType())) { // It's possible the original IV is a larger type than the new IV, @@ -1770,10 +1779,10 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride, /// set the IV user and stride information and return true, otherwise return /// false. bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse, - const SCEV* const * &CondStride) { + const SCEV *const * &CondStride) { for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e && !CondUse; ++Stride) { - std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI = + std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = IU->IVUsesByStride.find(IU->StrideOrder[Stride]); assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); @@ -1800,7 +1809,7 @@ namespace { const ScalarEvolution *SE; explicit StrideCompare(const ScalarEvolution *se) : SE(se) {} - bool operator()(const SCEV* const &LHS, const SCEV* const &RHS) { + bool operator()(const SCEV *const &LHS, const SCEV *const &RHS) { const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS); const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS); if (LHSC && RHSC) { @@ -1843,14 +1852,14 @@ namespace { /// if (v1 < 30) goto loop ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, IVStrideUse* &CondUse, - const SCEV* const* &CondStride) { + const SCEV *const* &CondStride) { // If there's only one stride in the loop, there's nothing to do here. if (IU->StrideOrder.size() < 2) return Cond; // If there are other users of the condition's stride, don't bother // trying to change the condition because the stride will still // remain. - std::map<const SCEV*, IVUsersOfOneStride *>::iterator I = + std::map<const SCEV *, IVUsersOfOneStride *>::iterator I = IU->IVUsesByStride.find(*CondStride); if (I == IU->IVUsesByStride.end() || I->second->Users.size() != 1) @@ -1867,11 +1876,11 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, const Type *NewCmpTy = NULL; unsigned TyBits = SE->getTypeSizeInBits(CmpTy); unsigned NewTyBits = 0; - const SCEV* *NewStride = NULL; + const SCEV **NewStride = NULL; Value *NewCmpLHS = NULL; Value *NewCmpRHS = NULL; int64_t Scale = 1; - const SCEV* NewOffset = SE->getIntegerSCEV(0, CmpTy); + const SCEV *NewOffset = SE->getIntegerSCEV(0, CmpTy); if (ConstantInt *C = dyn_cast<ConstantInt>(Cond->getOperand(1))) { int64_t CmpVal = C->getValue().getSExtValue(); @@ -1883,7 +1892,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, // Look for a suitable stride / iv as replacement. for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { - std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI = + std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = IU->IVUsesByStride.find(IU->StrideOrder[i]); if (!isa<SCEVConstant>(SI->first)) continue; @@ -1942,7 +1951,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, NewCmpTy = NewCmpLHS->getType(); NewTyBits = SE->getTypeSizeInBits(NewCmpTy); - const Type *NewCmpIntTy = Context->getIntegerType(NewTyBits); + const Type *NewCmpIntTy = IntegerType::get(Cond->getContext(), NewTyBits); if (RequiresTypeConversion(NewCmpTy, CmpTy)) { // Check if it is possible to rewrite it using // an iv / stride of a smaller integer type. @@ -1963,7 +1972,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, bool AllUsesAreAddresses = true; bool AllUsesAreOutsideLoop = true; std::vector<BasedUser> UsersToProcess; - const SCEV* CommonExprs = CollectIVUsers(SI->first, *SI->second, L, + const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L, AllUsesAreAddresses, AllUsesAreOutsideLoop, UsersToProcess); @@ -1987,10 +1996,10 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, NewStride = &IU->StrideOrder[i]; if (!isa<PointerType>(NewCmpTy)) - NewCmpRHS = Context->getConstantInt(NewCmpTy, NewCmpVal); + NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal); else { - Constant *CI = Context->getConstantInt(NewCmpIntTy, NewCmpVal); - NewCmpRHS = Context->getConstantExprIntToPtr(CI, NewCmpTy); + Constant *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal); + NewCmpRHS = ConstantExpr::getIntToPtr(CI, NewCmpTy); } NewOffset = TyBits == NewTyBits ? SE->getMulExpr(CondUse->getOffset(), @@ -2019,9 +2028,8 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, // Create a new compare instruction using new stride / iv. ICmpInst *OldCond = Cond; // Insert new compare instruction. - Cond = new ICmpInst(Predicate, NewCmpLHS, NewCmpRHS, - L->getHeader()->getName() + ".termcond", - OldCond); + Cond = new ICmpInst(OldCond, Predicate, NewCmpLHS, NewCmpRHS, + L->getHeader()->getName() + ".termcond"); // Remove the old compare instruction. The old indvar is probably dead too. DeadInsts.push_back(CondUse->getOperandValToReplace()); @@ -2098,13 +2106,13 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond, SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1)); if (!Sel || !Sel->hasOneUse()) return Cond; - const SCEV* BackedgeTakenCount = SE->getBackedgeTakenCount(L); + const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) return Cond; - const SCEV* One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType()); + const SCEV *One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType()); // Add one to the backedge-taken count to get the trip count. - const SCEV* IterationCount = SE->getAddExpr(BackedgeTakenCount, One); + const SCEV *IterationCount = SE->getAddExpr(BackedgeTakenCount, One); // Check for a max calculation that matches the pattern. if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount)) @@ -2117,13 +2125,13 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond, if (Max->getNumOperands() != 2) return Cond; - const SCEV* MaxLHS = Max->getOperand(0); - const SCEV* MaxRHS = Max->getOperand(1); + const SCEV *MaxLHS = Max->getOperand(0); + const SCEV *MaxRHS = Max->getOperand(1); if (!MaxLHS || MaxLHS != One) return Cond; // Check the relevant induction variable for conformance to // the pattern. - const SCEV* IV = SE->getSCEV(Cond->getOperand(0)); + const SCEV *IV = SE->getSCEV(Cond->getOperand(0)); const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV); if (!AR || !AR->isAffine() || AR->getStart() != One || @@ -2152,7 +2160,7 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond, // Ok, everything looks ok to change the condition into an SLT or SGE and // delete the max calculation. ICmpInst *NewCond = - new ICmpInst(Pred, Cond->getOperand(0), NewRHS, "scmp", Cond); + new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp"); // Delete the max calculation instructions. Cond->replaceAllUsesWith(NewCond); @@ -2169,13 +2177,13 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond, /// inside the loop then try to eliminate the cast opeation. void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { - const SCEV* BackedgeTakenCount = SE->getBackedgeTakenCount(L); + const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) return; - + for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e; ++Stride) { - std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI = + std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = IU->IVUsesByStride.find(IU->StrideOrder[Stride]); assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); if (!isa<SCEVConstant>(SI->first)) @@ -2209,7 +2217,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { if (TLI) { // If target does not support DestTy natively then do not apply // this transformation. - MVT DVT = TLI->getValueType(DestTy); + EVT DVT = TLI->getValueType(DestTy); if (!TLI->isTypeLegal(DVT)) continue; } @@ -2234,7 +2242,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry)); if (!Init) continue; - Constant *NewInit = Context->getConstantFP(DestTy, Init->getZExtValue()); + Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue()); BinaryOperator *Incr = dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch)); @@ -2258,7 +2266,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH); /* create new increment. '++d' in above example. */ - Constant *CFP = Context->getConstantFP(DestTy, C->getZExtValue()); + Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue()); BinaryOperator *NewIncr = BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ? Instruction::FAdd : Instruction::FSub, @@ -2294,6 +2302,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { // one register value. BasicBlock *LatchBlock = L->getLoopLatch(); BasicBlock *ExitingBlock = L->getExitingBlock(); + if (!ExitingBlock) // Multiple exits, just look at the exit in the latch block if there is one. ExitingBlock = LatchBlock; @@ -2305,7 +2314,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { // Search IVUsesByStride to find Cond's IVUse if there is one. IVStrideUse *CondUse = 0; - const SCEV* const *CondStride = 0; + const SCEV *const *CondStride = 0; ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition()); if (!FindIVUserForCond(Cond, CondUse, CondStride)) return; // setcc doesn't use the IV. @@ -2335,7 +2344,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { int64_t SInt = SC->getValue()->getSExtValue(); for (unsigned NewStride = 0, ee = IU->StrideOrder.size(); NewStride != ee; ++NewStride) { - std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI = + std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = IU->IVUsesByStride.find(IU->StrideOrder[NewStride]); if (!isa<SCEVConstant>(SI->first) || SI->first == *CondStride) continue; @@ -2349,7 +2358,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { bool AllUsesAreAddresses = true; bool AllUsesAreOutsideLoop = true; std::vector<BasedUser> UsersToProcess; - const SCEV* CommonExprs = CollectIVUsers(SI->first, *SI->second, L, + const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L, AllUsesAreAddresses, AllUsesAreOutsideLoop, UsersToProcess); @@ -2410,7 +2419,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) { // If the number of times the loop is executed isn't computable, give up. - const SCEV* BackedgeTakenCount = SE->getBackedgeTakenCount(L); + const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) return; @@ -2439,9 +2448,9 @@ void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) { // Handle only tests for equality for the moment, and only stride 1. if (Cond->getPredicate() != CmpInst::ICMP_EQ) return; - const SCEV* IV = SE->getSCEV(Cond->getOperand(0)); + const SCEV *IV = SE->getSCEV(Cond->getOperand(0)); const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV); - const SCEV* One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType()); + const SCEV *One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType()); if (!AR || !AR->isAffine() || AR->getStepRecurrence(*SE) != One) return; // If the RHS of the comparison is defined inside the loop, the rewrite @@ -2497,7 +2506,7 @@ void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) { Value *startVal = phi->getIncomingValue(inBlock); Value *endVal = Cond->getOperand(1); // FIXME check for case where both are constant - Constant* Zero = Context->getConstantInt(Cond->getOperand(1)->getType(), 0); + Constant* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0); BinaryOperator *NewStartVal = BinaryOperator::Create(Instruction::Sub, endVal, startVal, "tmp", PreInsertPt); @@ -2516,11 +2525,9 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { Changed = false; if (!IU->IVUsesByStride.empty()) { -#ifndef NDEBUG - DOUT << "\nLSR on \"" << L->getHeader()->getParent()->getNameStart() - << "\" "; - DEBUG(L->dump()); -#endif + DEBUG(errs() << "\nLSR on \"" << L->getHeader()->getParent()->getName() + << "\" "; + L->dump()); // Sort the StrideOrder so we process larger strides first. std::stable_sort(IU->StrideOrder.begin(), IU->StrideOrder.end(), @@ -2557,7 +2564,7 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { // strides deterministic - not dependent on map order. for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e; ++Stride) { - std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI = + std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = IU->IVUsesByStride.find(IU->StrideOrder[Stride]); assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); // FIXME: Generalize to non-affine IV's. diff --git a/lib/Transforms/Scalar/LoopUnroll.cpp b/lib/Transforms/Scalar/LoopUnroll.cpp index 23757cd..837ec59 100644 --- a/lib/Transforms/Scalar/LoopUnroll.cpp +++ b/lib/Transforms/Scalar/LoopUnroll.cpp @@ -17,9 +17,9 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/UnrollLoop.h" #include <climits> @@ -39,7 +39,7 @@ UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden, "-unroll-threshold loop size is reached.")); namespace { - class VISIBILITY_HIDDEN LoopUnroll : public LoopPass { + class LoopUnroll : public LoopPass { public: static char ID; // Pass ID, replacement for typeid LoopUnroll() : LoopPass(&ID) {} @@ -96,10 +96,7 @@ static unsigned ApproximateLoopSize(const Loop *L) { // is higher than other instructions. Here 3 and 10 are magic // numbers that help one isolated test case from PR2067 without // negatively impacting measured benchmarks. - if (isa<IntrinsicInst>(I)) - Size = Size + 3; - else - Size = Size + 10; + Size += isa<IntrinsicInst>(I) ? 3 : 10; } else { ++Size; } @@ -118,51 +115,48 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { LoopInfo *LI = &getAnalysis<LoopInfo>(); BasicBlock *Header = L->getHeader(); - DOUT << "Loop Unroll: F[" << Header->getParent()->getName() - << "] Loop %" << Header->getName() << "\n"; + DEBUG(errs() << "Loop Unroll: F[" << Header->getParent()->getName() + << "] Loop %" << Header->getName() << "\n"); + (void)Header; // Find trip count unsigned TripCount = L->getSmallConstantTripCount(); unsigned Count = UnrollCount; - + // Automatically select an unroll count. if (Count == 0) { // Conservative heuristic: if we know the trip count, see if we can // completely unroll (subject to the threshold, checked below); otherwise - // try to find greatest modulo of the trip count which is still under + // try to find greatest modulo of the trip count which is still under // threshold value. - if (TripCount != 0) { - Count = TripCount; - } else { + if (TripCount == 0) return false; - } + Count = TripCount; } // Enforce the threshold. if (UnrollThreshold != NoThreshold) { unsigned LoopSize = ApproximateLoopSize(L); - DOUT << " Loop Size = " << LoopSize << "\n"; + DEBUG(errs() << " Loop Size = " << LoopSize << "\n"); uint64_t Size = (uint64_t)LoopSize*Count; if (TripCount != 1 && Size > UnrollThreshold) { - DOUT << " Too large to fully unroll with count: " << Count - << " because size: " << Size << ">" << UnrollThreshold << "\n"; - if (UnrollAllowPartial) { - // Reduce unroll count to be modulo of TripCount for partial unrolling - Count = UnrollThreshold / LoopSize; - while (Count != 0 && TripCount%Count != 0) { - Count--; - } - if (Count < 2) { - DOUT << " could not unroll partially\n"; - return false; - } else { - DOUT << " partially unrolling with count: " << Count << "\n"; - } - } else { - DOUT << " will not try to unroll partially because " - << "-unroll-allow-partial not given\n"; + DEBUG(errs() << " Too large to fully unroll with count: " << Count + << " because size: " << Size << ">" << UnrollThreshold << "\n"); + if (!UnrollAllowPartial) { + DEBUG(errs() << " will not try to unroll partially because " + << "-unroll-allow-partial not given\n"); + return false; + } + // Reduce unroll count to be modulo of TripCount for partial unrolling + Count = UnrollThreshold / LoopSize; + while (Count != 0 && TripCount%Count != 0) { + Count--; + } + if (Count < 2) { + DEBUG(errs() << " could not unroll partially\n"); return false; } + DEBUG(errs() << " partially unrolling with count: " << Count << "\n"); } } diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index de5eedf..f6de362 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -34,6 +34,7 @@ #include "llvm/Instructions.h" #include "llvm/LLVMContext.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/Dominators.h" @@ -44,8 +45,8 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> #include <set> using namespace llvm; @@ -56,12 +57,14 @@ STATISTIC(NumSelects , "Number of selects unswitched"); STATISTIC(NumTrivial , "Number of unswitches that are trivial"); STATISTIC(NumSimplify, "Number of simplifications of unswitched code"); +// The specific value of 50 here was chosen based only on intuition and a +// few specific examples. static cl::opt<unsigned> Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), - cl::init(10), cl::Hidden); + cl::init(50), cl::Hidden); namespace { - class VISIBILITY_HIDDEN LoopUnswitch : public LoopPass { + class LoopUnswitch : public LoopPass { LoopInfo *LI; // Loop information LPPassManager *LPM; @@ -112,6 +115,10 @@ namespace { private: + virtual void releaseMemory() { + UnswitchedVals.clear(); + } + /// RemoveLoopFromWorklist - If the specified loop is on the loop worklist, /// remove it. void RemoveLoopFromWorklist(Loop *L) { @@ -168,8 +175,10 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) { if (isa<Constant>(Cond)) return 0; // TODO: Handle: br (VARIANT|INVARIANT). - // TODO: Hoist simple expressions out of loops. - if (L->isLoopInvariant(Cond)) return Cond; + + // Hoist simple values out. + if (L->makeLoopInvariant(Cond, Changed)) + return Cond; if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond)) if (BO->getOpcode() == Instruction::And || @@ -214,6 +223,7 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) { /// and profitable. bool LoopUnswitch::processCurrentLoop() { bool Changed = false; + LLVMContext &Context = currentLoop->getHeader()->getContext(); // Loop over all of the basic blocks in the loop. If we find an interior // block that is branching on a loop-invariant condition, we can unswitch this @@ -231,7 +241,7 @@ bool LoopUnswitch::processCurrentLoop() { Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), currentLoop, Changed); if (LoopCond && UnswitchIfProfitable(LoopCond, - Context->getConstantIntTrue())) { + ConstantInt::getTrue(Context))) { ++NumBranches; return true; } @@ -261,7 +271,7 @@ bool LoopUnswitch::processCurrentLoop() { Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), currentLoop, Changed); if (LoopCond && UnswitchIfProfitable(LoopCond, - Context->getConstantIntTrue())) { + ConstantInt::getTrue(Context))) { ++NumSelects; return true; } @@ -335,6 +345,7 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, BasicBlock **LoopExit) { BasicBlock *Header = currentLoop->getHeader(); TerminatorInst *HeaderTerm = Header->getTerminator(); + LLVMContext &Context = Header->getContext(); BasicBlock *LoopExitBB = 0; if (BranchInst *BI = dyn_cast<BranchInst>(HeaderTerm)) { @@ -349,10 +360,10 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, // this. if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, BI->getSuccessor(0)))) { - if (Val) *Val = Context->getConstantIntTrue(); + if (Val) *Val = ConstantInt::getTrue(Context); } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, BI->getSuccessor(1)))) { - if (Val) *Val = Context->getConstantIntFalse(); + if (Val) *Val = ConstantInt::getFalse(Context); } } else if (SwitchInst *SI = dyn_cast<SwitchInst>(HeaderTerm)) { // If this isn't a switch on Cond, we can't handle it. @@ -398,29 +409,14 @@ unsigned LoopUnswitch::getLoopUnswitchCost(Value *LIC) { if (IsTrivialUnswitchCondition(LIC)) return 0; - // FIXME: This is really overly conservative. However, more liberal - // estimations have thus far resulted in excessive unswitching, which is bad - // both in compile time and in code size. This should be replaced once - // someone figures out how a good estimation. - return currentLoop->getBlocks().size(); - - unsigned Cost = 0; - // FIXME: this is brain dead. It should take into consideration code - // shrinkage. + // FIXME: This is overly conservative because it does not take into + // consideration code simplification opportunities. + CodeMetrics Metrics; for (Loop::block_iterator I = currentLoop->block_begin(), E = currentLoop->block_end(); - I != E; ++I) { - BasicBlock *BB = *I; - // Do not include empty blocks in the cost calculation. This happen due to - // loop canonicalization and will be removed. - if (BB->begin() == BasicBlock::iterator(BB->getTerminator())) - continue; - - // Count basic blocks. - ++Cost; - } - - return Cost; + I != E; ++I) + Metrics.analyzeBasicBlock(*I); + return Metrics.NumInsts; } /// UnswitchIfProfitable - We have found that we can unswitch currentLoop when @@ -445,9 +441,9 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){ // FIXME: this should estimate growth by the amount of code shared by the // resultant unswitched loops. // - DOUT << "NOT unswitching loop %" - << currentLoop->getHeader()->getName() << ", cost too high: " - << currentLoop->getBlocks().size() << "\n"; + DEBUG(errs() << "NOT unswitching loop %" + << currentLoop->getHeader()->getName() << ", cost too high: " + << currentLoop->getBlocks().size() << "\n"); return false; } @@ -506,14 +502,20 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val, // Insert a conditional branch on LIC to the two preheaders. The original // code is the true version and the new code is the false version. Value *BranchVal = LIC; - if (!isa<ConstantInt>(Val) || Val->getType() != Type::Int1Ty) - BranchVal = new ICmpInst(ICmpInst::ICMP_EQ, LIC, Val, "tmp", InsertPt); - else if (Val != Context->getConstantIntTrue()) + if (!isa<ConstantInt>(Val) || + Val->getType() != Type::getInt1Ty(LIC->getContext())) + BranchVal = new ICmpInst(InsertPt, ICmpInst::ICMP_EQ, LIC, Val, "tmp"); + else if (Val != ConstantInt::getTrue(Val->getContext())) // We want to enter the new loop when the condition is true. std::swap(TrueDest, FalseDest); // Insert the new branch. - BranchInst::Create(TrueDest, FalseDest, BranchVal, InsertPt); + BranchInst *BI = BranchInst::Create(TrueDest, FalseDest, BranchVal, InsertPt); + + // If either edge is critical, split it. This helps preserve LoopSimplify + // form for enclosing loops. + SplitCriticalEdge(BI, 0, this); + SplitCriticalEdge(BI, 1, this); } /// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable @@ -524,10 +526,10 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val, void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, BasicBlock *ExitBlock) { - DOUT << "loop-unswitch: Trivial-Unswitch loop %" - << loopHeader->getName() << " [" << L->getBlocks().size() - << " blocks] in Function " << L->getHeader()->getParent()->getName() - << " on cond: " << *Val << " == " << *Cond << "\n"; + DEBUG(errs() << "loop-unswitch: Trivial-Unswitch loop %" + << loopHeader->getName() << " [" << L->getBlocks().size() + << " blocks] in Function " << L->getHeader()->getParent()->getName() + << " on cond: " << *Val << " == " << *Cond << "\n"); // First step, split the preheader, so that we know that there is a safe place // to insert the conditional branch. We will change loopPreheader to have a @@ -570,47 +572,11 @@ void LoopUnswitch::SplitExitEdges(Loop *L, for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBlock = ExitBlocks[i]; - std::vector<BasicBlock*> Preds(pred_begin(ExitBlock), pred_end(ExitBlock)); - - for (unsigned j = 0, e = Preds.size(); j != e; ++j) { - BasicBlock* NewExitBlock = SplitEdge(Preds[j], ExitBlock, this); - BasicBlock* StartBlock = Preds[j]; - BasicBlock* EndBlock; - if (NewExitBlock->getSinglePredecessor() == ExitBlock) { - EndBlock = NewExitBlock; - NewExitBlock = EndBlock->getSinglePredecessor(); - } else { - EndBlock = ExitBlock; - } - - std::set<PHINode*> InsertedPHIs; - PHINode* OldLCSSA = 0; - for (BasicBlock::iterator I = EndBlock->begin(); - (OldLCSSA = dyn_cast<PHINode>(I)); ++I) { - Value* OldValue = OldLCSSA->getIncomingValueForBlock(NewExitBlock); - PHINode* NewLCSSA = PHINode::Create(OldLCSSA->getType(), - OldLCSSA->getName() + ".us-lcssa", - NewExitBlock->getTerminator()); - NewLCSSA->addIncoming(OldValue, StartBlock); - OldLCSSA->setIncomingValue(OldLCSSA->getBasicBlockIndex(NewExitBlock), - NewLCSSA); - InsertedPHIs.insert(NewLCSSA); - } - - BasicBlock::iterator InsertPt = EndBlock->getFirstNonPHI(); - for (BasicBlock::iterator I = NewExitBlock->begin(); - (OldLCSSA = dyn_cast<PHINode>(I)) && InsertedPHIs.count(OldLCSSA) == 0; - ++I) { - PHINode *NewLCSSA = PHINode::Create(OldLCSSA->getType(), - OldLCSSA->getName() + ".us-lcssa", - InsertPt); - OldLCSSA->replaceAllUsesWith(NewLCSSA); - NewLCSSA->addIncoming(OldLCSSA, NewExitBlock); - } - - } + SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock), + pred_end(ExitBlock)); + SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(), + ".us-lcssa", this); } - } /// UnswitchNontrivialCondition - We determined that the loop is profitable @@ -619,10 +585,10 @@ void LoopUnswitch::SplitExitEdges(Loop *L, void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, Loop *L) { Function *F = loopHeader->getParent(); - DOUT << "loop-unswitch: Unswitching loop %" - << loopHeader->getName() << " [" << L->getBlocks().size() - << " blocks] in Function " << F->getName() - << " when '" << *Val << "' == " << *LIC << "\n"; + DEBUG(errs() << "loop-unswitch: Unswitching loop %" + << loopHeader->getName() << " [" << L->getBlocks().size() + << " blocks] in Function " << F->getName() + << " when '" << *Val << "' == " << *LIC << "\n"); LoopBlocks.clear(); NewBlocks.clear(); @@ -745,7 +711,7 @@ static void RemoveFromWorklist(Instruction *I, static void ReplaceUsesOfWith(Instruction *I, Value *V, std::vector<Instruction*> &Worklist, Loop *L, LPPassManager *LPM) { - DOUT << "Replace with '" << *V << "': " << *I; + DEBUG(errs() << "Replace with '" << *V << "': " << *I); // Add uses to the worklist, which may be dead now. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) @@ -788,7 +754,7 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB, // dominates the latch). LPM->deleteSimpleAnalysisValue(Pred->getTerminator(), L); Pred->getTerminator()->eraseFromParent(); - new UnreachableInst(Pred); + new UnreachableInst(BB->getContext(), Pred); // The loop is now broken, remove it from LI. RemoveLoopFromHierarchy(L); @@ -807,7 +773,7 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB, return; } - DOUT << "Nuking dead block: " << *BB; + DEBUG(errs() << "Nuking dead block: " << *BB); // Remove the instructions in the basic block from the worklist. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { @@ -815,8 +781,10 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB, // Anything that uses the instructions in this basic block should have their // uses replaced with undefs. - if (!I->use_empty()) - I->replaceAllUsesWith(Context->getUndef(I->getType())); + // If I is not void type then replaceAllUsesWith undef. + // This allows ValueHandlers and custom metadata to adjust itself. + if (!I->getType()->isVoidTy()) + I->replaceAllUsesWith(UndefValue::get(I->getType())); } // If this is the edge to the header block for a loop, remove the loop and @@ -897,15 +865,18 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, // selects, switches. std::vector<User*> Users(LIC->use_begin(), LIC->use_end()); std::vector<Instruction*> Worklist; + LLVMContext &Context = Val->getContext(); + // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC // in the loop with the appropriate one directly. - if (IsEqual || (isa<ConstantInt>(Val) && Val->getType() == Type::Int1Ty)) { + if (IsEqual || (isa<ConstantInt>(Val) && + Val->getType() == Type::getInt1Ty(Val->getContext()))) { Value *Replacement; if (IsEqual) Replacement = Val; else - Replacement = Context->getConstantInt(Type::Int1Ty, + Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()), !cast<ConstantInt>(Val)->getZExtValue()); for (unsigned i = 0, e = Users.size(); i != e; ++i) @@ -937,27 +908,35 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, // FIXME: This is a hack. We need to keep the successor around // and hooked up so as to preserve the loop structure, because // trying to update it is complicated. So instead we preserve the - // loop structure and put the block on an dead code path. - - BasicBlock *SISucc = SI->getSuccessor(i); - BasicBlock* Old = SI->getParent(); - BasicBlock* Split = SplitBlock(Old, SI, this); - - Instruction* OldTerm = Old->getTerminator(); - BranchInst::Create(Split, SISucc, - Context->getConstantIntTrue(), OldTerm); - - LPM->deleteSimpleAnalysisValue(Old->getTerminator(), L); - Old->getTerminator()->eraseFromParent(); - - PHINode *PN; - for (BasicBlock::iterator II = SISucc->begin(); - (PN = dyn_cast<PHINode>(II)); ++II) { - Value *InVal = PN->removeIncomingValue(Split, false); - PN->addIncoming(InVal, Old); - } - - SI->removeCase(i); + // loop structure and put the block on a dead code path. + BasicBlock *Switch = SI->getParent(); + SplitEdge(Switch, SI->getSuccessor(i), this); + // Compute the successors instead of relying on the return value + // of SplitEdge, since it may have split the switch successor + // after PHI nodes. + BasicBlock *NewSISucc = SI->getSuccessor(i); + BasicBlock *OldSISucc = *succ_begin(NewSISucc); + // Create an "unreachable" destination. + BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable", + Switch->getParent(), + OldSISucc); + new UnreachableInst(Context, Abort); + // Force the new case destination to branch to the "unreachable" + // block while maintaining a (dead) CFG edge to the old block. + NewSISucc->getTerminator()->eraseFromParent(); + BranchInst::Create(Abort, OldSISucc, + ConstantInt::getTrue(Context), NewSISucc); + // Release the PHI operands for this edge. + for (BasicBlock::iterator II = NewSISucc->begin(); + PHINode *PN = dyn_cast<PHINode>(II); ++II) + PN->setIncomingValue(PN->getBasicBlockIndex(Switch), + UndefValue::get(PN->getType())); + // Tell the domtree about the new block. We don't fully update the + // domtree here -- instead we force it to do a full recomputation + // after the pass is complete -- but we do need to inform it of + // new blocks. + if (DT) + DT->addNewBlock(Abort, NewSISucc); break; } } @@ -971,7 +950,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, SimplifyCode(Worklist, L); } -/// SimplifyCode - Okay, now that we have simplified some instructions in the +/// SimplifyCode - Okay, now that we have simplified some instructions in the /// loop, walk over it and constant prop, dce, and fold control flow where /// possible. Note that this is effectively a very simple loop-structure-aware /// optimizer. During processing of this loop, L could very well be deleted, so @@ -986,14 +965,14 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { Worklist.pop_back(); // Simple constant folding. - if (Constant *C = ConstantFoldInstruction(I)) { + if (Constant *C = ConstantFoldInstruction(I, I->getContext())) { ReplaceUsesOfWith(I, C, Worklist, L, LPM); continue; } // Simple DCE. if (isInstructionTriviallyDead(I)) { - DOUT << "Remove dead instruction '" << *I; + DEBUG(errs() << "Remove dead instruction '" << *I); // Add uses to the worklist, which may be dead now. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) @@ -1017,10 +996,11 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { break; case Instruction::And: if (isa<ConstantInt>(I->getOperand(0)) && - I->getOperand(0)->getType() == Type::Int1Ty) // constant -> RHS + // constant -> RHS + I->getOperand(0)->getType() == Type::getInt1Ty(I->getContext())) cast<BinaryOperator>(I)->swapOperands(); if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1))) - if (CB->getType() == Type::Int1Ty) { + if (CB->getType() == Type::getInt1Ty(I->getContext())) { if (CB->isOne()) // X & 1 -> X ReplaceUsesOfWith(I, I->getOperand(0), Worklist, L, LPM); else // X & 0 -> 0 @@ -1030,10 +1010,11 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { break; case Instruction::Or: if (isa<ConstantInt>(I->getOperand(0)) && - I->getOperand(0)->getType() == Type::Int1Ty) // constant -> RHS + // constant -> RHS + I->getOperand(0)->getType() == Type::getInt1Ty(I->getContext())) cast<BinaryOperator>(I)->swapOperands(); if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1))) - if (CB->getType() == Type::Int1Ty) { + if (CB->getType() == Type::getInt1Ty(I->getContext())) { if (CB->isOne()) // X | 1 -> 1 ReplaceUsesOfWith(I, I->getOperand(1), Worklist, L, LPM); else // X | 0 -> X @@ -1052,8 +1033,8 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { if (!SinglePred) continue; // Nothing to do. assert(SinglePred == Pred && "CFG broken"); - DOUT << "Merging blocks: " << Pred->getName() << " <- " - << Succ->getName() << "\n"; + DEBUG(errs() << "Merging blocks: " << Pred->getName() << " <- " + << Succ->getName() << "\n"); // Resolve any single entry PHI nodes in Succ. while (PHINode *PN = dyn_cast<PHINode>(Succ->begin())) @@ -1080,7 +1061,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { // remove dead blocks. break; // FIXME: Enable. - DOUT << "Folded branch: " << *BI; + DEBUG(errs() << "Folded branch: " << *BI); BasicBlock *DeadSucc = BI->getSuccessor(CB->getZExtValue()); BasicBlock *LiveSucc = BI->getSuccessor(!CB->getZExtValue()); DeadSucc->removePredecessor(BI->getParent(), true); diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 3c7a5ab..c922814 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -24,29 +24,33 @@ #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" #include <list> using namespace llvm; STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted"); STATISTIC(NumMemSetInfer, "Number of memsets inferred"); +STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); /// isBytewiseValue - If the specified value can be set by repeating the same /// byte in memory, return the i8 value that it is represented with. This is /// true for all i8 values obviously, but is also true for i32 0, i32 -1, /// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated /// byte store (e.g. i16 0x1234), return null. -static Value *isBytewiseValue(Value *V, LLVMContext* Context) { +static Value *isBytewiseValue(Value *V) { + LLVMContext &Context = V->getContext(); + // All byte-wide stores are splatable, even of arbitrary variables. - if (V->getType() == Type::Int8Ty) return V; + if (V->getType() == Type::getInt8Ty(Context)) return V; // Constant float and double values can be handled as integer values if the // corresponding integer value is "byteable". An important case is 0.0. if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) { - if (CFP->getType() == Type::FloatTy) - V = Context->getConstantExprBitCast(CFP, Type::Int32Ty); - if (CFP->getType() == Type::DoubleTy) - V = Context->getConstantExprBitCast(CFP, Type::Int64Ty); + if (CFP->getType()->isFloatTy()) + V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(Context)); + if (CFP->getType()->isDoubleTy()) + V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(Context)); // Don't handle long double formats, which have strange constraints. } @@ -69,7 +73,7 @@ static Value *isBytewiseValue(Value *V, LLVMContext* Context) { if (Val != Val2) return 0; } - return Context->getConstantInt(Val); + return ConstantInt::get(Context, Val); } } @@ -271,6 +275,7 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) { if (Start < I->Start) { I->Start = Start; I->StartPtr = SI->getPointerOperand(); + I->Alignment = SI->getAlignment(); } // Now we know that Start <= I->End and Start >= I->Start (so the startpoint @@ -295,8 +300,7 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) { //===----------------------------------------------------------------------===// namespace { - - class VISIBILITY_HIDDEN MemCpyOpt : public FunctionPass { + class MemCpyOpt : public FunctionPass { bool runOnFunction(Function &F); public: static char ID; // Pass identification, replacement for typeid @@ -309,16 +313,15 @@ namespace { AU.addRequired<DominatorTree>(); AU.addRequired<MemoryDependenceAnalysis>(); AU.addRequired<AliasAnalysis>(); - AU.addRequired<TargetData>(); AU.addPreserved<AliasAnalysis>(); AU.addPreserved<MemoryDependenceAnalysis>(); - AU.addPreserved<TargetData>(); } // Helper fuctions - bool processStore(StoreInst *SI, BasicBlock::iterator& BBI); - bool processMemCpy(MemCpyInst* M); - bool performCallSlotOptzn(MemCpyInst* cpy, CallInst* C); + bool processStore(StoreInst *SI, BasicBlock::iterator &BBI); + bool processMemCpy(MemCpyInst *M); + bool processMemMove(MemMoveInst *M); + bool performCallSlotOptzn(MemCpyInst *cpy, CallInst *C); bool iterateOnFunction(Function &F); }; @@ -337,27 +340,31 @@ static RegisterPass<MemCpyOpt> X("memcpyopt", /// some other patterns to fold away. In particular, this looks for stores to /// neighboring locations of memory. If it sees enough consequtive ones /// (currently 4) it attempts to merge them together into a memcpy/memset. -bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) { +bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (SI->isVolatile()) return false; + LLVMContext &Context = SI->getContext(); + // There are two cases that are interesting for this code to handle: memcpy // and memset. Right now we only handle memset. // Ensure that the value being stored is something that can be memset'able a // byte at a time like "0" or "-1" or any width, as well as things like // 0xA0A0A0A0 and 0.0. - Value *ByteVal = isBytewiseValue(SI->getOperand(0), Context); + Value *ByteVal = isBytewiseValue(SI->getOperand(0)); if (!ByteVal) return false; - TargetData &TD = getAnalysis<TargetData>(); + TargetData *TD = getAnalysisIfAvailable<TargetData>(); + if (!TD) return false; AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); + Module *M = SI->getParent()->getParent()->getParent(); // Okay, so we now have a single store that can be splatable. Scan to find // all subsequent stores of the same value to offset from the same pointer. // Join these together into ranges, so we can decide whether contiguous blocks // are stored. - MemsetRanges Ranges(TD); + MemsetRanges Ranges(*TD); Value *StartPtr = SI->getPointerOperand(); @@ -385,12 +392,12 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) { if (NextStore->isVolatile()) break; // Check to see if this stored value is of the same byte-splattable value. - if (ByteVal != isBytewiseValue(NextStore->getOperand(0), Context)) + if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) break; // Check to see if this store is to a constant offset from the start ptr. int64_t Offset; - if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, TD)) + if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD)) break; Ranges.addStore(Offset, NextStore); @@ -405,7 +412,6 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) { // store as well. We try to avoid this unless there is at least something // interesting as a small compile-time optimization. Ranges.addStore(0, SI); - Function *MemSetF = 0; @@ -419,7 +425,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) { if (Range.TheStores.size() == 1) continue; // If it is profitable to lower this range to memset, do so now. - if (!Range.isProfitableToUseMemset(TD)) + if (!Range.isProfitableToUseMemset(*TD)) continue; // Otherwise, we do want to transform this! Create a new memset. We put @@ -429,37 +435,38 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) { BasicBlock::iterator InsertPt = BI; if (MemSetF == 0) { - const Type *Tys[] = {Type::Int64Ty}; - MemSetF = Intrinsic::getDeclaration(SI->getParent()->getParent() - ->getParent(), Intrinsic::memset, - Tys, 1); - } + const Type *Ty = Type::getInt64Ty(Context); + MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, &Ty, 1); + } // Get the starting pointer of the block. StartPtr = Range.StartPtr; // Cast the start ptr to be i8* as memset requires. - const Type *i8Ptr = Context->getPointerTypeUnqual(Type::Int8Ty); + const Type *i8Ptr = Type::getInt8PtrTy(Context); if (StartPtr->getType() != i8Ptr) - StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getNameStart(), + StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getName(), InsertPt); Value *Ops[] = { StartPtr, ByteVal, // Start, value - Context->getConstantInt(Type::Int64Ty, Range.End-Range.Start), // size - Context->getConstantInt(Type::Int32Ty, Range.Alignment) // align + // size + ConstantInt::get(Type::getInt64Ty(Context), Range.End-Range.Start), + // align + ConstantInt::get(Type::getInt32Ty(Context), Range.Alignment) }; Value *C = CallInst::Create(MemSetF, Ops, Ops+4, "", InsertPt); - DEBUG(cerr << "Replace stores:\n"; + DEBUG(errs() << "Replace stores:\n"; for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) - cerr << *Range.TheStores[i]; - cerr << "With: " << *C); C=C; + errs() << *Range.TheStores[i]; + errs() << "With: " << *C); C=C; // Don't invalidate the iterator BBI = BI; // Zap all the stores. - for (SmallVector<StoreInst*, 16>::const_iterator SI = Range.TheStores.begin(), + for (SmallVector<StoreInst*, 16>::const_iterator + SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) (*SI)->eraseFromParent(); ++NumMemSetInfer; @@ -490,29 +497,30 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { // Deliberately get the source and destination with bitcasts stripped away, // because we'll need to do type comparisons based on the underlying type. - Value* cpyDest = cpy->getDest(); - Value* cpySrc = cpy->getSource(); + Value *cpyDest = cpy->getDest(); + Value *cpySrc = cpy->getSource(); CallSite CS = CallSite::get(C); // We need to be able to reason about the size of the memcpy, so we require // that it be a constant. - ConstantInt* cpyLength = dyn_cast<ConstantInt>(cpy->getLength()); + ConstantInt *cpyLength = dyn_cast<ConstantInt>(cpy->getLength()); if (!cpyLength) return false; // Require that src be an alloca. This simplifies the reasoning considerably. - AllocaInst* srcAlloca = dyn_cast<AllocaInst>(cpySrc); + AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc); if (!srcAlloca) return false; // Check that all of src is copied to dest. - TargetData& TD = getAnalysis<TargetData>(); + TargetData *TD = getAnalysisIfAvailable<TargetData>(); + if (!TD) return false; - ConstantInt* srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize()); + ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize()); if (!srcArraySize) return false; - uint64_t srcSize = TD.getTypeAllocSize(srcAlloca->getAllocatedType()) * + uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) * srcArraySize->getZExtValue(); if (cpyLength->getZExtValue() < srcSize) @@ -521,25 +529,25 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { // Check that accessing the first srcSize bytes of dest will not cause a // trap. Otherwise the transform is invalid since it might cause a trap // to occur earlier than it otherwise would. - if (AllocaInst* A = dyn_cast<AllocaInst>(cpyDest)) { + if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) { // The destination is an alloca. Check it is larger than srcSize. - ConstantInt* destArraySize = dyn_cast<ConstantInt>(A->getArraySize()); + ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize()); if (!destArraySize) return false; - uint64_t destSize = TD.getTypeAllocSize(A->getAllocatedType()) * + uint64_t destSize = TD->getTypeAllocSize(A->getAllocatedType()) * destArraySize->getZExtValue(); if (destSize < srcSize) return false; - } else if (Argument* A = dyn_cast<Argument>(cpyDest)) { + } else if (Argument *A = dyn_cast<Argument>(cpyDest)) { // If the destination is an sret parameter then only accesses that are // outside of the returned struct type can trap. if (!A->hasStructRetAttr()) return false; - const Type* StructTy = cast<PointerType>(A->getType())->getElementType(); - uint64_t destSize = TD.getTypeAllocSize(StructTy); + const Type *StructTy = cast<PointerType>(A->getType())->getElementType(); + uint64_t destSize = TD->getTypeAllocSize(StructTy); if (destSize < srcSize) return false; @@ -554,14 +562,14 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(), srcAlloca->use_end()); while (!srcUseList.empty()) { - User* UI = srcUseList.back(); + User *UI = srcUseList.back(); srcUseList.pop_back(); if (isa<BitCastInst>(UI)) { for (User::use_iterator I = UI->use_begin(), E = UI->use_end(); I != E; ++I) srcUseList.push_back(*I); - } else if (GetElementPtrInst* G = dyn_cast<GetElementPtrInst>(UI)) { + } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(UI)) { if (G->hasAllZeroIndices()) for (User::use_iterator I = UI->use_begin(), E = UI->use_end(); I != E; ++I) @@ -575,8 +583,8 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { // Since we're changing the parameter to the callsite, we need to make sure // that what would be the new parameter dominates the callsite. - DominatorTree& DT = getAnalysis<DominatorTree>(); - if (Instruction* cpyDestInst = dyn_cast<Instruction>(cpyDest)) + DominatorTree &DT = getAnalysis<DominatorTree>(); + if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest)) if (!DT.dominates(cpyDestInst, C)) return false; @@ -584,7 +592,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { // unexpected manner, for example via a global, which we deduce from // the use analysis, we also need to know that it does not sneakily // access dest. We rely on AA to figure this out for us. - AliasAnalysis& AA = getAnalysis<AliasAnalysis>(); + AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); if (AA.getModRefInfo(C, cpy->getRawDest(), srcSize) != AliasAnalysis::NoModRef) return false; @@ -597,11 +605,11 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { cpyDest = CastInst::CreatePointerCast(cpyDest, cpySrc->getType(), cpyDest->getName(), C); changedArgument = true; - if (CS.getArgument(i)->getType() != cpyDest->getType()) - CS.setArgument(i, CastInst::CreatePointerCast(cpyDest, - CS.getArgument(i)->getType(), cpyDest->getName(), C)); - else + if (CS.getArgument(i)->getType() == cpyDest->getType()) CS.setArgument(i, cpyDest); + else + CS.setArgument(i, CastInst::CreatePointerCast(cpyDest, + CS.getArgument(i)->getType(), cpyDest->getName(), C)); } if (!changedArgument) @@ -609,7 +617,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { // Drop any cached information about the call, because we may have changed // its dependence information by changing its parameter. - MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>(); + MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>(); MD.removeInstruction(C); // Remove the memcpy @@ -624,22 +632,22 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { /// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be /// a memcpy from X to Z (or potentially a memmove, depending on circumstances). /// This allows later passes to remove the first memcpy altogether. -bool MemCpyOpt::processMemCpy(MemCpyInst* M) { - MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>(); +bool MemCpyOpt::processMemCpy(MemCpyInst *M) { + MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>(); // The are two possible optimizations we can do for memcpy: - // a) memcpy-memcpy xform which exposes redundance for DSE - // b) call-memcpy xform for return slot optimization + // a) memcpy-memcpy xform which exposes redundance for DSE. + // b) call-memcpy xform for return slot optimization. MemDepResult dep = MD.getDependency(M); if (!dep.isClobber()) return false; if (!isa<MemCpyInst>(dep.getInst())) { - if (CallInst* C = dyn_cast<CallInst>(dep.getInst())) + if (CallInst *C = dyn_cast<CallInst>(dep.getInst())) return performCallSlotOptzn(M, C); return false; } - MemCpyInst* MDep = cast<MemCpyInst>(dep.getInst()); + MemCpyInst *MDep = cast<MemCpyInst>(dep.getInst()); // We can only transforms memcpy's where the dest of one is the source of the // other @@ -648,8 +656,8 @@ bool MemCpyOpt::processMemCpy(MemCpyInst* M) { // Second, the length of the memcpy's must be the same, or the preceeding one // must be larger than the following one. - ConstantInt* C1 = dyn_cast<ConstantInt>(MDep->getLength()); - ConstantInt* C2 = dyn_cast<ConstantInt>(M->getLength()); + ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength()); + ConstantInt *C2 = dyn_cast<ConstantInt>(M->getLength()); if (!C1 || !C2) return false; @@ -661,7 +669,7 @@ bool MemCpyOpt::processMemCpy(MemCpyInst* M) { // Finally, we have to make sure that the dest of the second does not // alias the source of the first - AliasAnalysis& AA = getAnalysis<AliasAnalysis>(); + AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) != AliasAnalysis::NoAlias) return false; @@ -673,17 +681,16 @@ bool MemCpyOpt::processMemCpy(MemCpyInst* M) { return false; // If all checks passed, then we can transform these memcpy's - const Type *Tys[1]; - Tys[0] = M->getLength()->getType(); - Function* MemCpyFun = Intrinsic::getDeclaration( + const Type *Ty = M->getLength()->getType(); + Function *MemCpyFun = Intrinsic::getDeclaration( M->getParent()->getParent()->getParent(), - M->getIntrinsicID(), Tys, 1); + M->getIntrinsicID(), &Ty, 1); Value *Args[4] = { M->getRawDest(), MDep->getRawSource(), M->getLength(), M->getAlignmentCst() }; - CallInst* C = CallInst::Create(MemCpyFun, Args, Args+4, "", M); + CallInst *C = CallInst::Create(MemCpyFun, Args, Args+4, "", M); // If C and M don't interfere, then this is a valid transformation. If they @@ -702,41 +709,78 @@ bool MemCpyOpt::processMemCpy(MemCpyInst* M) { return false; } -// MemCpyOpt::runOnFunction - This is the main transformation entry point for a -// function. -// -bool MemCpyOpt::runOnFunction(Function& F) { +/// processMemMove - Transforms memmove calls to memcpy calls when the src/dst +/// are guaranteed not to alias. +bool MemCpyOpt::processMemMove(MemMoveInst *M) { + AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); + + // If the memmove is a constant size, use it for the alias query, this allows + // us to optimize things like: memmove(P, P+64, 64); + uint64_t MemMoveSize = ~0ULL; + if (ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength())) + MemMoveSize = Len->getZExtValue(); - bool changed = false; - bool shouldContinue = true; + // See if the pointers alias. + if (AA.alias(M->getRawDest(), MemMoveSize, M->getRawSource(), MemMoveSize) != + AliasAnalysis::NoAlias) + return false; - while (shouldContinue) { - shouldContinue = iterateOnFunction(F); - changed |= shouldContinue; - } + DEBUG(errs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n"); - return changed; -} + // If not, then we know we can transform this. + Module *Mod = M->getParent()->getParent()->getParent(); + const Type *Ty = M->getLength()->getType(); + M->setOperand(0, Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, &Ty, 1)); + // MemDep may have over conservative information about this instruction, just + // conservatively flush it from the cache. + getAnalysis<MemoryDependenceAnalysis>().removeInstruction(M); -// MemCpyOpt::iterateOnFunction - Executes one iteration of GVN + ++NumMoveToCpy; + return true; +} + + +// MemCpyOpt::iterateOnFunction - Executes one iteration of GVN. bool MemCpyOpt::iterateOnFunction(Function &F) { - bool changed_function = false; + bool MadeChange = false; - // Walk all instruction in the function + // Walk all instruction in the function. for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { - // Avoid invalidating the iterator - Instruction* I = BI++; + // Avoid invalidating the iterator. + Instruction *I = BI++; if (StoreInst *SI = dyn_cast<StoreInst>(I)) - changed_function |= processStore(SI, BI); - else if (MemCpyInst* M = dyn_cast<MemCpyInst>(I)) { - changed_function |= processMemCpy(M); + MadeChange |= processStore(SI, BI); + else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I)) + MadeChange |= processMemCpy(M); + else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) { + if (processMemMove(M)) { + --BI; // Reprocess the new memcpy. + MadeChange = true; + } } } } - return changed_function; + return MadeChange; +} + +// MemCpyOpt::runOnFunction - This is the main transformation entry point for a +// function. +// +bool MemCpyOpt::runOnFunction(Function &F) { + bool MadeChange = false; + while (1) { + if (!iterateOnFunction(F)) + break; + MadeChange = true; + } + + return MadeChange; } + + + diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index fa60a9d..e6ffac2 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -31,9 +31,9 @@ #include "llvm/Pass.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/CFG.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ValueHandle.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" #include <algorithm> @@ -46,7 +46,7 @@ STATISTIC(NumAnnihil, "Number of expr tree annihilated"); STATISTIC(NumFactor , "Number of multiplies factored"); namespace { - struct VISIBILITY_HIDDEN ValueEntry { + struct ValueEntry { unsigned Rank; Value *Op; ValueEntry(unsigned R, Value *O) : Rank(R), Op(O) {} @@ -61,17 +61,17 @@ namespace { /// static void PrintOps(Instruction *I, const std::vector<ValueEntry> &Ops) { Module *M = I->getParent()->getParent()->getParent(); - cerr << Instruction::getOpcodeName(I->getOpcode()) << " " + errs() << Instruction::getOpcodeName(I->getOpcode()) << " " << *Ops[0].Op->getType(); for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - WriteAsOperand(*cerr.stream() << " ", Ops[i].Op, false, M); - cerr << "," << Ops[i].Rank; + WriteAsOperand(errs() << " ", Ops[i].Op, false, M); + errs() << "," << Ops[i].Rank; } } #endif namespace { - class VISIBILITY_HIDDEN Reassociate : public FunctionPass { + class Reassociate : public FunctionPass { std::map<BasicBlock*, unsigned> RankMap; std::map<AssertingVH<>, unsigned> ValueRankMap; bool MadeChange; @@ -181,8 +181,8 @@ unsigned Reassociate::getRank(Value *V) { (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I))) ++Rank; - //DOUT << "Calculated Rank[" << V->getName() << "] = " - // << Rank << "\n"; + //DEBUG(errs() << "Calculated Rank[" << V->getName() << "] = " + // << Rank << "\n"); return CachedRank = Rank; } @@ -200,8 +200,8 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) { /// static Instruction *LowerNegateToMultiply(Instruction *Neg, std::map<AssertingVH<>, unsigned> &ValueRankMap, - LLVMContext* Context) { - Constant *Cst = Context->getConstantIntAllOnesValue(Neg->getType()); + LLVMContext &Context) { + Constant *Cst = Constant::getAllOnesValue(Neg->getType()); Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg); ValueRankMap.erase(Neg); @@ -222,7 +222,7 @@ void Reassociate::LinearizeExpr(BinaryOperator *I) { isReassociableOp(RHS, I->getOpcode()) && "Not an expression that needs linearization?"); - DOUT << "Linear" << *LHS << *RHS << *I; + DEBUG(errs() << "Linear" << *LHS << '\n' << *RHS << '\n' << *I << '\n'); // Move the RHS instruction to live immediately before I, avoiding breaking // dominator properties. @@ -235,7 +235,7 @@ void Reassociate::LinearizeExpr(BinaryOperator *I) { ++NumLinear; MadeChange = true; - DOUT << "Linearized: " << *I; + DEBUG(errs() << "Linearized: " << *I << '\n'); // If D is part of this expression tree, tail recurse. if (isReassociableOp(I->getOperand(1), I->getOpcode())) @@ -256,6 +256,7 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I, std::vector<ValueEntry> &Ops) { Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); unsigned Opcode = I->getOpcode(); + LLVMContext &Context = I->getContext(); // First step, linearize the expression if it is in ((A+B)+(C+D)) form. BinaryOperator *LHSBO = isReassociableOp(LHS, Opcode); @@ -284,8 +285,8 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I, Ops.push_back(ValueEntry(getRank(RHS), RHS)); // Clear the leaves out. - I->setOperand(0, Context->getUndef(I->getType())); - I->setOperand(1, Context->getUndef(I->getType())); + I->setOperand(0, UndefValue::get(I->getType())); + I->setOperand(1, UndefValue::get(I->getType())); return; } else { // Turn X+(Y+Z) -> (Y+Z)+X @@ -320,7 +321,7 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I, Ops.push_back(ValueEntry(getRank(RHS), RHS)); // Clear the RHS leaf out. - I->setOperand(1, Context->getUndef(I->getType())); + I->setOperand(1, UndefValue::get(I->getType())); } // RewriteExprTree - Now that the operands for this expression tree are @@ -333,10 +334,10 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, if (I->getOperand(0) != Ops[i].Op || I->getOperand(1) != Ops[i+1].Op) { Value *OldLHS = I->getOperand(0); - DOUT << "RA: " << *I; + DEBUG(errs() << "RA: " << *I << '\n'); I->setOperand(0, Ops[i].Op); I->setOperand(1, Ops[i+1].Op); - DOUT << "TO: " << *I; + DEBUG(errs() << "TO: " << *I << '\n'); MadeChange = true; ++NumChanged; @@ -349,9 +350,9 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, assert(i+2 < Ops.size() && "Ops index out of range!"); if (I->getOperand(1) != Ops[i].Op) { - DOUT << "RA: " << *I; + DEBUG(errs() << "RA: " << *I << '\n'); I->setOperand(1, Ops[i].Op); - DOUT << "TO: " << *I; + DEBUG(errs() << "TO: " << *I << '\n'); MadeChange = true; ++NumChanged; } @@ -373,7 +374,7 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, // version of the value is returned, and BI is left pointing at the instruction // that should be processed next by the reassociation pass. // -static Value *NegateValue(Value *V, Instruction *BI) { +static Value *NegateValue(LLVMContext &Context, Value *V, Instruction *BI) { // We are trying to expose opportunity for reassociation. One of the things // that we want to do to achieve this is to push a negation as deep into an // expression chain as possible, to expose the add instructions. In practice, @@ -386,8 +387,8 @@ static Value *NegateValue(Value *V, Instruction *BI) { if (Instruction *I = dyn_cast<Instruction>(V)) if (I->getOpcode() == Instruction::Add && I->hasOneUse()) { // Push the negates through the add. - I->setOperand(0, NegateValue(I->getOperand(0), BI)); - I->setOperand(1, NegateValue(I->getOperand(1), BI)); + I->setOperand(0, NegateValue(Context, I->getOperand(0), BI)); + I->setOperand(1, NegateValue(Context, I->getOperand(1), BI)); // We must move the add instruction here, because the neg instructions do // not dominate the old add instruction in general. By moving it, we are @@ -407,7 +408,7 @@ static Value *NegateValue(Value *V, Instruction *BI) { /// ShouldBreakUpSubtract - Return true if we should break up this subtract of /// X-Y into (X + -Y). -static bool ShouldBreakUpSubtract(Instruction *Sub) { +static bool ShouldBreakUpSubtract(LLVMContext &Context, Instruction *Sub) { // If this is a negation, we can't split it up! if (BinaryOperator::isNeg(Sub)) return false; @@ -431,7 +432,7 @@ static bool ShouldBreakUpSubtract(Instruction *Sub) { /// BreakUpSubtract - If we have (X-Y), and if either X is an add, or if this is /// only used by an add, transform this into (X+(0-Y)) to promote better /// reassociation. -static Instruction *BreakUpSubtract(Instruction *Sub, +static Instruction *BreakUpSubtract(LLVMContext &Context, Instruction *Sub, std::map<AssertingVH<>, unsigned> &ValueRankMap) { // Convert a subtract into an add and a neg instruction... so that sub // instructions can be commuted with other add instructions... @@ -439,7 +440,7 @@ static Instruction *BreakUpSubtract(Instruction *Sub, // Calculate the negative value of Operand 1 of the sub instruction... // and set it as the RHS of the add instruction we just made... // - Value *NegVal = NegateValue(Sub->getOperand(1), Sub); + Value *NegVal = NegateValue(Context, Sub->getOperand(1), Sub); Instruction *New = BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub); New->takeName(Sub); @@ -449,7 +450,7 @@ static Instruction *BreakUpSubtract(Instruction *Sub, Sub->replaceAllUsesWith(New); Sub->eraseFromParent(); - DOUT << "Negated: " << *New; + DEBUG(errs() << "Negated: " << *New << '\n'); return New; } @@ -458,16 +459,16 @@ static Instruction *BreakUpSubtract(Instruction *Sub, /// reassociation. static Instruction *ConvertShiftToMul(Instruction *Shl, std::map<AssertingVH<>, unsigned> &ValueRankMap, - LLVMContext* Context) { + LLVMContext &Context) { // If an operand of this shift is a reassociable multiply, or if the shift // is used by a reassociable multiply or add, turn into a multiply. if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) || (Shl->hasOneUse() && (isReassociableOp(Shl->use_back(), Instruction::Mul) || isReassociableOp(Shl->use_back(), Instruction::Add)))) { - Constant *MulCst = Context->getConstantInt(Shl->getType(), 1); + Constant *MulCst = ConstantInt::get(Shl->getType(), 1); MulCst = - Context->getConstantExprShl(MulCst, cast<Constant>(Shl->getOperand(1))); + ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1))); Instruction *Mul = BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl); @@ -567,7 +568,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, if (Constant *V1 = dyn_cast<Constant>(Ops[Ops.size()-2].Op)) if (Constant *V2 = dyn_cast<Constant>(Ops.back().Op)) { Ops.pop_back(); - Ops.back().Op = Context->getConstantExpr(Opcode, V1, V2); + Ops.back().Op = ConstantExpr::get(Opcode, V1, V2); return OptimizeExpression(I, Ops); } @@ -623,10 +624,10 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, if (FoundX != i) { if (Opcode == Instruction::And) { // ...&X&~X = 0 ++NumAnnihil; - return Context->getNullValue(X->getType()); + return Constant::getNullValue(X->getType()); } else if (Opcode == Instruction::Or) { // ...|X|~X = -1 ++NumAnnihil; - return Context->getConstantIntAllOnesValue(X->getType()); + return Constant::getAllOnesValue(X->getType()); } } } @@ -645,7 +646,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, assert(Opcode == Instruction::Xor); if (e == 2) { ++NumAnnihil; - return Context->getNullValue(Ops[0].Op->getType()); + return Constant::getNullValue(Ops[0].Op->getType()); } // ... X^X -> ... Ops.erase(Ops.begin()+i, Ops.begin()+i+2); @@ -670,7 +671,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, // Remove X and -X from the operand list. if (Ops.size() == 2) { ++NumAnnihil; - return Context->getNullValue(X->getType()); + return Constant::getNullValue(X->getType()); } else { Ops.erase(Ops.begin()+i); if (i < FoundX) @@ -727,7 +728,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, // If any factor occurred more than one time, we can pull it out. if (MaxOcc > 1) { - DOUT << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << "\n"; + DEBUG(errs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << "\n"); // Create a new instruction that uses the MaxOccVal twice. If we don't do // this, we could otherwise run into situations where removing a factor @@ -781,6 +782,8 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, /// ReassociateBB - Inspect all of the instructions in this basic block, /// reassociating them as we go. void Reassociate::ReassociateBB(BasicBlock *BB) { + LLVMContext &Context = BB->getContext(); + for (BasicBlock::iterator BBI = BB->begin(); BBI != BB->end(); ) { Instruction *BI = BBI++; if (BI->getOpcode() == Instruction::Shl && @@ -798,8 +801,8 @@ void Reassociate::ReassociateBB(BasicBlock *BB) { // If this is a subtract instruction which is not already in negate form, // see if we can convert it to X+-Y. if (BI->getOpcode() == Instruction::Sub) { - if (ShouldBreakUpSubtract(BI)) { - BI = BreakUpSubtract(BI, ValueRankMap); + if (ShouldBreakUpSubtract(Context, BI)) { + BI = BreakUpSubtract(Context, BI, ValueRankMap); MadeChange = true; } else if (BinaryOperator::isNeg(BI)) { // Otherwise, this is a negation. See if the operand is a multiply tree @@ -838,7 +841,7 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) { std::vector<ValueEntry> Ops; LinearizeExprTree(I, Ops); - DOUT << "RAIn:\t"; DEBUG(PrintOps(I, Ops)); DOUT << "\n"; + DEBUG(errs() << "RAIn:\t"; PrintOps(I, Ops); errs() << "\n"); // Now that we have linearized the tree to a list and have gathered all of // the operands and their ranks, sort the operands by their rank. Use a @@ -853,7 +856,7 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) { if (Value *V = OptimizeExpression(I, Ops)) { // This expression tree simplified to something that isn't a tree, // eliminate it. - DOUT << "Reassoc to scalar: " << *V << "\n"; + DEBUG(errs() << "Reassoc to scalar: " << *V << "\n"); I->replaceAllUsesWith(V); RemoveDeadBinaryOp(I); return; @@ -871,7 +874,7 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) { Ops.pop_back(); } - DOUT << "RAOut:\t"; DEBUG(PrintOps(I, Ops)); DOUT << "\n"; + DEBUG(errs() << "RAOut:\t"; PrintOps(I, Ops); errs() << "\n"); if (Ops.size() == 1) { // This expression tree simplified to something that isn't a tree, diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp index ac95d25..99e1252 100644 --- a/lib/Transforms/Scalar/Reg2Mem.cpp +++ b/lib/Transforms/Scalar/Reg2Mem.cpp @@ -26,7 +26,6 @@ #include "llvm/BasicBlock.h" #include "llvm/Instructions.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/CFG.h" #include <list> using namespace llvm; @@ -35,7 +34,7 @@ STATISTIC(NumRegsDemoted, "Number of registers demoted"); STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted"); namespace { - struct VISIBILITY_HIDDEN RegToMem : public FunctionPass { + struct RegToMem : public FunctionPass { static char ID; // Pass identification, replacement for typeid RegToMem() : FunctionPass(&ID) {} @@ -44,73 +43,17 @@ namespace { AU.addPreservedID(BreakCriticalEdgesID); } - bool valueEscapes(Instruction* i) { - BasicBlock* bb = i->getParent(); - for (Value::use_iterator ii = i->use_begin(), ie = i->use_end(); - ii != ie; ++ii) - if (cast<Instruction>(*ii)->getParent() != bb || - isa<PHINode>(*ii)) + bool valueEscapes(const Instruction *Inst) const { + const BasicBlock *BB = Inst->getParent(); + for (Value::use_const_iterator UI = Inst->use_begin(),E = Inst->use_end(); + UI != E; ++UI) + if (cast<Instruction>(*UI)->getParent() != BB || + isa<PHINode>(*UI)) return true; return false; } - virtual bool runOnFunction(Function &F) { - if (!F.isDeclaration()) { - // Insert all new allocas into entry block. - BasicBlock* BBEntry = &F.getEntryBlock(); - assert(pred_begin(BBEntry) == pred_end(BBEntry) && - "Entry block to function must not have predecessors!"); - - // Find first non-alloca instruction and create insertion point. This is - // safe if block is well-formed: it always have terminator, otherwise - // we'll get and assertion. - BasicBlock::iterator I = BBEntry->begin(); - while (isa<AllocaInst>(I)) ++I; - - CastInst *AllocaInsertionPoint = - CastInst::Create(Instruction::BitCast, - Context->getNullValue(Type::Int32Ty), Type::Int32Ty, - "reg2mem alloca point", I); - - // Find the escaped instructions. But don't create stack slots for - // allocas in entry block. - std::list<Instruction*> worklist; - for (Function::iterator ibb = F.begin(), ibe = F.end(); - ibb != ibe; ++ibb) - for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end(); - iib != iie; ++iib) { - if (!(isa<AllocaInst>(iib) && iib->getParent() == BBEntry) && - valueEscapes(iib)) { - worklist.push_front(&*iib); - } - } - - // Demote escaped instructions - NumRegsDemoted += worklist.size(); - for (std::list<Instruction*>::iterator ilb = worklist.begin(), - ile = worklist.end(); ilb != ile; ++ilb) - DemoteRegToStack(**ilb, false, AllocaInsertionPoint); - - worklist.clear(); - - // Find all phi's - for (Function::iterator ibb = F.begin(), ibe = F.end(); - ibb != ibe; ++ibb) - for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end(); - iib != iie; ++iib) - if (isa<PHINode>(iib)) - worklist.push_front(&*iib); - - // Demote phi nodes - NumPhisDemoted += worklist.size(); - for (std::list<Instruction*>::iterator ilb = worklist.begin(), - ile = worklist.end(); ilb != ile; ++ilb) - DemotePHIToStack(cast<PHINode>(*ilb), AllocaInsertionPoint); - - return true; - } - return false; - } + virtual bool runOnFunction(Function &F); }; } @@ -118,6 +61,66 @@ char RegToMem::ID = 0; static RegisterPass<RegToMem> X("reg2mem", "Demote all values to stack slots"); + +bool RegToMem::runOnFunction(Function &F) { + if (F.isDeclaration()) + return false; + + // Insert all new allocas into entry block. + BasicBlock *BBEntry = &F.getEntryBlock(); + assert(pred_begin(BBEntry) == pred_end(BBEntry) && + "Entry block to function must not have predecessors!"); + + // Find first non-alloca instruction and create insertion point. This is + // safe if block is well-formed: it always have terminator, otherwise + // we'll get and assertion. + BasicBlock::iterator I = BBEntry->begin(); + while (isa<AllocaInst>(I)) ++I; + + CastInst *AllocaInsertionPoint = + new BitCastInst(Constant::getNullValue(Type::getInt32Ty(F.getContext())), + Type::getInt32Ty(F.getContext()), + "reg2mem alloca point", I); + + // Find the escaped instructions. But don't create stack slots for + // allocas in entry block. + std::list<Instruction*> WorkList; + for (Function::iterator ibb = F.begin(), ibe = F.end(); + ibb != ibe; ++ibb) + for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end(); + iib != iie; ++iib) { + if (!(isa<AllocaInst>(iib) && iib->getParent() == BBEntry) && + valueEscapes(iib)) { + WorkList.push_front(&*iib); + } + } + + // Demote escaped instructions + NumRegsDemoted += WorkList.size(); + for (std::list<Instruction*>::iterator ilb = WorkList.begin(), + ile = WorkList.end(); ilb != ile; ++ilb) + DemoteRegToStack(**ilb, false, AllocaInsertionPoint); + + WorkList.clear(); + + // Find all phi's + for (Function::iterator ibb = F.begin(), ibe = F.end(); + ibb != ibe; ++ibb) + for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end(); + iib != iie; ++iib) + if (isa<PHINode>(iib)) + WorkList.push_front(&*iib); + + // Demote phi nodes + NumPhisDemoted += WorkList.size(); + for (std::list<Instruction*>::iterator ilb = WorkList.begin(), + ile = WorkList.end(); ilb != ile; ++ilb) + DemotePHIToStack(cast<PHINode>(*ilb), AllocaInsertionPoint); + + return true; +} + + // createDemoteRegisterToMemory - Provide an entry point to create this pass. // const PassInfo *const llvm::DemoteRegisterToMemoryID = &X; diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index f0bc127..b5edf4e 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -33,9 +33,10 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/CallSite.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/InstVisitor.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallSet.h" @@ -58,7 +59,7 @@ namespace { /// LatticeVal class - This class represents the different lattice values that /// an LLVM value may occupy. It is a simple class with value semantics. /// -class VISIBILITY_HIDDEN LatticeVal { +class LatticeVal { enum { /// undefined - This LLVM Value has no known value yet. undefined, @@ -139,7 +140,7 @@ public: /// Constant Propagation. /// class SCCPSolver : public InstVisitor<SCCPSolver> { - LLVMContext* Context; + LLVMContext *Context; DenseSet<BasicBlock*> BBExecutable;// The basic blocks that are executable std::map<Value*, LatticeVal> ValueState; // The state each value is in. @@ -179,12 +180,12 @@ class SCCPSolver : public InstVisitor<SCCPSolver> { typedef std::pair<BasicBlock*, BasicBlock*> Edge; DenseSet<Edge> KnownFeasibleEdges; public: - void setContext(LLVMContext* C) { Context = C; } + void setContext(LLVMContext *C) { Context = C; } /// MarkBlockExecutable - This method can be used by clients to mark all of /// the blocks that are known to be intrinsically live in the processed unit. void MarkBlockExecutable(BasicBlock *BB) { - DOUT << "Marking Block Executable: " << BB->getNameStart() << "\n"; + DEBUG(errs() << "Marking Block Executable: " << BB->getName() << "\n"); BBExecutable.insert(BB); // Basic block is executable! BBWorkList.push_back(BB); // Add the block to the work list! } @@ -260,14 +261,14 @@ private: // inline void markConstant(LatticeVal &IV, Value *V, Constant *C) { if (IV.markConstant(C)) { - DOUT << "markConstant: " << *C << ": " << *V; + DEBUG(errs() << "markConstant: " << *C << ": " << *V << '\n'); InstWorkList.push_back(V); } } inline void markForcedConstant(LatticeVal &IV, Value *V, Constant *C) { IV.markForcedConstant(C); - DOUT << "markForcedConstant: " << *C << ": " << *V; + DEBUG(errs() << "markForcedConstant: " << *C << ": " << *V << '\n'); InstWorkList.push_back(V); } @@ -280,11 +281,11 @@ private: // work list so that the users of the instruction are updated later. inline void markOverdefined(LatticeVal &IV, Value *V) { if (IV.markOverdefined()) { - DEBUG(DOUT << "markOverdefined: "; + DEBUG(errs() << "markOverdefined: "; if (Function *F = dyn_cast<Function>(V)) - DOUT << "Function '" << F->getName() << "'\n"; + errs() << "Function '" << F->getName() << "'\n"; else - DOUT << *V); + errs() << *V << '\n'); // Only instructions go on the work list OverdefinedInstWorkList.push_back(V); } @@ -337,8 +338,8 @@ private: return; // This edge is already known to be executable! if (BBExecutable.count(Dest)) { - DOUT << "Marking Edge Executable: " << Source->getNameStart() - << " -> " << Dest->getNameStart() << "\n"; + DEBUG(errs() << "Marking Edge Executable: " << Source->getName() + << " -> " << Dest->getName() << "\n"); // The destination is already executable, but we just made an edge // feasible that wasn't before. Revisit the PHI nodes in the block @@ -399,7 +400,9 @@ private: void visitStoreInst (Instruction &I); void visitLoadInst (LoadInst &I); void visitGetElementPtrInst(GetElementPtrInst &I); - void visitCallInst (CallInst &I) { visitCallSite(CallSite::get(&I)); } + void visitCallInst (CallInst &I) { + visitCallSite(CallSite::get(&I)); + } void visitInvokeInst (InvokeInst &II) { visitCallSite(CallSite::get(&II)); visitTerminatorInst(II); @@ -414,7 +417,7 @@ private: void visitInstruction(Instruction &I) { // If a new instruction is added to LLVM that we don't handle... - cerr << "SCCP: Don't know how to handle: " << I; + errs() << "SCCP: Don't know how to handle: " << I; markOverdefined(&I); // Just in case } }; @@ -440,7 +443,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI, Succs[0] = Succs[1] = true; } else if (BCValue.isConstant()) { // Constant condition variables mean the branch can only go a single way - Succs[BCValue.getConstant() == Context->getConstantIntFalse()] = true; + Succs[BCValue.getConstant() == ConstantInt::getFalse(*Context)] = true; } } } else if (isa<InvokeInst>(&TI)) { @@ -455,7 +458,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI, } else if (SCValue.isConstant()) Succs[SI->findCaseValue(cast<ConstantInt>(SCValue.getConstant()))] = true; } else { - assert(0 && "SCCP: Don't know how to handle this terminator!"); + llvm_unreachable("SCCP: Don't know how to handle this terminator!"); } } @@ -485,7 +488,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) { // Constant condition variables mean the branch can only go a single way return BI->getSuccessor(BCValue.getConstant() == - Context->getConstantIntFalse()) == To; + ConstantInt::getFalse(*Context)) == To; } return false; } @@ -513,8 +516,10 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) { } return false; } else { - cerr << "Unknown terminator instruction: " << *TI; - abort(); +#ifndef NDEBUG + errs() << "Unknown terminator instruction: " << *TI << '\n'; +#endif + llvm_unreachable(0); } } @@ -642,7 +647,7 @@ void SCCPSolver::visitReturnInst(ReturnInst &I) { DenseMap<std::pair<Function*, unsigned>, LatticeVal>::iterator It = TrackedMultipleRetVals.find(std::make_pair(F, i)); if (It == TrackedMultipleRetVals.end()) break; - if (Value *Val = FindInsertedValue(I.getOperand(0), i)) + if (Value *Val = FindInsertedValue(I.getOperand(0), i, I.getContext())) mergeInValue(It->second, F, getValueState(Val)); } } @@ -666,7 +671,7 @@ void SCCPSolver::visitCastInst(CastInst &I) { if (VState.isOverdefined()) // Inherit overdefinedness of operand markOverdefined(&I); else if (VState.isConstant()) // Propagate constant value - markConstant(&I, Context->getConstantExprCast(I.getOpcode(), + markConstant(&I, ConstantExpr::getCast(I.getOpcode(), VState.getConstant(), I.getType())); } @@ -809,12 +814,12 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) { if (NonOverdefVal->isUndefined()) { // Could annihilate value. if (I.getOpcode() == Instruction::And) - markConstant(IV, &I, Context->getNullValue(I.getType())); + markConstant(IV, &I, Constant::getNullValue(I.getType())); else if (const VectorType *PT = dyn_cast<VectorType>(I.getType())) - markConstant(IV, &I, Context->getConstantVectorAllOnesValue(PT)); + markConstant(IV, &I, Constant::getAllOnesValue(PT)); else markConstant(IV, &I, - Context->getConstantIntAllOnesValue(I.getType())); + Constant::getAllOnesValue(I.getType())); return; } else { if (I.getOpcode() == Instruction::And) { @@ -859,7 +864,7 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) { break; // Cannot fold this operation over the PHI nodes! } else if (In1.isConstant() && In2.isConstant()) { Constant *V = - Context->getConstantExpr(I.getOpcode(), In1.getConstant(), + ConstantExpr::get(I.getOpcode(), In1.getConstant(), In2.getConstant()); if (Result.isUndefined()) Result.markConstant(V); @@ -908,7 +913,7 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) { markOverdefined(IV, &I); } else if (V1State.isConstant() && V2State.isConstant()) { markConstant(IV, &I, - Context->getConstantExpr(I.getOpcode(), V1State.getConstant(), + ConstantExpr::get(I.getOpcode(), V1State.getConstant(), V2State.getConstant())); } } @@ -945,7 +950,7 @@ void SCCPSolver::visitCmpInst(CmpInst &I) { Result.markOverdefined(); break; // Cannot fold this operation over the PHI nodes! } else if (In1.isConstant() && In2.isConstant()) { - Constant *V = Context->getConstantExprCompare(I.getPredicate(), + Constant *V = ConstantExpr::getCompare(I.getPredicate(), In1.getConstant(), In2.getConstant()); if (Result.isUndefined()) @@ -994,7 +999,7 @@ void SCCPSolver::visitCmpInst(CmpInst &I) { markOverdefined(IV, &I); } else if (V1State.isConstant() && V2State.isConstant()) { - markConstant(IV, &I, Context->getConstantExprCompare(I.getPredicate(), + markConstant(IV, &I, ConstantExpr::getCompare(I.getPredicate(), V1State.getConstant(), V2State.getConstant())); } @@ -1096,7 +1101,7 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) { Constant *Ptr = Operands[0]; Operands.erase(Operands.begin()); // Erase the pointer from idx list... - markConstant(IV, &I, Context->getConstantExprGetElementPtr(Ptr, &Operands[0], + markConstant(IV, &I, ConstantExpr::getGetElementPtr(Ptr, &Operands[0], Operands.size())); } @@ -1127,10 +1132,9 @@ void SCCPSolver::visitLoadInst(LoadInst &I) { if (PtrVal.isConstant() && !I.isVolatile()) { Value *Ptr = PtrVal.getConstant(); // TODO: Consider a target hook for valid address spaces for this xform. - if (isa<ConstantPointerNull>(Ptr) && - cast<PointerType>(Ptr->getType())->getAddressSpace() == 0) { + if (isa<ConstantPointerNull>(Ptr) && I.getPointerAddressSpace() == 0) { // load null -> null - markConstant(IV, &I, Context->getNullValue(I.getType())); + markConstant(IV, &I, Constant::getNullValue(I.getType())); return; } @@ -1179,7 +1183,7 @@ void SCCPSolver::visitCallSite(CallSite CS) { if (F == 0 || !F->hasLocalLinkage()) { CallOverdefined: // Void return and not tracking callee, just bail. - if (I->getType() == Type::VoidTy) return; + if (I->getType()->isVoidTy()) return; // Otherwise, if we have a single return value case, and if the function is // a declaration, maybe we can constant fold it. @@ -1258,6 +1262,10 @@ CallOverdefined: for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; ++AI, ++CAI) { LatticeVal &IV = ValueState[AI]; + if (AI->hasByValAttr() && !F->onlyReadsMemory()) { + IV.markOverdefined(); + continue; + } if (!IV.isOverdefined()) mergeInValue(IV, AI, getValueState(*CAI)); } @@ -1273,7 +1281,7 @@ void SCCPSolver::Solve() { Value *I = OverdefinedInstWorkList.back(); OverdefinedInstWorkList.pop_back(); - DOUT << "\nPopped off OI-WL: " << *I; + DEBUG(errs() << "\nPopped off OI-WL: " << *I << '\n'); // "I" got into the work list because it either made the transition from // bottom to constant @@ -1291,7 +1299,7 @@ void SCCPSolver::Solve() { Value *I = InstWorkList.back(); InstWorkList.pop_back(); - DOUT << "\nPopped off I-WL: " << *I; + DEBUG(errs() << "\nPopped off I-WL: " << *I << '\n'); // "I" got into the work list because it either made the transition from // bottom to constant @@ -1311,7 +1319,7 @@ void SCCPSolver::Solve() { BasicBlock *BB = BBWorkList.back(); BBWorkList.pop_back(); - DOUT << "\nPopped off BBWL: " << *BB; + DEBUG(errs() << "\nPopped off BBWL: " << *BB << '\n'); // Notify all instructions in this basic block that they are newly // executable. @@ -1345,7 +1353,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { // Look for instructions which produce undef values. - if (I->getType() == Type::VoidTy) continue; + if (I->getType()->isVoidTy()) continue; LatticeVal &LV = getValueState(I); if (!LV.isUndefined()) continue; @@ -1371,22 +1379,22 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // to be handled here, because we don't know whether the top part is 1's // or 0's. assert(Op0LV.isUndefined()); - markForcedConstant(LV, I, Context->getNullValue(ITy)); + markForcedConstant(LV, I, Constant::getNullValue(ITy)); return true; case Instruction::Mul: case Instruction::And: // undef * X -> 0. X could be zero. // undef & X -> 0. X could be zero. - markForcedConstant(LV, I, Context->getNullValue(ITy)); + markForcedConstant(LV, I, Constant::getNullValue(ITy)); return true; case Instruction::Or: // undef | X -> -1. X could be -1. if (const VectorType *PTy = dyn_cast<VectorType>(ITy)) markForcedConstant(LV, I, - Context->getConstantVectorAllOnesValue(PTy)); + Constant::getAllOnesValue(PTy)); else - markForcedConstant(LV, I, Context->getConstantIntAllOnesValue(ITy)); + markForcedConstant(LV, I, Constant::getAllOnesValue(ITy)); return true; case Instruction::SDiv: @@ -1399,7 +1407,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // undef / X -> 0. X could be maxint. // undef % X -> 0. X could be 1. - markForcedConstant(LV, I, Context->getNullValue(ITy)); + markForcedConstant(LV, I, Constant::getNullValue(ITy)); return true; case Instruction::AShr: @@ -1420,7 +1428,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // X >> undef -> 0. X could be 0. // X << undef -> 0. X could be 0. - markForcedConstant(LV, I, Context->getNullValue(ITy)); + markForcedConstant(LV, I, Constant::getNullValue(ITy)); return true; case Instruction::Select: // undef ? X : Y -> X or Y. There could be commonality between X/Y. @@ -1483,7 +1491,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // as undef, then further analysis could think the undef went another way // leading to an inconsistent set of conclusions. if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { - BI->setCondition(Context->getConstantIntFalse()); + BI->setCondition(ConstantInt::getFalse(*Context)); } else { SwitchInst *SI = cast<SwitchInst>(TI); SI->setCondition(SI->getCaseValue(1)); @@ -1502,7 +1510,7 @@ namespace { /// SCCP Class - This class uses the SCCPSolver to implement a per-function /// Sparse Conditional Constant Propagator. /// - struct VISIBILITY_HIDDEN SCCP : public FunctionPass { + struct SCCP : public FunctionPass { static char ID; // Pass identification, replacement for typeid SCCP() : FunctionPass(&ID) {} @@ -1531,9 +1539,9 @@ FunctionPass *llvm::createSCCPPass() { // and return true if the function was modified. // bool SCCP::runOnFunction(Function &F) { - DOUT << "SCCP on function '" << F.getNameStart() << "'\n"; + DEBUG(errs() << "SCCP on function '" << F.getName() << "'\n"); SCCPSolver Solver; - Solver.setContext(Context); + Solver.setContext(&F.getContext()); // Mark the first block of the function as being executable. Solver.MarkBlockExecutable(F.begin()); @@ -1546,7 +1554,7 @@ bool SCCP::runOnFunction(Function &F) { bool ResolvedUndefs = true; while (ResolvedUndefs) { Solver.Solve(); - DOUT << "RESOLVING UNDEFs\n"; + DEBUG(errs() << "RESOLVING UNDEFs\n"); ResolvedUndefs = Solver.ResolvedUndefsIn(F); } @@ -1561,7 +1569,7 @@ bool SCCP::runOnFunction(Function &F) { for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) if (!Solver.isBlockExecutable(BB)) { - DOUT << " BasicBlock Dead:" << *BB; + DEBUG(errs() << " BasicBlock Dead:" << *BB); ++NumDeadBlocks; // Delete the instructions backwards, as it has a reduced likelihood of @@ -1573,7 +1581,7 @@ bool SCCP::runOnFunction(Function &F) { Instruction *I = Insts.back(); Insts.pop_back(); if (!I->use_empty()) - I->replaceAllUsesWith(Context->getUndef(I->getType())); + I->replaceAllUsesWith(UndefValue::get(I->getType())); BB->getInstList().erase(I); MadeChanges = true; ++NumInstRemoved; @@ -1584,8 +1592,7 @@ bool SCCP::runOnFunction(Function &F) { // for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) { Instruction *Inst = BI++; - if (Inst->getType() == Type::VoidTy || - isa<TerminatorInst>(Inst)) + if (Inst->getType()->isVoidTy() || isa<TerminatorInst>(Inst)) continue; LatticeVal &IV = Values[Inst]; @@ -1593,8 +1600,8 @@ bool SCCP::runOnFunction(Function &F) { continue; Constant *Const = IV.isConstant() - ? IV.getConstant() : Context->getUndef(Inst->getType()); - DOUT << " Constant: " << *Const << " = " << *Inst; + ? IV.getConstant() : UndefValue::get(Inst->getType()); + DEBUG(errs() << " Constant: " << *Const << " = " << *Inst); // Replaces all of the uses of a variable with uses of the constant. Inst->replaceAllUsesWith(Const); @@ -1617,7 +1624,7 @@ namespace { /// IPSCCP Class - This class implements interprocedural Sparse Conditional /// Constant Propagation. /// - struct VISIBILITY_HIDDEN IPSCCP : public ModulePass { + struct IPSCCP : public ModulePass { static char ID; IPSCCP() : ModulePass(&ID) {} bool runOnModule(Module &M); @@ -1658,7 +1665,10 @@ static bool AddressIsTaken(GlobalValue *GV) { } bool IPSCCP::runOnModule(Module &M) { + LLVMContext *Context = &M.getContext(); + SCCPSolver Solver; + Solver.setContext(Context); // Loop over all functions, marking arguments to those with their addresses // taken or that are external as overdefined. @@ -1687,7 +1697,7 @@ bool IPSCCP::runOnModule(Module &M) { while (ResolvedUndefs) { Solver.Solve(); - DOUT << "RESOLVING UNDEFS\n"; + DEBUG(errs() << "RESOLVING UNDEFS\n"); ResolvedUndefs = false; for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) ResolvedUndefs |= Solver.ResolvedUndefsIn(*F); @@ -1709,8 +1719,8 @@ bool IPSCCP::runOnModule(Module &M) { LatticeVal &IV = Values[AI]; if (IV.isConstant() || IV.isUndefined()) { Constant *CST = IV.isConstant() ? - IV.getConstant() : Context->getUndef(AI->getType()); - DOUT << "*** Arg " << *AI << " = " << *CST <<"\n"; + IV.getConstant() : UndefValue::get(AI->getType()); + DEBUG(errs() << "*** Arg " << *AI << " = " << *CST <<"\n"); // Replaces all of the uses of a variable with uses of the // constant. @@ -1721,7 +1731,7 @@ bool IPSCCP::runOnModule(Module &M) { for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) if (!Solver.isBlockExecutable(BB)) { - DOUT << " BasicBlock Dead:" << *BB; + DEBUG(errs() << " BasicBlock Dead:" << *BB); ++IPNumDeadBlocks; // Delete the instructions backwards, as it has a reduced likelihood of @@ -1734,7 +1744,7 @@ bool IPSCCP::runOnModule(Module &M) { Instruction *I = Insts.back(); Insts.pop_back(); if (!I->use_empty()) - I->replaceAllUsesWith(Context->getUndef(I->getType())); + I->replaceAllUsesWith(UndefValue::get(I->getType())); BB->getInstList().erase(I); MadeChanges = true; ++IPNumInstRemoved; @@ -1746,18 +1756,18 @@ bool IPSCCP::runOnModule(Module &M) { TI->getSuccessor(i)->removePredecessor(BB); } if (!TI->use_empty()) - TI->replaceAllUsesWith(Context->getUndef(TI->getType())); + TI->replaceAllUsesWith(UndefValue::get(TI->getType())); BB->getInstList().erase(TI); if (&*BB != &F->front()) BlocksToErase.push_back(BB); else - new UnreachableInst(BB); + new UnreachableInst(M.getContext(), BB); } else { for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) { Instruction *Inst = BI++; - if (Inst->getType() == Type::VoidTy) + if (Inst->getType()->isVoidTy()) continue; LatticeVal &IV = Values[Inst]; @@ -1765,8 +1775,8 @@ bool IPSCCP::runOnModule(Module &M) { continue; Constant *Const = IV.isConstant() - ? IV.getConstant() : Context->getUndef(Inst->getType()); - DOUT << " Constant: " << *Const << " = " << *Inst; + ? IV.getConstant() : UndefValue::get(Inst->getType()); + DEBUG(errs() << " Constant: " << *Const << " = " << *Inst); // Replaces all of the uses of a variable with uses of the // constant. @@ -1802,7 +1812,7 @@ bool IPSCCP::runOnModule(Module &M) { } else if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) { assert(isa<UndefValue>(SI->getCondition()) && "Switch should fold"); } else { - assert(0 && "Didn't fold away reference to block!"); + llvm_unreachable("Didn't fold away reference to block!"); } #endif @@ -1834,12 +1844,12 @@ bool IPSCCP::runOnModule(Module &M) { for (DenseMap<Function*, LatticeVal>::const_iterator I = RV.begin(), E = RV.end(); I != E; ++I) if (!I->second.isOverdefined() && - I->first->getReturnType() != Type::VoidTy) { + !I->first->getReturnType()->isVoidTy()) { Function *F = I->first; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) if (!isa<UndefValue>(RI->getOperand(0))) - RI->setOperand(0, Context->getUndef(F->getReturnType())); + RI->setOperand(0, UndefValue::get(F->getReturnType())); } // If we infered constant or undef values for globals variables, we can delete @@ -1850,7 +1860,7 @@ bool IPSCCP::runOnModule(Module &M) { GlobalVariable *GV = I->first; assert(!I->second.isOverdefined() && "Overdefined values should have been taken out of the map!"); - DOUT << "Found that GV '" << GV->getNameStart() << "' is constant!\n"; + DEBUG(errs() << "Found that GV '" << GV->getName() << "' is constant!\n"); while (!GV->use_empty()) { StoreInst *SI = cast<StoreInst>(GV->use_back()); SI->eraseFromParent(); diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 109fb90..610d874 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -34,13 +34,13 @@ #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/IRBuilder.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" using namespace llvm; STATISTIC(NumReplaced, "Number of allocas broken up"); @@ -49,7 +49,7 @@ STATISTIC(NumConverted, "Number of aggregates converted to scalar"); STATISTIC(NumGlobals, "Number of allocas copied from constant global"); namespace { - struct VISIBILITY_HIDDEN SROA : public FunctionPass { + struct SROA : public FunctionPass { static char ID; // Pass identification, replacement for typeid explicit SROA(signed T = -1) : FunctionPass(&ID) { if (T == -1) @@ -68,7 +68,6 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<DominatorTree>(); AU.addRequired<DominanceFrontier>(); - AU.addRequired<TargetData>(); AU.setPreservesCFG(); } @@ -150,9 +149,16 @@ FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) { bool SROA::runOnFunction(Function &F) { - TD = &getAnalysis<TargetData>(); - + TD = getAnalysisIfAvailable<TargetData>(); + bool Changed = performPromotion(F); + + // FIXME: ScalarRepl currently depends on TargetData more than it + // theoretically needs to. It should be refactored in order to support + // target-independent IR. Until this is done, just skip the actual + // scalar-replacement portion of this pass. + if (!TD) return Changed; + while (1) { bool LocalChange = performScalarRepl(F); if (!LocalChange) break; // No need to repromote if no scalarrepl @@ -186,7 +192,7 @@ bool SROA::performPromotion(Function &F) { if (Allocas.empty()) break; - PromoteMemToReg(Allocas, DT, DF); + PromoteMemToReg(Allocas, DT, DF, F.getContext()); NumPromoted += Allocas.size(); Changed = true; } @@ -238,11 +244,10 @@ bool SROA::performScalarRepl(Function &F) { // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' // is only subsequently read. if (Instruction *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) { - DOUT << "Found alloca equal to global: " << *AI; - DOUT << " memcpy = " << *TheCopy; + DEBUG(errs() << "Found alloca equal to global: " << *AI << '\n'); + DEBUG(errs() << " memcpy = " << *TheCopy << '\n'); Constant *TheSrc = cast<Constant>(TheCopy->getOperand(2)); - AI->replaceAllUsesWith( - Context->getConstantExprBitCast(TheSrc, AI->getType())); + AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); TheCopy->eraseFromParent(); // Don't mutate the global. AI->eraseFromParent(); ++NumGlobals; @@ -256,9 +261,12 @@ bool SROA::performScalarRepl(Function &F) { // value cannot be decomposed at all. uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType()); + // Do not promote [0 x %struct]. + if (AllocaSize == 0) continue; + // Do not promote any struct whose size is too big. if (AllocaSize > SRThreshold) continue; - + if ((isa<StructType>(AI->getAllocatedType()) || isa<ArrayType>(AI->getAllocatedType())) && // Do not promote any struct into more than "32" separate vars. @@ -266,7 +274,7 @@ bool SROA::performScalarRepl(Function &F) { // Check that all of the users of the allocation are capable of being // transformed. switch (isSafeAllocaToScalarRepl(AI)) { - default: assert(0 && "Unexpected value!"); + default: llvm_unreachable("Unexpected value!"); case 0: // Not safe to scalar replace. break; case 1: // Safe, but requires cleanup/canonicalizations first @@ -298,16 +306,17 @@ bool SROA::performScalarRepl(Function &F) { // we just get a lot of insert/extracts. If at least one vector is // involved, then we probably really do have a union of vector/array. if (VectorTy && isa<VectorType>(VectorTy) && HadAVector) { - DOUT << "CONVERT TO VECTOR: " << *AI << " TYPE = " << *VectorTy <<"\n"; + DEBUG(errs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " + << *VectorTy << '\n'); // Create and insert the vector alloca. - NewAI = new AllocaInst(VectorTy, 0, "", AI->getParent()->begin()); + NewAI = new AllocaInst(VectorTy, 0, "", AI->getParent()->begin()); ConvertUsesToScalar(AI, NewAI, 0); } else { - DOUT << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"; + DEBUG(errs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); // Create and insert the integer alloca. - const Type *NewTy = Context->getIntegerType(AllocaSize*8); + const Type *NewTy = IntegerType::get(AI->getContext(), AllocaSize*8); NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin()); ConvertUsesToScalar(AI, NewAI, 0); } @@ -328,14 +337,14 @@ bool SROA::performScalarRepl(Function &F) { /// predicate, do SROA now. void SROA::DoScalarReplacement(AllocationInst *AI, std::vector<AllocationInst*> &WorkList) { - DOUT << "Found inst to SROA: " << *AI; + DEBUG(errs() << "Found inst to SROA: " << *AI << '\n'); SmallVector<AllocaInst*, 32> ElementAllocas; if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { ElementAllocas.reserve(ST->getNumContainedTypes()); for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) { AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0, AI->getAlignment(), - AI->getName() + "." + utostr(i), AI); + AI->getName() + "." + Twine(i), AI); ElementAllocas.push_back(NA); WorkList.push_back(NA); // Add to worklist for recursive processing } @@ -345,7 +354,7 @@ void SROA::DoScalarReplacement(AllocationInst *AI, const Type *ElTy = AT->getElementType(); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(), - AI->getName() + "." + utostr(i), AI); + AI->getName() + "." + Twine(i), AI); ElementAllocas.push_back(NA); WorkList.push_back(NA); // Add to worklist for recursive processing } @@ -371,7 +380,7 @@ void SROA::DoScalarReplacement(AllocationInst *AI, // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 // (Also works for arrays instead of structs) if (LoadInst *LI = dyn_cast<LoadInst>(User)) { - Value *Insert = Context->getUndef(LI->getType()); + Value *Insert = UndefValue::get(LI->getType()); for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { Value *Load = new LoadInst(ElementAllocas[i], "load", LI); Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); @@ -418,7 +427,8 @@ void SROA::DoScalarReplacement(AllocationInst *AI, // expanded itself once the worklist is rerun. // SmallVector<Value*, 8> NewArgs; - NewArgs.push_back(Context->getNullValue(Type::Int32Ty)); + NewArgs.push_back(Constant::getNullValue( + Type::getInt32Ty(AI->getContext()))); NewArgs.append(GEPI->op_begin()+3, GEPI->op_end()); RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(), NewArgs.end(), "", GEPI); @@ -478,7 +488,7 @@ void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI, if (Info.isUnsafe) return; break; } - DOUT << " Transformation preventing inst: " << *User; + DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); return MarkUnsafe(Info); case Instruction::Call: if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { @@ -488,10 +498,10 @@ void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI, break; } } - DOUT << " Transformation preventing inst: " << *User; + DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); return MarkUnsafe(Info); default: - DOUT << " Transformation preventing inst: " << *User; + DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); return MarkUnsafe(Info); } } @@ -531,7 +541,7 @@ void SROA::isSafeUseOfAllocation(Instruction *User, AllocationInst *AI, // The GEP is not safe to transform if not of the form "GEP <ptr>, 0, <cst>". if (I == E || - I.getOperand() != Context->getNullValue(I.getOperand()->getType())) { + I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) { return MarkUnsafe(Info); } @@ -727,6 +737,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, // that doesn't have anything to do with the alloca that we are promoting. For // memset, this Value* stays null. Value *OtherPtr = 0; + LLVMContext &Context = MI->getContext(); unsigned MemAlignment = MI->getAlignment(); if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy if (BCInst == MTI->getRawDest()) @@ -764,7 +775,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, const Type *BytePtrTy = MI->getRawDest()->getType(); bool SROADest = MI->getRawDest() == BCInst; - Constant *Zero = Context->getNullValue(Type::Int32Ty); + Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext())); for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { // If this is a memcpy/memmove, emit a GEP of the other element address. @@ -772,9 +783,10 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, unsigned OtherEltAlign = MemAlignment; if (OtherPtr) { - Value *Idx[2] = { Zero, Context->getConstantInt(Type::Int32Ty, i) }; + Value *Idx[2] = { Zero, + ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) }; OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2, - OtherPtr->getNameStr()+"."+utostr(i), + OtherPtr->getNameStr()+"."+Twine(i), MI); uint64_t EltOffset; const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType()); @@ -819,7 +831,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, Constant *StoreVal; if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getOperand(2))) { if (CI->isZero()) { - StoreVal = Context->getNullValue(EltTy); // 0.0, null, 0, <0,0> + StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0> } else { // If EltTy is a vector type, get the element type. const Type *ValTy = EltTy->getScalarType(); @@ -835,18 +847,18 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, } // Convert the integer value to the appropriate type. - StoreVal = Context->getConstantInt(TotalVal); + StoreVal = ConstantInt::get(Context, TotalVal); if (isa<PointerType>(ValTy)) - StoreVal = Context->getConstantExprIntToPtr(StoreVal, ValTy); + StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy); else if (ValTy->isFloatingPoint()) - StoreVal = Context->getConstantExprBitCast(StoreVal, ValTy); + StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy); assert(StoreVal->getType() == ValTy && "Type mismatch!"); // If the requested value was a vector constant, create it. if (EltTy != ValTy) { unsigned NumElts = cast<VectorType>(ValTy)->getNumElements(); SmallVector<Constant*, 16> Elts(NumElts, StoreVal); - StoreVal = Context->getConstantVector(&Elts[0], NumElts); + StoreVal = ConstantVector::get(&Elts[0], NumElts); } } new StoreInst(StoreVal, EltPtr, MI); @@ -872,15 +884,16 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, Value *Ops[] = { SROADest ? EltPtr : OtherElt, // Dest ptr SROADest ? OtherElt : EltPtr, // Src ptr - Context->getConstantInt(MI->getOperand(3)->getType(), EltSize), // Size - Context->getConstantInt(Type::Int32Ty, OtherEltAlign) // Align + ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size + // Align + ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign) }; CallInst::Create(TheFn, Ops, Ops + 4, "", MI); } else { assert(isa<MemSetInst>(MI)); Value *Ops[] = { EltPtr, MI->getOperand(2), // Dest, Value, - Context->getConstantInt(MI->getOperand(3)->getType(), EltSize), // Size + ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size Zero // Align }; CallInst::Create(TheFn, Ops, Ops + 4, "", MI); @@ -910,9 +923,11 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, // Handle tail padding by extending the operand if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) SrcVal = new ZExtInst(SrcVal, - Context->getIntegerType(AllocaSizeBits), "", SI); + IntegerType::get(SI->getContext(), AllocaSizeBits), + "", SI); - DOUT << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << *SI; + DEBUG(errs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI + << '\n'); // There are two forms here: AI could be an array or struct. Both cases // have different ways to compute the element offset. @@ -929,7 +944,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, Value *EltVal = SrcVal; if (Shift) { - Value *ShiftVal = Context->getConstantInt(EltVal->getType(), Shift); + Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal, "sroa.store.elt", SI); } @@ -942,7 +957,8 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, if (FieldSizeBits != AllocaSizeBits) EltVal = new TruncInst(EltVal, - Context->getIntegerType(FieldSizeBits), "", SI); + IntegerType::get(SI->getContext(), FieldSizeBits), + "", SI); Value *DestField = NewElts[i]; if (EltVal->getType() == FieldTy) { // Storing to an integer field of this size, just do it. @@ -952,7 +968,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, } else { // Otherwise, bitcast the dest pointer (for aggregates). DestField = new BitCastInst(DestField, - Context->getPointerTypeUnqual(EltVal->getType()), + PointerType::getUnqual(EltVal->getType()), "", SI); } new StoreInst(EltVal, DestField, SI); @@ -977,7 +993,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, Value *EltVal = SrcVal; if (Shift) { - Value *ShiftVal = Context->getConstantInt(EltVal->getType(), Shift); + Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal, "sroa.store.elt", SI); } @@ -985,7 +1001,8 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, // Truncate down to an integer of the right size. if (ElementSizeBits != AllocaSizeBits) EltVal = new TruncInst(EltVal, - Context->getIntegerType(ElementSizeBits),"",SI); + IntegerType::get(SI->getContext(), + ElementSizeBits),"",SI); Value *DestField = NewElts[i]; if (EltVal->getType() == ArrayEltTy) { // Storing to an integer field of this size, just do it. @@ -995,7 +1012,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, } else { // Otherwise, bitcast the dest pointer (for aggregates). DestField = new BitCastInst(DestField, - Context->getPointerTypeUnqual(EltVal->getType()), + PointerType::getUnqual(EltVal->getType()), "", SI); } new StoreInst(EltVal, DestField, SI); @@ -1026,7 +1043,8 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI, TD->getTypeAllocSizeInBits(LI->getType()) != AllocaSizeBits) return; - DOUT << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << *LI; + DEBUG(errs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI + << '\n'); // There are two forms here: AI could be an array or struct. Both cases // have different ways to compute the element offset. @@ -1038,9 +1056,9 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI, const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType(); ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy); } - - Value *ResultVal = - Context->getNullValue(Context->getIntegerType(AllocaSizeBits)); + + Value *ResultVal = + Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits)); for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { // Load the value from the alloca. If the NewElt is an aggregate, cast @@ -1053,11 +1071,12 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI, // Ignore zero sized fields like {}, they obviously contain no data. if (FieldSizeBits == 0) continue; - const IntegerType *FieldIntTy = Context->getIntegerType(FieldSizeBits); + const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(), + FieldSizeBits); if (!isa<IntegerType>(FieldTy) && !FieldTy->isFloatingPoint() && !isa<VectorType>(FieldTy)) SrcField = new BitCastInst(SrcField, - Context->getPointerTypeUnqual(FieldIntTy), + PointerType::getUnqual(FieldIntTy), "", LI); SrcField = new LoadInst(SrcField, "sroa.load.elt", LI); @@ -1082,7 +1101,7 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI, Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth(); if (Shift) { - Value *ShiftVal = Context->getConstantInt(SrcField->getType(), Shift); + Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift); SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI); } @@ -1152,7 +1171,8 @@ int SROA::isSafeAllocaToScalarRepl(AllocationInst *AI) { I != E; ++I) { isSafeUseOfAllocation(cast<Instruction>(*I), AI, Info); if (Info.isUnsafe) { - DOUT << "Cannot transform: " << *AI << " due to user: " << **I; + DEBUG(errs() << "Cannot transform: " << *AI << "\n due to user: " + << **I << '\n'); return 0; } } @@ -1186,24 +1206,25 @@ void SROA::CleanupGEP(GetElementPtrInst *GEPI) { return; if (NumElements == 1) { - GEPI->setOperand(2, Context->getNullValue(Type::Int32Ty)); + GEPI->setOperand(2, + Constant::getNullValue(Type::getInt32Ty(GEPI->getContext()))); return; } assert(NumElements == 2 && "Unhandled case!"); // All users of the GEP must be loads. At each use of the GEP, insert // two loads of the appropriate indexed GEP and select between them. - Value *IsOne = new ICmpInst(ICmpInst::ICMP_NE, I.getOperand(), - Context->getNullValue(I.getOperand()->getType()), - "isone", GEPI); + Value *IsOne = new ICmpInst(GEPI, ICmpInst::ICMP_NE, I.getOperand(), + Constant::getNullValue(I.getOperand()->getType()), + "isone"); // Insert the new GEP instructions, which are properly indexed. SmallVector<Value*, 8> Indices(GEPI->op_begin()+1, GEPI->op_end()); - Indices[1] = Context->getNullValue(Type::Int32Ty); + Indices[1] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())); Value *ZeroIdx = GetElementPtrInst::Create(GEPI->getOperand(0), Indices.begin(), Indices.end(), GEPI->getName()+".0", GEPI); - Indices[1] = Context->getConstantInt(Type::Int32Ty, 1); + Indices[1] = ConstantInt::get(Type::getInt32Ty(GEPI->getContext()), 1); Value *OneIdx = GetElementPtrInst::Create(GEPI->getOperand(0), Indices.begin(), Indices.end(), @@ -1261,9 +1282,9 @@ void SROA::CleanupAllocaUsers(AllocationInst *AI) { /// and stores would mutate the memory. static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy, unsigned AllocaSize, const TargetData &TD, - LLVMContext* Context) { + LLVMContext &Context) { // If this could be contributing to a vector, analyze it. - if (VecTy != Type::VoidTy) { // either null or a vector type. + if (VecTy != Type::getVoidTy(Context)) { // either null or a vector type. // If the In type is a vector that is the same size as the alloca, see if it // matches the existing VecTy. @@ -1276,7 +1297,7 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy, VecTy = VInTy; return; } - } else if (In == Type::FloatTy || In == Type::DoubleTy || + } else if (In->isFloatTy() || In->isDoubleTy() || (isa<IntegerType>(In) && In->getPrimitiveSizeInBits() >= 8 && isPowerOf2_32(In->getPrimitiveSizeInBits()))) { // If we're accessing something that could be an element of a vector, see @@ -1289,7 +1310,7 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy, cast<VectorType>(VecTy)->getElementType() ->getPrimitiveSizeInBits()/8 == EltSize)) { if (VecTy == 0) - VecTy = Context->getVectorType(In, AllocaSize/EltSize); + VecTy = VectorType::get(In, AllocaSize/EltSize); return; } } @@ -1297,7 +1318,7 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy, // Otherwise, we have a case that we can't handle with an optimized vector // form. We can still turn this into a large integer. - VecTy = Type::VoidTy; + VecTy = Type::getVoidTy(Context); } /// CanConvertToScalar - V is a pointer. If we can convert the pointee and all @@ -1320,7 +1341,8 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, // Don't break volatile loads. if (LI->isVolatile()) return false; - MergeInType(LI->getType(), Offset, VecTy, AllocaSize, *TD, Context); + MergeInType(LI->getType(), Offset, VecTy, + AllocaSize, *TD, V->getContext()); SawVec |= isa<VectorType>(LI->getType()); continue; } @@ -1329,7 +1351,7 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, // Storing the pointer, not into the value? if (SI->getOperand(0) == V || SI->isVolatile()) return 0; MergeInType(SI->getOperand(0)->getType(), Offset, - VecTy, AllocaSize, *TD, Context); + VecTy, AllocaSize, *TD, V->getContext()); SawVec |= isa<VectorType>(SI->getOperand(0)->getType()); continue; } @@ -1433,7 +1455,8 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { if (StoreInst *SI = dyn_cast<StoreInst>(User)) { assert(SI->getOperand(0) != Ptr && "Consistency error!"); - Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").c_str()); + // FIXME: Remove once builder has Twine API. + Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").str().c_str()); Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset, Builder); Builder.CreateStore(New, NewAI); @@ -1457,8 +1480,10 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { for (unsigned i = 1; i != NumBytes; ++i) APVal |= APVal << 8; - Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").c_str()); - Value *New = ConvertScalar_InsertValue(Context->getConstantInt(APVal), + // FIXME: Remove once builder has Twine API. + Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").str().c_str()); + Value *New = ConvertScalar_InsertValue( + ConstantInt::get(User->getContext(), APVal), Old, Offset, Builder); Builder.CreateStore(New, NewAI); } @@ -1510,8 +1535,7 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { continue; } - assert(0 && "Unsupported operation!"); - abort(); + llvm_unreachable("Unsupported operation!"); } } @@ -1545,9 +1569,8 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); } // Return the element extracted out of it. - Value *V = Builder.CreateExtractElement(FromVal, - Context->getConstantInt(Type::Int32Ty,Elt), - "tmp"); + Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get( + Type::getInt32Ty(FromVal->getContext()), Elt), "tmp"); if (V->getType() != ToType) V = Builder.CreateBitCast(V, ToType, "tmp"); return V; @@ -1557,7 +1580,7 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, // use insertvalue's to form the FCA. if (const StructType *ST = dyn_cast<StructType>(ToType)) { const StructLayout &Layout = *TD->getStructLayout(ST); - Value *Res = Context->getUndef(ST); + Value *Res = UndefValue::get(ST); for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i), Offset+Layout.getElementOffsetInBits(i), @@ -1569,7 +1592,7 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) { uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType()); - Value *Res = Context->getUndef(AT); + Value *Res = UndefValue::get(AT); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(), Offset+i*EltSize, Builder); @@ -1599,21 +1622,23 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, // only some bits are used. if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth()) FromVal = Builder.CreateLShr(FromVal, - Context->getConstantInt(FromVal->getType(), + ConstantInt::get(FromVal->getType(), ShAmt), "tmp"); else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth()) FromVal = Builder.CreateShl(FromVal, - Context->getConstantInt(FromVal->getType(), + ConstantInt::get(FromVal->getType(), -ShAmt), "tmp"); // Finally, unconditionally truncate the integer to the right width. unsigned LIBitWidth = TD->getTypeSizeInBits(ToType); if (LIBitWidth < NTy->getBitWidth()) FromVal = - Builder.CreateTrunc(FromVal, Context->getIntegerType(LIBitWidth), "tmp"); + Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(), + LIBitWidth), "tmp"); else if (LIBitWidth > NTy->getBitWidth()) FromVal = - Builder.CreateZExt(FromVal, Context->getIntegerType(LIBitWidth), "tmp"); + Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(), + LIBitWidth), "tmp"); // If the result is an integer, this is a trunc or bitcast. if (isa<IntegerType>(ToType)) { @@ -1645,6 +1670,7 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old, // Convert the stored type to the actual type, shift it left to insert // then 'or' into place. const Type *AllocaType = Old->getType(); + LLVMContext &Context = Old->getContext(); if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) { uint64_t VecSize = TD->getTypeAllocSizeInBits(VTy); @@ -1664,7 +1690,7 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old, SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp"); SV = Builder.CreateInsertElement(Old, SV, - Context->getConstantInt(Type::Int32Ty, Elt), + ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt), "tmp"); return SV; } @@ -1697,9 +1723,10 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old, unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType()); unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType); if (SV->getType()->isFloatingPoint() || isa<VectorType>(SV->getType())) - SV = Builder.CreateBitCast(SV, Context->getIntegerType(SrcWidth), "tmp"); + SV = Builder.CreateBitCast(SV, + IntegerType::get(SV->getContext(),SrcWidth), "tmp"); else if (isa<PointerType>(SV->getType())) - SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(), "tmp"); + SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(SV->getContext()), "tmp"); // Zero extend or truncate the value if needed. if (SV->getType() != AllocaType) { @@ -1732,11 +1759,11 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old, // only some bits in the structure are set. APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth)); if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) { - SV = Builder.CreateShl(SV, Context->getConstantInt(SV->getType(), + SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), ShAmt), "tmp"); Mask <<= ShAmt; } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) { - SV = Builder.CreateLShr(SV, Context->getConstantInt(SV->getType(), + SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), -ShAmt), "tmp"); Mask = Mask.lshr(-ShAmt); } @@ -1745,7 +1772,7 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old, // in the new bits. if (SrcWidth != DestWidth) { assert(DestWidth > SrcWidth); - Old = Builder.CreateAnd(Old, Context->getConstantInt(~Mask), "mask"); + Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask"); SV = Builder.CreateOr(Old, SV, "ins"); } return SV; diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index b8bce80..29712b3 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -30,7 +30,6 @@ #include "llvm/Module.h" #include "llvm/Attributes.h" #include "llvm/Support/CFG.h" -#include "llvm/Support/Compiler.h" #include "llvm/Pass.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -40,7 +39,7 @@ using namespace llvm; STATISTIC(NumSimpl, "Number of blocks simplified"); namespace { - struct VISIBILITY_HIDDEN CFGSimplifyPass : public FunctionPass { + struct CFGSimplifyPass : public FunctionPass { static char ID; // Pass identification, replacement for typeid CFGSimplifyPass() : FunctionPass(&ID) {} @@ -58,20 +57,20 @@ FunctionPass *llvm::createCFGSimplificationPass() { /// ChangeToUnreachable - Insert an unreachable instruction before the specified /// instruction, making it and the rest of the code in the block dead. -static void ChangeToUnreachable(Instruction *I, LLVMContext* Context) { +static void ChangeToUnreachable(Instruction *I, LLVMContext &Context) { BasicBlock *BB = I->getParent(); // Loop over all of the successors, removing BB's entry from any PHI // nodes. for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) (*SI)->removePredecessor(BB); - new UnreachableInst(I); + new UnreachableInst(I->getContext(), I); // All instructions after this are dead. BasicBlock::iterator BBI = I, BBE = BB->end(); while (BBI != BBE) { if (!BBI->use_empty()) - BBI->replaceAllUsesWith(Context->getUndef(BBI->getType())); + BBI->replaceAllUsesWith(UndefValue::get(BBI->getType())); BB->getInstList().erase(BBI++); } } @@ -97,7 +96,7 @@ static void ChangeToCall(InvokeInst *II) { static bool MarkAliveBlocks(BasicBlock *BB, SmallPtrSet<BasicBlock*, 128> &Reachable, - LLVMContext* Context) { + LLVMContext &Context) { SmallVector<BasicBlock*, 128> Worklist; Worklist.push_back(BB); @@ -132,7 +131,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, if (isa<UndefValue>(Ptr) || (isa<ConstantPointerNull>(Ptr) && - cast<PointerType>(Ptr->getType())->getAddressSpace() == 0)) { + SI->getPointerAddressSpace() == 0)) { ChangeToUnreachable(SI, Context); Changed = true; break; diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp index 4aad17d..13077fe 100644 --- a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp @@ -22,15 +22,13 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Config/config.h" using namespace llvm; namespace { /// This pass optimizes well half_powr function calls. /// - class VISIBILITY_HIDDEN SimplifyHalfPowrLibCalls : public FunctionPass { + class SimplifyHalfPowrLibCalls : public FunctionPass { const TargetData *TD; public: static char ID; // Pass identification @@ -39,7 +37,6 @@ namespace { bool runOnFunction(Function &F); virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<TargetData>(); } Instruction * @@ -60,8 +57,9 @@ FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() { /// InlineHalfPowrs - Inline a sequence of adjacent half_powr calls, rearranging /// their control flow to better facilitate subsequent optimization. Instruction * -SimplifyHalfPowrLibCalls::InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs, - Instruction *InsertPt) { +SimplifyHalfPowrLibCalls:: +InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs, + Instruction *InsertPt) { std::vector<BasicBlock *> Bodies; BasicBlock *NewBlock = 0; @@ -123,7 +121,7 @@ SimplifyHalfPowrLibCalls::InlineHalfPowrs(const std::vector<Instruction *> &Half /// runOnFunction - Top level algorithm. /// bool SimplifyHalfPowrLibCalls::runOnFunction(Function &F) { - TD = &getAnalysis<TargetData>(); + TD = getAnalysisIfAvailable<TargetData>(); bool Changed = false; std::vector<Instruction *> HalfPowrs; @@ -136,8 +134,7 @@ bool SimplifyHalfPowrLibCalls::runOnFunction(Function &F) { Function *Callee = CI->getCalledFunction(); if (Callee && Callee->hasExternalLinkage()) { // Look for calls with well-known names. - const char *CalleeName = Callee->getNameStart(); - if (strcmp(CalleeName, "__half_powrf4") == 0) + if (Callee->getName() == "__half_powrf4") IsHalfPowr = true; } } diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index ec48469..e1866015 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -9,11 +9,9 @@ // // This file implements a simple pass that applies a variety of small // optimizations for calls to specific well-known function calls (e.g. runtime -// library functions). For example, a call to the function "exit(3)" that -// occurs within the main() function can be transformed into a simple "return 3" -// instruction. Any optimization that takes this form (replace call to library -// function with simpler code that provides the same result) belongs in this -// file. +// library functions). Any optimization that takes the very simple form +// "replace call to library function with simpler code that provides the same +// result" belongs in this file. // //===----------------------------------------------------------------------===// @@ -29,8 +27,9 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Support/Compiler.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Config/config.h" using namespace llvm; @@ -44,7 +43,7 @@ STATISTIC(NumAnnotated, "Number of attributes added to library functions"); /// This class is the abstract base class for the set of optimizations that /// corresponds to one library call. namespace { -class VISIBILITY_HIDDEN LibCallOptimization { +class LibCallOptimization { protected: Function *Caller; const TargetData *TD; @@ -58,14 +57,14 @@ public: /// performed. If it returns CI, then it transformed the call and CI is to be /// deleted. If it returns something else, replace CI with the new value and /// delete CI. - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) =0; - - Value *OptimizeCall(CallInst *CI, const TargetData &TD, IRBuilder<> &B) { + + Value *OptimizeCall(CallInst *CI, const TargetData *TD, IRBuilder<> &B) { Caller = CI->getParent()->getParent(); - this->TD = &TD; + this->TD = TD; if (CI->getCalledFunction()) - Context = CI->getCalledFunction()->getContext(); + Context = &CI->getCalledFunction()->getContext(); return CallOptimizer(CI->getCalledFunction(), CI, B); } @@ -76,12 +75,12 @@ public: /// specified pointer. Ptr is required to be some pointer type, and the /// return value has 'intptr_t' type. Value *EmitStrLen(Value *Ptr, IRBuilder<> &B); - + /// EmitMemCpy - Emit a call to the memcpy function to the builder. This /// always expects that the size has type 'intptr_t' and Dst/Src are pointers. - Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len, + Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len, unsigned Align, IRBuilder<> &B); - + /// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value. Value *EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B); @@ -96,35 +95,36 @@ public: /// 'floor'). This function is known to take a single of type matching 'Op' /// and returns one value with the same type. If 'Op' is a long double, 'l' /// is added as the suffix of name, if 'Op' is a float, we add a 'f' suffix. - Value *EmitUnaryFloatFnCall(Value *Op, const char *Name, IRBuilder<> &B); - + Value *EmitUnaryFloatFnCall(Value *Op, const char *Name, IRBuilder<> &B, + const AttrListPtr &Attrs); + /// EmitPutChar - Emit a call to the putchar function. This assumes that Char /// is an integer. void EmitPutChar(Value *Char, IRBuilder<> &B); - + /// EmitPutS - Emit a call to the puts function. This assumes that Str is /// some pointer. void EmitPutS(Value *Str, IRBuilder<> &B); - + /// EmitFPutC - Emit a call to the fputc function. This assumes that Char is /// an i32, and File is a pointer to FILE. void EmitFPutC(Value *Char, Value *File, IRBuilder<> &B); - + /// EmitFPutS - Emit a call to the puts function. Str is required to be a /// pointer and File is a pointer to FILE. void EmitFPutS(Value *Str, Value *File, IRBuilder<> &B); - + /// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE. void EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B); - + }; } // End anonymous namespace. /// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*. Value *LibCallOptimization::CastToCStr(Value *V, IRBuilder<> &B) { return - B.CreateBitCast(V, Context->getPointerTypeUnqual(Type::Int8Ty), "cstr"); + B.CreateBitCast(V, Type::getInt8PtrTy(*Context), "cstr"); } /// EmitStrLen - Emit a call to the strlen function to the builder, for the @@ -137,8 +137,8 @@ Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder<> &B) { Attribute::NoUnwind); Constant *StrLen =M->getOrInsertFunction("strlen", AttrListPtr::get(AWI, 2), - TD->getIntPtrType(), - Context->getPointerTypeUnqual(Type::Int8Ty), + TD->getIntPtrType(*Context), + Type::getInt8PtrTy(*Context), NULL); CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen"); if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts())) @@ -157,7 +157,7 @@ Value *LibCallOptimization::EmitMemCpy(Value *Dst, Value *Src, Value *Len, Tys[0] = Len->getType(); Value *MemCpy = Intrinsic::getDeclaration(M, IID, Tys, 1); return B.CreateCall4(MemCpy, CastToCStr(Dst, B), CastToCStr(Src, B), Len, - Context->getConstantInt(Type::Int32Ty, Align)); + ConstantInt::get(Type::getInt32Ty(*Context), Align)); } /// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is @@ -169,9 +169,10 @@ Value *LibCallOptimization::EmitMemChr(Value *Ptr, Value *Val, AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(&AWI, 1), - Context->getPointerTypeUnqual(Type::Int8Ty), - Context->getPointerTypeUnqual(Type::Int8Ty), - Type::Int32Ty, TD->getIntPtrType(), + Type::getInt8PtrTy(*Context), + Type::getInt8PtrTy(*Context), + Type::getInt32Ty(*Context), + TD->getIntPtrType(*Context), NULL); CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr"); @@ -192,10 +193,10 @@ Value *LibCallOptimization::EmitMemCmp(Value *Ptr1, Value *Ptr2, Attribute::NoUnwind); Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI, 3), - Type::Int32Ty, - Context->getPointerTypeUnqual(Type::Int8Ty), - Context->getPointerTypeUnqual(Type::Int8Ty), - TD->getIntPtrType(), NULL); + Type::getInt32Ty(*Context), + Type::getInt8PtrTy(*Context), + Type::getInt8PtrTy(*Context), + TD->getIntPtrType(*Context), NULL); CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), Len, "memcmp"); @@ -213,7 +214,7 @@ Value *LibCallOptimization::EmitMemSet(Value *Dst, Value *Val, const Type *Tys[1]; Tys[0] = Len->getType(); Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 1); - Value *Align = Context->getConstantInt(Type::Int32Ty, 1); + Value *Align = ConstantInt::get(Type::getInt32Ty(*Context), 1); return B.CreateCall4(MemSet, CastToCStr(Dst, B), Val, Len, Align); } @@ -222,14 +223,15 @@ Value *LibCallOptimization::EmitMemSet(Value *Dst, Value *Val, /// returns one value with the same type. If 'Op' is a long double, 'l' is /// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix. Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name, - IRBuilder<> &B) { + IRBuilder<> &B, + const AttrListPtr &Attrs) { char NameBuffer[20]; - if (Op->getType() != Type::DoubleTy) { + if (!Op->getType()->isDoubleTy()) { // If we need to add a suffix, copy into NameBuffer. unsigned NameLen = strlen(Name); assert(NameLen < sizeof(NameBuffer)-2); memcpy(NameBuffer, Name, NameLen); - if (Op->getType() == Type::FloatTy) + if (Op->getType()->isFloatTy()) NameBuffer[NameLen] = 'f'; // floorf else NameBuffer[NameLen] = 'l'; // floorl @@ -241,7 +243,7 @@ Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name, Value *Callee = M->getOrInsertFunction(Name, Op->getType(), Op->getType(), NULL); CallInst *CI = B.CreateCall(Callee, Op, Name); - + CI->setAttributes(Attrs); if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); @@ -252,10 +254,12 @@ Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name, /// is an integer. void LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) { Module *M = Caller->getParent(); - Value *PutChar = M->getOrInsertFunction("putchar", Type::Int32Ty, - Type::Int32Ty, NULL); + Value *PutChar = M->getOrInsertFunction("putchar", Type::getInt32Ty(*Context), + Type::getInt32Ty(*Context), NULL); CallInst *CI = B.CreateCall(PutChar, - B.CreateIntCast(Char, Type::Int32Ty, "chari"), + B.CreateIntCast(Char, + Type::getInt32Ty(*Context), + "chari"), "putchar"); if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts())) @@ -271,8 +275,8 @@ void LibCallOptimization::EmitPutS(Value *Str, IRBuilder<> &B) { AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2), - Type::Int32Ty, - Context->getPointerTypeUnqual(Type::Int8Ty), + Type::getInt32Ty(*Context), + Type::getInt8PtrTy(*Context), NULL); CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts"); if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts())) @@ -289,12 +293,16 @@ void LibCallOptimization::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B) { AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); Constant *F; if (isa<PointerType>(File->getType())) - F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2), Type::Int32Ty, - Type::Int32Ty, File->getType(), NULL); + F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2), + Type::getInt32Ty(*Context), + Type::getInt32Ty(*Context), File->getType(), + NULL); else - F = M->getOrInsertFunction("fputc", Type::Int32Ty, Type::Int32Ty, + F = M->getOrInsertFunction("fputc", + Type::getInt32Ty(*Context), + Type::getInt32Ty(*Context), File->getType(), NULL); - Char = B.CreateIntCast(Char, Type::Int32Ty, "chari"); + Char = B.CreateIntCast(Char, Type::getInt32Ty(*Context), "chari"); CallInst *CI = B.CreateCall2(F, Char, File, "fputc"); if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) @@ -311,12 +319,13 @@ void LibCallOptimization::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B) { AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); Constant *F; if (isa<PointerType>(File->getType())) - F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3), Type::Int32Ty, - Context->getPointerTypeUnqual(Type::Int8Ty), + F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3), + Type::getInt32Ty(*Context), + Type::getInt8PtrTy(*Context), File->getType(), NULL); else - F = M->getOrInsertFunction("fputs", Type::Int32Ty, - Context->getPointerTypeUnqual(Type::Int8Ty), + F = M->getOrInsertFunction("fputs", Type::getInt32Ty(*Context), + Type::getInt8PtrTy(*Context), File->getType(), NULL); CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs"); @@ -336,17 +345,19 @@ void LibCallOptimization::EmitFWrite(Value *Ptr, Value *Size, Value *File, Constant *F; if (isa<PointerType>(File->getType())) F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3), - TD->getIntPtrType(), - Context->getPointerTypeUnqual(Type::Int8Ty), - TD->getIntPtrType(), TD->getIntPtrType(), + TD->getIntPtrType(*Context), + Type::getInt8PtrTy(*Context), + TD->getIntPtrType(*Context), + TD->getIntPtrType(*Context), File->getType(), NULL); else - F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(), - Context->getPointerTypeUnqual(Type::Int8Ty), - TD->getIntPtrType(), TD->getIntPtrType(), + F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(*Context), + Type::getInt8PtrTy(*Context), + TD->getIntPtrType(*Context), + TD->getIntPtrType(*Context), File->getType(), NULL); CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size, - Context->getConstantInt(TD->getIntPtrType(), 1), File); + ConstantInt::get(TD->getIntPtrType(*Context), 1), File); if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); @@ -362,30 +373,30 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) { // Look through noop bitcast instructions. if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) return GetStringLengthH(BCI->getOperand(0), PHIs); - + // If this is a PHI node, there are two cases: either we have already seen it // or we haven't. if (PHINode *PN = dyn_cast<PHINode>(V)) { if (!PHIs.insert(PN)) return ~0ULL; // already in the set. - + // If it was new, see if all the input strings are the same length. uint64_t LenSoFar = ~0ULL; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs); if (Len == 0) return 0; // Unknown length -> unknown. - + if (Len == ~0ULL) continue; - + if (Len != LenSoFar && LenSoFar != ~0ULL) return 0; // Disagree -> unknown. LenSoFar = Len; } - + // Success, all agree. return LenSoFar; } - + // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y) if (SelectInst *SI = dyn_cast<SelectInst>(V)) { uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs); @@ -397,7 +408,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) { if (Len1 != Len2) return 0; return Len1; } - + // If the value is not a GEP instruction nor a constant expression with a // GEP instruction, then return unknown. User *GEP = 0; @@ -410,11 +421,11 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) { } else { return 0; } - + // Make sure the GEP has exactly three arguments. if (GEP->getNumOperands() != 3) return 0; - + // Check to make sure that the first operand of the GEP is an integer and // has value 0 so that we are sure we're indexing into the initializer. if (ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1))) { @@ -422,7 +433,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) { return 0; } else return 0; - + // If the second index isn't a ConstantInt, then this is a variable index // into the array. If this occurs, we can't say anything meaningful about // the string. @@ -431,28 +442,30 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) { StartIdx = CI->getZExtValue(); else return 0; - + // The GEP instruction, constant or instruction, must reference a global // variable that is a constant and is initialized. The referenced constant // initializer is the array that we'll use for optimization. GlobalVariable* GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)); - if (!GV || !GV->isConstant() || !GV->hasInitializer()) + if (!GV || !GV->isConstant() || !GV->hasInitializer() || + GV->mayBeOverridden()) return 0; Constant *GlobalInit = GV->getInitializer(); - + // Handle the ConstantAggregateZero case, which is a degenerate case. The // initializer is constant zero so the length of the string must be zero. if (isa<ConstantAggregateZero>(GlobalInit)) return 1; // Len = 0 offset by 1. - + // Must be a Constant Array ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit); - if (!Array || Array->getType()->getElementType() != Type::Int8Ty) + if (!Array || + Array->getType()->getElementType() != Type::getInt8Ty(V->getContext())) return false; - + // Get the number of elements in the array uint64_t NumElts = Array->getType()->getNumElements(); - + // Traverse the constant array from StartIdx (derived above) which is // the place the GEP refers to in the array. for (unsigned i = StartIdx; i != NumElts; ++i) { @@ -463,7 +476,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) { if (CI->isZero()) return i-StartIdx+1; // We found end of string, success! } - + return 0; // The array isn't null terminated, conservatively return 'unknown'. } @@ -471,7 +484,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) { /// the specified pointer, return 'len+1'. If we can't, return 0. static uint64_t GetStringLength(Value *V) { if (!isa<PointerType>(V->getType())) return 0; - + SmallPtrSet<PHINode*, 32> PHIs; uint64_t Len = GetStringLengthH(V, PHIs); // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return @@ -480,7 +493,7 @@ static uint64_t GetStringLength(Value *V) { } /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the -/// value is equal or not-equal to zero. +/// value is equal or not-equal to zero. static bool IsOnlyUsedInZeroEqualityComparison(Value *V) { for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) { @@ -496,73 +509,38 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) { } //===----------------------------------------------------------------------===// -// Miscellaneous LibCall Optimizations -//===----------------------------------------------------------------------===// - -namespace { -//===---------------------------------------===// -// 'exit' Optimizations - -/// ExitOpt - int main() { exit(4); } --> int main() { return 4; } -struct VISIBILITY_HIDDEN ExitOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Verify we have a reasonable prototype for exit. - if (Callee->arg_size() == 0 || !CI->use_empty()) - return 0; - - // Verify the caller is main, and that the result type of main matches the - // argument type of exit. - if (!Caller->isName("main") || !Caller->hasExternalLinkage() || - Caller->getReturnType() != CI->getOperand(1)->getType()) - return 0; - - TerminatorInst *OldTI = CI->getParent()->getTerminator(); - - // Create the return after the call. - ReturnInst *RI = B.CreateRet(CI->getOperand(1)); - - // Drop all successor phi node entries. - for (unsigned i = 0, e = OldTI->getNumSuccessors(); i != e; ++i) - OldTI->getSuccessor(i)->removePredecessor(CI->getParent()); - - // Erase all instructions from after our return instruction until the end of - // the block. - BasicBlock::iterator FirstDead = RI; ++FirstDead; - CI->getParent()->getInstList().erase(FirstDead, CI->getParent()->end()); - return CI; - } -}; - -//===----------------------------------------------------------------------===// // String and Memory LibCall Optimizations //===----------------------------------------------------------------------===// //===---------------------------------------===// // 'strcat' Optimizations - -struct VISIBILITY_HIDDEN StrCatOpt : public LibCallOptimization { +namespace { +struct StrCatOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Verify the "strcat" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || - FT->getReturnType() != Context->getPointerTypeUnqual(Type::Int8Ty) || + FT->getReturnType() != Type::getInt8PtrTy(*Context) || FT->getParamType(0) != FT->getReturnType() || FT->getParamType(1) != FT->getReturnType()) return 0; - + // Extract some information from the instruction Value *Dst = CI->getOperand(1); Value *Src = CI->getOperand(2); - + // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); if (Len == 0) return 0; --Len; // Unbias length. - + // Handle the simple, do-nothing case: strcat(x, "") -> x if (Len == 0) return Dst; - + + // These optimizations require TargetData. + if (!TD) return 0; + EmitStrLenMemCpy(Src, Dst, Len, B); return Dst; } @@ -571,28 +549,28 @@ struct VISIBILITY_HIDDEN StrCatOpt : public LibCallOptimization { // We need to find the end of the destination string. That's where the // memory is to be moved to. We just generate a call to strlen. Value *DstLen = EmitStrLen(Dst, B); - + // Now that we have the destination's length, we must index into the // destination's pointer to get the actual memcpy destination (end of // the string .. we're concatenating). Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr"); - + // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. EmitMemCpy(CpyDst, Src, - Context->getConstantInt(TD->getIntPtrType(), Len+1), 1, B); + ConstantInt::get(TD->getIntPtrType(*Context), Len+1), 1, B); } }; //===---------------------------------------===// // 'strncat' Optimizations -struct VISIBILITY_HIDDEN StrNCatOpt : public StrCatOpt { +struct StrNCatOpt : public StrCatOpt { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Verify the "strncat" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || - FT->getReturnType() != Context->getPointerTypeUnqual(Type::Int8Ty) || + FT->getReturnType() != Type::getInt8PtrTy(*Context) || FT->getParamType(0) != FT->getReturnType() || FT->getParamType(1) != FT->getReturnType() || !isa<IntegerType>(FT->getParamType(2))) @@ -619,6 +597,9 @@ struct VISIBILITY_HIDDEN StrNCatOpt : public StrCatOpt { // strncat(x, c, 0) -> x if (SrcLen == 0 || Len == 0) return Dst; + // These optimizations require TargetData. + if (!TD) return 0; + // We don't optimize this case if (Len < SrcLen) return 0; @@ -632,27 +613,31 @@ struct VISIBILITY_HIDDEN StrNCatOpt : public StrCatOpt { //===---------------------------------------===// // 'strchr' Optimizations -struct VISIBILITY_HIDDEN StrChrOpt : public LibCallOptimization { +struct StrChrOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Verify the "strchr" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || - FT->getReturnType() != Context->getPointerTypeUnqual(Type::Int8Ty) || + FT->getReturnType() != Type::getInt8PtrTy(*Context) || FT->getParamType(0) != FT->getReturnType()) return 0; - + Value *SrcStr = CI->getOperand(1); - + // If the second operand is non-constant, see if we can compute the length // of the input string and turn this into memchr. ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getOperand(2)); if (CharC == 0) { + // These optimizations require TargetData. + if (!TD) return 0; + uint64_t Len = GetStringLength(SrcStr); - if (Len == 0 || FT->getParamType(1) != Type::Int32Ty) // memchr needs i32. + if (Len == 0 || + FT->getParamType(1) != Type::getInt32Ty(*Context)) // memchr needs i32. return 0; - + return EmitMemChr(SrcStr, CI->getOperand(2), // include nul. - Context->getConstantInt(TD->getIntPtrType(), Len), B); + ConstantInt::get(TD->getIntPtrType(*Context), Len), B); } // Otherwise, the character is a constant, see if the first argument is @@ -660,24 +645,24 @@ struct VISIBILITY_HIDDEN StrChrOpt : public LibCallOptimization { std::string Str; if (!GetConstantStringInfo(SrcStr, Str)) return 0; - + // strchr can find the nul character. Str += '\0'; char CharValue = CharC->getSExtValue(); - + // Compute the offset. uint64_t i = 0; while (1) { if (i == Str.size()) // Didn't find the char. strchr returns null. - return Context->getNullValue(CI->getType()); + return Constant::getNullValue(CI->getType()); // Did we find our match? if (Str[i] == CharValue) break; ++i; } - + // strchr(s+n,c) -> gep(s+n+i,c) - Value *Idx = Context->getConstantInt(Type::Int64Ty, i); + Value *Idx = ConstantInt::get(Type::getInt64Ty(*Context), i); return B.CreateGEP(SrcStr, Idx, "strchr"); } }; @@ -685,40 +670,44 @@ struct VISIBILITY_HIDDEN StrChrOpt : public LibCallOptimization { //===---------------------------------------===// // 'strcmp' Optimizations -struct VISIBILITY_HIDDEN StrCmpOpt : public LibCallOptimization { +struct StrCmpOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Verify the "strcmp" function prototype. const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || FT->getReturnType() != Type::Int32Ty || + if (FT->getNumParams() != 2 || + FT->getReturnType() != Type::getInt32Ty(*Context) || FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty)) + FT->getParamType(0) != Type::getInt8PtrTy(*Context)) return 0; - + Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2); if (Str1P == Str2P) // strcmp(x,x) -> 0 - return Context->getConstantInt(CI->getType(), 0); - + return ConstantInt::get(CI->getType(), 0); + std::string Str1, Str2; bool HasStr1 = GetConstantStringInfo(Str1P, Str1); bool HasStr2 = GetConstantStringInfo(Str2P, Str2); - + if (HasStr1 && Str1.empty()) // strcmp("", x) -> *x return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType()); - + if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); - + // strcmp(x, y) -> cnst (if both x and y are constant strings) if (HasStr1 && HasStr2) - return Context->getConstantInt(CI->getType(), + return ConstantInt::get(CI->getType(), strcmp(Str1.c_str(),Str2.c_str())); // strcmp(P, "x") -> memcmp(P, "x", 2) uint64_t Len1 = GetStringLength(Str1P); uint64_t Len2 = GetStringLength(Str2P); if (Len1 && Len2) { + // These optimizations require TargetData. + if (!TD) return 0; + return EmitMemCmp(Str1P, Str2P, - Context->getConstantInt(TD->getIntPtrType(), + ConstantInt::get(TD->getIntPtrType(*Context), std::min(Len1, Len2)), B); } @@ -729,43 +718,44 @@ struct VISIBILITY_HIDDEN StrCmpOpt : public LibCallOptimization { //===---------------------------------------===// // 'strncmp' Optimizations -struct VISIBILITY_HIDDEN StrNCmpOpt : public LibCallOptimization { +struct StrNCmpOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Verify the "strncmp" function prototype. const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 3 || FT->getReturnType() != Type::Int32Ty || + if (FT->getNumParams() != 3 || + FT->getReturnType() != Type::getInt32Ty(*Context) || FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty) || + FT->getParamType(0) != Type::getInt8PtrTy(*Context) || !isa<IntegerType>(FT->getParamType(2))) return 0; - + Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2); if (Str1P == Str2P) // strncmp(x,x,n) -> 0 - return Context->getConstantInt(CI->getType(), 0); - + return ConstantInt::get(CI->getType(), 0); + // Get the length argument if it is constant. uint64_t Length; if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getOperand(3))) Length = LengthArg->getZExtValue(); else return 0; - + if (Length == 0) // strncmp(x,y,0) -> 0 - return Context->getConstantInt(CI->getType(), 0); - + return ConstantInt::get(CI->getType(), 0); + std::string Str1, Str2; bool HasStr1 = GetConstantStringInfo(Str1P, Str1); bool HasStr2 = GetConstantStringInfo(Str2P, Str2); - + if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> *x return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType()); - + if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); - + // strncmp(x, y) -> cnst (if both x and y are constant strings) if (HasStr1 && HasStr2) - return Context->getConstantInt(CI->getType(), + return ConstantInt::get(CI->getType(), strncmp(Str1.c_str(), Str2.c_str(), Length)); return 0; } @@ -775,27 +765,30 @@ struct VISIBILITY_HIDDEN StrNCmpOpt : public LibCallOptimization { //===---------------------------------------===// // 'strcpy' Optimizations -struct VISIBILITY_HIDDEN StrCpyOpt : public LibCallOptimization { +struct StrCpyOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Verify the "strcpy" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty)) + FT->getParamType(0) != Type::getInt8PtrTy(*Context)) return 0; - + Value *Dst = CI->getOperand(1), *Src = CI->getOperand(2); if (Dst == Src) // strcpy(x,x) -> x return Src; - + + // These optimizations require TargetData. + if (!TD) return 0; + // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); if (Len == 0) return 0; - + // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. EmitMemCpy(Dst, Src, - Context->getConstantInt(TD->getIntPtrType(), Len), 1, B); + ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B); return Dst; } }; @@ -803,12 +796,12 @@ struct VISIBILITY_HIDDEN StrCpyOpt : public LibCallOptimization { //===---------------------------------------===// // 'strncpy' Optimizations -struct VISIBILITY_HIDDEN StrNCpyOpt : public LibCallOptimization { +struct StrNCpyOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty) || + FT->getParamType(0) != Type::getInt8PtrTy(*Context) || !isa<IntegerType>(FT->getParamType(2))) return 0; @@ -823,7 +816,8 @@ struct VISIBILITY_HIDDEN StrNCpyOpt : public LibCallOptimization { if (SrcLen == 0) { // strncpy(x, "", y) -> memset(x, '\0', y, 1) - EmitMemSet(Dst, Context->getConstantInt(Type::Int8Ty, '\0'), LenOp, B); + EmitMemSet(Dst, ConstantInt::get(Type::getInt8Ty(*Context), '\0'), LenOp, + B); return Dst; } @@ -835,12 +829,15 @@ struct VISIBILITY_HIDDEN StrNCpyOpt : public LibCallOptimization { if (Len == 0) return Dst; // strncpy(x, y, 0) -> x + // These optimizations require TargetData. + if (!TD) return 0; + // Let strncpy handle the zero padding if (Len > SrcLen+1) return 0; // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant] EmitMemCpy(Dst, Src, - Context->getConstantInt(TD->getIntPtrType(), Len), 1, B); + ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B); return Dst; } @@ -849,19 +846,19 @@ struct VISIBILITY_HIDDEN StrNCpyOpt : public LibCallOptimization { //===---------------------------------------===// // 'strlen' Optimizations -struct VISIBILITY_HIDDEN StrLenOpt : public LibCallOptimization { +struct StrLenOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 1 || - FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty) || + FT->getParamType(0) != Type::getInt8PtrTy(*Context) || !isa<IntegerType>(FT->getReturnType())) return 0; - + Value *Src = CI->getOperand(1); // Constant folding: strlen("xyz") -> 3 if (uint64_t Len = GetStringLength(Src)) - return Context->getConstantInt(CI->getType(), Len-1); + return ConstantInt::get(CI->getType(), Len-1); // Handle strlen(p) != 0. if (!IsOnlyUsedInZeroEqualityComparison(CI)) return 0; @@ -875,7 +872,7 @@ struct VISIBILITY_HIDDEN StrLenOpt : public LibCallOptimization { //===---------------------------------------===// // 'strto*' Optimizations -struct VISIBILITY_HIDDEN StrToOpt : public LibCallOptimization { +struct StrToOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) || @@ -897,18 +894,18 @@ struct VISIBILITY_HIDDEN StrToOpt : public LibCallOptimization { //===---------------------------------------===// // 'memcmp' Optimizations -struct VISIBILITY_HIDDEN MemCmpOpt : public LibCallOptimization { +struct MemCmpOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || !isa<PointerType>(FT->getParamType(0)) || !isa<PointerType>(FT->getParamType(1)) || - FT->getReturnType() != Type::Int32Ty) + FT->getReturnType() != Type::getInt32Ty(*Context)) return 0; Value *LHS = CI->getOperand(1), *RHS = CI->getOperand(2); if (LHS == RHS) // memcmp(s,s,x) -> 0 - return Context->getNullValue(CI->getType()); + return Constant::getNullValue(CI->getType()); // Make sure we have a constant length. ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getOperand(3)); @@ -916,7 +913,7 @@ struct VISIBILITY_HIDDEN MemCmpOpt : public LibCallOptimization { uint64_t Len = LenC->getZExtValue(); if (Len == 0) // memcmp(s1,s2,0) -> 0 - return Context->getNullValue(CI->getType()); + return Constant::getNullValue(CI->getType()); if (Len == 1) { // memcmp(S1,S2,1) -> *LHS - *RHS Value *LHSV = B.CreateLoad(CastToCStr(LHS, B), "lhsv"); @@ -927,8 +924,8 @@ struct VISIBILITY_HIDDEN MemCmpOpt : public LibCallOptimization { // memcmp(S1,S2,2) != 0 -> (*(short*)LHS ^ *(short*)RHS) != 0 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS ^ *(int*)RHS) != 0 if ((Len == 2 || Len == 4) && IsOnlyUsedInZeroEqualityComparison(CI)) { - const Type *PTy = Context->getPointerTypeUnqual(Len == 2 ? - Type::Int16Ty : Type::Int32Ty); + const Type *PTy = PointerType::getUnqual(Len == 2 ? + Type::getInt16Ty(*Context) : Type::getInt32Ty(*Context)); LHS = B.CreateBitCast(LHS, PTy, "tmp"); RHS = B.CreateBitCast(RHS, PTy, "tmp"); LoadInst *LHSV = B.CreateLoad(LHS, "lhsv"); @@ -944,13 +941,16 @@ struct VISIBILITY_HIDDEN MemCmpOpt : public LibCallOptimization { //===---------------------------------------===// // 'memcpy' Optimizations -struct VISIBILITY_HIDDEN MemCpyOpt : public LibCallOptimization { +struct MemCpyOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require TargetData. + if (!TD) return 0; + const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !isa<PointerType>(FT->getParamType(0)) || !isa<PointerType>(FT->getParamType(1)) || - FT->getParamType(2) != TD->getIntPtrType()) + FT->getParamType(2) != TD->getIntPtrType(*Context)) return 0; // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) @@ -962,25 +962,28 @@ struct VISIBILITY_HIDDEN MemCpyOpt : public LibCallOptimization { //===---------------------------------------===// // 'memmove' Optimizations -struct VISIBILITY_HIDDEN MemMoveOpt : public LibCallOptimization { +struct MemMoveOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require TargetData. + if (!TD) return 0; + const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !isa<PointerType>(FT->getParamType(0)) || !isa<PointerType>(FT->getParamType(1)) || - FT->getParamType(2) != TD->getIntPtrType()) + FT->getParamType(2) != TD->getIntPtrType(*Context)) return 0; // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) Module *M = Caller->getParent(); Intrinsic::ID IID = Intrinsic::memmove; const Type *Tys[1]; - Tys[0] = TD->getIntPtrType(); + Tys[0] = TD->getIntPtrType(*Context); Value *MemMove = Intrinsic::getDeclaration(M, IID, Tys, 1); Value *Dst = CastToCStr(CI->getOperand(1), B); Value *Src = CastToCStr(CI->getOperand(2), B); Value *Size = CI->getOperand(3); - Value *Align = Context->getConstantInt(Type::Int32Ty, 1); + Value *Align = ConstantInt::get(Type::getInt32Ty(*Context), 1); B.CreateCall4(MemMove, Dst, Src, Size, Align); return CI->getOperand(1); } @@ -989,17 +992,21 @@ struct VISIBILITY_HIDDEN MemMoveOpt : public LibCallOptimization { //===---------------------------------------===// // 'memset' Optimizations -struct VISIBILITY_HIDDEN MemSetOpt : public LibCallOptimization { +struct MemSetOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require TargetData. + if (!TD) return 0; + const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !isa<PointerType>(FT->getParamType(0)) || - FT->getParamType(1) != TD->getIntPtrType() || - FT->getParamType(2) != TD->getIntPtrType()) + !isa<IntegerType>(FT->getParamType(1)) || + FT->getParamType(2) != TD->getIntPtrType(*Context)) return 0; // memset(p, v, n) -> llvm.memset(p, v, n, 1) - Value *Val = B.CreateTrunc(CI->getOperand(2), Type::Int8Ty); + Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context), + false); EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), B); return CI->getOperand(1); } @@ -1012,7 +1019,7 @@ struct VISIBILITY_HIDDEN MemSetOpt : public LibCallOptimization { //===---------------------------------------===// // 'pow*' Optimizations -struct VISIBILITY_HIDDEN PowOpt : public LibCallOptimization { +struct PowOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 2 arguments of the same FP type, which match the @@ -1021,40 +1028,44 @@ struct VISIBILITY_HIDDEN PowOpt : public LibCallOptimization { FT->getParamType(0) != FT->getParamType(1) || !FT->getParamType(0)->isFloatingPoint()) return 0; - + Value *Op1 = CI->getOperand(1), *Op2 = CI->getOperand(2); if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) { if (Op1C->isExactlyValue(1.0)) // pow(1.0, x) -> 1.0 return Op1C; if (Op1C->isExactlyValue(2.0)) // pow(2.0, x) -> exp2(x) - return EmitUnaryFloatFnCall(Op2, "exp2", B); + return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes()); } - + ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2); if (Op2C == 0) return 0; - + if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0 - return Context->getConstantFP(CI->getType(), 1.0); - + return ConstantFP::get(CI->getType(), 1.0); + if (Op2C->isExactlyValue(0.5)) { - // FIXME: This is not safe for -0.0 and -inf. This can only be done when - // 'unsafe' math optimizations are allowed. - // x pow(x, 0.5) sqrt(x) - // --------------------------------------------- - // -0.0 +0.0 -0.0 - // -inf +inf NaN -#if 0 - // pow(x, 0.5) -> sqrt(x) - return B.CreateCall(get_sqrt(), Op1, "sqrt"); -#endif + // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))). + // This is faster than calling pow, and still handles negative zero + // and negative infinite correctly. + // TODO: In fast-math mode, this could be just sqrt(x). + // TODO: In finite-only mode, this could be just fabs(sqrt(x)). + Value *Inf = ConstantFP::getInfinity(CI->getType()); + Value *NegInf = ConstantFP::getInfinity(CI->getType(), true); + Value *Sqrt = EmitUnaryFloatFnCall(Op1, "sqrt", B, + Callee->getAttributes()); + Value *FAbs = EmitUnaryFloatFnCall(Sqrt, "fabs", B, + Callee->getAttributes()); + Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf, "tmp"); + Value *Sel = B.CreateSelect(FCmp, Inf, FAbs, "tmp"); + return Sel; } - + if (Op2C->isExactlyValue(1.0)) // pow(x, 1.0) -> x return Op1; if (Op2C->isExactlyValue(2.0)) // pow(x, 2.0) -> x*x return B.CreateFMul(Op1, Op1, "pow2"); if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x - return B.CreateFDiv(Context->getConstantFP(CI->getType(), 1.0), + return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip"); return 0; } @@ -1063,7 +1074,7 @@ struct VISIBILITY_HIDDEN PowOpt : public LibCallOptimization { //===---------------------------------------===// // 'exp2' Optimizations -struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization { +struct Exp2Opt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 1 argument of FP type, which matches the @@ -1071,35 +1082,38 @@ struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization { if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isFloatingPoint()) return 0; - + Value *Op = CI->getOperand(1); // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32 // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32 Value *LdExpArg = 0; if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) { if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32) - LdExpArg = B.CreateSExt(OpC->getOperand(0), Type::Int32Ty, "tmp"); + LdExpArg = B.CreateSExt(OpC->getOperand(0), + Type::getInt32Ty(*Context), "tmp"); } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) { if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32) - LdExpArg = B.CreateZExt(OpC->getOperand(0), Type::Int32Ty, "tmp"); + LdExpArg = B.CreateZExt(OpC->getOperand(0), + Type::getInt32Ty(*Context), "tmp"); } if (LdExpArg) { const char *Name; - if (Op->getType() == Type::FloatTy) + if (Op->getType()->isFloatTy()) Name = "ldexpf"; - else if (Op->getType() == Type::DoubleTy) + else if (Op->getType()->isDoubleTy()) Name = "ldexp"; else Name = "ldexpl"; - Constant *One = Context->getConstantFP(APFloat(1.0f)); - if (Op->getType() != Type::FloatTy) - One = Context->getConstantExprFPExtend(One, Op->getType()); + Constant *One = ConstantFP::get(*Context, APFloat(1.0f)); + if (!Op->getType()->isFloatTy()) + One = ConstantExpr::getFPExtend(One, Op->getType()); Module *M = Caller->getParent(); Value *Callee = M->getOrInsertFunction(Name, Op->getType(), - Op->getType(), Type::Int32Ty,NULL); + Op->getType(), + Type::getInt32Ty(*Context),NULL); CallInst *CI = B.CreateCall2(Callee, One, LdExpArg); if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); @@ -1113,22 +1127,23 @@ struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization { //===---------------------------------------===// // Double -> Float Shrinking Optimizations for Unary Functions like 'floor' -struct VISIBILITY_HIDDEN UnaryDoubleFPOpt : public LibCallOptimization { +struct UnaryDoubleFPOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 1 || FT->getReturnType() != Type::DoubleTy || - FT->getParamType(0) != Type::DoubleTy) + if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || + !FT->getParamType(0)->isDoubleTy()) return 0; // If this is something like 'floor((double)floatval)', convert to floorf. FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getOperand(1)); - if (Cast == 0 || Cast->getOperand(0)->getType() != Type::FloatTy) + if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy()) return 0; // floor((double)floatval) -> (double)floorf(floatval) Value *V = Cast->getOperand(0); - V = EmitUnaryFloatFnCall(V, Callee->getNameStart(), B); - return B.CreateFPExt(V, Type::DoubleTy); + V = EmitUnaryFloatFnCall(V, Callee->getName().data(), B, + Callee->getAttributes()); + return B.CreateFPExt(V, Type::getDoubleTy(*Context)); } }; @@ -1139,54 +1154,56 @@ struct VISIBILITY_HIDDEN UnaryDoubleFPOpt : public LibCallOptimization { //===---------------------------------------===// // 'ffs*' Optimizations -struct VISIBILITY_HIDDEN FFSOpt : public LibCallOptimization { +struct FFSOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 2 arguments of the same FP type, which match the // result type. - if (FT->getNumParams() != 1 || FT->getReturnType() != Type::Int32Ty || + if (FT->getNumParams() != 1 || + FT->getReturnType() != Type::getInt32Ty(*Context) || !isa<IntegerType>(FT->getParamType(0))) return 0; - + Value *Op = CI->getOperand(1); - + // Constant fold. if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { if (CI->getValue() == 0) // ffs(0) -> 0. - return Context->getNullValue(CI->getType()); - return Context->getConstantInt(Type::Int32Ty, // ffs(c) -> cttz(c)+1 + return Constant::getNullValue(CI->getType()); + return ConstantInt::get(Type::getInt32Ty(*Context), // ffs(c) -> cttz(c)+1 CI->getValue().countTrailingZeros()+1); } - + // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0 const Type *ArgType = Op->getType(); Value *F = Intrinsic::getDeclaration(Callee->getParent(), Intrinsic::cttz, &ArgType, 1); Value *V = B.CreateCall(F, Op, "cttz"); - V = B.CreateAdd(V, Context->getConstantInt(V->getType(), 1), "tmp"); - V = B.CreateIntCast(V, Type::Int32Ty, false, "tmp"); - - Value *Cond = B.CreateICmpNE(Op, Context->getNullValue(ArgType), "tmp"); - return B.CreateSelect(Cond, V, Context->getConstantInt(Type::Int32Ty, 0)); + V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp"); + V = B.CreateIntCast(V, Type::getInt32Ty(*Context), false, "tmp"); + + Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp"); + return B.CreateSelect(Cond, V, + ConstantInt::get(Type::getInt32Ty(*Context), 0)); } }; //===---------------------------------------===// // 'isdigit' Optimizations -struct VISIBILITY_HIDDEN IsDigitOpt : public LibCallOptimization { +struct IsDigitOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); // We require integer(i32) if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) || - FT->getParamType(0) != Type::Int32Ty) + FT->getParamType(0) != Type::getInt32Ty(*Context)) return 0; - + // isdigit(c) -> (c-'0') <u 10 Value *Op = CI->getOperand(1); - Op = B.CreateSub(Op, Context->getConstantInt(Type::Int32Ty, '0'), + Op = B.CreateSub(Op, ConstantInt::get(Type::getInt32Ty(*Context), '0'), "isdigittmp"); - Op = B.CreateICmpULT(Op, Context->getConstantInt(Type::Int32Ty, 10), + Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 10), "isdigit"); return B.CreateZExt(Op, CI->getType()); } @@ -1195,58 +1212,58 @@ struct VISIBILITY_HIDDEN IsDigitOpt : public LibCallOptimization { //===---------------------------------------===// // 'isascii' Optimizations -struct VISIBILITY_HIDDEN IsAsciiOpt : public LibCallOptimization { +struct IsAsciiOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); // We require integer(i32) if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) || - FT->getParamType(0) != Type::Int32Ty) + FT->getParamType(0) != Type::getInt32Ty(*Context)) return 0; - + // isascii(c) -> c <u 128 Value *Op = CI->getOperand(1); - Op = B.CreateICmpULT(Op, Context->getConstantInt(Type::Int32Ty, 128), + Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 128), "isascii"); return B.CreateZExt(Op, CI->getType()); } }; - + //===---------------------------------------===// // 'abs', 'labs', 'llabs' Optimizations -struct VISIBILITY_HIDDEN AbsOpt : public LibCallOptimization { +struct AbsOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); // We require integer(integer) where the types agree. if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) || FT->getParamType(0) != FT->getReturnType()) return 0; - + // abs(x) -> x >s -1 ? x : -x Value *Op = CI->getOperand(1); - Value *Pos = B.CreateICmpSGT(Op, - Context->getConstantIntAllOnesValue(Op->getType()), + Value *Pos = B.CreateICmpSGT(Op, + Constant::getAllOnesValue(Op->getType()), "ispos"); Value *Neg = B.CreateNeg(Op, "neg"); return B.CreateSelect(Pos, Op, Neg); } }; - + //===---------------------------------------===// // 'toascii' Optimizations -struct VISIBILITY_HIDDEN ToAsciiOpt : public LibCallOptimization { +struct ToAsciiOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); // We require i32(i32) if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || - FT->getParamType(0) != Type::Int32Ty) + FT->getParamType(0) != Type::getInt32Ty(*Context)) return 0; - + // isascii(c) -> c & 0x7f return B.CreateAnd(CI->getOperand(1), - Context->getConstantInt(CI->getType(),0x7F)); + ConstantInt::get(CI->getType(),0x7F)); } }; @@ -1257,15 +1274,15 @@ struct VISIBILITY_HIDDEN ToAsciiOpt : public LibCallOptimization { //===---------------------------------------===// // 'printf' Optimizations -struct VISIBILITY_HIDDEN PrintFOpt : public LibCallOptimization { +struct PrintFOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Require one fixed pointer argument and an integer/void result. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() < 1 || !isa<PointerType>(FT->getParamType(0)) || !(isa<IntegerType>(FT->getReturnType()) || - FT->getReturnType() == Type::VoidTy)) + FT->getReturnType()->isVoidTy())) return 0; - + // Check for a fixed format string. std::string FormatStr; if (!GetConstantStringInfo(CI->getOperand(1), FormatStr)) @@ -1273,39 +1290,39 @@ struct VISIBILITY_HIDDEN PrintFOpt : public LibCallOptimization { // Empty format string -> noop. if (FormatStr.empty()) // Tolerate printf's declared void. - return CI->use_empty() ? (Value*)CI : - Context->getConstantInt(CI->getType(), 0); - + return CI->use_empty() ? (Value*)CI : + ConstantInt::get(CI->getType(), 0); + // printf("x") -> putchar('x'), even for '%'. if (FormatStr.size() == 1) { - EmitPutChar(Context->getConstantInt(Type::Int32Ty, FormatStr[0]), B); - return CI->use_empty() ? (Value*)CI : - Context->getConstantInt(CI->getType(), 1); + EmitPutChar(ConstantInt::get(Type::getInt32Ty(*Context), FormatStr[0]), B); + return CI->use_empty() ? (Value*)CI : + ConstantInt::get(CI->getType(), 1); } - + // printf("foo\n") --> puts("foo") if (FormatStr[FormatStr.size()-1] == '\n' && FormatStr.find('%') == std::string::npos) { // no format characters. // Create a string literal with no \n on it. We expect the constant merge // pass to be run after this pass, to merge duplicate strings. FormatStr.erase(FormatStr.end()-1); - Constant *C = Context->getConstantArray(FormatStr, true); - C = new GlobalVariable(C->getType(), true,GlobalVariable::InternalLinkage, - C, "str", Callee->getParent()); + Constant *C = ConstantArray::get(*Context, FormatStr, true); + C = new GlobalVariable(*Callee->getParent(), C->getType(), true, + GlobalVariable::InternalLinkage, C, "str"); EmitPutS(C, B); - return CI->use_empty() ? (Value*)CI : - Context->getConstantInt(CI->getType(), FormatStr.size()+1); + return CI->use_empty() ? (Value*)CI : + ConstantInt::get(CI->getType(), FormatStr.size()+1); } - + // Optimize specific format strings. // printf("%c", chr) --> putchar(*(i8*)dst) if (FormatStr == "%c" && CI->getNumOperands() > 2 && isa<IntegerType>(CI->getOperand(2)->getType())) { EmitPutChar(CI->getOperand(2), B); - return CI->use_empty() ? (Value*)CI : - Context->getConstantInt(CI->getType(), 1); + return CI->use_empty() ? (Value*)CI : + ConstantInt::get(CI->getType(), 1); } - + // printf("%s\n", str) --> puts(str) if (FormatStr == "%s\n" && CI->getNumOperands() > 2 && isa<PointerType>(CI->getOperand(2)->getType()) && @@ -1320,7 +1337,7 @@ struct VISIBILITY_HIDDEN PrintFOpt : public LibCallOptimization { //===---------------------------------------===// // 'sprintf' Optimizations -struct VISIBILITY_HIDDEN SPrintFOpt : public LibCallOptimization { +struct SPrintFOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Require two fixed pointer arguments and an integer result. const FunctionType *FT = Callee->getFunctionType(); @@ -1333,7 +1350,7 @@ struct VISIBILITY_HIDDEN SPrintFOpt : public LibCallOptimization { std::string FormatStr; if (!GetConstantStringInfo(CI->getOperand(2), FormatStr)) return 0; - + // If we just have a format string (nothing else crazy) transform it. if (CI->getNumOperands() == 3) { // Make sure there's no % in the constant array. We could try to handle @@ -1341,41 +1358,49 @@ struct VISIBILITY_HIDDEN SPrintFOpt : public LibCallOptimization { for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) if (FormatStr[i] == '%') return 0; // we found a format specifier, bail out. - + + // These optimizations require TargetData. + if (!TD) return 0; + // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte. - Context->getConstantInt(TD->getIntPtrType(), FormatStr.size()+1),1,B); - return Context->getConstantInt(CI->getType(), FormatStr.size()); + ConstantInt::get(TD->getIntPtrType(*Context), FormatStr.size()+1),1,B); + return ConstantInt::get(CI->getType(), FormatStr.size()); } - + // The remaining optimizations require the format string to be "%s" or "%c" // and have an extra operand. if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4) return 0; - + // Decode the second character of the format string. if (FormatStr[1] == 'c') { // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 if (!isa<IntegerType>(CI->getOperand(3)->getType())) return 0; - Value *V = B.CreateTrunc(CI->getOperand(3), Type::Int8Ty, "char"); + Value *V = B.CreateTrunc(CI->getOperand(3), + Type::getInt8Ty(*Context), "char"); Value *Ptr = CastToCStr(CI->getOperand(1), B); B.CreateStore(V, Ptr); - Ptr = B.CreateGEP(Ptr, Context->getConstantInt(Type::Int32Ty, 1), "nul"); - B.CreateStore(Context->getNullValue(Type::Int8Ty), Ptr); - - return Context->getConstantInt(CI->getType(), 1); + Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::getInt32Ty(*Context), 1), + "nul"); + B.CreateStore(Constant::getNullValue(Type::getInt8Ty(*Context)), Ptr); + + return ConstantInt::get(CI->getType(), 1); } - + if (FormatStr[1] == 's') { + // These optimizations require TargetData. + if (!TD) return 0; + // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) if (!isa<PointerType>(CI->getOperand(3)->getType())) return 0; Value *Len = EmitStrLen(CI->getOperand(3), B); Value *IncLen = B.CreateAdd(Len, - Context->getConstantInt(Len->getType(), 1), + ConstantInt::get(Len->getType(), 1), "leninc"); EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, B); - + // The sprintf result is the unincremented number of bytes in the string. return B.CreateIntCast(Len, CI->getType(), false); } @@ -1386,7 +1411,7 @@ struct VISIBILITY_HIDDEN SPrintFOpt : public LibCallOptimization { //===---------------------------------------===// // 'fwrite' Optimizations -struct VISIBILITY_HIDDEN FWriteOpt : public LibCallOptimization { +struct FWriteOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Require a pointer, an integer, an integer, a pointer, returning integer. const FunctionType *FT = Callee->getFunctionType(); @@ -1396,22 +1421,22 @@ struct VISIBILITY_HIDDEN FWriteOpt : public LibCallOptimization { !isa<PointerType>(FT->getParamType(3)) || !isa<IntegerType>(FT->getReturnType())) return 0; - + // Get the element size and count. ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getOperand(2)); ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getOperand(3)); if (!SizeC || !CountC) return 0; uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue(); - + // If this is writing zero records, remove the call (it's a noop). if (Bytes == 0) - return Context->getConstantInt(CI->getType(), 0); - + return ConstantInt::get(CI->getType(), 0); + // If this is writing one byte, turn it into fputc. if (Bytes == 1) { // fwrite(S,1,1,F) -> fputc(S[0],F) Value *Char = B.CreateLoad(CastToCStr(CI->getOperand(1), B), "char"); EmitFPutC(Char, CI->getOperand(4), B); - return Context->getConstantInt(CI->getType(), 1); + return ConstantInt::get(CI->getType(), 1); } return 0; @@ -1421,20 +1446,23 @@ struct VISIBILITY_HIDDEN FWriteOpt : public LibCallOptimization { //===---------------------------------------===// // 'fputs' Optimizations -struct VISIBILITY_HIDDEN FPutsOpt : public LibCallOptimization { +struct FPutsOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require TargetData. + if (!TD) return 0; + // Require two pointers. Also, we can't optimize if return value is used. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || !isa<PointerType>(FT->getParamType(0)) || !isa<PointerType>(FT->getParamType(1)) || !CI->use_empty()) return 0; - + // fputs(s,F) --> fwrite(s,1,strlen(s),F) uint64_t Len = GetStringLength(CI->getOperand(1)); if (!Len) return 0; EmitFWrite(CI->getOperand(1), - Context->getConstantInt(TD->getIntPtrType(), Len-1), + ConstantInt::get(TD->getIntPtrType(*Context), Len-1), CI->getOperand(2), B); return CI; // Known to have no uses (see above). } @@ -1443,7 +1471,7 @@ struct VISIBILITY_HIDDEN FPutsOpt : public LibCallOptimization { //===---------------------------------------===// // 'fprintf' Optimizations -struct VISIBILITY_HIDDEN FPrintFOpt : public LibCallOptimization { +struct FPrintFOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Require two fixed paramters as pointers and integer result. const FunctionType *FT = Callee->getFunctionType(); @@ -1451,7 +1479,7 @@ struct VISIBILITY_HIDDEN FPrintFOpt : public LibCallOptimization { !isa<PointerType>(FT->getParamType(1)) || !isa<IntegerType>(FT->getReturnType())) return 0; - + // All the optimizations depend on the format string. std::string FormatStr; if (!GetConstantStringInfo(CI->getOperand(2), FormatStr)) @@ -1462,26 +1490,29 @@ struct VISIBILITY_HIDDEN FPrintFOpt : public LibCallOptimization { for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) if (FormatStr[i] == '%') // Could handle %% -> % if we cared. return 0; // We found a format specifier. - - EmitFWrite(CI->getOperand(2), Context->getConstantInt(TD->getIntPtrType(), + + // These optimizations require TargetData. + if (!TD) return 0; + + EmitFWrite(CI->getOperand(2), ConstantInt::get(TD->getIntPtrType(*Context), FormatStr.size()), CI->getOperand(1), B); - return Context->getConstantInt(CI->getType(), FormatStr.size()); + return ConstantInt::get(CI->getType(), FormatStr.size()); } - + // The remaining optimizations require the format string to be "%s" or "%c" // and have an extra operand. if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4) return 0; - + // Decode the second character of the format string. if (FormatStr[1] == 'c') { // fprintf(F, "%c", chr) --> *(i8*)dst = chr if (!isa<IntegerType>(CI->getOperand(3)->getType())) return 0; EmitFPutC(CI->getOperand(3), CI->getOperand(1), B); - return Context->getConstantInt(CI->getType(), 1); + return ConstantInt::get(CI->getType(), 1); } - + if (FormatStr[1] == 's') { // fprintf(F, "%s", str) -> fputs(str, F) if (!isa<PointerType>(CI->getOperand(3)->getType()) || !CI->use_empty()) @@ -1502,10 +1533,8 @@ struct VISIBILITY_HIDDEN FPrintFOpt : public LibCallOptimization { namespace { /// This pass optimizes well known library functions from libc and libm. /// - class VISIBILITY_HIDDEN SimplifyLibCalls : public FunctionPass { + class SimplifyLibCalls : public FunctionPass { StringMap<LibCallOptimization*> Optimizations; - // Miscellaneous LibCall Optimizations - ExitOpt Exit; // String and Memory LibCall Optimizations StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrCmpOpt StrCmp; StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrNCpyOpt StrNCpy; StrLenOpt StrLen; @@ -1536,7 +1565,6 @@ namespace { bool doInitialization(Module &M); virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<TargetData>(); } }; char SimplifyLibCalls::ID = 0; @@ -1547,15 +1575,12 @@ X("simplify-libcalls", "Simplify well-known library calls"); // Public interface to the Simplify LibCalls pass. FunctionPass *llvm::createSimplifyLibCallsPass() { - return new SimplifyLibCalls(); + return new SimplifyLibCalls(); } /// Optimizations - Populate the Optimizations map with all the optimizations /// we know. void SimplifyLibCalls::InitOptimizations() { - // Miscellaneous LibCall Optimizations - Optimizations["exit"] = &Exit; - // String and Memory LibCall Optimizations Optimizations["strcat"] = &StrCat; Optimizations["strncat"] = &StrNCat; @@ -1576,7 +1601,7 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["memcpy"] = &MemCpy; Optimizations["memmove"] = &MemMove; Optimizations["memset"] = &MemSet; - + // Math Library Optimizations Optimizations["powf"] = &Pow; Optimizations["pow"] = &Pow; @@ -1594,7 +1619,7 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["llvm.exp2.f80"] = &Exp2; Optimizations["llvm.exp2.f64"] = &Exp2; Optimizations["llvm.exp2.f32"] = &Exp2; - + #ifdef HAVE_FLOORF Optimizations["floor"] = &UnaryDoubleFP; #endif @@ -1610,7 +1635,7 @@ void SimplifyLibCalls::InitOptimizations() { #ifdef HAVE_NEARBYINTF Optimizations["nearbyint"] = &UnaryDoubleFP; #endif - + // Integer Optimizations Optimizations["ffs"] = &FFS; Optimizations["ffsl"] = &FFS; @@ -1621,7 +1646,7 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["isdigit"] = &IsDigit; Optimizations["isascii"] = &IsAscii; Optimizations["toascii"] = &ToAscii; - + // Formatting and IO Optimizations Optimizations["sprintf"] = &SPrintF; Optimizations["printf"] = &PrintF; @@ -1636,10 +1661,10 @@ void SimplifyLibCalls::InitOptimizations() { bool SimplifyLibCalls::runOnFunction(Function &F) { if (Optimizations.empty()) InitOptimizations(); - - const TargetData &TD = getAnalysis<TargetData>(); - - IRBuilder<> Builder; + + const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + + IRBuilder<> Builder(F.getContext()); bool Changed = false; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { @@ -1647,37 +1672,35 @@ bool SimplifyLibCalls::runOnFunction(Function &F) { // Ignore non-calls. CallInst *CI = dyn_cast<CallInst>(I++); if (!CI) continue; - + // Ignore indirect calls and calls to non-external functions. Function *Callee = CI->getCalledFunction(); if (Callee == 0 || !Callee->isDeclaration() || !(Callee->hasExternalLinkage() || Callee->hasDLLImportLinkage())) continue; - + // Ignore unknown calls. - const char *CalleeName = Callee->getNameStart(); - StringMap<LibCallOptimization*>::iterator OMI = - Optimizations.find(CalleeName, CalleeName+Callee->getNameLen()); - if (OMI == Optimizations.end()) continue; - + LibCallOptimization *LCO = Optimizations.lookup(Callee->getName()); + if (!LCO) continue; + // Set the builder to the instruction after the call. Builder.SetInsertPoint(BB, I); - + // Try to optimize this call. - Value *Result = OMI->second->OptimizeCall(CI, TD, Builder); + Value *Result = LCO->OptimizeCall(CI, TD, Builder); if (Result == 0) continue; - DEBUG(DOUT << "SimplifyLibCalls simplified: " << *CI; - DOUT << " into: " << *Result << "\n"); - + DEBUG(errs() << "SimplifyLibCalls simplified: " << *CI; + errs() << " into: " << *Result << "\n"); + // Something changed! Changed = true; ++NumSimplified; - + // Inspect the instruction after the call (which was potentially just // added) next. I = CI; ++I; - + if (CI != Result && !CI->use_empty()) { CI->replaceAllUsesWith(Result); if (!Result->hasName()) @@ -1736,40 +1759,39 @@ bool SimplifyLibCalls::doInitialization(Module &M) { if (!F.isDeclaration()) continue; - unsigned NameLen = F.getNameLen(); - if (!NameLen) + if (!F.hasName()) continue; const FunctionType *FTy = F.getFunctionType(); - const char *NameStr = F.getNameStart(); - switch (NameStr[0]) { + StringRef Name = F.getName(); + switch (Name[0]) { case 's': - if (NameLen == 6 && !strcmp(NameStr, "strlen")) { + if (Name == "strlen") { if (FTy->getNumParams() != 1 || !isa<PointerType>(FTy->getParamType(0))) continue; setOnlyReadsMemory(F); setDoesNotThrow(F); setDoesNotCapture(F, 1); - } else if ((NameLen == 6 && !strcmp(NameStr, "strcpy")) || - (NameLen == 6 && !strcmp(NameStr, "stpcpy")) || - (NameLen == 6 && !strcmp(NameStr, "strcat")) || - (NameLen == 6 && !strcmp(NameStr, "strtol")) || - (NameLen == 6 && !strcmp(NameStr, "strtod")) || - (NameLen == 6 && !strcmp(NameStr, "strtof")) || - (NameLen == 7 && !strcmp(NameStr, "strtoul")) || - (NameLen == 7 && !strcmp(NameStr, "strtoll")) || - (NameLen == 7 && !strcmp(NameStr, "strtold")) || - (NameLen == 7 && !strcmp(NameStr, "strncat")) || - (NameLen == 7 && !strcmp(NameStr, "strncpy")) || - (NameLen == 8 && !strcmp(NameStr, "strtoull"))) { + } else if (Name == "strcpy" || + Name == "stpcpy" || + Name == "strcat" || + Name == "strtol" || + Name == "strtod" || + Name == "strtof" || + Name == "strtoul" || + Name == "strtoll" || + Name == "strtold" || + Name == "strncat" || + Name == "strncpy" || + Name == "strtoull") { if (FTy->getNumParams() < 2 || !isa<PointerType>(FTy->getParamType(1))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 2); - } else if (NameLen == 7 && !strcmp(NameStr, "strxfrm")) { + } else if (Name == "strxfrm") { if (FTy->getNumParams() != 3 || !isa<PointerType>(FTy->getParamType(0)) || !isa<PointerType>(FTy->getParamType(1))) @@ -1777,13 +1799,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); - } else if ((NameLen == 6 && !strcmp(NameStr, "strcmp")) || - (NameLen == 6 && !strcmp(NameStr, "strspn")) || - (NameLen == 7 && !strcmp(NameStr, "strncmp")) || - (NameLen == 7 && !strcmp(NameStr, "strcspn")) || - (NameLen == 7 && !strcmp(NameStr, "strcoll")) || - (NameLen == 10 && !strcmp(NameStr, "strcasecmp")) || - (NameLen == 11 && !strcmp(NameStr, "strncasecmp"))) { + } else if (Name == "strcmp" || + Name == "strspn" || + Name == "strncmp" || + Name ==" strcspn" || + Name == "strcoll" || + Name == "strcasecmp" || + Name == "strncasecmp") { if (FTy->getNumParams() < 2 || !isa<PointerType>(FTy->getParamType(0)) || !isa<PointerType>(FTy->getParamType(1))) @@ -1792,31 +1814,31 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); - } else if ((NameLen == 6 && !strcmp(NameStr, "strstr")) || - (NameLen == 7 && !strcmp(NameStr, "strpbrk"))) { + } else if (Name == "strstr" || + Name == "strpbrk") { if (FTy->getNumParams() != 2 || !isa<PointerType>(FTy->getParamType(1))) continue; setOnlyReadsMemory(F); setDoesNotThrow(F); setDoesNotCapture(F, 2); - } else if ((NameLen == 6 && !strcmp(NameStr, "strtok")) || - (NameLen == 8 && !strcmp(NameStr, "strtok_r"))) { + } else if (Name == "strtok" || + Name == "strtok_r") { if (FTy->getNumParams() < 2 || !isa<PointerType>(FTy->getParamType(1))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 2); - } else if ((NameLen == 5 && !strcmp(NameStr, "scanf")) || - (NameLen == 6 && !strcmp(NameStr, "setbuf")) || - (NameLen == 7 && !strcmp(NameStr, "setvbuf"))) { + } else if (Name == "scanf" || + Name == "setbuf" || + Name == "setvbuf") { if (FTy->getNumParams() < 1 || !isa<PointerType>(FTy->getParamType(0))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); - } else if ((NameLen == 6 && !strcmp(NameStr, "strdup")) || - (NameLen == 7 && !strcmp(NameStr, "strndup"))) { + } else if (Name == "strdup" || + Name == "strndup") { if (FTy->getNumParams() < 1 || !isa<PointerType>(FTy->getReturnType()) || !isa<PointerType>(FTy->getParamType(0))) @@ -1824,10 +1846,10 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotAlias(F, 0); setDoesNotCapture(F, 1); - } else if ((NameLen == 4 && !strcmp(NameStr, "stat")) || - (NameLen == 6 && !strcmp(NameStr, "sscanf")) || - (NameLen == 7 && !strcmp(NameStr, "sprintf")) || - (NameLen == 7 && !strcmp(NameStr, "statvfs"))) { + } else if (Name == "stat" || + Name == "sscanf" || + Name == "sprintf" || + Name == "statvfs") { if (FTy->getNumParams() < 2 || !isa<PointerType>(FTy->getParamType(0)) || !isa<PointerType>(FTy->getParamType(1))) @@ -1835,7 +1857,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); - } else if (NameLen == 8 && !strcmp(NameStr, "snprintf")) { + } else if (Name == "snprintf") { if (FTy->getNumParams() != 3 || !isa<PointerType>(FTy->getParamType(0)) || !isa<PointerType>(FTy->getParamType(2))) @@ -1843,7 +1865,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 3); - } else if (NameLen == 9 && !strcmp(NameStr, "setitimer")) { + } else if (Name == "setitimer") { if (FTy->getNumParams() != 3 || !isa<PointerType>(FTy->getParamType(1)) || !isa<PointerType>(FTy->getParamType(2))) @@ -1851,7 +1873,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotCapture(F, 2); setDoesNotCapture(F, 3); - } else if (NameLen == 6 && !strcmp(NameStr, "system")) { + } else if (Name == "system") { if (FTy->getNumParams() != 1 || !isa<PointerType>(FTy->getParamType(0))) continue; @@ -1860,7 +1882,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 'm': - if (NameLen == 6 && !strcmp(NameStr, "memcmp")) { + if (Name == "malloc") { + if (FTy->getNumParams() != 1 || + !isa<PointerType>(FTy->getReturnType())) + continue; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + } else if (Name == "memcmp") { if (FTy->getNumParams() != 3 || !isa<PointerType>(FTy->getParamType(0)) || !isa<PointerType>(FTy->getParamType(1))) @@ -1869,29 +1897,29 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); - } else if ((NameLen == 6 && !strcmp(NameStr, "memchr")) || - (NameLen == 7 && !strcmp(NameStr, "memrchr"))) { + } else if (Name == "memchr" || + Name == "memrchr") { if (FTy->getNumParams() != 3) continue; setOnlyReadsMemory(F); setDoesNotThrow(F); - } else if ((NameLen == 4 && !strcmp(NameStr, "modf")) || - (NameLen == 5 && !strcmp(NameStr, "modff")) || - (NameLen == 5 && !strcmp(NameStr, "modfl")) || - (NameLen == 6 && !strcmp(NameStr, "memcpy")) || - (NameLen == 7 && !strcmp(NameStr, "memccpy")) || - (NameLen == 7 && !strcmp(NameStr, "memmove"))) { + } else if (Name == "modf" || + Name == "modff" || + Name == "modfl" || + Name == "memcpy" || + Name == "memccpy" || + Name == "memmove") { if (FTy->getNumParams() < 2 || !isa<PointerType>(FTy->getParamType(1))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 2); - } else if (NameLen == 8 && !strcmp(NameStr, "memalign")) { + } else if (Name == "memalign") { if (!isa<PointerType>(FTy->getReturnType())) continue; setDoesNotAlias(F, 0); - } else if ((NameLen == 5 && !strcmp(NameStr, "mkdir")) || - (NameLen == 6 && !strcmp(NameStr, "mktime"))) { + } else if (Name == "mkdir" || + Name == "mktime") { if (FTy->getNumParams() == 0 || !isa<PointerType>(FTy->getParamType(0))) continue; @@ -1900,7 +1928,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 'r': - if (NameLen == 7 && !strcmp(NameStr, "realloc")) { + if (Name == "realloc") { if (FTy->getNumParams() != 2 || !isa<PointerType>(FTy->getParamType(0)) || !isa<PointerType>(FTy->getReturnType())) @@ -1908,23 +1936,23 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotAlias(F, 0); setDoesNotCapture(F, 1); - } else if (NameLen == 4 && !strcmp(NameStr, "read")) { + } else if (Name == "read") { if (FTy->getNumParams() != 3 || !isa<PointerType>(FTy->getParamType(1))) continue; // May throw; "read" is a valid pthread cancellation point. setDoesNotCapture(F, 2); - } else if ((NameLen == 5 && !strcmp(NameStr, "rmdir")) || - (NameLen == 6 && !strcmp(NameStr, "rewind")) || - (NameLen == 6 && !strcmp(NameStr, "remove")) || - (NameLen == 8 && !strcmp(NameStr, "realpath"))) { + } else if (Name == "rmdir" || + Name == "rewind" || + Name == "remove" || + Name == "realpath") { if (FTy->getNumParams() < 1 || !isa<PointerType>(FTy->getParamType(0))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); - } else if ((NameLen == 6 && !strcmp(NameStr, "rename")) || - (NameLen == 8 && !strcmp(NameStr, "readlink"))) { + } else if (Name == "rename" || + Name == "readlink") { if (FTy->getNumParams() < 2 || !isa<PointerType>(FTy->getParamType(0)) || !isa<PointerType>(FTy->getParamType(1))) @@ -1935,7 +1963,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 'w': - if (NameLen == 5 && !strcmp(NameStr, "write")) { + if (Name == "write") { if (FTy->getNumParams() != 3 || !isa<PointerType>(FTy->getParamType(1))) continue; @@ -1944,7 +1972,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 'b': - if (NameLen == 5 && !strcmp(NameStr, "bcopy")) { + if (Name == "bcopy") { if (FTy->getNumParams() != 3 || !isa<PointerType>(FTy->getParamType(0)) || !isa<PointerType>(FTy->getParamType(1))) @@ -1952,7 +1980,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); - } else if (NameLen == 4 && !strcmp(NameStr, "bcmp")) { + } else if (Name == "bcmp") { if (FTy->getNumParams() != 3 || !isa<PointerType>(FTy->getParamType(0)) || !isa<PointerType>(FTy->getParamType(1))) @@ -1961,7 +1989,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setOnlyReadsMemory(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); - } else if (NameLen == 5 && !strcmp(NameStr, "bzero")) { + } else if (Name == "bzero") { if (FTy->getNumParams() != 2 || !isa<PointerType>(FTy->getParamType(0))) continue; @@ -1970,17 +1998,17 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 'c': - if (NameLen == 6 && !strcmp(NameStr, "calloc")) { + if (Name == "calloc") { if (FTy->getNumParams() != 2 || !isa<PointerType>(FTy->getReturnType())) continue; setDoesNotThrow(F); setDoesNotAlias(F, 0); - } else if ((NameLen == 5 && !strcmp(NameStr, "chmod")) || - (NameLen == 5 && !strcmp(NameStr, "chown")) || - (NameLen == 7 && !strcmp(NameStr, "ctermid")) || - (NameLen == 8 && !strcmp(NameStr, "clearerr")) || - (NameLen == 8 && !strcmp(NameStr, "closedir"))) { + } else if (Name == "chmod" || + Name == "chown" || + Name == "ctermid" || + Name == "clearerr" || + Name == "closedir") { if (FTy->getNumParams() == 0 || !isa<PointerType>(FTy->getParamType(0))) continue; @@ -1989,17 +2017,17 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 'a': - if ((NameLen == 4 && !strcmp(NameStr, "atoi")) || - (NameLen == 4 && !strcmp(NameStr, "atol")) || - (NameLen == 4 && !strcmp(NameStr, "atof")) || - (NameLen == 5 && !strcmp(NameStr, "atoll"))) { + if (Name == "atoi" || + Name == "atol" || + Name == "atof" || + Name == "atoll") { if (FTy->getNumParams() != 1 || !isa<PointerType>(FTy->getParamType(0))) continue; setDoesNotThrow(F); setOnlyReadsMemory(F); setDoesNotCapture(F, 1); - } else if (NameLen == 6 && !strcmp(NameStr, "access")) { + } else if (Name == "access") { if (FTy->getNumParams() != 2 || !isa<PointerType>(FTy->getParamType(0))) continue; @@ -2008,7 +2036,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 'f': - if (NameLen == 5 && !strcmp(NameStr, "fopen")) { + if (Name == "fopen") { if (FTy->getNumParams() != 2 || !isa<PointerType>(FTy->getReturnType()) || !isa<PointerType>(FTy->getParamType(0)) || @@ -2018,7 +2046,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotAlias(F, 0); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); - } else if (NameLen == 6 && !strcmp(NameStr, "fdopen")) { + } else if (Name == "fdopen") { if (FTy->getNumParams() != 2 || !isa<PointerType>(FTy->getReturnType()) || !isa<PointerType>(FTy->getParamType(1))) @@ -2026,52 +2054,52 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotAlias(F, 0); setDoesNotCapture(F, 2); - } else if ((NameLen == 4 && !strcmp(NameStr, "feof")) || - (NameLen == 4 && !strcmp(NameStr, "free")) || - (NameLen == 5 && !strcmp(NameStr, "fseek")) || - (NameLen == 5 && !strcmp(NameStr, "ftell")) || - (NameLen == 5 && !strcmp(NameStr, "fgetc")) || - (NameLen == 6 && !strcmp(NameStr, "fseeko")) || - (NameLen == 6 && !strcmp(NameStr, "ftello")) || - (NameLen == 6 && !strcmp(NameStr, "fileno")) || - (NameLen == 6 && !strcmp(NameStr, "fflush")) || - (NameLen == 6 && !strcmp(NameStr, "fclose")) || - (NameLen == 7 && !strcmp(NameStr, "fsetpos")) || - (NameLen == 9 && !strcmp(NameStr, "flockfile")) || - (NameLen == 11 && !strcmp(NameStr, "funlockfile")) || - (NameLen == 12 && !strcmp(NameStr, "ftrylockfile"))) { + } else if (Name == "feof" || + Name == "free" || + Name == "fseek" || + Name == "ftell" || + Name == "fgetc" || + Name == "fseeko" || + Name == "ftello" || + Name == "fileno" || + Name == "fflush" || + Name == "fclose" || + Name == "fsetpos" || + Name == "flockfile" || + Name == "funlockfile" || + Name == "ftrylockfile") { if (FTy->getNumParams() == 0 || !isa<PointerType>(FTy->getParamType(0))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); - } else if (NameLen == 6 && !strcmp(NameStr, "ferror")) { + } else if (Name == "ferror") { if (FTy->getNumParams() != 1 || !isa<PointerType>(FTy->getParamType(0))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); setOnlyReadsMemory(F); - } else if ((NameLen == 5 && !strcmp(NameStr, "fputc")) || - (NameLen == 5 && !strcmp(NameStr, "fstat")) || - (NameLen == 5 && !strcmp(NameStr, "frexp")) || - (NameLen == 6 && !strcmp(NameStr, "frexpf")) || - (NameLen == 6 && !strcmp(NameStr, "frexpl")) || - (NameLen == 8 && !strcmp(NameStr, "fstatvfs"))) { + } else if (Name == "fputc" || + Name == "fstat" || + Name == "frexp" || + Name == "frexpf" || + Name == "frexpl" || + Name == "fstatvfs") { if (FTy->getNumParams() != 2 || !isa<PointerType>(FTy->getParamType(1))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 2); - } else if (NameLen == 5 && !strcmp(NameStr, "fgets")) { + } else if (Name == "fgets") { if (FTy->getNumParams() != 3 || !isa<PointerType>(FTy->getParamType(0)) || !isa<PointerType>(FTy->getParamType(2))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 3); - } else if ((NameLen == 5 && !strcmp(NameStr, "fread")) || - (NameLen == 6 && !strcmp(NameStr, "fwrite"))) { + } else if (Name == "fread" || + Name == "fwrite") { if (FTy->getNumParams() != 4 || !isa<PointerType>(FTy->getParamType(0)) || !isa<PointerType>(FTy->getParamType(3))) @@ -2079,10 +2107,10 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 4); - } else if ((NameLen == 5 && !strcmp(NameStr, "fputs")) || - (NameLen == 6 && !strcmp(NameStr, "fscanf")) || - (NameLen == 7 && !strcmp(NameStr, "fprintf")) || - (NameLen == 7 && !strcmp(NameStr, "fgetpos"))) { + } else if (Name == "fputs" || + Name == "fscanf" || + Name == "fprintf" || + Name == "fgetpos") { if (FTy->getNumParams() < 2 || !isa<PointerType>(FTy->getParamType(0)) || !isa<PointerType>(FTy->getParamType(1))) @@ -2093,31 +2121,31 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 'g': - if ((NameLen == 4 && !strcmp(NameStr, "getc")) || - (NameLen == 10 && !strcmp(NameStr, "getlogin_r")) || - (NameLen == 13 && !strcmp(NameStr, "getc_unlocked"))) { + if (Name == "getc" || + Name == "getlogin_r" || + Name == "getc_unlocked") { if (FTy->getNumParams() == 0 || !isa<PointerType>(FTy->getParamType(0))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); - } else if (NameLen == 6 && !strcmp(NameStr, "getenv")) { + } else if (Name == "getenv") { if (FTy->getNumParams() != 1 || !isa<PointerType>(FTy->getParamType(0))) continue; setDoesNotThrow(F); setOnlyReadsMemory(F); setDoesNotCapture(F, 1); - } else if ((NameLen == 4 && !strcmp(NameStr, "gets")) || - (NameLen == 7 && !strcmp(NameStr, "getchar"))) { + } else if (Name == "gets" || + Name == "getchar") { setDoesNotThrow(F); - } else if (NameLen == 9 && !strcmp(NameStr, "getitimer")) { + } else if (Name == "getitimer") { if (FTy->getNumParams() != 2 || !isa<PointerType>(FTy->getParamType(1))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 2); - } else if (NameLen == 8 && !strcmp(NameStr, "getpwnam")) { + } else if (Name == "getpwnam") { if (FTy->getNumParams() != 1 || !isa<PointerType>(FTy->getParamType(0))) continue; @@ -2126,22 +2154,22 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 'u': - if (NameLen == 6 && !strcmp(NameStr, "ungetc")) { + if (Name == "ungetc") { if (FTy->getNumParams() != 2 || !isa<PointerType>(FTy->getParamType(1))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 2); - } else if ((NameLen == 5 && !strcmp(NameStr, "uname")) || - (NameLen == 6 && !strcmp(NameStr, "unlink")) || - (NameLen == 8 && !strcmp(NameStr, "unsetenv"))) { + } else if (Name == "uname" || + Name == "unlink" || + Name == "unsetenv") { if (FTy->getNumParams() != 1 || !isa<PointerType>(FTy->getParamType(0))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); - } else if ((NameLen == 5 && !strcmp(NameStr, "utime")) || - (NameLen == 6 && !strcmp(NameStr, "utimes"))) { + } else if (Name == "utime" || + Name == "utimes") { if (FTy->getNumParams() != 2 || !isa<PointerType>(FTy->getParamType(0)) || !isa<PointerType>(FTy->getParamType(1))) @@ -2152,30 +2180,30 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 'p': - if (NameLen == 4 && !strcmp(NameStr, "putc")) { + if (Name == "putc") { if (FTy->getNumParams() != 2 || !isa<PointerType>(FTy->getParamType(1))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 2); - } else if ((NameLen == 4 && !strcmp(NameStr, "puts")) || - (NameLen == 6 && !strcmp(NameStr, "printf")) || - (NameLen == 6 && !strcmp(NameStr, "perror"))) { + } else if (Name == "puts" || + Name == "printf" || + Name == "perror") { if (FTy->getNumParams() != 1 || !isa<PointerType>(FTy->getParamType(0))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); - } else if ((NameLen == 5 && !strcmp(NameStr, "pread")) || - (NameLen == 6 && !strcmp(NameStr, "pwrite"))) { + } else if (Name == "pread" || + Name == "pwrite") { if (FTy->getNumParams() != 4 || !isa<PointerType>(FTy->getParamType(1))) continue; // May throw; these are valid pthread cancellation points. setDoesNotCapture(F, 2); - } else if (NameLen == 7 && !strcmp(NameStr, "putchar")) { + } else if (Name == "putchar") { setDoesNotThrow(F); - } else if (NameLen == 5 && !strcmp(NameStr, "popen")) { + } else if (Name == "popen") { if (FTy->getNumParams() != 2 || !isa<PointerType>(FTy->getReturnType()) || !isa<PointerType>(FTy->getParamType(0)) || @@ -2185,7 +2213,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotAlias(F, 0); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); - } else if (NameLen == 6 && !strcmp(NameStr, "pclose")) { + } else if (Name == "pclose") { if (FTy->getNumParams() != 1 || !isa<PointerType>(FTy->getParamType(0))) continue; @@ -2194,14 +2222,14 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 'v': - if (NameLen == 6 && !strcmp(NameStr, "vscanf")) { + if (Name == "vscanf") { if (FTy->getNumParams() != 2 || !isa<PointerType>(FTy->getParamType(1))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); - } else if ((NameLen == 7 && !strcmp(NameStr, "vsscanf")) || - (NameLen == 7 && !strcmp(NameStr, "vfscanf"))) { + } else if (Name == "vsscanf" || + Name == "vfscanf") { if (FTy->getNumParams() != 3 || !isa<PointerType>(FTy->getParamType(1)) || !isa<PointerType>(FTy->getParamType(2))) @@ -2209,19 +2237,19 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); - } else if (NameLen == 6 && !strcmp(NameStr, "valloc")) { + } else if (Name == "valloc") { if (!isa<PointerType>(FTy->getReturnType())) continue; setDoesNotThrow(F); setDoesNotAlias(F, 0); - } else if (NameLen == 7 && !strcmp(NameStr, "vprintf")) { + } else if (Name == "vprintf") { if (FTy->getNumParams() != 2 || !isa<PointerType>(FTy->getParamType(0))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); - } else if ((NameLen == 8 && !strcmp(NameStr, "vfprintf")) || - (NameLen == 8 && !strcmp(NameStr, "vsprintf"))) { + } else if (Name == "vfprintf" || + Name == "vsprintf") { if (FTy->getNumParams() != 3 || !isa<PointerType>(FTy->getParamType(0)) || !isa<PointerType>(FTy->getParamType(1))) @@ -2229,7 +2257,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); - } else if (NameLen == 9 && !strcmp(NameStr, "vsnprintf")) { + } else if (Name == "vsnprintf") { if (FTy->getNumParams() != 4 || !isa<PointerType>(FTy->getParamType(0)) || !isa<PointerType>(FTy->getParamType(2))) @@ -2240,13 +2268,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 'o': - if (NameLen == 4 && !strcmp(NameStr, "open")) { + if (Name == "open") { if (FTy->getNumParams() < 2 || !isa<PointerType>(FTy->getParamType(0))) continue; // May throw; "open" is a valid pthread cancellation point. setDoesNotCapture(F, 1); - } else if (NameLen == 7 && !strcmp(NameStr, "opendir")) { + } else if (Name == "opendir") { if (FTy->getNumParams() != 1 || !isa<PointerType>(FTy->getReturnType()) || !isa<PointerType>(FTy->getParamType(0))) @@ -2257,12 +2285,12 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 't': - if (NameLen == 7 && !strcmp(NameStr, "tmpfile")) { + if (Name == "tmpfile") { if (!isa<PointerType>(FTy->getReturnType())) continue; setDoesNotThrow(F); setDoesNotAlias(F, 0); - } else if (NameLen == 5 && !strcmp(NameStr, "times")) { + } else if (Name == "times") { if (FTy->getNumParams() != 1 || !isa<PointerType>(FTy->getParamType(0))) continue; @@ -2271,21 +2299,21 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 'h': - if ((NameLen == 5 && !strcmp(NameStr, "htonl")) || - (NameLen == 5 && !strcmp(NameStr, "htons"))) { + if (Name == "htonl" || + Name == "htons") { setDoesNotThrow(F); setDoesNotAccessMemory(F); } break; case 'n': - if ((NameLen == 5 && !strcmp(NameStr, "ntohl")) || - (NameLen == 5 && !strcmp(NameStr, "ntohs"))) { + if (Name == "ntohl" || + Name == "ntohs") { setDoesNotThrow(F); setDoesNotAccessMemory(F); } break; case 'l': - if (NameLen == 5 && !strcmp(NameStr, "lstat")) { + if (Name == "lstat") { if (FTy->getNumParams() != 2 || !isa<PointerType>(FTy->getParamType(0)) || !isa<PointerType>(FTy->getParamType(1))) @@ -2293,7 +2321,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); - } else if (NameLen == 6 && !strcmp(NameStr, "lchown")) { + } else if (Name == "lchown") { if (FTy->getNumParams() != 3 || !isa<PointerType>(FTy->getParamType(0))) continue; @@ -2302,7 +2330,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 'q': - if (NameLen == 5 && !strcmp(NameStr, "qsort")) { + if (Name == "qsort") { if (FTy->getNumParams() != 4 || !isa<PointerType>(FTy->getParamType(3))) continue; @@ -2311,8 +2339,8 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case '_': - if ((NameLen == 8 && !strcmp(NameStr, "__strdup")) || - (NameLen == 9 && !strcmp(NameStr, "__strndup"))) { + if (Name == "__strdup" || + Name == "__strndup") { if (FTy->getNumParams() < 1 || !isa<PointerType>(FTy->getReturnType()) || !isa<PointerType>(FTy->getParamType(0))) @@ -2320,19 +2348,19 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotAlias(F, 0); setDoesNotCapture(F, 1); - } else if (NameLen == 10 && !strcmp(NameStr, "__strtok_r")) { + } else if (Name == "__strtok_r") { if (FTy->getNumParams() != 3 || !isa<PointerType>(FTy->getParamType(1))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 2); - } else if (NameLen == 8 && !strcmp(NameStr, "_IO_getc")) { + } else if (Name == "_IO_getc") { if (FTy->getNumParams() != 1 || !isa<PointerType>(FTy->getParamType(0))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); - } else if (NameLen == 8 && !strcmp(NameStr, "_IO_putc")) { + } else if (Name == "_IO_putc") { if (FTy->getNumParams() != 2 || !isa<PointerType>(FTy->getParamType(1))) continue; @@ -2341,16 +2369,16 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 1: - if (NameLen == 15 && !strcmp(NameStr, "\1__isoc99_scanf")) { + if (Name == "\1__isoc99_scanf") { if (FTy->getNumParams() < 1 || !isa<PointerType>(FTy->getParamType(0))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); - } else if ((NameLen == 7 && !strcmp(NameStr, "\1stat64")) || - (NameLen == 8 && !strcmp(NameStr, "\1lstat64")) || - (NameLen == 10 && !strcmp(NameStr, "\1statvfs64")) || - (NameLen == 16 && !strcmp(NameStr, "\1__isoc99_sscanf"))) { + } else if (Name == "\1stat64" || + Name == "\1lstat64" || + Name == "\1statvfs64" || + Name == "\1__isoc99_sscanf") { if (FTy->getNumParams() < 1 || !isa<PointerType>(FTy->getParamType(0)) || !isa<PointerType>(FTy->getParamType(1))) @@ -2358,7 +2386,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); - } else if (NameLen == 8 && !strcmp(NameStr, "\1fopen64")) { + } else if (Name == "\1fopen64") { if (FTy->getNumParams() != 2 || !isa<PointerType>(FTy->getReturnType()) || !isa<PointerType>(FTy->getParamType(0)) || @@ -2368,26 +2396,26 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotAlias(F, 0); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); - } else if ((NameLen == 9 && !strcmp(NameStr, "\1fseeko64")) || - (NameLen == 9 && !strcmp(NameStr, "\1ftello64"))) { + } else if (Name == "\1fseeko64" || + Name == "\1ftello64") { if (FTy->getNumParams() == 0 || !isa<PointerType>(FTy->getParamType(0))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); - } else if (NameLen == 10 && !strcmp(NameStr, "\1tmpfile64")) { + } else if (Name == "\1tmpfile64") { if (!isa<PointerType>(FTy->getReturnType())) continue; setDoesNotThrow(F); setDoesNotAlias(F, 0); - } else if ((NameLen == 8 && !strcmp(NameStr, "\1fstat64")) || - (NameLen == 11 && !strcmp(NameStr, "\1fstatvfs64"))) { + } else if (Name == "\1fstat64" || + Name == "\1fstatvfs64") { if (FTy->getNumParams() != 2 || !isa<PointerType>(FTy->getParamType(1))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 2); - } else if (NameLen == 7 && !strcmp(NameStr, "\1open64")) { + } else if (Name == "\1open64") { if (FTy->getNumParams() < 2 || !isa<PointerType>(FTy->getParamType(0))) continue; diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp index c037ee9..68689d6 100644 --- a/lib/Transforms/Scalar/TailDuplication.cpp +++ b/lib/Transforms/Scalar/TailDuplication.cpp @@ -30,8 +30,8 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/SmallPtrSet.h" #include <map> @@ -45,7 +45,7 @@ TailDupThreshold("taildup-threshold", cl::init(1), cl::Hidden); namespace { - class VISIBILITY_HIDDEN TailDup : public FunctionPass { + class TailDup : public FunctionPass { bool runOnFunction(Function &F); public: static char ID; // Pass identification, replacement for typeid @@ -128,7 +128,7 @@ bool TailDup::shouldEliminateUnconditionalBranch(TerminatorInst *TI, // other instructions. if (isa<CallInst>(I) || isa<InvokeInst>(I)) return false; - // Allso alloca and malloc. + // Also alloca and malloc. if (isa<AllocationInst>(I)) return false; // Some vector instructions can expand into a number of instructions. @@ -243,13 +243,13 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) { BasicBlock *DestBlock = Branch->getSuccessor(0); assert(SourceBlock != DestBlock && "Our predicate is broken!"); - DOUT << "TailDuplication[" << SourceBlock->getParent()->getName() - << "]: Eliminating branch: " << *Branch; + DEBUG(errs() << "TailDuplication[" << SourceBlock->getParent()->getName() + << "]: Eliminating branch: " << *Branch); // See if we can avoid duplicating code by moving it up to a dominator of both // blocks. if (BasicBlock *DomBlock = FindObviousSharedDomOf(SourceBlock, DestBlock)) { - DOUT << "Found shared dominator: " << DomBlock->getName() << "\n"; + DEBUG(errs() << "Found shared dominator: " << DomBlock->getName() << "\n"); // If there are non-phi instructions in DestBlock that have no operands // defined in DestBlock, and if the instruction has no side effects, we can @@ -258,7 +258,8 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) { while (!isa<TerminatorInst>(BBI)) { Instruction *I = BBI++; - bool CanHoist = !I->isTrapping() && !I->mayHaveSideEffects(); + bool CanHoist = I->isSafeToSpeculativelyExecute() && + !I->mayReadFromMemory(); if (CanHoist) { for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) if (Instruction *OpI = dyn_cast<Instruction>(I->getOperand(op))) @@ -271,7 +272,7 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) { // Remove from DestBlock, move right before the term in DomBlock. DestBlock->getInstList().remove(I); DomBlock->getInstList().insert(DomBlock->getTerminator(), I); - DOUT << "Hoisted: " << *I; + DEBUG(errs() << "Hoisted: " << *I); } } } @@ -358,7 +359,8 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) { Instruction *Inst = BI++; if (isInstructionTriviallyDead(Inst)) Inst->eraseFromParent(); - else if (Constant *C = ConstantFoldInstruction(Inst)) { + else if (Constant *C = ConstantFoldInstruction(Inst, + Inst->getContext())) { Inst->replaceAllUsesWith(C); Inst->eraseFromParent(); } diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 34ee57c..b56e170 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -60,14 +60,13 @@ #include "llvm/Pass.h" #include "llvm/Support/CFG.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Support/Compiler.h" using namespace llvm; STATISTIC(NumEliminated, "Number of tail calls removed"); STATISTIC(NumAccumAdded, "Number of accumulators introduced"); namespace { - struct VISIBILITY_HIDDEN TailCallElim : public FunctionPass { + struct TailCallElim : public FunctionPass { static char ID; // Pass identification, replacement for typeid TailCallElim() : FunctionPass(&ID) {} @@ -394,7 +393,7 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, // create the new entry block, allowing us to branch back to the old entry. if (OldEntry == 0) { OldEntry = &F->getEntryBlock(); - BasicBlock *NewEntry = BasicBlock::Create("", F, OldEntry); + BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry); NewEntry->takeName(OldEntry); OldEntry->setName("tailrecurse"); BranchInst::Create(OldEntry, NewEntry); diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp index 71049fa..135a621 100644 --- a/lib/Transforms/Utils/AddrModeMatcher.cpp +++ b/lib/Transforms/Utils/AddrModeMatcher.cpp @@ -19,17 +19,18 @@ #include "llvm/Target/TargetData.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::PatternMatch; -void ExtAddrMode::print(OStream &OS) const { +void ExtAddrMode::print(raw_ostream &OS) const { bool NeedPlus = false; OS << "["; if (BaseGV) { OS << (NeedPlus ? " + " : "") << "GV:"; - WriteAsOperand(*OS.stream(), BaseGV, /*PrintType=*/false); + WriteAsOperand(OS, BaseGV, /*PrintType=*/false); NeedPlus = true; } @@ -39,13 +40,13 @@ void ExtAddrMode::print(OStream &OS) const { if (BaseReg) { OS << (NeedPlus ? " + " : "") << "Base:"; - WriteAsOperand(*OS.stream(), BaseReg, /*PrintType=*/false); + WriteAsOperand(OS, BaseReg, /*PrintType=*/false); NeedPlus = true; } if (Scale) { OS << (NeedPlus ? " + " : "") << Scale << "*"; - WriteAsOperand(*OS.stream(), ScaledReg, /*PrintType=*/false); + WriteAsOperand(OS, ScaledReg, /*PrintType=*/false); NeedPlus = true; } @@ -53,8 +54,8 @@ void ExtAddrMode::print(OStream &OS) const { } void ExtAddrMode::dump() const { - print(cerr); - cerr << '\n'; + print(errs()); + errs() << '\n'; } @@ -205,7 +206,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, if (!RHS) return false; int64_t Scale = RHS->getSExtValue(); if (Opcode == Instruction::Shl) - Scale = 1 << Scale; + Scale = 1LL << Scale; return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth); } diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 6d1180d..4931ab3 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -16,6 +16,7 @@ #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Constant.h" #include "llvm/Type.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -23,6 +24,8 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ValueHandle.h" #include <algorithm> using namespace llvm; @@ -249,11 +252,11 @@ void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) { Value *RetVal = 0; // Create a value to return... if the function doesn't return null... - if (BB->getParent()->getReturnType() != Type::VoidTy) + if (BB->getParent()->getReturnType() != Type::getVoidTy(TI->getContext())) RetVal = Constant::getNullValue(BB->getParent()->getReturnType()); // Create the return... - NewTI = ReturnInst::Create(RetVal); + NewTI = ReturnInst::Create(TI->getContext(), RetVal); } break; @@ -261,8 +264,7 @@ void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) { case Instruction::Switch: // Should remove entry default: case Instruction::Ret: // Cannot happen, has no successors! - assert(0 && "Unhandled terminator instruction type in RemoveSuccessor!"); - abort(); + llvm_unreachable("Unhandled terminator instruction type in RemoveSuccessor!"); } if (NewTI) // If it's a different instruction, replace. @@ -318,7 +320,8 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) { ++SplitIt; BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split"); - // The new block lives in whichever loop the old one did. + // The new block lives in whichever loop the old one did. This preserves + // LCSSA as well, because we force the split point to be after any PHI nodes. if (LoopInfo* LI = P->getAnalysisIfAvailable<LoopInfo>()) if (Loop *L = LI->getLoopFor(Old)) L->addBasicBlockToLoop(New, LI->getBase()); @@ -352,32 +355,61 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) { /// Preds array, which has NumPreds elements in it. The new block is given a /// suffix of 'Suffix'. /// -/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree and -/// DominanceFrontier, but no other analyses. +/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, +/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. +/// In particular, it does not preserve LoopSimplify (because it's +/// complicated to handle the case where one of the edges being split +/// is an exit of a loop with other exits). +/// BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, BasicBlock *const *Preds, unsigned NumPreds, const char *Suffix, Pass *P) { // Create new basic block, insert right before the original block. - BasicBlock *NewBB = - BasicBlock::Create(BB->getName()+Suffix, BB->getParent(), BB); + BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix, + BB->getParent(), BB); // The new block unconditionally branches to the old block. BranchInst *BI = BranchInst::Create(BB, NewBB); + LoopInfo *LI = P ? P->getAnalysisIfAvailable<LoopInfo>() : 0; + Loop *L = LI ? LI->getLoopFor(BB) : 0; + bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID); + // Move the edges from Preds to point to NewBB instead of BB. - for (unsigned i = 0; i != NumPreds; ++i) + // While here, if we need to preserve loop analyses, collect + // some information about how this split will affect loops. + bool HasLoopExit = false; + bool IsLoopEntry = !!L; + bool SplitMakesNewLoopHeader = false; + for (unsigned i = 0; i != NumPreds; ++i) { Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); - + + if (LI) { + // If we need to preserve LCSSA, determine if any of + // the preds is a loop exit. + if (PreserveLCSSA) + if (Loop *PL = LI->getLoopFor(Preds[i])) + if (!PL->contains(BB)) + HasLoopExit = true; + // If we need to preserve LoopInfo, note whether any of the + // preds crosses an interesting loop boundary. + if (L) { + if (L->contains(Preds[i])) + IsLoopEntry = false; + else + SplitMakesNewLoopHeader = true; + } + } + } + // Update dominator tree and dominator frontier if available. DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0; if (DT) DT->splitBlock(NewBB); if (DominanceFrontier *DF = P ? P->getAnalysisIfAvailable<DominanceFrontier>():0) DF->splitBlock(NewBB); - AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0; - - + // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI // node becomes an incoming value for BB's phi node. However, if the Preds // list is empty, we need to insert dummy entries into the PHI nodes in BB to @@ -388,20 +420,42 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB); return NewBB; } + + AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0; + + if (L) { + if (IsLoopEntry) { + if (Loop *PredLoop = LI->getLoopFor(Preds[0])) { + // Add the new block to the nearest enclosing loop (and not an + // adjacent loop). + while (PredLoop && !PredLoop->contains(BB)) + PredLoop = PredLoop->getParentLoop(); + if (PredLoop) + PredLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + } + } else { + L->addBasicBlockToLoop(NewBB, LI->getBase()); + if (SplitMakesNewLoopHeader) + L->moveToHeader(NewBB); + } + } // Otherwise, create a new PHI node in NewBB for each PHI node in BB. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I++); // Check to see if all of the values coming in are the same. If so, we - // don't need to create a new PHI node. - Value *InVal = PN->getIncomingValueForBlock(Preds[0]); - for (unsigned i = 1; i != NumPreds; ++i) - if (InVal != PN->getIncomingValueForBlock(Preds[i])) { - InVal = 0; - break; - } - + // don't need to create a new PHI node, unless it's needed for LCSSA. + Value *InVal = 0; + if (!HasLoopExit) { + InVal = PN->getIncomingValueForBlock(Preds[0]); + for (unsigned i = 1; i != NumPreds; ++i) + if (InVal != PN->getIncomingValueForBlock(Preds[i])) { + InVal = 0; + break; + } + } + if (InVal) { // If all incoming values for the new PHI would be the same, just don't // make a new PHI. Instead, just remove the incoming values from the old @@ -426,16 +480,6 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, // Add an incoming value to the PHI node in the loop for the preheader // edge. PN->addIncoming(InVal, NewBB); - - // Check to see if we can eliminate this phi node. - if (Value *V = PN->hasConstantValue(DT != 0)) { - Instruction *I = dyn_cast<Instruction>(V); - if (!I || DT == 0 || DT->dominates(I, PN)) { - PN->replaceAllUsesWith(V); - if (AA) AA->deleteValue(PN); - PN->eraseFromParent(); - } - } } return NewBB; @@ -503,11 +547,15 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) { // Test if the values are trivially equivalent. if (A == B) return true; - // Test if the values come form identical arithmetic instructions. + // Test if the values come from identical arithmetic instructions. + // Use isIdenticalToWhenDefined instead of isIdenticalTo because + // this function is only used when one address use dominates the + // other, which means that they'll always either have the same + // value or one of them will have an undefined value. if (isa<BinaryOperator>(A) || isa<CastInst>(A) || isa<PHINode>(A) || isa<GetElementPtrInst>(A)) if (const Instruction *BI = dyn_cast<Instruction>(B)) - if (cast<Instruction>(A)->isIdenticalTo(BI)) + if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) return true; // Otherwise they may not be equivalent. @@ -537,7 +585,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, unsigned AccessSize = 0; if (AA) { const Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType(); - AccessSize = AA->getTargetData().getTypeStoreSizeInBits(AccessTy); + AccessSize = AA->getTypeStoreSize(AccessTy); } while (ScanFrom != ScanBB->begin()) { diff --git a/lib/Transforms/Utils/BasicInliner.cpp b/lib/Transforms/Utils/BasicInliner.cpp index 1650cfa..4b720b1 100644 --- a/lib/Transforms/Utils/BasicInliner.cpp +++ b/lib/Transforms/Utils/BasicInliner.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "basicinliner" - #include "llvm/Module.h" #include "llvm/Function.h" #include "llvm/Transforms/Utils/BasicInliner.h" @@ -21,6 +20,7 @@ #include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallPtrSet.h" #include <vector> @@ -89,7 +89,7 @@ void BasicInlinerImpl::inlineFunctions() { } } - DOUT << ": " << CallSites.size() << " call sites.\n"; + DEBUG(errs() << ": " << CallSites.size() << " call sites.\n"); // Inline call sites. bool Changed = false; @@ -109,22 +109,22 @@ void BasicInlinerImpl::inlineFunctions() { } InlineCost IC = CA.getInlineCost(CS, NeverInline); if (IC.isAlways()) { - DOUT << " Inlining: cost=always" - <<", call: " << *CS.getInstruction(); + DEBUG(errs() << " Inlining: cost=always" + <<", call: " << *CS.getInstruction()); } else if (IC.isNever()) { - DOUT << " NOT Inlining: cost=never" - <<", call: " << *CS.getInstruction(); + DEBUG(errs() << " NOT Inlining: cost=never" + <<", call: " << *CS.getInstruction()); continue; } else { int Cost = IC.getValue(); if (Cost >= (int) BasicInlineThreshold) { - DOUT << " NOT Inlining: cost = " << Cost - << ", call: " << *CS.getInstruction(); + DEBUG(errs() << " NOT Inlining: cost = " << Cost + << ", call: " << *CS.getInstruction()); continue; } else { - DOUT << " Inlining: cost = " << Cost - << ", call: " << *CS.getInstruction(); + DEBUG(errs() << " Inlining: cost = " << Cost + << ", call: " << *CS.getInstruction()); } } diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index c4fd1ea..849b2b5 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -21,11 +21,13 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ProfileInfo.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/Type.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -43,6 +45,7 @@ namespace { AU.addPreserved<DominatorTree>(); AU.addPreserved<DominanceFrontier>(); AU.addPreserved<LoopInfo>(); + AU.addPreserved<ProfileInfo>(); // No loop canonicalization guarantees are broken by this pass. AU.addPreservedID(LoopSimplifyID); @@ -114,6 +117,38 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum, return false; } +/// CreatePHIsForSplitLoopExit - When a loop exit edge is split, LCSSA form +/// may require new PHIs in the new exit block. This function inserts the +/// new PHIs, as needed. Preds is a list of preds inside the loop, SplitBB +/// is the new loop exit block, and DestBB is the old loop exit, now the +/// successor of SplitBB. +static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds, + BasicBlock *SplitBB, + BasicBlock *DestBB) { + // SplitBB shouldn't have anything non-trivial in it yet. + assert(SplitBB->getFirstNonPHI() == SplitBB->getTerminator() && + "SplitBB has non-PHI nodes!"); + + // For each PHI in the destination block... + for (BasicBlock::iterator I = DestBB->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) { + unsigned Idx = PN->getBasicBlockIndex(SplitBB); + Value *V = PN->getIncomingValue(Idx); + // If the input is a PHI which already satisfies LCSSA, don't create + // a new one. + if (const PHINode *VP = dyn_cast<PHINode>(V)) + if (VP->getParent() == SplitBB) + continue; + // Otherwise a new PHI is needed. Create one and populate it. + PHINode *NewPN = PHINode::Create(PN->getType(), "split", + SplitBB->getTerminator()); + for (unsigned i = 0, e = Preds.size(); i != e; ++i) + NewPN->addIncoming(V, Preds[i]); + // Update the original PHI. + PN->setIncomingValue(Idx, NewPN); + } +} + /// SplitCriticalEdge - If this edge is a critical edge, insert a new node to /// split the critical edge. This will update DominatorTree and /// DominatorFrontier information if it is available, thus calling this pass @@ -121,15 +156,15 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum, /// false otherwise. This ensures that all edges to that dest go to one block /// instead of each going to a different block. // -bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P, - bool MergeIdenticalEdges) { - if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return false; +BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, + Pass *P, bool MergeIdenticalEdges) { + if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0; BasicBlock *TIBB = TI->getParent(); BasicBlock *DestBB = TI->getSuccessor(SuccNum); // Create a new basic block, linking it into the CFG. - BasicBlock *NewBB = BasicBlock::Create(TIBB->getName() + "." + - DestBB->getName() + "_crit_edge"); + BasicBlock *NewBB = BasicBlock::Create(TI->getContext(), + TIBB->getName() + "." + DestBB->getName() + "_crit_edge"); // Create our unconditional branch... BranchInst::Create(DestBB, NewBB); @@ -171,7 +206,7 @@ bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P, // If we don't have a pass object, we can't update anything... - if (P == 0) return true; + if (P == 0) return NewBB; // Now update analysis information. Since the only predecessor of NewBB is // the TIBB, TIBB clearly dominates NewBB. TIBB usually doesn't dominate @@ -222,8 +257,8 @@ bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P, // If NewBBDominatesDestBB hasn't been computed yet, do so with DF. if (!OtherPreds.empty()) { // FIXME: IMPLEMENT THIS! - assert(0 && "Requiring domfrontiers but not idom/domtree/domset." - " not implemented yet!"); + llvm_unreachable("Requiring domfrontiers but not idom/domtree/domset." + " not implemented yet!"); } // Since the new block is dominated by its only predecessor TIBB, @@ -253,9 +288,9 @@ bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P, // Update LoopInfo if it is around. if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>()) { - // If one or the other blocks were not in a loop, the new block is not - // either, and thus LI doesn't need to be updated. - if (Loop *TIL = LI->getLoopFor(TIBB)) + if (Loop *TIL = LI->getLoopFor(TIBB)) { + // If one or the other blocks were not in a loop, the new block is not + // either, and thus LI doesn't need to be updated. if (Loop *DestLoop = LI->getLoopFor(DestBB)) { if (TIL == DestLoop) { // Both in the same loop, the NewBB joins loop. @@ -277,6 +312,65 @@ bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P, P->addBasicBlockToLoop(NewBB, LI->getBase()); } } + // If TIBB is in a loop and DestBB is outside of that loop, split the + // other exit blocks of the loop that also have predecessors outside + // the loop, to maintain a LoopSimplify guarantee. + if (!TIL->contains(DestBB) && + P->mustPreserveAnalysisID(LoopSimplifyID)) { + assert(!TIL->contains(NewBB) && + "Split point for loop exit is contained in loop!"); + + // Update LCSSA form in the newly created exit block. + if (P->mustPreserveAnalysisID(LCSSAID)) { + SmallVector<BasicBlock *, 1> OrigPred; + OrigPred.push_back(TIBB); + CreatePHIsForSplitLoopExit(OrigPred, NewBB, DestBB); + } + + // For each unique exit block... + SmallVector<BasicBlock *, 4> ExitBlocks; + TIL->getExitBlocks(ExitBlocks); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { + // Collect all the preds that are inside the loop, and note + // whether there are any preds outside the loop. + SmallVector<BasicBlock *, 4> Preds; + bool HasPredOutsideOfLoop = false; + BasicBlock *Exit = ExitBlocks[i]; + for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); + I != E; ++I) + if (TIL->contains(*I)) + Preds.push_back(*I); + else + HasPredOutsideOfLoop = true; + // If there are any preds not in the loop, we'll need to split + // the edges. The Preds.empty() check is needed because a block + // may appear multiple times in the list. We can't use + // getUniqueExitBlocks above because that depends on LoopSimplify + // form, which we're in the process of restoring! + if (!Preds.empty() && HasPredOutsideOfLoop) { + BasicBlock *NewExitBB = + SplitBlockPredecessors(Exit, Preds.data(), Preds.size(), + "split", P); + if (P->mustPreserveAnalysisID(LCSSAID)) + CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit); + } + } + } + // LCSSA form was updated above for the case where LoopSimplify is + // available, which means that all predecessors of loop exit blocks + // are within the loop. Without LoopSimplify form, it would be + // necessary to insert a new phi. + assert((!P->mustPreserveAnalysisID(LCSSAID) || + P->mustPreserveAnalysisID(LoopSimplifyID)) && + "SplitCriticalEdge doesn't know how to update LCCSA form " + "without LoopSimplify!"); + } } - return true; + + // Update ProfileInfo if it is around. + if (ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>()) { + PI->splitEdge(TIBB,DestBB,NewBB,MergeIdenticalEdges); + } + + return NewBB; } diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 10cae5c..f4394ea 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -6,11 +6,10 @@ add_llvm_library(LLVMTransformUtils CloneFunction.cpp CloneLoop.cpp CloneModule.cpp - CloneTrace.cpp CodeExtractor.cpp DemoteRegToStack.cpp - InlineCost.cpp InlineFunction.cpp + InstructionNamer.cpp LCSSA.cpp Local.cpp LoopSimplify.cpp @@ -19,12 +18,12 @@ add_llvm_library(LLVMTransformUtils LowerSwitch.cpp Mem2Reg.cpp PromoteMemoryToRegister.cpp - SimplifyCFG.cpp + SSAUpdater.cpp SSI.cpp + SimplifyCFG.cpp UnifyFunctionExitNodes.cpp UnrollLoop.cpp ValueMapper.cpp - InstructionNamer.cpp ) target_link_libraries (LLVMTransformUtils LLVMSupport) diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index d0fdefa..30130fa 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -20,6 +20,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/GlobalVariable.h" #include "llvm/Function.h" +#include "llvm/LLVMContext.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Compiler.h" #include "llvm/Transforms/Utils/ValueMapper.h" @@ -34,7 +35,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, DenseMap<const Value*, Value*> &ValueMap, const char *NameSuffix, Function *F, ClonedCodeInfo *CodeInfo) { - BasicBlock *NewBB = BasicBlock::Create("", F); + BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F); if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; @@ -72,7 +73,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, // void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, DenseMap<const Value*, Value*> &ValueMap, - std::vector<ReturnInst*> &Returns, + SmallVectorImpl<ReturnInst*> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo) { assert(NameSuffix && "NameSuffix cannot be null!"); @@ -165,7 +166,7 @@ Function *llvm::CloneFunction(const Function *F, ValueMap[I] = DestI++; // Add mapping to ValueMap } - std::vector<ReturnInst*> Returns; // Ignore returns cloned... + SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned. CloneFunctionInto(NewF, F, ValueMap, Returns, "", CodeInfo); return NewF; } @@ -179,7 +180,7 @@ namespace { Function *NewFunc; const Function *OldFunc; DenseMap<const Value*, Value*> &ValueMap; - std::vector<ReturnInst*> &Returns; + SmallVectorImpl<ReturnInst*> &Returns; const char *NameSuffix; ClonedCodeInfo *CodeInfo; const TargetData *TD; @@ -187,7 +188,7 @@ namespace { public: PruningFunctionCloner(Function *newFunc, const Function *oldFunc, DenseMap<const Value*, Value*> &valueMap, - std::vector<ReturnInst*> &returns, + SmallVectorImpl<ReturnInst*> &returns, const char *nameSuffix, ClonedCodeInfo *codeInfo, const TargetData *td) @@ -218,7 +219,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // Nope, clone it now. BasicBlock *NewBB; - BBEntry = NewBB = BasicBlock::Create(); + BBEntry = NewBB = BasicBlock::Create(BB->getContext()); if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; @@ -237,7 +238,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // Do not clone llvm.dbg.region.end. It will be adjusted by the inliner. if (const DbgFuncStartInst *DFSI = dyn_cast<DbgFuncStartInst>(II)) { if (DbgFnStart == NULL) { - DISubprogram SP(cast<GlobalVariable>(DFSI->getSubprogram())); + DISubprogram SP(DFSI->getSubprogram()); if (SP.describes(BB->getParent())) DbgFnStart = DFSI->getSubprogram(); } @@ -323,17 +324,21 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, /// mapping its operands through ValueMap if they are available. Constant *PruningFunctionCloner:: ConstantFoldMappedInstruction(const Instruction *I) { + LLVMContext &Context = I->getContext(); + SmallVector<Constant*, 8> Ops; for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i), - ValueMap))) + ValueMap, + Context))) Ops.push_back(Op); else return 0; // All operands not constant! if (const CmpInst *CI = dyn_cast<CmpInst>(I)) return ConstantFoldCompareInstOperands(CI->getPredicate(), - &Ops[0], Ops.size(), TD); + &Ops[0], Ops.size(), + Context, TD); if (const LoadInst *LI = dyn_cast<LoadInst>(I)) if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) @@ -344,7 +349,7 @@ ConstantFoldMappedInstruction(const Instruction *I) { CE); return ConstantFoldInstOperands(I->getOpcode(), I->getType(), &Ops[0], - Ops.size(), TD); + Ops.size(), Context, TD); } /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, @@ -356,11 +361,12 @@ ConstantFoldMappedInstruction(const Instruction *I) { /// used for things like CloneFunction or CloneModule. void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, DenseMap<const Value*, Value*> &ValueMap, - std::vector<ReturnInst*> &Returns, + SmallVectorImpl<ReturnInst*> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, const TargetData *TD) { assert(NameSuffix && "NameSuffix cannot be null!"); + LLVMContext &Context = OldFunc->getContext(); #ifndef NDEBUG for (Function::const_arg_iterator II = OldFunc->arg_begin(), @@ -385,7 +391,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, // insert it into the new function in the right order. If not, ignore it. // // Defer PHI resolution until rest of function is resolved. - std::vector<const PHINode*> PHIToResolve; + SmallVector<const PHINode*, 16> PHIToResolve; for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); BI != BE; ++BI) { BasicBlock *NewBB = cast_or_null<BasicBlock>(ValueMap[BI]); @@ -430,7 +436,8 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(ValueMap[PN->getIncomingBlock(pred)])) { - Value *InVal = MapValue(PN->getIncomingValue(pred), ValueMap); + Value *InVal = MapValue(PN->getIncomingValue(pred), + ValueMap, Context); assert(InVal && "Unknown input value?"); PN->setIncomingValue(pred, InVal); PN->setIncomingBlock(pred, MappedBlock); diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index 82f5b93..0285f8c 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -56,10 +56,11 @@ Module *llvm::CloneModule(const Module *M, // for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); I != E; ++I) { - GlobalVariable *GV = new GlobalVariable(I->getType()->getElementType(), + GlobalVariable *GV = new GlobalVariable(*New, + I->getType()->getElementType(), false, GlobalValue::ExternalLinkage, 0, - I->getName(), New); + I->getName()); GV->setAlignment(I->getAlignment()); ValueMap[I] = GV; } @@ -88,7 +89,8 @@ Module *llvm::CloneModule(const Module *M, GlobalVariable *GV = cast<GlobalVariable>(ValueMap[I]); if (I->hasInitializer()) GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(), - ValueMap))); + ValueMap, + M->getContext()))); GV->setLinkage(I->getLinkage()); GV->setThreadLocal(I->isThreadLocal()); GV->setConstant(I->isConstant()); @@ -106,7 +108,7 @@ Module *llvm::CloneModule(const Module *M, ValueMap[J] = DestI++; } - std::vector<ReturnInst*> Returns; // Ignore returns cloned... + SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned. CloneFunctionInto(F, I, ValueMap, Returns); } @@ -119,7 +121,7 @@ Module *llvm::CloneModule(const Module *M, GlobalAlias *GA = cast<GlobalAlias>(ValueMap[I]); GA->setLinkage(I->getLinkage()); if (const Constant* C = I->getAliasee()) - GA->setAliasee(cast<Constant>(MapValue(C, ValueMap))); + GA->setAliasee(cast<Constant>(MapValue(C, ValueMap, M->getContext()))); } return New; diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index 6d5904e..c39ccf7 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -18,6 +18,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/Dominators.h" @@ -27,6 +28,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/StringExtras.h" #include <algorithm> #include <set> @@ -180,8 +183,24 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { void CodeExtractor::splitReturnBlocks() { for (std::set<BasicBlock*>::iterator I = BlocksToExtract.begin(), E = BlocksToExtract.end(); I != E; ++I) - if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) - (*I)->splitBasicBlock(RI, (*I)->getName()+".ret"); + if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) { + BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret"); + if (DT) { + // Old dominates New. New node domiantes all other nodes dominated + //by Old. + DomTreeNode *OldNode = DT->getNode(*I); + SmallVector<DomTreeNode*, 8> Children; + for (DomTreeNode::iterator DI = OldNode->begin(), DE = OldNode->end(); + DI != DE; ++DI) + Children.push_back(*DI); + + DomTreeNode *NewNode = DT->addNewBlock(New, *I); + + for (SmallVector<DomTreeNode*, 8>::iterator I = Children.begin(), + E = Children.end(); I != E; ++I) + DT->changeImmediateDominator(*I, NewNode); + } + } } // findInputsOutputs - Find inputs to, outputs from the code region. @@ -234,15 +253,15 @@ Function *CodeExtractor::constructFunction(const Values &inputs, BasicBlock *newHeader, Function *oldFunction, Module *M) { - DOUT << "inputs: " << inputs.size() << "\n"; - DOUT << "outputs: " << outputs.size() << "\n"; + DEBUG(errs() << "inputs: " << inputs.size() << "\n"); + DEBUG(errs() << "outputs: " << outputs.size() << "\n"); // This function returns unsigned, outputs will go back by reference. switch (NumExitBlocks) { case 0: - case 1: RetTy = Type::VoidTy; break; - case 2: RetTy = Type::Int1Ty; break; - default: RetTy = Type::Int16Ty; break; + case 1: RetTy = Type::getVoidTy(header->getContext()); break; + case 2: RetTy = Type::getInt1Ty(header->getContext()); break; + default: RetTy = Type::getInt16Ty(header->getContext()); break; } std::vector<const Type*> paramTy; @@ -251,32 +270,34 @@ Function *CodeExtractor::constructFunction(const Values &inputs, for (Values::const_iterator i = inputs.begin(), e = inputs.end(); i != e; ++i) { const Value *value = *i; - DOUT << "value used in func: " << *value << "\n"; + DEBUG(errs() << "value used in func: " << *value << "\n"); paramTy.push_back(value->getType()); } // Add the types of the output values to the function's argument list. for (Values::const_iterator I = outputs.begin(), E = outputs.end(); I != E; ++I) { - DOUT << "instr used in func: " << **I << "\n"; + DEBUG(errs() << "instr used in func: " << **I << "\n"); if (AggregateArgs) paramTy.push_back((*I)->getType()); else paramTy.push_back(PointerType::getUnqual((*I)->getType())); } - DOUT << "Function type: " << *RetTy << " f("; + DEBUG(errs() << "Function type: " << *RetTy << " f("); for (std::vector<const Type*>::iterator i = paramTy.begin(), e = paramTy.end(); i != e; ++i) - DOUT << **i << ", "; - DOUT << ")\n"; + DEBUG(errs() << **i << ", "); + DEBUG(errs() << ")\n"); if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { - PointerType *StructPtr = PointerType::getUnqual(StructType::get(paramTy)); + PointerType *StructPtr = + PointerType::getUnqual(StructType::get(M->getContext(), paramTy)); paramTy.clear(); paramTy.push_back(StructPtr); } - const FunctionType *funcType = FunctionType::get(RetTy, paramTy, false); + const FunctionType *funcType = + FunctionType::get(RetTy, paramTy, false); // Create the new function Function *newFunction = Function::Create(funcType, @@ -298,13 +319,13 @@ Function *CodeExtractor::constructFunction(const Values &inputs, Value *RewriteVal; if (AggregateArgs) { Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::Int32Ty); - Idx[1] = ConstantInt::get(Type::Int32Ty, i); - std::string GEPname = "gep_" + inputs[i]->getName(); + Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); + Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); TerminatorInst *TI = newFunction->begin()->getTerminator(); - GetElementPtrInst *GEP = GetElementPtrInst::Create(AI, Idx, Idx+2, - GEPname, TI); - RewriteVal = new LoadInst(GEP, "load" + GEPname, TI); + GetElementPtrInst *GEP = + GetElementPtrInst::Create(AI, Idx, Idx+2, + "gep_" + inputs[i]->getName(), TI); + RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI); } else RewriteVal = AI++; @@ -340,6 +361,20 @@ Function *CodeExtractor::constructFunction(const Values &inputs, return newFunction; } +/// FindPhiPredForUseInBlock - Given a value and a basic block, find a PHI +/// that uses the value within the basic block, and return the predecessor +/// block associated with that use, or return 0 if none is found. +static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) { + for (Value::use_iterator UI = Used->use_begin(), + UE = Used->use_end(); UI != UE; ++UI) { + PHINode *P = dyn_cast<PHINode>(*UI); + if (P && P->getParent() == BB) + return P->getIncomingBlock(UI); + } + + return 0; +} + /// emitCallAndSwitchStatement - This method sets up the caller side by adding /// the call instruction, splitting any PHI nodes in the header block as /// necessary. @@ -348,7 +383,9 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, Values &inputs, Values &outputs) { // Emit a call to the new function, passing in: *pointer to struct (if // aggregating parameters), or plan inputs and allocated memory for outputs - std::vector<Value*> params, StructValues, ReloadOutputs; + std::vector<Value*> params, StructValues, ReloadOutputs, Reloads; + + LLVMContext &Context = newFunction->getContext(); // Add inputs as params, or to be filled into the struct for (Values::iterator i = inputs.begin(), e = inputs.end(); i != e; ++i) @@ -378,7 +415,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, ArgTypes.push_back((*v)->getType()); // Allocate a struct at the beginning of this function - Type *StructArgTy = StructType::get(ArgTypes); + Type *StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); Struct = new AllocaInst(StructArgTy, 0, "structArg", codeReplacer->getParent()->begin()->begin()); @@ -386,8 +423,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, for (unsigned i = 0, e = inputs.size(); i != e; ++i) { Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::Int32Ty); - Idx[1] = ConstantInt::get(Type::Int32Ty, i); + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); GetElementPtrInst *GEP = GetElementPtrInst::Create(Struct, Idx, Idx + 2, "gep_" + StructValues[i]->getName()); @@ -412,8 +449,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, Value *Output = 0; if (AggregateArgs) { Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::Int32Ty); - Idx[1] = ConstantInt::get(Type::Int32Ty, FirstOut + i); + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); GetElementPtrInst *GEP = GetElementPtrInst::Create(Struct, Idx, Idx + 2, "gep_reload_" + outputs[i]->getName()); @@ -423,6 +460,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, Output = ReloadOutputs[i]; } LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload"); + Reloads.push_back(load); codeReplacer->getInstList().push_back(load); std::vector<User*> Users(outputs[i]->use_begin(), outputs[i]->use_end()); for (unsigned u = 0, e = Users.size(); u != e; ++u) { @@ -434,7 +472,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, // Now we can emit a switch statement using the call as a value. SwitchInst *TheSwitch = - SwitchInst::Create(ConstantInt::getNullValue(Type::Int16Ty), + SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), codeReplacer, 0, codeReplacer); // Since there may be multiple exits from the original region, make the new @@ -456,7 +494,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, if (!NewTarget) { // If we don't already have an exit stub for this non-extracted // destination, create one now! - NewTarget = BasicBlock::Create(OldTarget->getName() + ".exitStub", + NewTarget = BasicBlock::Create(Context, + OldTarget->getName() + ".exitStub", newFunction); unsigned SuccNum = switchVal++; @@ -465,17 +504,18 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, case 0: case 1: break; // No value needed. case 2: // Conditional branch, return a bool - brVal = ConstantInt::get(Type::Int1Ty, !SuccNum); + brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); break; default: - brVal = ConstantInt::get(Type::Int16Ty, SuccNum); + brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); break; } - ReturnInst *NTRet = ReturnInst::Create(brVal, NewTarget); + ReturnInst *NTRet = ReturnInst::Create(Context, brVal, NewTarget); // Update the switch instruction. - TheSwitch->addCase(ConstantInt::get(Type::Int16Ty, SuccNum), + TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), + SuccNum), OldTarget); // Restore values just before we exit @@ -507,14 +547,25 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, DominatesDef = false; } - if (DT) + if (DT) { DominatesDef = DT->dominates(DefBlock, OldTarget); + + // If the output value is used by a phi in the target block, + // then we need to test for dominance of the phi's predecessor + // instead. Unfortunately, this a little complicated since we + // have already rewritten uses of the value to uses of the reload. + BasicBlock* pred = FindPhiPredForUseInBlock(Reloads[out], + OldTarget); + if (pred && DT && DT->dominates(DefBlock, pred)) + DominatesDef = true; + } if (DominatesDef) { if (AggregateArgs) { Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::Int32Ty); - Idx[1] = ConstantInt::get(Type::Int32Ty,FirstOut+out); + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), + FirstOut+out); GetElementPtrInst *GEP = GetElementPtrInst::Create(OAI, Idx, Idx + 2, "gep_" + outputs[out]->getName(), @@ -543,15 +594,16 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, // this should be rewritten as a `ret' // Check if the function should return a value - if (OldFnRetTy == Type::VoidTy) { - ReturnInst::Create(0, TheSwitch); // Return void + if (OldFnRetTy == Type::getVoidTy(Context)) { + ReturnInst::Create(Context, 0, TheSwitch); // Return void } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { // return what we have - ReturnInst::Create(TheSwitch->getCondition(), TheSwitch); + ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); } else { // Otherwise we must have code extracted an unwind or something, just // return whatever we want. - ReturnInst::Create(Constant::getNullValue(OldFnRetTy), TheSwitch); + ReturnInst::Create(Context, + Constant::getNullValue(OldFnRetTy), TheSwitch); } TheSwitch->eraseFromParent(); @@ -644,12 +696,14 @@ ExtractCodeRegion(const std::vector<BasicBlock*> &code) { Function *oldFunction = header->getParent(); // This takes place of the original loop - BasicBlock *codeReplacer = BasicBlock::Create("codeRepl", oldFunction, + BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), + "codeRepl", oldFunction, header); // The new function needs a root node because other nodes can branch to the // head of the region, but the entry node of a function cannot have preds. - BasicBlock *newFuncRoot = BasicBlock::Create("newFuncRoot"); + BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), + "newFuncRoot"); newFuncRoot->getInstList().push_back(BranchInst::Create(header)); // Find inputs to, outputs from the code region. @@ -702,7 +756,8 @@ ExtractCodeRegion(const std::vector<BasicBlock*> &code) { // cerr << "OLD FUNCTION: " << *oldFunction; // verifyFunction(*oldFunction); - DEBUG(if (verifyFunction(*newFunction)) abort()); + DEBUG(if (verifyFunction(*newFunction)) + llvm_report_error("verifyFunction failed!")); return newFunction; } diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp index b8dd754..c908b4a 100644 --- a/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -39,7 +39,8 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, // Create a stack slot to hold the value. AllocaInst *Slot; if (AllocaPoint) { - Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem", AllocaPoint); + Slot = new AllocaInst(I.getType(), 0, + I.getName()+".reg2mem", AllocaPoint); } else { Function *F = I.getParent()->getParent(); Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem", @@ -116,7 +117,8 @@ AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) { // Create a stack slot to hold the value. AllocaInst *Slot; if (AllocaPoint) { - Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem", AllocaPoint); + Slot = new AllocaInst(P->getType(), 0, + P->getName()+".reg2mem", AllocaPoint); } else { Function *F = P->getParent()->getParent(); Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem", diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 4989c00..0d00d69 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -15,6 +15,7 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" @@ -28,13 +29,73 @@ #include "llvm/Support/CallSite.h" using namespace llvm; -bool llvm::InlineFunction(CallInst *CI, CallGraph *CG, const TargetData *TD) { - return InlineFunction(CallSite(CI), CG, TD); +bool llvm::InlineFunction(CallInst *CI, CallGraph *CG, const TargetData *TD, + SmallVectorImpl<AllocaInst*> *StaticAllocas) { + return InlineFunction(CallSite(CI), CG, TD, StaticAllocas); } -bool llvm::InlineFunction(InvokeInst *II, CallGraph *CG, const TargetData *TD) { - return InlineFunction(CallSite(II), CG, TD); +bool llvm::InlineFunction(InvokeInst *II, CallGraph *CG, const TargetData *TD, + SmallVectorImpl<AllocaInst*> *StaticAllocas) { + return InlineFunction(CallSite(II), CG, TD, StaticAllocas); } + +/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into +/// an invoke, we have to turn all of the calls that can throw into +/// invokes. This function analyze BB to see if there are any calls, and if so, +/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI +/// nodes in that block with the values specified in InvokeDestPHIValues. +/// +static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, + BasicBlock *InvokeDest, + const SmallVectorImpl<Value*> &InvokeDestPHIValues) { + for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { + Instruction *I = BBI++; + + // We only need to check for function calls: inlined invoke + // instructions require no special handling. + CallInst *CI = dyn_cast<CallInst>(I); + if (CI == 0) continue; + + // If this call cannot unwind, don't convert it to an invoke. + if (CI->doesNotThrow()) + continue; + + // Convert this function call into an invoke instruction. + // First, split the basic block. + BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc"); + + // Next, create the new invoke instruction, inserting it at the end + // of the old basic block. + SmallVector<Value*, 8> InvokeArgs(CI->op_begin()+1, CI->op_end()); + InvokeInst *II = + InvokeInst::Create(CI->getCalledValue(), Split, InvokeDest, + InvokeArgs.begin(), InvokeArgs.end(), + CI->getName(), BB->getTerminator()); + II->setCallingConv(CI->getCallingConv()); + II->setAttributes(CI->getAttributes()); + + // Make sure that anything using the call now uses the invoke! This also + // updates the CallGraph if present. + CI->replaceAllUsesWith(II); + + // Delete the unconditional branch inserted by splitBasicBlock + BB->getInstList().pop_back(); + Split->getInstList().pop_front(); // Delete the original call + + // Update any PHI nodes in the exceptional block to indicate that + // there is now a new entry in them. + unsigned i = 0; + for (BasicBlock::iterator I = InvokeDest->begin(); + isa<PHINode>(I); ++I, ++i) + cast<PHINode>(I)->addIncoming(InvokeDestPHIValues[i], BB); + + // This basic block is now complete, the caller will continue scanning the + // next one. + return; + } +} + + /// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls /// in the body of the inlined function into invokes and turn unwind /// instructions into branches to the invoke unwind dest. @@ -43,10 +104,9 @@ bool llvm::InlineFunction(InvokeInst *II, CallGraph *CG, const TargetData *TD) { /// block of the inlined code (the last block is the end of the function), /// and InlineCodeInfo is information about the code that got inlined. static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, - ClonedCodeInfo &InlinedCodeInfo, - CallGraph *CG) { + ClonedCodeInfo &InlinedCodeInfo) { BasicBlock *InvokeDest = II->getUnwindDest(); - std::vector<Value*> InvokeDestPHIValues; + SmallVector<Value*, 8> InvokeDestPHIValues; // If there are PHI nodes in the unwind destination block, we need to // keep track of which values came into them from this invoke, then remove @@ -62,92 +122,39 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, // The inlined code is currently at the end of the function, scan from the // start of the inlined code to its end, checking for stuff we need to - // rewrite. - if (InlinedCodeInfo.ContainsCalls || InlinedCodeInfo.ContainsUnwinds) { - for (Function::iterator BB = FirstNewBlock, E = Caller->end(); - BB != E; ++BB) { - if (InlinedCodeInfo.ContainsCalls) { - for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ){ - Instruction *I = BBI++; - - // We only need to check for function calls: inlined invoke - // instructions require no special handling. - if (!isa<CallInst>(I)) continue; - CallInst *CI = cast<CallInst>(I); - - // If this call cannot unwind, don't convert it to an invoke. - if (CI->doesNotThrow()) - continue; - - // Convert this function call into an invoke instruction. - // First, split the basic block. - BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc"); - - // Next, create the new invoke instruction, inserting it at the end - // of the old basic block. - SmallVector<Value*, 8> InvokeArgs(CI->op_begin()+1, CI->op_end()); - InvokeInst *II = - InvokeInst::Create(CI->getCalledValue(), Split, InvokeDest, - InvokeArgs.begin(), InvokeArgs.end(), - CI->getName(), BB->getTerminator()); - II->setCallingConv(CI->getCallingConv()); - II->setAttributes(CI->getAttributes()); - - // Make sure that anything using the call now uses the invoke! - CI->replaceAllUsesWith(II); - - // Update the callgraph. - if (CG) { - // We should be able to do this: - // (*CG)[Caller]->replaceCallSite(CI, II); - // but that fails if the old call site isn't in the call graph, - // which, because of LLVM bug 3601, it sometimes isn't. - CallGraphNode *CGN = (*CG)[Caller]; - for (CallGraphNode::iterator NI = CGN->begin(), NE = CGN->end(); - NI != NE; ++NI) { - if (NI->first == CI) { - NI->first = II; - break; - } - } - } - - // Delete the unconditional branch inserted by splitBasicBlock - BB->getInstList().pop_back(); - Split->getInstList().pop_front(); // Delete the original call - - // Update any PHI nodes in the exceptional block to indicate that - // there is now a new entry in them. - unsigned i = 0; - for (BasicBlock::iterator I = InvokeDest->begin(); - isa<PHINode>(I); ++I, ++i) { - PHINode *PN = cast<PHINode>(I); - PN->addIncoming(InvokeDestPHIValues[i], BB); - } - - // This basic block is now complete, start scanning the next one. - break; - } - } - - if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { - // An UnwindInst requires special handling when it gets inlined into an - // invoke site. Once this happens, we know that the unwind would cause - // a control transfer to the invoke exception destination, so we can - // transform it into a direct branch to the exception destination. - BranchInst::Create(InvokeDest, UI); - - // Delete the unwind instruction! - UI->eraseFromParent(); - - // Update any PHI nodes in the exceptional block to indicate that - // there is now a new entry in them. - unsigned i = 0; - for (BasicBlock::iterator I = InvokeDest->begin(); - isa<PHINode>(I); ++I, ++i) { - PHINode *PN = cast<PHINode>(I); - PN->addIncoming(InvokeDestPHIValues[i], BB); - } + // rewrite. If the code doesn't have calls or unwinds, we know there is + // nothing to rewrite. + if (!InlinedCodeInfo.ContainsCalls && !InlinedCodeInfo.ContainsUnwinds) { + // Now that everything is happy, we have one final detail. The PHI nodes in + // the exception destination block still have entries due to the original + // invoke instruction. Eliminate these entries (which might even delete the + // PHI node) now. + InvokeDest->removePredecessor(II->getParent()); + return; + } + + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){ + if (InlinedCodeInfo.ContainsCalls) + HandleCallsInBlockInlinedThroughInvoke(BB, InvokeDest, + InvokeDestPHIValues); + + if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { + // An UnwindInst requires special handling when it gets inlined into an + // invoke site. Once this happens, we know that the unwind would cause + // a control transfer to the invoke exception destination, so we can + // transform it into a direct branch to the exception destination. + BranchInst::Create(InvokeDest, UI); + + // Delete the unwind instruction! + UI->eraseFromParent(); + + // Update any PHI nodes in the exceptional block to indicate that + // there is now a new entry in them. + unsigned i = 0; + for (BasicBlock::iterator I = InvokeDest->begin(); + isa<PHINode>(I); ++I, ++i) { + PHINode *PN = cast<PHINode>(I); + PN->addIncoming(InvokeDestPHIValues[i], BB); } } } @@ -185,17 +192,19 @@ static void UpdateCallGraphAfterInlining(CallSite CS, } for (; I != E; ++I) { - const Instruction *OrigCall = I->first.getInstruction(); + const Value *OrigCall = I->first; DenseMap<const Value*, Value*>::iterator VMI = ValueMap.find(OrigCall); // Only copy the edge if the call was inlined! - if (VMI != ValueMap.end() && VMI->second) { - // If the call was inlined, but then constant folded, there is no edge to - // add. Check for this case. - if (Instruction *NewCall = dyn_cast<Instruction>(VMI->second)) - CallerNode->addCalledFunction(CallSite::get(NewCall), I->second); - } + if (VMI == ValueMap.end() || VMI->second == 0) + continue; + + // If the call was inlined, but then constant folded, there is no edge to + // add. Check for this case. + if (Instruction *NewCall = dyn_cast<Instruction>(VMI->second)) + CallerNode->addCalledFunction(CallSite::get(NewCall), I->second); } + // Update the call graph by deleting the edge from Callee to Caller. We must // do this after the loop above in case Caller and Callee are the same. CallerNode->removeCallEdgeFor(CS); @@ -204,25 +213,27 @@ static void UpdateCallGraphAfterInlining(CallSite CS, /// findFnRegionEndMarker - This is a utility routine that is used by /// InlineFunction. Return llvm.dbg.region.end intrinsic that corresponds /// to the llvm.dbg.func.start of the function F. Otherwise return NULL. +/// static const DbgRegionEndInst *findFnRegionEndMarker(const Function *F) { - GlobalVariable *FnStart = NULL; + MDNode *FnStart = NULL; const DbgRegionEndInst *FnEnd = NULL; for (Function::const_iterator FI = F->begin(), FE =F->end(); FI != FE; ++FI) for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { if (FnStart == NULL) { if (const DbgFuncStartInst *FSI = dyn_cast<DbgFuncStartInst>(BI)) { - DISubprogram SP(cast<GlobalVariable>(FSI->getSubprogram())); + DISubprogram SP(FSI->getSubprogram()); assert (SP.isNull() == false && "Invalid llvm.dbg.func.start"); if (SP.describes(F)) - FnStart = SP.getGV(); + FnStart = SP.getNode(); } - } else { - if (const DbgRegionEndInst *REI = dyn_cast<DbgRegionEndInst>(BI)) - if (REI->getContext() == FnStart) - FnEnd = REI; + continue; } + + if (const DbgRegionEndInst *REI = dyn_cast<DbgRegionEndInst>(BI)) + if (REI->getContext() == FnStart) + FnEnd = REI; } return FnEnd; } @@ -236,8 +247,10 @@ static const DbgRegionEndInst *findFnRegionEndMarker(const Function *F) { // exists in the instruction stream. Similiarly this will inline a recursive // function by one level. // -bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) { +bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD, + SmallVectorImpl<AllocaInst*> *StaticAllocas) { Instruction *TheCall = CS.getInstruction(); + LLVMContext &Context = TheCall->getContext(); assert(TheCall->getParent() && TheCall->getParent()->getParent() && "Instruction not in function!"); @@ -277,7 +290,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) { // Make sure to capture all of the return instructions from the cloned // function. - std::vector<ReturnInst*> Returns; + SmallVector<ReturnInst*, 8> Returns; ClonedCodeInfo InlinedFunctionInfo; Function::iterator FirstNewBlock; @@ -302,15 +315,17 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) { if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal) && !CalledFunc->onlyReadsMemory()) { const Type *AggTy = cast<PointerType>(I->getType())->getElementType(); - const Type *VoidPtrTy = PointerType::getUnqual(Type::Int8Ty); + const Type *VoidPtrTy = + Type::getInt8PtrTy(Context); // Create the alloca. If we have TargetData, use nice alignment. unsigned Align = 1; if (TD) Align = TD->getPrefTypeAlignment(AggTy); - Value *NewAlloca = new AllocaInst(AggTy, 0, Align, I->getName(), - Caller->begin()->begin()); + Value *NewAlloca = new AllocaInst(AggTy, 0, Align, + I->getName(), + &*Caller->begin()->begin()); // Emit a memcpy. - const Type *Tys[] = { Type::Int64Ty }; + const Type *Tys[] = { Type::getInt64Ty(Context) }; Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(), Intrinsic::memcpy, Tys, 1); @@ -321,13 +336,15 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) { if (TD == 0) Size = ConstantExpr::getSizeOf(AggTy); else - Size = ConstantInt::get(Type::Int64Ty, TD->getTypeStoreSize(AggTy)); + Size = ConstantInt::get(Type::getInt64Ty(Context), + TD->getTypeStoreSize(AggTy)); // Always generate a memcpy of alignment 1 here because we don't know // the alignment of the src pointer. Other optimizations can infer // better alignment. Value *CallArgs[] = { - DestCast, SrcCast, Size, ConstantInt::get(Type::Int32Ty, 1) + DestCast, SrcCast, Size, + ConstantInt::get(Type::getInt32Ty(Context), 1) }; CallInst *TheMemCpy = CallInst::Create(MemCpyFn, CallArgs, CallArgs+4, "", TheCall); @@ -352,13 +369,12 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) { // call site. The function body cloner does not clone original // region end marker from the CalledFunc. This will ensure that // inlined function's scope ends at the right place. - const DbgRegionEndInst *DREI = findFnRegionEndMarker(CalledFunc); - if (DREI) { - for (BasicBlock::iterator BI = TheCall, - BE = TheCall->getParent()->end(); BI != BE; ++BI) { + if (const DbgRegionEndInst *DREI = findFnRegionEndMarker(CalledFunc)) { + for (BasicBlock::iterator BI = TheCall, BE = TheCall->getParent()->end(); + BI != BE; ++BI) { if (DbgStopPointInst *DSPI = dyn_cast<DbgStopPointInst>(BI)) { if (DbgRegionEndInst *NewDREI = - dyn_cast<DbgRegionEndInst>(DREI->clone())) + dyn_cast<DbgRegionEndInst>(DREI->clone())) NewDREI->insertAfter(DSPI); break; } @@ -388,31 +404,39 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) { { BasicBlock::iterator InsertPoint = Caller->begin()->begin(); for (BasicBlock::iterator I = FirstNewBlock->begin(), - E = FirstNewBlock->end(); I != E; ) - if (AllocaInst *AI = dyn_cast<AllocaInst>(I++)) { - // If the alloca is now dead, remove it. This often occurs due to code - // specialization. - if (AI->use_empty()) { - AI->eraseFromParent(); - continue; - } + E = FirstNewBlock->end(); I != E; ) { + AllocaInst *AI = dyn_cast<AllocaInst>(I++); + if (AI == 0) continue; + + // If the alloca is now dead, remove it. This often occurs due to code + // specialization. + if (AI->use_empty()) { + AI->eraseFromParent(); + continue; + } - if (isa<Constant>(AI->getArraySize())) { - // Scan for the block of allocas that we can move over, and move them - // all at once. - while (isa<AllocaInst>(I) && - isa<Constant>(cast<AllocaInst>(I)->getArraySize())) - ++I; - - // Transfer all of the allocas over in a block. Using splice means - // that the instructions aren't removed from the symbol table, then - // reinserted. - Caller->getEntryBlock().getInstList().splice( - InsertPoint, - FirstNewBlock->getInstList(), - AI, I); - } + if (!isa<Constant>(AI->getArraySize())) + continue; + + // Keep track of the static allocas that we inline into the caller if the + // StaticAllocas pointer is non-null. + if (StaticAllocas) StaticAllocas->push_back(AI); + + // Scan for the block of allocas that we can move over, and move them + // all at once. + while (isa<AllocaInst>(I) && + isa<Constant>(cast<AllocaInst>(I)->getArraySize())) { + if (StaticAllocas) StaticAllocas->push_back(cast<AllocaInst>(I)); + ++I; } + + // Transfer all of the allocas over in a block. Using splice means + // that the instructions aren't removed from the symbol table, then + // reinserted. + Caller->getEntryBlock().getInstList().splice(InsertPoint, + FirstNewBlock->getInstList(), + AI, I); + } } // If the inlined code contained dynamic alloca instructions, wrap the inlined @@ -486,7 +510,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) { BB != E; ++BB) { TerminatorInst *Term = BB->getTerminator(); if (isa<UnwindInst>(Term)) { - new UnreachableInst(Term); + new UnreachableInst(Context, Term); BB->getInstList().erase(Term); } } @@ -495,7 +519,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) { // any inlined 'unwind' instructions into branches to the invoke exception // destination, and call instructions into invoke instructions. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) - HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo, CG); + HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo); // If we cloned in _exactly one_ basic block, and if that block ends in a // return instruction, we splice the body of the inlined callee directly into diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp index 4f8a160..1fa51a3 100644 --- a/lib/Transforms/Utils/InstructionNamer.cpp +++ b/lib/Transforms/Utils/InstructionNamer.cpp @@ -32,7 +32,7 @@ namespace { bool runOnFunction(Function &F) { for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) - if (!AI->hasName() && AI->getType() != Type::VoidTy) + if (!AI->hasName() && AI->getType() != Type::getVoidTy(F.getContext())) AI->setName("tmp"); for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { @@ -40,7 +40,7 @@ namespace { BB->setName("BB"); for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (!I->hasName() && I->getType() != Type::VoidTy) + if (!I->hasName() && I->getType() != Type::getVoidTy(F.getContext())) I->setName("tmp"); } return true; diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index d5e7303..56e662e 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -33,22 +33,19 @@ #include "llvm/Pass.h" #include "llvm/Function.h" #include "llvm/Instructions.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/Compiler.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/PredIteratorCache.h" -#include <algorithm> -#include <map> using namespace llvm; STATISTIC(NumLCSSA, "Number of live out of a loop variables"); namespace { - struct VISIBILITY_HIDDEN LCSSA : public LoopPass { + struct LCSSA : public LoopPass { static char ID; // Pass identification, replacement for typeid LCSSA() : LoopPass(&ID) {} @@ -57,12 +54,10 @@ namespace { DominatorTree *DT; std::vector<BasicBlock*> LoopBlocks; PredIteratorCache PredCache; + Loop *L; virtual bool runOnLoop(Loop *L, LPPassManager &LPM); - void ProcessInstruction(Instruction* Instr, - const SmallVector<BasicBlock*, 8>& exitBlocks); - /// This transformation requires natural loop information & requires that /// loop preheaders be inserted into the CFG. It maintains both of these, /// as well as the CFG. It also requires dominator information. @@ -71,9 +66,9 @@ namespace { AU.setPreservesCFG(); AU.addRequiredID(LoopSimplifyID); AU.addPreservedID(LoopSimplifyID); - AU.addRequired<LoopInfo>(); + AU.addRequiredTransitive<LoopInfo>(); AU.addPreserved<LoopInfo>(); - AU.addRequired<DominatorTree>(); + AU.addRequiredTransitive<DominatorTree>(); AU.addPreserved<ScalarEvolution>(); AU.addPreserved<DominatorTree>(); @@ -85,15 +80,17 @@ namespace { AU.addPreserved<DominanceFrontier>(); } private: - void getLoopValuesUsedOutsideLoop(Loop *L, - SetVector<Instruction*> &AffectedValues, - const SmallVector<BasicBlock*, 8>& exitBlocks); - - Value *GetValueForBlock(DomTreeNode *BB, Instruction *OrigInst, - DenseMap<DomTreeNode*, Value*> &Phis); + bool ProcessInstruction(Instruction *Inst, + const SmallVectorImpl<BasicBlock*> &ExitBlocks); + + /// verifyAnalysis() - Verify loop nest. + virtual void verifyAnalysis() const { + // Check the special guarantees that LCSSA makes. + assert(L->isLCSSAForm() && "LCSSA form not preserved!"); + } /// inLoop - returns true if the given block is within the current loop - bool inLoop(BasicBlock* B) { + bool inLoop(BasicBlock *B) const { return std::binary_search(LoopBlocks.begin(), LoopBlocks.end(), B); } }; @@ -105,181 +102,163 @@ static RegisterPass<LCSSA> X("lcssa", "Loop-Closed SSA Form Pass"); Pass *llvm::createLCSSAPass() { return new LCSSA(); } const PassInfo *const llvm::LCSSAID = &X; + +/// BlockDominatesAnExit - Return true if the specified block dominates at least +/// one of the blocks in the specified list. +static bool BlockDominatesAnExit(BasicBlock *BB, + const SmallVectorImpl<BasicBlock*> &ExitBlocks, + DominatorTree *DT) { + DomTreeNode *DomNode = DT->getNode(BB); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + if (DT->dominates(DomNode, DT->getNode(ExitBlocks[i]))) + return true; + + return false; +} + + /// runOnFunction - Process all loops in the function, inner-most out. -bool LCSSA::runOnLoop(Loop *L, LPPassManager &LPM) { - PredCache.clear(); +bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) { + L = TheLoop; LI = &LPM.getAnalysis<LoopInfo>(); DT = &getAnalysis<DominatorTree>(); - // Speed up queries by creating a sorted list of blocks + // Get the set of exiting blocks. + SmallVector<BasicBlock*, 8> ExitBlocks; + L->getExitBlocks(ExitBlocks); + + if (ExitBlocks.empty()) + return false; + + // Speed up queries by creating a sorted vector of blocks. LoopBlocks.clear(); LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end()); - std::sort(LoopBlocks.begin(), LoopBlocks.end()); + array_pod_sort(LoopBlocks.begin(), LoopBlocks.end()); - SmallVector<BasicBlock*, 8> exitBlocks; - L->getExitBlocks(exitBlocks); + // Look at all the instructions in the loop, checking to see if they have uses + // outside the loop. If so, rewrite those uses. + bool MadeChange = false; - SetVector<Instruction*> AffectedValues; - getLoopValuesUsedOutsideLoop(L, AffectedValues, exitBlocks); + for (Loop::block_iterator BBI = L->block_begin(), E = L->block_end(); + BBI != E; ++BBI) { + BasicBlock *BB = *BBI; + + // For large loops, avoid use-scanning by using dominance information: In + // particular, if a block does not dominate any of the loop exits, then none + // of the values defined in the block could be used outside the loop. + if (!BlockDominatesAnExit(BB, ExitBlocks, DT)) + continue; + + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); + I != E; ++I) { + // Reject two common cases fast: instructions with no uses (like stores) + // and instructions with one use that is in the same block as this. + if (I->use_empty() || + (I->hasOneUse() && I->use_back()->getParent() == BB && + !isa<PHINode>(I->use_back()))) + continue; + + MadeChange |= ProcessInstruction(I, ExitBlocks); + } + } - // If no values are affected, we can save a lot of work, since we know that - // nothing will be changed. - if (AffectedValues.empty()) - return false; + assert(L->isLCSSAForm()); + PredCache.clear(); + + return MadeChange; +} + +/// isExitBlock - Return true if the specified block is in the list. +static bool isExitBlock(BasicBlock *BB, + const SmallVectorImpl<BasicBlock*> &ExitBlocks) { + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + if (ExitBlocks[i] == BB) + return true; + return false; +} + +/// ProcessInstruction - Given an instruction in the loop, check to see if it +/// has any uses that are outside the current loop. If so, insert LCSSA PHI +/// nodes and rewrite the uses. +bool LCSSA::ProcessInstruction(Instruction *Inst, + const SmallVectorImpl<BasicBlock*> &ExitBlocks) { + SmallVector<Use*, 16> UsesToRewrite; - // Iterate over all affected values for this loop and insert Phi nodes - // for them in the appropriate exit blocks + BasicBlock *InstBB = Inst->getParent(); - for (SetVector<Instruction*>::iterator I = AffectedValues.begin(), - E = AffectedValues.end(); I != E; ++I) - ProcessInstruction(*I, exitBlocks); + for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); + UI != E; ++UI) { + BasicBlock *UserBB = cast<Instruction>(*UI)->getParent(); + if (PHINode *PN = dyn_cast<PHINode>(*UI)) + UserBB = PN->getIncomingBlock(UI); + + if (InstBB != UserBB && !inLoop(UserBB)) + UsesToRewrite.push_back(&UI.getUse()); + } - assert(L->isLCSSAForm()); + // If there are no uses outside the loop, exit with no change. + if (UsesToRewrite.empty()) return false; - return true; -} - -/// processInstruction - Given a live-out instruction, insert LCSSA Phi nodes, -/// eliminate all out-of-loop uses. -void LCSSA::ProcessInstruction(Instruction *Instr, - const SmallVector<BasicBlock*, 8>& exitBlocks) { ++NumLCSSA; // We are applying the transformation - // Keep track of the blocks that have the value available already. - DenseMap<DomTreeNode*, Value*> Phis; - - BasicBlock *DomBB = Instr->getParent(); - // Invoke instructions are special in that their result value is not available // along their unwind edge. The code below tests to see whether DomBB dominates // the value, so adjust DomBB to the normal destination block, which is // effectively where the value is first usable. - if (InvokeInst *Inv = dyn_cast<InvokeInst>(Instr)) + BasicBlock *DomBB = Inst->getParent(); + if (InvokeInst *Inv = dyn_cast<InvokeInst>(Inst)) DomBB = Inv->getNormalDest(); DomTreeNode *DomNode = DT->getNode(DomBB); - // Insert the LCSSA phi's into the exit blocks (dominated by the value), and - // add them to the Phi's map. - for (SmallVector<BasicBlock*, 8>::const_iterator BBI = exitBlocks.begin(), - BBE = exitBlocks.end(); BBI != BBE; ++BBI) { - BasicBlock *BB = *BBI; - DomTreeNode *ExitBBNode = DT->getNode(BB); - Value *&Phi = Phis[ExitBBNode]; - if (!Phi && DT->dominates(DomNode, ExitBBNode)) { - PHINode *PN = PHINode::Create(Instr->getType(), Instr->getName()+".lcssa", - BB->begin()); - PN->reserveOperandSpace(PredCache.GetNumPreds(BB)); - - // Remember that this phi makes the value alive in this block. - Phi = PN; - - // Add inputs from inside the loop for this PHI. - for (BasicBlock** PI = PredCache.GetPreds(BB); *PI; ++PI) - PN->addIncoming(Instr, *PI); - } - } + SSAUpdater SSAUpdate; + SSAUpdate.Initialize(Inst); - - // Record all uses of Instr outside the loop. We need to rewrite these. The - // LCSSA phis won't be included because they use the value in the loop. - for (Value::use_iterator UI = Instr->use_begin(), E = Instr->use_end(); - UI != E;) { - BasicBlock *UserBB = cast<Instruction>(*UI)->getParent(); - if (PHINode *P = dyn_cast<PHINode>(*UI)) { - UserBB = P->getIncomingBlock(UI); - } + // Insert the LCSSA phi's into all of the exit blocks dominated by the + // value., and add them to the Phi's map. + for (SmallVectorImpl<BasicBlock*>::const_iterator BBI = ExitBlocks.begin(), + BBE = ExitBlocks.end(); BBI != BBE; ++BBI) { + BasicBlock *ExitBB = *BBI; + if (!DT->dominates(DomNode, DT->getNode(ExitBB))) continue; - // If the user is in the loop, don't rewrite it! - if (UserBB == Instr->getParent() || inLoop(UserBB)) { - ++UI; - continue; - } + // If we already inserted something for this BB, don't reprocess it. + if (SSAUpdate.HasValueForBlock(ExitBB)) continue; - // Otherwise, patch up uses of the value with the appropriate LCSSA Phi, - // inserting PHI nodes into join points where needed. - Value *Val = GetValueForBlock(DT->getNode(UserBB), Instr, Phis); - - // Preincrement the iterator to avoid invalidating it when we change the - // value. - Use &U = UI.getUse(); - ++UI; - U.set(Val); - } -} + PHINode *PN = PHINode::Create(Inst->getType(), Inst->getName()+".lcssa", + ExitBB->begin()); + PN->reserveOperandSpace(PredCache.GetNumPreds(ExitBB)); -/// getLoopValuesUsedOutsideLoop - Return any values defined in the loop that -/// are used by instructions outside of it. -void LCSSA::getLoopValuesUsedOutsideLoop(Loop *L, - SetVector<Instruction*> &AffectedValues, - const SmallVector<BasicBlock*, 8>& exitBlocks) { - // FIXME: For large loops, we may be able to avoid a lot of use-scanning - // by using dominance information. In particular, if a block does not - // dominate any of the loop exits, then none of the values defined in the - // block could be used outside the loop. - for (Loop::block_iterator BB = L->block_begin(), BE = L->block_end(); - BB != BE; ++BB) { - for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ++I) - for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; - ++UI) { - BasicBlock *UserBB = cast<Instruction>(*UI)->getParent(); - if (PHINode* p = dyn_cast<PHINode>(*UI)) { - UserBB = p->getIncomingBlock(UI); - } - - if (*BB != UserBB && !inLoop(UserBB)) { - AffectedValues.insert(I); - break; - } - } + // Add inputs from inside the loop for this PHI. + for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) + PN->addIncoming(Inst, *PI); + + // Remember that this phi makes the value alive in this block. + SSAUpdate.AddAvailableValue(ExitBB, PN); } -} - -/// GetValueForBlock - Get the value to use within the specified basic block. -/// available values are in Phis. -Value *LCSSA::GetValueForBlock(DomTreeNode *BB, Instruction *OrigInst, - DenseMap<DomTreeNode*, Value*> &Phis) { - // If there is no dominator info for this BB, it is unreachable. - if (BB == 0) - return UndefValue::get(OrigInst->getType()); - - // If we have already computed this value, return the previously computed val. - if (Phis.count(BB)) return Phis[BB]; - - DomTreeNode *IDom = BB->getIDom(); + + // Rewrite all uses outside the loop in terms of the new PHIs we just + // inserted. + for (unsigned i = 0, e = UsesToRewrite.size(); i != e; ++i) { + // If this use is in an exit block, rewrite to use the newly inserted PHI. + // This is required for correctness because SSAUpdate doesn't handle uses in + // the same block. It assumes the PHI we inserted is at the end of the + // block. + Instruction *User = cast<Instruction>(UsesToRewrite[i]->getUser()); + BasicBlock *UserBB = User->getParent(); + if (PHINode *PN = dyn_cast<PHINode>(User)) + UserBB = PN->getIncomingBlock(*UsesToRewrite[i]); - // Otherwise, there are two cases: we either have to insert a PHI node or we - // don't. We need to insert a PHI node if this block is not dominated by one - // of the exit nodes from the loop (the loop could have multiple exits, and - // though the value defined *inside* the loop dominated all its uses, each - // exit by itself may not dominate all the uses). - // - // The simplest way to check for this condition is by checking to see if the - // idom is in the loop. If so, we *know* that none of the exit blocks - // dominate this block. Note that we *know* that the block defining the - // original instruction is in the idom chain, because if it weren't, then the - // original value didn't dominate this use. - if (!inLoop(IDom->getBlock())) { - // Idom is not in the loop, we must still be "below" the exit block and must - // be fully dominated by the value live in the idom. - Value* val = GetValueForBlock(IDom, OrigInst, Phis); - Phis.insert(std::make_pair(BB, val)); - return val; + if (isa<PHINode>(UserBB->begin()) && + isExitBlock(UserBB, ExitBlocks)) { + UsesToRewrite[i]->set(UserBB->begin()); + continue; + } + + // Otherwise, do full PHI insertion. + SSAUpdate.RewriteUse(*UsesToRewrite[i]); } - BasicBlock *BBN = BB->getBlock(); - - // Otherwise, the idom is the loop, so we need to insert a PHI node. Do so - // now, then get values to fill in the incoming values for the PHI. - PHINode *PN = PHINode::Create(OrigInst->getType(), - OrigInst->getName() + ".lcssa", BBN->begin()); - PN->reserveOperandSpace(PredCache.GetNumPreds(BBN)); - Phis.insert(std::make_pair(BB, PN)); - - // Fill in the incoming values for the block. - for (BasicBlock** PI = PredCache.GetPreds(BBN); *PI; ++PI) - PN->addIncoming(GetValueForBlock(DT->getNode(*PI), OrigInst, Phis), *PI); - return PN; + return true; } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 8c08638..b622611 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -20,9 +20,11 @@ #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/DebugInfo.h" +#include "llvm/Analysis/ProfileInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" @@ -183,8 +185,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) { } else if (SI->getNumSuccessors() == 2) { // Otherwise, we can fold this switch into a conditional branch // instruction if it has only one non-default destination. - Value *Cond = new ICmpInst(ICmpInst::ICMP_EQ, SI->getCondition(), - SI->getSuccessorValue(1), "cond", SI); + Value *Cond = new ICmpInst(SI, ICmpInst::ICMP_EQ, SI->getCondition(), + SI->getSuccessorValue(1), "cond"); // Insert the new branch... BranchInst::Create(SI->getSuccessor(1), SI->getSuccessor(0), Cond, SI); @@ -262,7 +264,6 @@ void llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) { /// too, recursively. void llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) { - // We can remove a PHI if it is on a cycle in the def-use graph // where each node in the cycle has degree one, i.e. only one use, // and is an instruction with no side effects. @@ -294,7 +295,7 @@ llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) { /// between them, moving the instructions in the predecessor into DestBB and /// deleting the predecessor block. /// -void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB) { +void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) { // If BB has single-entry PHI nodes, fold them. while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { Value *NewVal = PN->getIncomingValue(0); @@ -314,6 +315,13 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB) { // Anything that branched to PredBB now branches to DestBB. PredBB->replaceAllUsesWith(DestBB); + if (P) { + ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>(); + if (PI) { + PI->replaceAllUses(PredBB, DestBB); + PI->removeEdge(ProfileInfo::getEdge(PredBB, DestBB)); + } + } // Nuke BB. PredBB->eraseFromParent(); } diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index d6b167f..c22708a 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -37,10 +37,12 @@ #include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/Function.h" +#include "llvm/LLVMContext.h" #include "llvm/Type.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/CFG.h" @@ -55,44 +57,42 @@ STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted"); STATISTIC(NumNested , "Number of nested loops split out"); namespace { - struct VISIBILITY_HIDDEN LoopSimplify : public FunctionPass { + struct VISIBILITY_HIDDEN LoopSimplify : public LoopPass { static char ID; // Pass identification, replacement for typeid - LoopSimplify() : FunctionPass(&ID) {} + LoopSimplify() : LoopPass(&ID) {} // AA - If we have an alias analysis object to update, this is it, otherwise // this is null. AliasAnalysis *AA; LoopInfo *LI; DominatorTree *DT; - virtual bool runOnFunction(Function &F); + Loop *L; + virtual bool runOnLoop(Loop *L, LPPassManager &LPM); virtual void getAnalysisUsage(AnalysisUsage &AU) const { // We need loop information to identify the loops... - AU.addRequired<LoopInfo>(); - AU.addRequired<DominatorTree>(); + AU.addRequiredTransitive<LoopInfo>(); + AU.addRequiredTransitive<DominatorTree>(); AU.addPreserved<LoopInfo>(); AU.addPreserved<DominatorTree>(); AU.addPreserved<DominanceFrontier>(); AU.addPreserved<AliasAnalysis>(); + AU.addPreserved<ScalarEvolution>(); AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added. } /// verifyAnalysis() - Verify loop nest. void verifyAnalysis() const { -#ifndef NDEBUG - LoopInfo *NLI = &getAnalysis<LoopInfo>(); - for (LoopInfo::iterator I = NLI->begin(), E = NLI->end(); I != E; ++I) - (*I)->verifyLoop(); -#endif + assert(L->isLoopSimplifyForm() && "LoopSimplify form not preserved!"); } private: - bool ProcessLoop(Loop *L); + bool ProcessLoop(Loop *L, LPPassManager &LPM); BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit); - void InsertPreheaderForLoop(Loop *L); - Loop *SeparateNestedLoop(Loop *L); - void InsertUniqueBackedgeBlock(Loop *L); + BasicBlock *InsertPreheaderForLoop(Loop *L); + Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM); + void InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader); void PlaceSplitBlockCarefully(BasicBlock *NewBB, SmallVectorImpl<BasicBlock*> &SplitPreds, Loop *L); @@ -105,73 +105,19 @@ X("loopsimplify", "Canonicalize natural loops", true); // Publically exposed interface to pass... const PassInfo *const llvm::LoopSimplifyID = &X; -FunctionPass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } +Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } /// runOnFunction - Run down all loops in the CFG (recursively, but we could do /// it in any convenient order) inserting preheaders... /// -bool LoopSimplify::runOnFunction(Function &F) { +bool LoopSimplify::runOnLoop(Loop *l, LPPassManager &LPM) { + L = l; bool Changed = false; LI = &getAnalysis<LoopInfo>(); AA = getAnalysisIfAvailable<AliasAnalysis>(); DT = &getAnalysis<DominatorTree>(); - // Check to see that no blocks (other than the header) in loops have - // predecessors that are not in loops. This is not valid for natural loops, - // but can occur if the blocks are unreachable. Since they are unreachable we - // can just shamelessly destroy their terminators to make them not branch into - // the loop! - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - // This case can only occur for unreachable blocks. Blocks that are - // unreachable can't be in loops, so filter those blocks out. - if (LI->getLoopFor(BB)) continue; - - bool BlockUnreachable = false; - TerminatorInst *TI = BB->getTerminator(); - - // Check to see if any successors of this block are non-loop-header loops - // that are not the header. - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { - // If this successor is not in a loop, BB is clearly ok. - Loop *L = LI->getLoopFor(TI->getSuccessor(i)); - if (!L) continue; - - // If the succ is the loop header, and if L is a top-level loop, then this - // is an entrance into a loop through the header, which is also ok. - if (L->getHeader() == TI->getSuccessor(i) && L->getParentLoop() == 0) - continue; - - // Otherwise, this is an entrance into a loop from some place invalid. - // Either the loop structure is invalid and this is not a natural loop (in - // which case the compiler is buggy somewhere else) or BB is unreachable. - BlockUnreachable = true; - break; - } - - // If this block is ok, check the next one. - if (!BlockUnreachable) continue; - - // Otherwise, this block is dead. To clean up the CFG and to allow later - // loop transformations to ignore this case, we delete the edges into the - // loop by replacing the terminator. - - // Remove PHI entries from the successors. - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - TI->getSuccessor(i)->removePredecessor(BB); - - // Add a new unreachable instruction before the old terminator. - new UnreachableInst(TI); - - // Delete the dead terminator. - if (AA) AA->deleteValue(TI); - if (!TI->use_empty()) - TI->replaceAllUsesWith(UndefValue::get(TI->getType())); - TI->eraseFromParent(); - Changed |= true; - } - - for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) - Changed |= ProcessLoop(*I); + Changed |= ProcessLoop(L, LPM); return Changed; } @@ -179,21 +125,42 @@ bool LoopSimplify::runOnFunction(Function &F) { /// ProcessLoop - Walk the loop structure in depth first order, ensuring that /// all loops have preheaders. /// -bool LoopSimplify::ProcessLoop(Loop *L) { +bool LoopSimplify::ProcessLoop(Loop *L, LPPassManager &LPM) { bool Changed = false; ReprocessLoop: - - // Canonicalize inner loops before outer loops. Inner loop canonicalization - // can provide work for the outer loop to canonicalize. - for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) - Changed |= ProcessLoop(*I); - - assert(L->getBlocks()[0] == L->getHeader() && - "Header isn't first block in loop?"); + + // Check to see that no blocks (other than the header) in this loop that has + // predecessors that are not in the loop. This is not valid for natural + // loops, but can occur if the blocks are unreachable. Since they are + // unreachable we can just shamelessly delete those CFG edges! + for (Loop::block_iterator BB = L->block_begin(), E = L->block_end(); + BB != E; ++BB) { + if (*BB == L->getHeader()) continue; + + SmallPtrSet<BasicBlock *, 4> BadPreds; + for (pred_iterator PI = pred_begin(*BB), PE = pred_end(*BB); PI != PE; ++PI) + if (!L->contains(*PI)) + BadPreds.insert(*PI); + + // Delete each unique out-of-loop (and thus dead) predecessor. + for (SmallPtrSet<BasicBlock *, 4>::iterator I = BadPreds.begin(), + E = BadPreds.end(); I != E; ++I) { + // Inform each successor of each dead pred. + for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI) + (*SI)->removePredecessor(*I); + // Zap the dead pred's terminator and replace it with unreachable. + TerminatorInst *TI = (*I)->getTerminator(); + TI->replaceAllUsesWith(UndefValue::get(TI->getType())); + (*I)->getTerminator()->eraseFromParent(); + new UnreachableInst((*I)->getContext(), *I); + Changed = true; + } + } // Does the loop already have a preheader? If so, don't insert one. - if (L->getLoopPreheader() == 0) { - InsertPreheaderForLoop(L); + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) { + Preheader = InsertPreheaderForLoop(L); NumInserted++; Changed = true; } @@ -229,10 +196,9 @@ ReprocessLoop: // this for loops with a giant number of backedges, just factor them into a // common backedge instead. if (NumBackedges < 8) { - if (Loop *NL = SeparateNestedLoop(L)) { + if (SeparateNestedLoop(L, LPM)) { ++NumNested; // This is a big restructuring change, reprocess the whole loop. - ProcessLoop(NL); Changed = true; // GCC doesn't tail recursion eliminate this. goto ReprocessLoop; @@ -242,7 +208,7 @@ ReprocessLoop: // If we either couldn't, or didn't want to, identify nesting of the loops, // insert a new block that all backedges target, then make it jump to the // loop header. - InsertUniqueBackedgeBlock(L); + InsertUniqueBackedgeBlock(L, Preheader); NumInserted++; Changed = true; } @@ -253,7 +219,7 @@ ReprocessLoop: PHINode *PN; for (BasicBlock::iterator I = L->getHeader()->begin(); (PN = dyn_cast<PHINode>(I++)); ) - if (Value *V = PN->hasConstantValue()) { + if (Value *V = PN->hasConstantValue(DT)) { if (AA) AA->deleteValue(PN); PN->replaceAllUsesWith(V); PN->eraseFromParent(); @@ -286,19 +252,10 @@ ReprocessLoop: Instruction *Inst = I++; if (Inst == CI) continue; - if (Inst->isTrapping()) { + if (!L->makeLoopInvariant(Inst, Changed, Preheader->getTerminator())) { AllInvariant = false; break; } - for (unsigned j = 0, f = Inst->getNumOperands(); j != f; ++j) - if (!L->isLoopInvariant(Inst->getOperand(j))) { - AllInvariant = false; - break; - } - if (!AllInvariant) - break; - // Hoist. - Inst->moveBefore(L->getLoopPreheader()->getTerminator()); } if (!AllInvariant) continue; @@ -317,9 +274,10 @@ ReprocessLoop: DomTreeNode *Node = DT->getNode(ExitingBlock); const std::vector<DomTreeNodeBase<BasicBlock> *> &Children = Node->getChildren(); - for (unsigned k = 0, g = Children.size(); k != g; ++k) { - DT->changeImmediateDominator(Children[k], Node->getIDom()); - if (DF) DF->changeImmediateDominator(Children[k]->getBlock(), + while (!Children.empty()) { + DomTreeNode *Child = Children.front(); + DT->changeImmediateDominator(Child, Node->getIDom()); + if (DF) DF->changeImmediateDominator(Child->getBlock(), Node->getIDom()->getBlock(), DT); } @@ -339,7 +297,7 @@ ReprocessLoop: /// preheader, this method is called to insert one. This method has two phases: /// preheader insertion and analysis updating. /// -void LoopSimplify::InsertPreheaderForLoop(Loop *L) { +BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) { BasicBlock *Header = L->getHeader(); // Compute the set of predecessors of the loop that are not in the loop. @@ -353,19 +311,12 @@ void LoopSimplify::InsertPreheaderForLoop(Loop *L) { BasicBlock *NewBB = SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(), ".preheader", this); - - - //===--------------------------------------------------------------------===// - // Update analysis results now that we have performed the transformation - // - - // We know that we have loop information to update... update it now. - if (Loop *Parent = L->getParentLoop()) - Parent->addBasicBlockToLoop(NewBB, LI->getBase()); // Make sure that NewBB is put someplace intelligent, which doesn't mess up // code layout too horribly. PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L); + + return NewBB; } /// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit @@ -382,17 +333,6 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) { LoopBlocks.size(), ".loopexit", this); - // Update Loop Information - we know that the new block will be in whichever - // loop the Exit block is in. Note that it may not be in that immediate loop, - // if the successor is some other loop header. In that case, we continue - // walking up the loop tree to find a loop that contains both the successor - // block and the predecessor block. - Loop *SuccLoop = LI->getLoopFor(Exit); - while (SuccLoop && !SuccLoop->contains(L->getHeader())) - SuccLoop = SuccLoop->getParentLoop(); - if (SuccLoop) - SuccLoop->addBasicBlockToLoop(NewBB, LI->getBase()); - return NewBB; } @@ -422,14 +362,13 @@ static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT, for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I); ++I; - if (Value *V = PN->hasConstantValue()) - if (!isa<Instruction>(V) || DT->dominates(cast<Instruction>(V), PN)) { - // This is a degenerate PHI already, don't modify it! - PN->replaceAllUsesWith(V); - if (AA) AA->deleteValue(PN); - PN->eraseFromParent(); - continue; - } + if (Value *V = PN->hasConstantValue(DT)) { + // This is a degenerate PHI already, don't modify it! + PN->replaceAllUsesWith(V); + if (AA) AA->deleteValue(PN); + PN->eraseFromParent(); + continue; + } // Scan this PHI node looking for a use of the PHI node by itself. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) @@ -496,7 +435,7 @@ void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB, /// If we are able to separate out a loop, return the new outer loop that was /// created. /// -Loop *LoopSimplify::SeparateNestedLoop(Loop *L) { +Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) { PHINode *PN = FindPHIToPartitionLoops(L, DT, AA); if (PN == 0) return 0; // No known way to partition. @@ -527,17 +466,20 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L) { else LI->changeTopLevelLoop(L, NewOuter); - // This block is going to be our new header block: add it to this loop and all - // parent loops. - NewOuter->addBasicBlockToLoop(NewBB, LI->getBase()); - // L is now a subloop of our outer loop. NewOuter->addChildLoop(L); + // Add the new loop to the pass manager queue. + LPM.insertLoopIntoQueue(NewOuter); + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) NewOuter->addBlockEntry(*I); + // Now reset the header in L, which had been moved by + // SplitBlockPredecessors for the outer loop. + L->moveToHeader(Header); + // Determine which blocks should stay in L and which should be moved out to // the Outer loop now. std::set<BasicBlock*> BlocksInL; @@ -578,11 +520,10 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L) { /// backedges to target a new basic block and have that block branch to the loop /// header. This ensures that loops have exactly one backedge. /// -void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L) { +void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!"); // Get information about the loop - BasicBlock *Preheader = L->getLoopPreheader(); BasicBlock *Header = L->getHeader(); Function *F = Header->getParent(); @@ -592,7 +533,8 @@ void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L) { if (*I != Preheader) BackedgeBlocks.push_back(*I); // Create and insert the new backedge block... - BasicBlock *BEBlock = BasicBlock::Create(Header->getName()+".backedge", F); + BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(), + Header->getName()+".backedge", F); BranchInst *BETerminator = BranchInst::Create(Header, BEBlock); // Move the new backedge block to right after the last backedge block. diff --git a/lib/Transforms/Utils/LowerAllocations.cpp b/lib/Transforms/Utils/LowerAllocations.cpp index 74e7028..f26d7c1 100644 --- a/lib/Transforms/Utils/LowerAllocations.cpp +++ b/lib/Transforms/Utils/LowerAllocations.cpp @@ -19,6 +19,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" #include "llvm/Constants.h" +#include "llvm/LLVMContext.h" #include "llvm/Pass.h" #include "llvm/ADT/Statistic.h" #include "llvm/Target/TargetData.h" @@ -28,17 +29,17 @@ using namespace llvm; STATISTIC(NumLowered, "Number of allocations lowered"); namespace { - /// LowerAllocations - Turn malloc and free instructions into %malloc and - /// %free calls. + /// LowerAllocations - Turn malloc and free instructions into @malloc and + /// @free calls. /// class VISIBILITY_HIDDEN LowerAllocations : public BasicBlockPass { - Constant *MallocFunc; // Functions in the module we are processing - Constant *FreeFunc; // Initialized by doInitialization + Constant *FreeFunc; // Functions in the module we are processing + // Initialized by doInitialization bool LowerMallocArgToInteger; public: static char ID; // Pass ID, replacement for typeid explicit LowerAllocations(bool LowerToInt = false) - : BasicBlockPass(&ID), MallocFunc(0), FreeFunc(0), + : BasicBlockPass(&ID), FreeFunc(0), LowerMallocArgToInteger(LowerToInt) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { @@ -86,12 +87,9 @@ Pass *llvm::createLowerAllocationsPass(bool LowerMallocArgToInteger) { // This function is always successful. // bool LowerAllocations::doInitialization(Module &M) { - const Type *BPTy = PointerType::getUnqual(Type::Int8Ty); - // Prototype malloc as "char* malloc(...)", because we don't know in - // doInitialization whether size_t is int or long. - FunctionType *FT = FunctionType::get(BPTy, true); - MallocFunc = M.getOrInsertFunction("malloc", FT); - FreeFunc = M.getOrInsertFunction("free" , Type::VoidTy, BPTy, (Type *)0); + const Type *BPTy = Type::getInt8PtrTy(M.getContext()); + FreeFunc = M.getOrInsertFunction("free" , Type::getVoidTy(M.getContext()), + BPTy, (Type *)0); return true; } @@ -100,57 +98,22 @@ bool LowerAllocations::doInitialization(Module &M) { // bool LowerAllocations::runOnBasicBlock(BasicBlock &BB) { bool Changed = false; - assert(MallocFunc && FreeFunc && "Pass not initialized!"); + assert(FreeFunc && "Pass not initialized!"); BasicBlock::InstListType &BBIL = BB.getInstList(); const TargetData &TD = getAnalysis<TargetData>(); - const Type *IntPtrTy = TD.getIntPtrType(); + const Type *IntPtrTy = TD.getIntPtrType(BB.getContext()); // Loop over all of the instructions, looking for malloc or free instructions for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) { if (MallocInst *MI = dyn_cast<MallocInst>(I)) { - const Type *AllocTy = MI->getType()->getElementType(); - - // malloc(type) becomes i8 *malloc(size) - Value *MallocArg; - if (LowerMallocArgToInteger) - MallocArg = ConstantInt::get(Type::Int64Ty, - TD.getTypeAllocSize(AllocTy)); - else - MallocArg = ConstantExpr::getSizeOf(AllocTy); - MallocArg = ConstantExpr::getTruncOrBitCast(cast<Constant>(MallocArg), - IntPtrTy); - - if (MI->isArrayAllocation()) { - if (isa<ConstantInt>(MallocArg) && - cast<ConstantInt>(MallocArg)->isOne()) { - MallocArg = MI->getOperand(0); // Operand * 1 = Operand - } else if (Constant *CO = dyn_cast<Constant>(MI->getOperand(0))) { - CO = ConstantExpr::getIntegerCast(CO, IntPtrTy, false /*ZExt*/); - MallocArg = ConstantExpr::getMul(CO, cast<Constant>(MallocArg)); - } else { - Value *Scale = MI->getOperand(0); - if (Scale->getType() != IntPtrTy) - Scale = CastInst::CreateIntegerCast(Scale, IntPtrTy, false /*ZExt*/, - "", I); - - // Multiply it by the array size if necessary... - MallocArg = BinaryOperator::Create(Instruction::Mul, Scale, - MallocArg, "", I); - } - } - - // Create the call to Malloc. - CallInst *MCall = CallInst::Create(MallocFunc, MallocArg, "", I); - MCall->setTailCall(); - - // Create a cast instruction to convert to the right type... - Value *MCast; - if (MCall->getType() != Type::VoidTy) - MCast = new BitCastInst(MCall, MI->getType(), "", I); - else - MCast = Constant::getNullValue(MI->getType()); + Value *ArraySize = MI->getOperand(0); + if (ArraySize->getType() != IntPtrTy) + ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, + false /*ZExt*/, "", I); + Value *MCast = CallInst::CreateMalloc(I, IntPtrTy, + MI->getAllocatedType(), ArraySize); // Replace all uses of the old malloc inst with the cast inst MI->replaceAllUsesWith(MCast); @@ -160,7 +123,7 @@ bool LowerAllocations::runOnBasicBlock(BasicBlock &BB) { } else if (FreeInst *FI = dyn_cast<FreeInst>(I)) { Value *PtrCast = new BitCastInst(FI->getOperand(0), - PointerType::getUnqual(Type::Int8Ty), "", I); + Type::getInt8PtrTy(BB.getContext()), "", I); // Insert a call to the free function... CallInst::Create(FreeFunc, PtrCast, "", I)->setTailCall(); diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp index 1f6b1a2..9a3de26 100644 --- a/lib/Transforms/Utils/LowerInvoke.cpp +++ b/lib/Transforms/Utils/LowerInvoke.cpp @@ -40,6 +40,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -114,7 +115,8 @@ FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) { // doInitialization - Make sure that there is a prototype for abort in the // current module. bool LowerInvoke::doInitialization(Module &M) { - const Type *VoidPtrTy = PointerType::getUnqual(Type::Int8Ty); + const Type *VoidPtrTy = + Type::getInt8PtrTy(M.getContext()); AbortMessage = 0; if (ExpensiveEHSupport) { // Insert a type for the linked list of jump buffers. @@ -125,9 +127,9 @@ bool LowerInvoke::doInitialization(Module &M) { { // The type is recursive, so use a type holder. std::vector<const Type*> Elements; Elements.push_back(JmpBufTy); - OpaqueType *OT = OpaqueType::get(); + OpaqueType *OT = OpaqueType::get(M.getContext()); Elements.push_back(PointerType::getUnqual(OT)); - PATypeHolder JBLType(StructType::get(Elements)); + PATypeHolder JBLType(StructType::get(M.getContext(), Elements)); OT->refineAbstractTypeTo(JBLType.get()); // Complete the cycle. JBLinkTy = JBLType.get(); M.addTypeName("llvm.sjljeh.jmpbufty", JBLinkTy); @@ -138,10 +140,10 @@ bool LowerInvoke::doInitialization(Module &M) { // Now that we've done that, insert the jmpbuf list head global, unless it // already exists. if (!(JBListHead = M.getGlobalVariable("llvm.sjljeh.jblist", PtrJBList))) { - JBListHead = new GlobalVariable(PtrJBList, false, + JBListHead = new GlobalVariable(M, PtrJBList, false, GlobalValue::LinkOnceAnyLinkage, Constant::getNullValue(PtrJBList), - "llvm.sjljeh.jblist", &M); + "llvm.sjljeh.jblist"); } // VisualStudio defines setjmp as _setjmp via #include <csetjmp> / <setjmp.h>, @@ -163,7 +165,8 @@ bool LowerInvoke::doInitialization(Module &M) { } // We need the 'write' and 'abort' functions for both models. - AbortFn = M.getOrInsertFunction("abort", Type::VoidTy, (Type *)0); + AbortFn = M.getOrInsertFunction("abort", Type::getVoidTy(M.getContext()), + (Type *)0); #if 0 // "write" is Unix-specific.. code is going away soon anyway. WriteFn = M.getOrInsertFunction("write", Type::VoidTy, Type::Int32Ty, VoidPtrTy, Type::Int32Ty, (Type *)0); @@ -178,26 +181,30 @@ void LowerInvoke::createAbortMessage(Module *M) { // The abort message for expensive EH support tells the user that the // program 'unwound' without an 'invoke' instruction. Constant *Msg = - ConstantArray::get("ERROR: Exception thrown, but not caught!\n"); + ConstantArray::get(M->getContext(), + "ERROR: Exception thrown, but not caught!\n"); AbortMessageLength = Msg->getNumOperands()-1; // don't include \0 - GlobalVariable *MsgGV = new GlobalVariable(Msg->getType(), true, + GlobalVariable *MsgGV = new GlobalVariable(*M, Msg->getType(), true, GlobalValue::InternalLinkage, - Msg, "abortmsg", M); - std::vector<Constant*> GEPIdx(2, Constant::getNullValue(Type::Int32Ty)); + Msg, "abortmsg"); + std::vector<Constant*> GEPIdx(2, + Constant::getNullValue(Type::getInt32Ty(M->getContext()))); AbortMessage = ConstantExpr::getGetElementPtr(MsgGV, &GEPIdx[0], 2); } else { // The abort message for cheap EH support tells the user that EH is not // enabled. Constant *Msg = - ConstantArray::get("Exception handler needed, but not enabled. Recompile" - " program with -enable-correct-eh-support.\n"); + ConstantArray::get(M->getContext(), + "Exception handler needed, but not enabled." + "Recompile program with -enable-correct-eh-support.\n"); AbortMessageLength = Msg->getNumOperands()-1; // don't include \0 - GlobalVariable *MsgGV = new GlobalVariable(Msg->getType(), true, + GlobalVariable *MsgGV = new GlobalVariable(*M, Msg->getType(), true, GlobalValue::InternalLinkage, - Msg, "abortmsg", M); - std::vector<Constant*> GEPIdx(2, Constant::getNullValue(Type::Int32Ty)); + Msg, "abortmsg"); + std::vector<Constant*> GEPIdx(2, Constant::getNullValue( + Type::getInt32Ty(M->getContext()))); AbortMessage = ConstantExpr::getGetElementPtr(MsgGV, &GEPIdx[0], 2); } } @@ -249,8 +256,9 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) { // Insert a return instruction. This really should be a "barrier", as it // is unreachable. - ReturnInst::Create(F.getReturnType() == Type::VoidTy ? 0 : - Constant::getNullValue(F.getReturnType()), UI); + ReturnInst::Create(F.getContext(), + F.getReturnType() == Type::getVoidTy(F.getContext()) ? + 0 : Constant::getNullValue(F.getReturnType()), UI); // Remove the unwind instruction now. BB->getInstList().erase(UI); @@ -265,7 +273,8 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) { void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, AllocaInst *InvokeNum, SwitchInst *CatchSwitch) { - ConstantInt *InvokeNoC = ConstantInt::get(Type::Int32Ty, InvokeNo); + ConstantInt *InvokeNoC = ConstantInt::get(Type::getInt32Ty(II->getContext()), + InvokeNo); // If the unwind edge has phi nodes, split the edge. if (isa<PHINode>(II->getUnwindDest()->begin())) { @@ -284,7 +293,8 @@ void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, BasicBlock::iterator NI = II->getNormalDest()->getFirstNonPHI(); // nonvolatile. - new StoreInst(Constant::getNullValue(Type::Int32Ty), InvokeNum, false, NI); + new StoreInst(Constant::getNullValue(Type::getInt32Ty(II->getContext())), + InvokeNum, false, NI); // Add a switch case to our unwind block. CatchSwitch->addCase(InvokeNoC, II->getUnwindDest()); @@ -469,13 +479,15 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { // alloca because the value needs to be live across invokes. unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0; AllocaInst *JmpBuf = - new AllocaInst(JBLinkTy, 0, Align, "jblink", F.begin()->begin()); + new AllocaInst(JBLinkTy, 0, Align, + "jblink", F.begin()->begin()); std::vector<Value*> Idx; - Idx.push_back(Constant::getNullValue(Type::Int32Ty)); - Idx.push_back(ConstantInt::get(Type::Int32Ty, 1)); + Idx.push_back(Constant::getNullValue(Type::getInt32Ty(F.getContext()))); + Idx.push_back(ConstantInt::get(Type::getInt32Ty(F.getContext()), 1)); OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx.begin(), Idx.end(), - "OldBuf", EntryBB->getTerminator()); + "OldBuf", + EntryBB->getTerminator()); // Copy the JBListHead to the alloca. Value *OldBuf = new LoadInst(JBListHead, "oldjmpbufptr", true, @@ -487,20 +499,21 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { // Create the catch block. The catch block is basically a big switch // statement that goes to all of the invoke catch blocks. - BasicBlock *CatchBB = BasicBlock::Create("setjmp.catch", &F); + BasicBlock *CatchBB = + BasicBlock::Create(F.getContext(), "setjmp.catch", &F); // Create an alloca which keeps track of which invoke is currently // executing. For normal calls it contains zero. - AllocaInst *InvokeNum = new AllocaInst(Type::Int32Ty, 0, "invokenum", - EntryBB->begin()); - new StoreInst(ConstantInt::get(Type::Int32Ty, 0), InvokeNum, true, - EntryBB->getTerminator()); + AllocaInst *InvokeNum = new AllocaInst(Type::getInt32Ty(F.getContext()), 0, + "invokenum",EntryBB->begin()); + new StoreInst(ConstantInt::get(Type::getInt32Ty(F.getContext()), 0), + InvokeNum, true, EntryBB->getTerminator()); // Insert a load in the Catch block, and a switch on its value. By default, // we go to a block that just does an unwind (which is the correct action // for a standard call). - BasicBlock *UnwindBB = BasicBlock::Create("unwindbb", &F); - Unwinds.push_back(new UnwindInst(UnwindBB)); + BasicBlock *UnwindBB = BasicBlock::Create(F.getContext(), "unwindbb", &F); + Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBB)); Value *CatchLoad = new LoadInst(InvokeNum, "invoke.num", true, CatchBB); SwitchInst *CatchSwitch = @@ -512,19 +525,21 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(), "setjmp.cont"); - Idx[1] = ConstantInt::get(Type::Int32Ty, 0); + Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 0); Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx.begin(), Idx.end(), "TheJmpBuf", EntryBB->getTerminator()); - JmpBufPtr = new BitCastInst(JmpBufPtr, PointerType::getUnqual(Type::Int8Ty), + JmpBufPtr = new BitCastInst(JmpBufPtr, + Type::getInt8PtrTy(F.getContext()), "tmp", EntryBB->getTerminator()); Value *SJRet = CallInst::Create(SetJmpFn, JmpBufPtr, "sjret", EntryBB->getTerminator()); // Compare the return value to zero. - Value *IsNormal = new ICmpInst(ICmpInst::ICMP_EQ, SJRet, + Value *IsNormal = new ICmpInst(EntryBB->getTerminator(), + ICmpInst::ICMP_EQ, SJRet, Constant::getNullValue(SJRet->getType()), - "notunwind", EntryBB->getTerminator()); + "notunwind"); // Nuke the uncond branch. EntryBB->getTerminator()->eraseFromParent(); @@ -541,9 +556,10 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { // Create three new blocks, the block to load the jmpbuf ptr and compare // against null, the block to do the longjmp, and the error block for if it // is null. Add them at the end of the function because they are not hot. - BasicBlock *UnwindHandler = BasicBlock::Create("dounwind", &F); - BasicBlock *UnwindBlock = BasicBlock::Create("unwind", &F); - BasicBlock *TermBlock = BasicBlock::Create("unwinderror", &F); + BasicBlock *UnwindHandler = BasicBlock::Create(F.getContext(), + "dounwind", &F); + BasicBlock *UnwindBlock = BasicBlock::Create(F.getContext(), "unwind", &F); + BasicBlock *TermBlock = BasicBlock::Create(F.getContext(), "unwinderror", &F); // If this function contains an invoke, restore the old jumpbuf ptr. Value *BufPtr; @@ -556,26 +572,27 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { } // Load the JBList, if it's null, then there was no catch! - Value *NotNull = new ICmpInst(ICmpInst::ICMP_NE, BufPtr, + Value *NotNull = new ICmpInst(*UnwindHandler, ICmpInst::ICMP_NE, BufPtr, Constant::getNullValue(BufPtr->getType()), - "notnull", UnwindHandler); + "notnull"); BranchInst::Create(UnwindBlock, TermBlock, NotNull, UnwindHandler); // Create the block to do the longjmp. // Get a pointer to the jmpbuf and longjmp. std::vector<Value*> Idx; - Idx.push_back(Constant::getNullValue(Type::Int32Ty)); - Idx.push_back(ConstantInt::get(Type::Int32Ty, 0)); + Idx.push_back(Constant::getNullValue(Type::getInt32Ty(F.getContext()))); + Idx.push_back(ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)); Idx[0] = GetElementPtrInst::Create(BufPtr, Idx.begin(), Idx.end(), "JmpBuf", UnwindBlock); - Idx[0] = new BitCastInst(Idx[0], PointerType::getUnqual(Type::Int8Ty), + Idx[0] = new BitCastInst(Idx[0], + Type::getInt8PtrTy(F.getContext()), "tmp", UnwindBlock); - Idx[1] = ConstantInt::get(Type::Int32Ty, 1); + Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1); CallInst::Create(LongJmpFn, Idx.begin(), Idx.end(), "", UnwindBlock); - new UnreachableInst(UnwindBlock); + new UnreachableInst(F.getContext(), UnwindBlock); // Set up the term block ("throw without a catch"). - new UnreachableInst(TermBlock); + new UnreachableInst(F.getContext(), TermBlock); // Insert a new call to write(2, AbortMessage, AbortMessageLength); writeAbortMessage(TermBlock->getTerminator()); diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index 1da5936..764f098 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -18,6 +18,7 @@ #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" #include "llvm/Pass.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" @@ -108,8 +109,10 @@ bool LowerSwitch::runOnFunction(Function &F) { // operator<< - Used for debugging purposes. // -static std::ostream& operator<<(std::ostream &O, - const LowerSwitch::CaseVector &C) { +static raw_ostream& operator<<(raw_ostream &O, + const LowerSwitch::CaseVector &C) ATTRIBUTE_USED; +static raw_ostream& operator<<(raw_ostream &O, + const LowerSwitch::CaseVector &C) { O << "["; for (LowerSwitch::CaseVector::const_iterator B = C.begin(), @@ -121,11 +124,6 @@ static std::ostream& operator<<(std::ostream &O, return O << "]"; } -static OStream& operator<<(OStream &O, const LowerSwitch::CaseVector &C) { - if (O.stream()) *O.stream() << C; - return O; -} - // switchConvert - Convert the switch statement into a binary lookup of // the case values. The function recursively builds this tree. // @@ -140,9 +138,9 @@ BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, unsigned Mid = Size / 2; std::vector<CaseRange> LHS(Begin, Begin + Mid); - DOUT << "LHS: " << LHS << "\n"; + DEBUG(errs() << "LHS: " << LHS << "\n"); std::vector<CaseRange> RHS(Begin + Mid, End); - DOUT << "RHS: " << RHS << "\n"; + DEBUG(errs() << "RHS: " << RHS << "\n"); CaseRange& Pivot = *(Begin + Mid); DEBUG(errs() << "Pivot ==> " @@ -157,11 +155,12 @@ BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, // Create a new node that checks if the value is < pivot. Go to the // left branch if it is and right branch if not. Function* F = OrigBlock->getParent(); - BasicBlock* NewNode = BasicBlock::Create("NodeBlock"); + BasicBlock* NewNode = BasicBlock::Create(Val->getContext(), "NodeBlock"); Function::iterator FI = OrigBlock; F->getBasicBlockList().insert(++FI, NewNode); - ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT, Val, Pivot.Low, "Pivot"); + ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT, + Val, Pivot.Low, "Pivot"); NewNode->getInstList().push_back(Comp); BranchInst::Create(LBranch, RBranch, Comp, NewNode); return NewNode; @@ -178,7 +177,7 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, BasicBlock* Default) { Function* F = OrigBlock->getParent(); - BasicBlock* NewLeaf = BasicBlock::Create("LeafBlock"); + BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock"); Function::iterator FI = OrigBlock; F->getBasicBlockList().insert(++FI, NewLeaf); @@ -186,18 +185,18 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, ICmpInst* Comp = NULL; if (Leaf.Low == Leaf.High) { // Make the seteq instruction... - Comp = new ICmpInst(ICmpInst::ICMP_EQ, Val, Leaf.Low, - "SwitchLeaf", NewLeaf); + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val, + Leaf.Low, "SwitchLeaf"); } else { // Make range comparison if (cast<ConstantInt>(Leaf.Low)->isMinValue(true /*isSigned*/)) { // Val >= Min && Val <= Hi --> Val <= Hi - Comp = new ICmpInst(ICmpInst::ICMP_SLE, Val, Leaf.High, - "SwitchLeaf", NewLeaf); + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High, + "SwitchLeaf"); } else if (cast<ConstantInt>(Leaf.Low)->isZero()) { // Val >= 0 && Val <= Hi --> Val <=u Hi - Comp = new ICmpInst(ICmpInst::ICMP_ULE, Val, Leaf.High, - "SwitchLeaf", NewLeaf); + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High, + "SwitchLeaf"); } else { // Emit V-Lo <=u Hi-Lo Constant* NegLo = ConstantExpr::getNeg(Leaf.Low); @@ -205,8 +204,8 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, Val->getName()+".off", NewLeaf); Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High); - Comp = new ICmpInst(ICmpInst::ICMP_ULE, Add, UpperBound, - "SwitchLeaf", NewLeaf); + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound, + "SwitchLeaf"); } } @@ -290,7 +289,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) { // Create a new, empty default block so that the new hierarchy of // if-then statements go to this and the PHI nodes are happy. - BasicBlock* NewDefault = BasicBlock::Create("NewDefault"); + BasicBlock* NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault"); F->getBasicBlockList().insert(Default, NewDefault); BranchInst::Create(Default, NewDefault); @@ -308,9 +307,10 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) { CaseVector Cases; unsigned numCmps = Clusterify(Cases, SI); - DOUT << "Clusterify finished. Total clusters: " << Cases.size() - << ". Total compares: " << numCmps << "\n"; - DOUT << "Cases: " << Cases << "\n"; + DEBUG(errs() << "Clusterify finished. Total clusters: " << Cases.size() + << ". Total compares: " << numCmps << "\n"); + DEBUG(errs() << "Cases: " << Cases << "\n"); + (void)numCmps; BasicBlock* SwitchBlock = switchConvert(Cases.begin(), Cases.end(), Val, OrigBlock, NewDefault); diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp index 2b06d77..5df0832 100644 --- a/lib/Transforms/Utils/Mem2Reg.cpp +++ b/lib/Transforms/Utils/Mem2Reg.cpp @@ -75,7 +75,7 @@ bool PromotePass::runOnFunction(Function &F) { if (Allocas.empty()) break; - PromoteMemToReg(Allocas, DT, DF); + PromoteMemToReg(Allocas, DT, DF, F.getContext()); NumPromoted += Allocas.size(); Changed = true; } diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index b717699..9ca06bd 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -23,13 +23,13 @@ #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Compiler.h" @@ -41,7 +41,6 @@ STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); -// Provide DenseMapInfo for all pointers. namespace llvm { template<> struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > { @@ -181,6 +180,8 @@ namespace { /// AST - An AliasSetTracker object to update. If null, don't update it. /// AliasSetTracker *AST; + + LLVMContext &Context; /// AllocaLookup - Reverse mapping of Allocas. /// @@ -212,8 +213,9 @@ namespace { DenseMap<const BasicBlock*, unsigned> BBNumPreds; public: PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt, - DominanceFrontier &df, AliasSetTracker *ast) - : Allocas(A), DT(dt), DF(df), AST(ast) {} + DominanceFrontier &df, AliasSetTracker *ast, + LLVMContext &C) + : Allocas(A), DT(dt), DF(df), AST(ast), Context(C) {} void run(); @@ -291,10 +293,9 @@ namespace { // As we scan the uses of the alloca instruction, keep track of stores, // and decide whether all of the loads and stores to the alloca are within // the same basic block. - for (Value::use_iterator U = AI->use_begin(), E = AI->use_end(); - U != E;) { - Instruction *User = cast<Instruction>(*U); - ++U; + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); + UI != E;) { + Instruction *User = cast<Instruction>(*UI++); if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) { // Remove any uses of this alloca in DbgInfoInstrinsics. assert(BC->hasOneUse() && "Unexpected alloca uses!"); @@ -303,7 +304,8 @@ namespace { BC->eraseFromParent(); continue; } - else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { + + if (StoreInst *SI = dyn_cast<StoreInst>(User)) { // Remember the basic blocks which define new values for the alloca DefiningBlocks.push_back(SI->getParent()); AllocaPointerVal = SI->getOperand(0); @@ -491,17 +493,14 @@ void PromoteMem2Reg::run() { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. - if (Value *V = PN->hasConstantValue(true)) { - if (!isa<Instruction>(V) || - properlyDominates(cast<Instruction>(V), PN)) { - if (AST && isa<PointerType>(PN->getType())) - AST->deleteValue(PN); - PN->replaceAllUsesWith(V); - PN->eraseFromParent(); - NewPhiNodes.erase(I++); - EliminatedAPHI = true; - continue; - } + if (Value *V = PN->hasConstantValue(&DT)) { + if (AST && isa<PointerType>(PN->getType())) + AST->deleteValue(PN); + PN->replaceAllUsesWith(V); + PN->eraseFromParent(); + NewPhiNodes.erase(I++); + EliminatedAPHI = true; + continue; } ++I; } @@ -603,7 +602,9 @@ ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, LiveInBlockWorklist.pop_back(); --i, --e; break; - } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + } + + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { if (LI->getOperand(0) != AI) continue; // Okay, we found a load before a store to the alloca. It is actually @@ -757,6 +758,7 @@ void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI, } } +namespace { /// StoreIndexSearchPredicate - This is a helper predicate used to search by the /// first element of a pair. @@ -767,6 +769,8 @@ struct StoreIndexSearchPredicate { } }; +} + /// PromoteSingleBlockAlloca - Many allocas are only used within a single basic /// block. If this is the case, avoid traversing the CFG and inserting a lot of /// potentially useless PHI nodes by just performing a single linear pass over @@ -864,8 +868,8 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, // Create a PhiNode using the dereferenced type... and add the phi-node to the // BasicBlock. PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), - Allocas[AllocaNo]->getName() + "." + - utostr(Version++), BB->begin()); + Allocas[AllocaNo]->getName() + "." + Twine(Version++), + BB->begin()); ++NumPHIInsert; PhiToAllocaMap[PN] = AllocaNo; PN->reserveOperandSpace(getNumPreds(BB)); @@ -995,9 +999,9 @@ NextIteration: /// void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas, DominatorTree &DT, DominanceFrontier &DF, - AliasSetTracker *AST) { + LLVMContext &Context, AliasSetTracker *AST) { // If there is nothing to do, bail out... if (Allocas.empty()) return; - PromoteMem2Reg(Allocas, DT, DF, AST).run(); + PromoteMem2Reg(Allocas, DT, DF, AST, Context).run(); } diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp new file mode 100644 index 0000000..780ee26 --- /dev/null +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -0,0 +1,335 @@ +//===- SSAUpdater.cpp - Unstructured SSA Update Tool ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SSAUpdater class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Instructions.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ValueHandle.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +typedef DenseMap<BasicBlock*, TrackingVH<Value> > AvailableValsTy; +typedef std::vector<std::pair<BasicBlock*, TrackingVH<Value> > > + IncomingPredInfoTy; + +static AvailableValsTy &getAvailableVals(void *AV) { + return *static_cast<AvailableValsTy*>(AV); +} + +static IncomingPredInfoTy &getIncomingPredInfo(void *IPI) { + return *static_cast<IncomingPredInfoTy*>(IPI); +} + + +SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI) + : AV(0), PrototypeValue(0), IPI(0), InsertedPHIs(NewPHI) {} + +SSAUpdater::~SSAUpdater() { + delete &getAvailableVals(AV); + delete &getIncomingPredInfo(IPI); +} + +/// Initialize - Reset this object to get ready for a new set of SSA +/// updates. ProtoValue is the value used to name PHI nodes. +void SSAUpdater::Initialize(Value *ProtoValue) { + if (AV == 0) + AV = new AvailableValsTy(); + else + getAvailableVals(AV).clear(); + + if (IPI == 0) + IPI = new IncomingPredInfoTy(); + else + getIncomingPredInfo(IPI).clear(); + PrototypeValue = ProtoValue; +} + +/// HasValueForBlock - Return true if the SSAUpdater already has a value for +/// the specified block. +bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const { + return getAvailableVals(AV).count(BB); +} + +/// AddAvailableValue - Indicate that a rewritten value is available in the +/// specified block with the specified value. +void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) { + assert(PrototypeValue != 0 && "Need to initialize SSAUpdater"); + assert(PrototypeValue->getType() == V->getType() && + "All rewritten values must have the same type"); + getAvailableVals(AV)[BB] = V; +} + +/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is +/// live at the end of the specified block. +Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) { + assert(getIncomingPredInfo(IPI).empty() && "Unexpected Internal State"); + Value *Res = GetValueAtEndOfBlockInternal(BB); + assert(getIncomingPredInfo(IPI).empty() && "Unexpected Internal State"); + return Res; +} + +/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that +/// is live in the middle of the specified block. +/// +/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one +/// important case: if there is a definition of the rewritten value after the +/// 'use' in BB. Consider code like this: +/// +/// X1 = ... +/// SomeBB: +/// use(X) +/// X2 = ... +/// br Cond, SomeBB, OutBB +/// +/// In this case, there are two values (X1 and X2) added to the AvailableVals +/// set by the client of the rewriter, and those values are both live out of +/// their respective blocks. However, the use of X happens in the *middle* of +/// a block. Because of this, we need to insert a new PHI node in SomeBB to +/// merge the appropriate values, and this value isn't live out of the block. +/// +Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { + // If there is no definition of the renamed variable in this block, just use + // GetValueAtEndOfBlock to do our work. + if (!getAvailableVals(AV).count(BB)) + return GetValueAtEndOfBlock(BB); + + // Otherwise, we have the hard case. Get the live-in values for each + // predecessor. + SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues; + Value *SingularValue = 0; + + // We can get our predecessor info by walking the pred_iterator list, but it + // is relatively slow. If we already have PHI nodes in this block, walk one + // of them to get the predecessor list instead. + if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) { + for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) { + BasicBlock *PredBB = SomePhi->getIncomingBlock(i); + Value *PredVal = GetValueAtEndOfBlock(PredBB); + PredValues.push_back(std::make_pair(PredBB, PredVal)); + + // Compute SingularValue. + if (i == 0) + SingularValue = PredVal; + else if (PredVal != SingularValue) + SingularValue = 0; + } + } else { + bool isFirstPred = true; + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *PredBB = *PI; + Value *PredVal = GetValueAtEndOfBlock(PredBB); + PredValues.push_back(std::make_pair(PredBB, PredVal)); + + // Compute SingularValue. + if (isFirstPred) { + SingularValue = PredVal; + isFirstPred = false; + } else if (PredVal != SingularValue) + SingularValue = 0; + } + } + + // If there are no predecessors, just return undef. + if (PredValues.empty()) + return UndefValue::get(PrototypeValue->getType()); + + // Otherwise, if all the merged values are the same, just use it. + if (SingularValue != 0) + return SingularValue; + + // Otherwise, we do need a PHI: insert one now. + PHINode *InsertedPHI = PHINode::Create(PrototypeValue->getType(), + PrototypeValue->getName(), + &BB->front()); + InsertedPHI->reserveOperandSpace(PredValues.size()); + + // Fill in all the predecessors of the PHI. + for (unsigned i = 0, e = PredValues.size(); i != e; ++i) + InsertedPHI->addIncoming(PredValues[i].second, PredValues[i].first); + + // See if the PHI node can be merged to a single value. This can happen in + // loop cases when we get a PHI of itself and one other value. + if (Value *ConstVal = InsertedPHI->hasConstantValue()) { + InsertedPHI->eraseFromParent(); + return ConstVal; + } + + // If the client wants to know about all new instructions, tell it. + if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); + + DEBUG(errs() << " Inserted PHI: " << *InsertedPHI << "\n"); + return InsertedPHI; +} + +/// RewriteUse - Rewrite a use of the symbolic value. This handles PHI nodes, +/// which use their value in the corresponding predecessor. +void SSAUpdater::RewriteUse(Use &U) { + Instruction *User = cast<Instruction>(U.getUser()); + BasicBlock *UseBB = User->getParent(); + if (PHINode *UserPN = dyn_cast<PHINode>(User)) + UseBB = UserPN->getIncomingBlock(U); + + U.set(GetValueInMiddleOfBlock(UseBB)); +} + + +/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry +/// for the specified BB and if so, return it. If not, construct SSA form by +/// walking predecessors inserting PHI nodes as needed until we get to a block +/// where the value is available. +/// +Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { + AvailableValsTy &AvailableVals = getAvailableVals(AV); + + // Query AvailableVals by doing an insertion of null. + std::pair<AvailableValsTy::iterator, bool> InsertRes = + AvailableVals.insert(std::make_pair(BB, WeakVH())); + + // Handle the case when the insertion fails because we have already seen BB. + if (!InsertRes.second) { + // If the insertion failed, there are two cases. The first case is that the + // value is already available for the specified block. If we get this, just + // return the value. + if (InsertRes.first->second != 0) + return InsertRes.first->second; + + // Otherwise, if the value we find is null, then this is the value is not + // known but it is being computed elsewhere in our recursion. This means + // that we have a cycle. Handle this by inserting a PHI node and returning + // it. When we get back to the first instance of the recursion we will fill + // in the PHI node. + return InsertRes.first->second = + PHINode::Create(PrototypeValue->getType(), PrototypeValue->getName(), + &BB->front()); + } + + // Okay, the value isn't in the map and we just inserted a null in the entry + // to indicate that we're processing the block. Since we have no idea what + // value is in this block, we have to recurse through our predecessors. + // + // While we're walking our predecessors, we keep track of them in a vector, + // then insert a PHI node in the end if we actually need one. We could use a + // smallvector here, but that would take a lot of stack space for every level + // of the recursion, just use IncomingPredInfo as an explicit stack. + IncomingPredInfoTy &IncomingPredInfo = getIncomingPredInfo(IPI); + unsigned FirstPredInfoEntry = IncomingPredInfo.size(); + + // As we're walking the predecessors, keep track of whether they are all + // producing the same value. If so, this value will capture it, if not, it + // will get reset to null. We distinguish the no-predecessor case explicitly + // below. + TrackingVH<Value> SingularValue; + + // We can get our predecessor info by walking the pred_iterator list, but it + // is relatively slow. If we already have PHI nodes in this block, walk one + // of them to get the predecessor list instead. + if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) { + for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) { + BasicBlock *PredBB = SomePhi->getIncomingBlock(i); + Value *PredVal = GetValueAtEndOfBlockInternal(PredBB); + IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal)); + + // Compute SingularValue. + if (i == 0) + SingularValue = PredVal; + else if (PredVal != SingularValue) + SingularValue = 0; + } + } else { + bool isFirstPred = true; + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *PredBB = *PI; + Value *PredVal = GetValueAtEndOfBlockInternal(PredBB); + IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal)); + + // Compute SingularValue. + if (isFirstPred) { + SingularValue = PredVal; + isFirstPred = false; + } else if (PredVal != SingularValue) + SingularValue = 0; + } + } + + // If there are no predecessors, then we must have found an unreachable block + // just return 'undef'. Since there are no predecessors, InsertRes must not + // be invalidated. + if (IncomingPredInfo.size() == FirstPredInfoEntry) + return InsertRes.first->second = UndefValue::get(PrototypeValue->getType()); + + /// Look up BB's entry in AvailableVals. 'InsertRes' may be invalidated. If + /// this block is involved in a loop, a no-entry PHI node will have been + /// inserted as InsertedVal. Otherwise, we'll still have the null we inserted + /// above. + TrackingVH<Value> &InsertedVal = AvailableVals[BB]; + + // If all the predecessor values are the same then we don't need to insert a + // PHI. This is the simple and common case. + if (SingularValue) { + // If a PHI node got inserted, replace it with the singlar value and delete + // it. + if (InsertedVal) { + PHINode *OldVal = cast<PHINode>(InsertedVal); + // Be careful about dead loops. These RAUW's also update InsertedVal. + if (InsertedVal != SingularValue) + OldVal->replaceAllUsesWith(SingularValue); + else + OldVal->replaceAllUsesWith(UndefValue::get(InsertedVal->getType())); + OldVal->eraseFromParent(); + } else { + InsertedVal = SingularValue; + } + + // Drop the entries we added in IncomingPredInfo to restore the stack. + IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry, + IncomingPredInfo.end()); + return InsertedVal; + } + + // Otherwise, we do need a PHI: insert one now if we don't already have one. + if (InsertedVal == 0) + InsertedVal = PHINode::Create(PrototypeValue->getType(), + PrototypeValue->getName(), &BB->front()); + + PHINode *InsertedPHI = cast<PHINode>(InsertedVal); + InsertedPHI->reserveOperandSpace(IncomingPredInfo.size()-FirstPredInfoEntry); + + // Fill in all the predecessors of the PHI. + for (IncomingPredInfoTy::iterator I = + IncomingPredInfo.begin()+FirstPredInfoEntry, + E = IncomingPredInfo.end(); I != E; ++I) + InsertedPHI->addIncoming(I->second, I->first); + + // Drop the entries we added in IncomingPredInfo to restore the stack. + IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry, + IncomingPredInfo.end()); + + // See if the PHI node can be merged to a single value. This can happen in + // loop cases when we get a PHI of itself and one other value. + if (Value *ConstVal = InsertedPHI->hasConstantValue()) { + InsertedPHI->replaceAllUsesWith(ConstVal); + InsertedPHI->eraseFromParent(); + InsertedVal = ConstVal; + } else { + DEBUG(errs() << " Inserted PHI: " << *InsertedPHI << "\n"); + + // If the client wants to know about all new instructions, tell it. + if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); + } + + return InsertedVal; +} + + diff --git a/lib/Transforms/Utils/SSI.cpp b/lib/Transforms/Utils/SSI.cpp index 4c4dd37..3bb2e8e 100644 --- a/lib/Transforms/Utils/SSI.cpp +++ b/lib/Transforms/Utils/SSI.cpp @@ -23,6 +23,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/SSI.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" using namespace llvm; @@ -30,11 +31,12 @@ using namespace llvm; static const std::string SSI_PHI = "SSI_phi"; static const std::string SSI_SIG = "SSI_sigma"; -static const unsigned UNSIGNED_INFINITE = ~0U; +STATISTIC(NumSigmaInserted, "Number of sigma functions inserted"); +STATISTIC(NumPhiInserted, "Number of phi functions inserted"); void SSI::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<DominanceFrontier>(); - AU.addRequired<DominatorTree>(); + AU.addRequiredTransitive<DominanceFrontier>(); + AU.addRequiredTransitive<DominatorTree>(); AU.setPreservesAll(); } @@ -45,22 +47,23 @@ bool SSI::runOnFunction(Function &F) { /// This methods creates the SSI representation for the list of values /// received. It will only create SSI representation if a value is used -/// in a to decide a branch. Repeated values are created only once. +/// to decide a branch. Repeated values are created only once. /// void SSI::createSSI(SmallVectorImpl<Instruction *> &value) { init(value); - for (unsigned i = 0; i < num_values; ++i) { - if (created.insert(value[i])) { - needConstruction[i] = true; - } - } - insertSigmaFunctions(value); + SmallPtrSet<Instruction*, 4> needConstruction; + for (SmallVectorImpl<Instruction*>::iterator I = value.begin(), + E = value.end(); I != E; ++I) + if (created.insert(*I)) + needConstruction.insert(*I); + + insertSigmaFunctions(needConstruction); // Test if there is a need to transform to SSI - if (needConstruction.any()) { - insertPhiFunctions(value); - renameInit(value); + if (!needConstruction.empty()) { + insertPhiFunctions(needConstruction); + renameInit(needConstruction); rename(DT_->getRoot()); fixPhis(); } @@ -71,100 +74,107 @@ void SSI::createSSI(SmallVectorImpl<Instruction *> &value) { /// Insert sigma functions (a sigma function is a phi function with one /// operator) /// -void SSI::insertSigmaFunctions(SmallVectorImpl<Instruction *> &value) { - for (unsigned i = 0; i < num_values; ++i) { - if (!needConstruction[i]) - continue; - - bool need = false; - for (Value::use_iterator begin = value[i]->use_begin(), end = - value[i]->use_end(); begin != end; ++begin) { +void SSI::insertSigmaFunctions(SmallPtrSet<Instruction*, 4> &value) { + for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(), + E = value.end(); I != E; ++I) { + for (Value::use_iterator begin = (*I)->use_begin(), + end = (*I)->use_end(); begin != end; ++begin) { // Test if the Use of the Value is in a comparator - CmpInst *CI = dyn_cast<CmpInst>(begin); - if (CI && isUsedInTerminator(CI)) { - // Basic Block of the Instruction - BasicBlock *BB = CI->getParent(); - // Last Instruction of the Basic Block - const TerminatorInst *TI = BB->getTerminator(); - - for (unsigned j = 0, e = TI->getNumSuccessors(); j < e; ++j) { - // Next Basic Block - BasicBlock *BB_next = TI->getSuccessor(j); - if (BB_next != BB && - BB_next->getUniquePredecessor() != NULL && - dominateAny(BB_next, value[i])) { - PHINode *PN = PHINode::Create( - value[i]->getType(), SSI_SIG, BB_next->begin()); - PN->addIncoming(value[i], BB); - sigmas.insert(std::make_pair(PN, i)); - created.insert(PN); - need = true; - defsites[i].push_back(BB_next); + if (CmpInst *CI = dyn_cast<CmpInst>(begin)) { + // Iterates through all uses of CmpInst + for (Value::use_iterator begin_ci = CI->use_begin(), + end_ci = CI->use_end(); begin_ci != end_ci; ++begin_ci) { + // Test if any use of CmpInst is in a Terminator + if (TerminatorInst *TI = dyn_cast<TerminatorInst>(begin_ci)) { + insertSigma(TI, *I); } } } } - needConstruction[i] = need; + } +} + +/// Inserts Sigma Functions in every BasicBlock successor to Terminator +/// Instruction TI. All inserted Sigma Function are related to Instruction I. +/// +void SSI::insertSigma(TerminatorInst *TI, Instruction *I) { + // Basic Block of the Terminator Instruction + BasicBlock *BB = TI->getParent(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i < e; ++i) { + // Next Basic Block + BasicBlock *BB_next = TI->getSuccessor(i); + if (BB_next != BB && + BB_next->getSinglePredecessor() != NULL && + dominateAny(BB_next, I)) { + PHINode *PN = PHINode::Create(I->getType(), SSI_SIG, BB_next->begin()); + PN->addIncoming(I, BB); + sigmas[PN] = I; + created.insert(PN); + defsites[I].push_back(BB_next); + ++NumSigmaInserted; + } } } /// Insert phi functions when necessary /// -void SSI::insertPhiFunctions(SmallVectorImpl<Instruction *> &value) { +void SSI::insertPhiFunctions(SmallPtrSet<Instruction*, 4> &value) { DominanceFrontier *DF = &getAnalysis<DominanceFrontier>(); - for (unsigned i = 0; i < num_values; ++i) { + for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(), + E = value.end(); I != E; ++I) { // Test if there were any sigmas for this variable - if (needConstruction[i]) { - - SmallPtrSet<BasicBlock *, 1> BB_visited; - - // Insert phi functions if there is any sigma function - while (!defsites[i].empty()) { - - BasicBlock *BB = defsites[i].back(); - - defsites[i].pop_back(); - DominanceFrontier::iterator DF_BB = DF->find(BB); - - // Iterates through all the dominance frontier of BB - for (std::set<BasicBlock *>::iterator DF_BB_begin = - DF_BB->second.begin(), DF_BB_end = DF_BB->second.end(); - DF_BB_begin != DF_BB_end; ++DF_BB_begin) { - BasicBlock *BB_dominated = *DF_BB_begin; - - // Test if has not yet visited this node and if the - // original definition dominates this node - if (BB_visited.insert(BB_dominated) && - DT_->properlyDominates(value_original[i], BB_dominated) && - dominateAny(BB_dominated, value[i])) { - PHINode *PN = PHINode::Create( - value[i]->getType(), SSI_PHI, BB_dominated->begin()); - phis.insert(std::make_pair(PN, i)); - created.insert(PN); - - defsites[i].push_back(BB_dominated); - } + SmallPtrSet<BasicBlock *, 16> BB_visited; + + // Insert phi functions if there is any sigma function + while (!defsites[*I].empty()) { + + BasicBlock *BB = defsites[*I].back(); + + defsites[*I].pop_back(); + DominanceFrontier::iterator DF_BB = DF->find(BB); + + // The BB is unreachable. Skip it. + if (DF_BB == DF->end()) + continue; + + // Iterates through all the dominance frontier of BB + for (std::set<BasicBlock *>::iterator DF_BB_begin = + DF_BB->second.begin(), DF_BB_end = DF_BB->second.end(); + DF_BB_begin != DF_BB_end; ++DF_BB_begin) { + BasicBlock *BB_dominated = *DF_BB_begin; + + // Test if has not yet visited this node and if the + // original definition dominates this node + if (BB_visited.insert(BB_dominated) && + DT_->properlyDominates(value_original[*I], BB_dominated) && + dominateAny(BB_dominated, *I)) { + PHINode *PN = PHINode::Create( + (*I)->getType(), SSI_PHI, BB_dominated->begin()); + phis.insert(std::make_pair(PN, *I)); + created.insert(PN); + + defsites[*I].push_back(BB_dominated); + ++NumPhiInserted; } } - BB_visited.clear(); } + BB_visited.clear(); } } /// Some initialization for the rename part /// -void SSI::renameInit(SmallVectorImpl<Instruction *> &value) { - value_stack.resize(num_values); - for (unsigned i = 0; i < num_values; ++i) { - value_stack[i].push_back(value[i]); - } +void SSI::renameInit(SmallPtrSet<Instruction*, 4> &value) { + for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(), + E = value.end(); I != E; ++I) + value_stack[*I].push_back(*I); } /// Renames all variables in the specified BasicBlock. /// Only variables that need to be rename will be. /// void SSI::rename(BasicBlock *BB) { - BitVector *defined = new BitVector(num_values, false); + SmallPtrSet<Instruction*, 8> defined; // Iterate through instructions and make appropriate renaming. // For SSI_PHI (b = PHI()), store b at value_stack as a new @@ -178,19 +188,17 @@ void SSI::rename(BasicBlock *BB) { begin != end; ++begin) { Instruction *I = begin; if (PHINode *PN = dyn_cast<PHINode>(I)) { // Treat PHI functions - int position; + Instruction* position; // Treat SSI_PHI - if ((position = getPositionPhi(PN)) != -1) { + if ((position = getPositionPhi(PN))) { value_stack[position].push_back(PN); - (*defined)[position] = true; - } - + defined.insert(position); // Treat SSI_SIG - else if ((position = getPositionSigma(PN)) != -1) { + } else if ((position = getPositionSigma(PN))) { substituteUse(I); value_stack[position].push_back(PN); - (*defined)[position] = true; + defined.insert(position); } // Treat all other PHI functions @@ -216,10 +224,9 @@ void SSI::rename(BasicBlock *BB) { for (BasicBlock::iterator begin = BB_succ->begin(), notPhi = BB_succ->getFirstNonPHI(); begin != *notPhi; ++begin) { Instruction *I = begin; - PHINode *PN; - int position; - if ((PN = dyn_cast<PHINode>(I)) && ((position - = getPositionPhi(PN)) != -1)) { + PHINode *PN = dyn_cast<PHINode>(I); + Instruction* position; + if (PN && ((position = getPositionPhi(PN)))) { PN->addIncoming(value_stack[position].back(), BB); } } @@ -237,13 +244,9 @@ void SSI::rename(BasicBlock *BB) { // Now we remove all inserted definitions of a variable from the top of // the stack leaving the previous one as the top. - if (defined->any()) { - for (unsigned i = 0; i < num_values; ++i) { - if ((*defined)[i]) { - value_stack[i].pop_back(); - } - } - } + for (SmallPtrSet<Instruction*, 8>::iterator DI = defined.begin(), + DE = defined.end(); DI != DE; ++DI) + value_stack[*DI].pop_back(); } /// Substitute any use in this instruction for the last definition of @@ -252,23 +255,24 @@ void SSI::rename(BasicBlock *BB) { void SSI::substituteUse(Instruction *I) { for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) { Value *operand = I->getOperand(i); - for (unsigned j = 0; j < num_values; ++j) { - if (operand == value_stack[j].front() && - I != value_stack[j].back()) { + for (DenseMap<Instruction*, SmallVector<Instruction*, 1> >::iterator + VI = value_stack.begin(), VE = value_stack.end(); VI != VE; ++VI) { + if (operand == VI->second.front() && + I != VI->second.back()) { PHINode *PN_I = dyn_cast<PHINode>(I); - PHINode *PN_vs = dyn_cast<PHINode>(value_stack[j].back()); + PHINode *PN_vs = dyn_cast<PHINode>(VI->second.back()); // If a phi created in a BasicBlock is used as an operand of another // created in the same BasicBlock, this step marks this second phi, // to fix this issue later. It cannot be fixed now, because the // operands of the first phi are not final yet. if (PN_I && PN_vs && - value_stack[j].back()->getParent() == I->getParent()) { + VI->second.back()->getParent() == I->getParent()) { phisToFix.insert(PN_I); } - I->setOperand(i, value_stack[j].back()); + I->setOperand(i, VI->second.back()); break; } } @@ -276,12 +280,16 @@ void SSI::substituteUse(Instruction *I) { } /// Test if the BasicBlock BB dominates any use or definition of value. +/// If it dominates a phi instruction that is on the same BasicBlock, +/// that does not count. /// bool SSI::dominateAny(BasicBlock *BB, Instruction *value) { for (Value::use_iterator begin = value->use_begin(), end = value->use_end(); begin != end; ++begin) { Instruction *I = cast<Instruction>(*begin); BasicBlock *BB_father = I->getParent(); + if (BB == BB_father && isa<PHINode>(I)) + continue; if (DT_->dominates(BB, BB_father)) { return true; } @@ -293,31 +301,54 @@ bool SSI::dominateAny(BasicBlock *BB, Instruction *value) { /// as an operand of another phi function used in the same BasicBlock, /// LLVM looks this as an error. So on the second phi, the first phi is called /// P and the BasicBlock it incomes is B. This P will be replaced by the value -/// it has for BasicBlock B. +/// it has for BasicBlock B. It also includes undef values for predecessors +/// that were not included in the phi. /// void SSI::fixPhis() { for (SmallPtrSet<PHINode *, 1>::iterator begin = phisToFix.begin(), end = phisToFix.end(); begin != end; ++begin) { PHINode *PN = *begin; for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) { - PHINode *PN_father; - if ((PN_father = dyn_cast<PHINode>(PN->getIncomingValue(i))) && - PN->getParent() == PN_father->getParent()) { + PHINode *PN_father = dyn_cast<PHINode>(PN->getIncomingValue(i)); + if (PN_father && PN->getParent() == PN_father->getParent() && + !DT_->dominates(PN->getParent(), PN->getIncomingBlock(i))) { BasicBlock *BB = PN->getIncomingBlock(i); int pos = PN_father->getBasicBlockIndex(BB); PN->setIncomingValue(i, PN_father->getIncomingValue(pos)); } } } + + for (DenseMapIterator<PHINode *, Instruction*> begin = phis.begin(), + end = phis.end(); begin != end; ++begin) { + PHINode *PN = begin->first; + BasicBlock *BB = PN->getParent(); + pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + SmallVector<BasicBlock*, 8> Preds(PI, PE); + for (unsigned size = Preds.size(); + PI != PE && PN->getNumIncomingValues() != size; ++PI) { + bool found = false; + for (unsigned i = 0, pn_end = PN->getNumIncomingValues(); + i < pn_end; ++i) { + if (PN->getIncomingBlock(i) == *PI) { + found = true; + break; + } + } + if (!found) { + PN->addIncoming(UndefValue::get(PN->getType()), *PI); + } + } + } } /// Return which variable (position on the vector of variables) this phi /// represents on the phis list. /// -unsigned SSI::getPositionPhi(PHINode *PN) { - DenseMap<PHINode *, unsigned>::iterator val = phis.find(PN); +Instruction* SSI::getPositionPhi(PHINode *PN) { + DenseMap<PHINode *, Instruction*>::iterator val = phis.find(PN); if (val == phis.end()) - return UNSIGNED_INFINITE; + return 0; else return val->second; } @@ -325,52 +356,27 @@ unsigned SSI::getPositionPhi(PHINode *PN) { /// Return which variable (position on the vector of variables) this phi /// represents on the sigmas list. /// -unsigned SSI::getPositionSigma(PHINode *PN) { - DenseMap<PHINode *, unsigned>::iterator val = sigmas.find(PN); +Instruction* SSI::getPositionSigma(PHINode *PN) { + DenseMap<PHINode *, Instruction*>::iterator val = sigmas.find(PN); if (val == sigmas.end()) - return UNSIGNED_INFINITE; + return 0; else return val->second; } -/// Return true if the the Comparison Instruction is an operator -/// of the Terminator instruction of its Basic Block. -/// -unsigned SSI::isUsedInTerminator(CmpInst *CI) { - TerminatorInst *TI = CI->getParent()->getTerminator(); - if (TI->getNumOperands() == 0) { - return false; - } else if (CI == TI->getOperand(0)) { - return true; - } else { - return false; - } -} - /// Initializes /// void SSI::init(SmallVectorImpl<Instruction *> &value) { - num_values = value.size(); - needConstruction.resize(num_values, false); - - value_original.resize(num_values); - defsites.resize(num_values); - - for (unsigned i = 0; i < num_values; ++i) { - value_original[i] = value[i]->getParent(); - defsites[i].push_back(value_original[i]); + for (SmallVectorImpl<Instruction *>::iterator I = value.begin(), + E = value.end(); I != E; ++I) { + value_original[*I] = (*I)->getParent(); + defsites[*I].push_back((*I)->getParent()); } } /// Clean all used resources in this creation of SSI /// void SSI::clean() { - for (unsigned i = 0; i < num_values; ++i) { - defsites[i].clear(); - if (i < value_stack.size()) - value_stack[i].clear(); - } - phis.clear(); sigmas.clear(); phisToFix.clear(); @@ -378,7 +384,6 @@ void SSI::clean() { defsites.clear(); value_stack.clear(); value_original.clear(); - needConstruction.clear(); } /// createSSIPass - The public interface to this file... @@ -388,3 +393,40 @@ FunctionPass *llvm::createSSIPass() { return new SSI(); } char SSI::ID = 0; static RegisterPass<SSI> X("ssi", "Static Single Information Construction"); +/// SSIEverything - A pass that runs createSSI on every non-void variable, +/// intended for debugging. +namespace { + struct VISIBILITY_HIDDEN SSIEverything : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + SSIEverything() : FunctionPass(&ID) {} + + bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<SSI>(); + } + }; +} + +bool SSIEverything::runOnFunction(Function &F) { + SmallVector<Instruction *, 16> Insts; + SSI &ssi = getAnalysis<SSI>(); + + if (F.isDeclaration() || F.isIntrinsic()) return false; + + for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) + for (BasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) + if (I->getType() != Type::getVoidTy(F.getContext())) + Insts.push_back(I); + + ssi.createSSI(Insts); + return true; +} + +/// createSSIEverythingPass - The public interface to this file... +/// +FunctionPass *llvm::createSSIEverythingPass() { return new SSIEverything(); } + +char SSIEverything::ID = 0; +static RegisterPass<SSIEverything> +Y("ssi-everything", "Static Single Information Construction"); diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 58d4d5a..6fd7d7b 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -21,6 +21,7 @@ #include "llvm/GlobalVariable.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/ADT/SmallVector.h" @@ -84,19 +85,12 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!"); - DOUT << "Looking to fold " << BB->getNameStart() << " into " - << Succ->getNameStart() << "\n"; + DEBUG(errs() << "Looking to fold " << BB->getName() << " into " + << Succ->getName() << "\n"); // Shortcut, if there is only a single predecessor it must be BB and merging // is always safe if (Succ->getSinglePredecessor()) return true; - typedef SmallPtrSet<Instruction*, 16> InstrSet; - InstrSet BBPHIs; - - // Make a list of all phi nodes in BB - BasicBlock::iterator BBI = BB->begin(); - while (isa<PHINode>(*BBI)) BBPHIs.insert(BBI++); - // Make a list of the predecessors of BB typedef SmallPtrSet<BasicBlock*, 16> BlockSet; BlockSet BBPreds(pred_begin(BB), pred_end(BB)); @@ -126,16 +120,13 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { PI != PE; PI++) { if (BBPN->getIncomingValueForBlock(*PI) != PN->getIncomingValueForBlock(*PI)) { - DOUT << "Can't fold, phi node " << *PN->getNameStart() << " in " - << Succ->getNameStart() << " is conflicting with " - << BBPN->getNameStart() << " with regard to common predecessor " - << (*PI)->getNameStart() << "\n"; + DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in " + << Succ->getName() << " is conflicting with " + << BBPN->getName() << " with regard to common predecessor " + << (*PI)->getName() << "\n"); return false; } } - // Remove this phinode from the list of phis in BB, since it has been - // handled. - BBPHIs.erase(BBPN); } else { Value* Val = PN->getIncomingValueForBlock(BB); for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end(); @@ -144,33 +135,15 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { // one for BB, in which case this phi node will not prevent the merging // of the block. if (Val != PN->getIncomingValueForBlock(*PI)) { - DOUT << "Can't fold, phi node " << *PN->getNameStart() << " in " - << Succ->getNameStart() << " is conflicting with regard to common " - << "predecessor " << (*PI)->getNameStart() << "\n"; + DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in " + << Succ->getName() << " is conflicting with regard to common " + << "predecessor " << (*PI)->getName() << "\n"); return false; } } } } - // If there are any other phi nodes in BB that don't have a phi node in Succ - // to merge with, they must be moved to Succ completely. However, for any - // predecessors of Succ, branches will be added to the phi node that just - // point to itself. So, for any common predecessors, this must not cause - // conflicts. - for (InstrSet::iterator I = BBPHIs.begin(), E = BBPHIs.end(); - I != E; I++) { - PHINode *PN = cast<PHINode>(*I); - for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end(); - PI != PE; PI++) - if (PN->getIncomingValueForBlock(*PI) != PN) { - DOUT << "Can't fold, phi node " << *PN->getNameStart() << " in " - << BB->getNameStart() << " is conflicting with regard to common " - << "predecessor " << (*PI)->getNameStart() << "\n"; - return false; - } - } - return true; } @@ -182,8 +155,36 @@ static bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, // Check to see if merging these blocks would cause conflicts for any of the // phi nodes in BB or Succ. If not, we can safely merge. if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false; - - DOUT << "Killing Trivial BB: \n" << *BB; + + // Check for cases where Succ has multiple predecessors and a PHI node in BB + // has uses which will not disappear when the PHI nodes are merged. It is + // possible to handle such cases, but difficult: it requires checking whether + // BB dominates Succ, which is non-trivial to calculate in the case where + // Succ has multiple predecessors. Also, it requires checking whether + // constructing the necessary self-referential PHI node doesn't intoduce any + // conflicts; this isn't too difficult, but the previous code for doing this + // was incorrect. + // + // Note that if this check finds a live use, BB dominates Succ, so BB is + // something like a loop pre-header (or rarely, a part of an irreducible CFG); + // folding the branch isn't profitable in that case anyway. + if (!Succ->getSinglePredecessor()) { + BasicBlock::iterator BBI = BB->begin(); + while (isa<PHINode>(*BBI)) { + for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end(); + UI != E; ++UI) { + if (PHINode* PN = dyn_cast<PHINode>(*UI)) { + if (PN->getIncomingBlock(UI) != BB) + return false; + } else { + return false; + } + } + ++BBI; + } + } + + DEBUG(errs() << "Killing Trivial BB: \n" << *BB); if (isa<PHINode>(Succ->begin())) { // If there is more than one pred of succ, and there are PHI nodes in @@ -217,38 +218,16 @@ static bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, } } - if (isa<PHINode>(&BB->front())) { - SmallVector<BasicBlock*, 16> - OldSuccPreds(pred_begin(Succ), pred_end(Succ)); - - // Move all PHI nodes in BB to Succ if they are alive, otherwise - // delete them. - while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) { - if (PN->use_empty()) { - // Just remove the dead phi. This happens if Succ's PHIs were the only - // users of the PHI nodes. - PN->eraseFromParent(); - continue; - } - - // The instruction is alive, so this means that BB must dominate all - // predecessors of Succ (Since all uses of the PN are after its - // definition, so in Succ or a block dominated by Succ. If a predecessor - // of Succ would not be dominated by BB, PN would violate the def before - // use SSA demand). Therefore, we can simply move the phi node to the - // next block. + while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) { + if (Succ->getSinglePredecessor()) { + // BB is the only predecessor of Succ, so Succ will end up with exactly + // the same predecessors BB had. Succ->getInstList().splice(Succ->begin(), BB->getInstList(), BB->begin()); - - // We need to add new entries for the PHI node to account for - // predecessors of Succ that the PHI node does not take into - // account. At this point, since we know that BB dominated succ and all - // of its predecessors, this means that we should any newly added - // incoming edges should use the PHI node itself as the value for these - // edges, because they are loop back edges. - for (unsigned i = 0, e = OldSuccPreds.size(); i != e; ++i) - if (OldSuccPreds[i] != BB) - PN->addIncoming(PN, OldSuccPreds[i]); + } else { + // We explicitly check for such uses in CanPropagatePredecessorsForPHIs. + assert(PN->use_empty() && "There shouldn't be any uses here!"); + PN->eraseFromParent(); } } @@ -383,26 +362,15 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // Okay, it looks like the instruction IS in the "condition". Check to // see if its a cheap instruction to unconditionally compute, and if it // only uses stuff defined outside of the condition. If so, hoist it out. + if (!I->isSafeToSpeculativelyExecute()) + return false; + switch (I->getOpcode()) { default: return false; // Cannot hoist this out safely. case Instruction::Load: { - // We can hoist loads that are non-volatile and obviously cannot trap. - if (cast<LoadInst>(I)->isVolatile()) - return false; - // FIXME: A computation of a constant can trap! - if (!isa<AllocaInst>(I->getOperand(0)) && - !isa<Constant>(I->getOperand(0))) - return false; - // External weak globals may have address 0, so we can't load them. - Value *V2 = I->getOperand(0)->getUnderlyingObject(); - if (V2) { - GlobalVariable* GV = dyn_cast<GlobalVariable>(V2); - if (GV && GV->hasExternalWeakLinkage()) - return false; - } - // Finally, we have to check to make sure there are no instructions - // before the load in its basic block, as we are going to hoist the loop - // out to its predecessor. + // We have to check to make sure there are no instructions before the + // load in its basic block, as we are going to hoist the loop out to + // its predecessor. BasicBlock::iterator IP = PBB->begin(); while (isa<DbgInfoIntrinsic>(IP)) IP++; @@ -645,12 +613,13 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, assert(ThisCases.size() == 1 && "Branch can only have one case!"); // Insert the new branch. Instruction *NI = BranchInst::Create(ThisDef, TI); + (void) NI; // Remove PHI node entries for the dead edge. ThisCases[0].second->removePredecessor(TI->getParent()); - DOUT << "Threading pred instr: " << *Pred->getTerminator() - << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"; + DEBUG(errs() << "Threading pred instr: " << *Pred->getTerminator() + << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"); EraseTerminatorInstAndDCECond(TI); return true; @@ -662,8 +631,8 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, for (unsigned i = 0, e = PredCases.size(); i != e; ++i) DeadCases.insert(PredCases[i].first); - DOUT << "Threading pred instr: " << *Pred->getTerminator() - << "Through successor TI: " << *TI; + DEBUG(errs() << "Threading pred instr: " << *Pred->getTerminator() + << "Through successor TI: " << *TI); for (unsigned i = SI->getNumCases()-1; i != 0; --i) if (DeadCases.count(SI->getCaseValue(i))) { @@ -671,7 +640,7 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, SI->removeCase(i); } - DOUT << "Leaving: " << *TI << "\n"; + DEBUG(errs() << "Leaving: " << *TI << "\n"); return true; } } @@ -712,9 +681,10 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, // Insert the new branch. Instruction *NI = BranchInst::Create(TheRealDest, TI); + (void) NI; - DOUT << "Threading pred instr: " << *Pred->getTerminator() - << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"; + DEBUG(errs() << "Threading pred instr: " << *Pred->getTerminator() + << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"); EraseTerminatorInstAndDCECond(TI); return true; @@ -847,7 +817,8 @@ static bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI) { if (InfLoopBlock == 0) { // Insert it at the end of the function, because it's either code, // or it won't matter if it's hot. :) - InfLoopBlock = BasicBlock::Create("infloop", BB->getParent()); + InfLoopBlock = BasicBlock::Create(BB->getContext(), + "infloop", BB->getParent()); BranchInst::Create(InfLoopBlock, InfLoopBlock); } NewSI->setSuccessor(i, InfLoopBlock); @@ -900,7 +871,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) { while (isa<DbgInfoIntrinsic>(I2)) I2 = BB2_Itr++; if (I1->getOpcode() != I2->getOpcode() || isa<PHINode>(I1) || - !I1->isIdenticalTo(I2) || + !I1->isIdenticalToWhenDefined(I2) || (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))) return false; @@ -919,6 +890,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) { BIParent->getInstList().splice(BI, BB1->getInstList(), I1); if (!I2->use_empty()) I2->replaceAllUsesWith(I1); + I1->intersectOptionalDataWith(I2); BB2->getInstList().erase(I2); I1 = BB1_Itr++; @@ -927,7 +899,8 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) { I2 = BB2_Itr++; while (isa<DbgInfoIntrinsic>(I2)) I2 = BB2_Itr++; - } while (I1->getOpcode() == I2->getOpcode() && I1->isIdenticalTo(I2)); + } while (I1->getOpcode() == I2->getOpcode() && + I1->isIdenticalToWhenDefined(I2)); return true; @@ -939,7 +912,7 @@ HoistTerminator: // Okay, it is safe to hoist the terminator. Instruction *NT = I1->clone(); BIParent->getInstList().insert(BI, NT); - if (NT->getType() != Type::VoidTy) { + if (NT->getType() != Type::getVoidTy(BB1->getContext())) { I1->replaceAllUsesWith(NT); I2->replaceAllUsesWith(NT); NT->takeName(I1); @@ -1197,7 +1170,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { ConstantInt *CB; if ((CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i))) && - CB->getType() == Type::Int1Ty) { + CB->getType() == Type::getInt1Ty(BB->getContext())) { // Okay, we now know that all edges from PredBB should be revectored to // branch to RealDest. BasicBlock *PredBB = PN->getIncomingBlock(i); @@ -1209,7 +1182,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) { // difficult cases. Instead of being smart about this, just insert a new // block that jumps to the destination block, effectively splitting // the edge we are about to create. - BasicBlock *EdgeBB = BasicBlock::Create(RealDest->getName()+".critedge", + BasicBlock *EdgeBB = BasicBlock::Create(BB->getContext(), + RealDest->getName()+".critedge", RealDest->getParent(), RealDest); BranchInst::Create(RealDest, EdgeBB); PHINode *PN; @@ -1242,7 +1216,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) { } // Check for trivial simplification. - if (Constant *C = ConstantFoldInstruction(N)) { + if (Constant *C = ConstantFoldInstruction(N, BB->getContext())) { TranslateMap[BBI] = C; delete N; // Constant folded away, don't need actual inst } else { @@ -1296,8 +1270,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN) { if (NumPhis > 2) return false; - DOUT << "FOUND IF CONDITION! " << *IfCond << " T: " - << IfTrue->getName() << " F: " << IfFalse->getName() << "\n"; + DEBUG(errs() << "FOUND IF CONDITION! " << *IfCond << " T: " + << IfTrue->getName() << " F: " << IfFalse->getName() << "\n"); // Loop over the PHI's seeing if we can promote them all to select // instructions. While we are at it, keep track of the instructions @@ -1427,7 +1401,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) { if (FalseRet->getNumOperands() == 0) { TrueSucc->removePredecessor(BI->getParent()); FalseSucc->removePredecessor(BI->getParent()); - ReturnInst::Create(0, BI); + ReturnInst::Create(BI->getContext(), 0, BI); EraseTerminatorInstAndDCECond(BI); return true; } @@ -1476,12 +1450,13 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) { } Value *RI = !TrueValue ? - ReturnInst::Create(BI) : - ReturnInst::Create(TrueValue, BI); + ReturnInst::Create(BI->getContext(), BI) : + ReturnInst::Create(BI->getContext(), TrueValue, BI); + (void) RI; - DOUT << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:" - << "\n " << *BI << "NewRet = " << *RI - << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc; + DEBUG(errs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:" + << "\n " << *BI << "NewRet = " << *RI + << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc); EraseTerminatorInstAndDCECond(BI); @@ -1561,7 +1536,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { else continue; - DOUT << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB; + DEBUG(errs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); // If we need to invert the condition in the pred block to match, do so now. if (InvertPredCond) { @@ -1605,7 +1580,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { assert(PBI->isConditional() && BI->isConditional()); BasicBlock *BB = BI->getParent(); - + // If this block ends with a branch instruction, and if there is a // predecessor that ends on a branch of the same condition, make // this conditional branch redundant. @@ -1616,7 +1591,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { if (BB->getSinglePredecessor()) { // Turn this into a branch on constant. bool CondIsTrue = PBI->getSuccessor(0) == BB; - BI->setCondition(ConstantInt::get(Type::Int1Ty, CondIsTrue)); + BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), + CondIsTrue)); return true; // Nuke the branch on constant. } @@ -1624,7 +1600,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { // in the constant and simplify the block result. Subsequent passes of // simplifycfg will thread the block. if (BlockIsSimpleEnoughToThreadThrough(BB)) { - PHINode *NewPN = PHINode::Create(Type::Int1Ty, + PHINode *NewPN = PHINode::Create(Type::getInt1Ty(BB->getContext()), BI->getCondition()->getName() + ".pr", BB->begin()); // Okay, we're going to insert the PHI node. Since PBI is not the only @@ -1636,7 +1612,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { PBI->getCondition() == BI->getCondition() && PBI->getSuccessor(0) != PBI->getSuccessor(1)) { bool CondIsTrue = PBI->getSuccessor(0) == BB; - NewPN->addIncoming(ConstantInt::get(Type::Int1Ty, + NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue), *PI); } else { NewPN->addIncoming(BI->getCondition(), *PI); @@ -1694,8 +1670,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { // Finally, if everything is ok, fold the branches to logical ops. BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1); - DOUT << "FOLDING BRs:" << *PBI->getParent() - << "AND: " << *BI->getParent(); + DEBUG(errs() << "FOLDING BRs:" << *PBI->getParent() + << "AND: " << *BI->getParent()); // If OtherDest *is* BB, then BB is a basic block with a single conditional @@ -1708,12 +1684,13 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { if (OtherDest == BB) { // Insert it at the end of the function, because it's either code, // or it won't matter if it's hot. :) - BasicBlock *InfLoopBlock = BasicBlock::Create("infloop", BB->getParent()); + BasicBlock *InfLoopBlock = BasicBlock::Create(BB->getContext(), + "infloop", BB->getParent()); BranchInst::Create(InfLoopBlock, InfLoopBlock); OtherDest = InfLoopBlock; } - DOUT << *PBI->getParent()->getParent(); + DEBUG(errs() << *PBI->getParent()->getParent()); // BI may have other predecessors. Because of this, we leave // it alone, but modify PBI. @@ -1763,9 +1740,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { } } - DOUT << "INTO: " << *PBI->getParent(); - - DOUT << *PBI->getParent()->getParent(); + DEBUG(errs() << "INTO: " << *PBI->getParent()); + DEBUG(errs() << *PBI->getParent()->getParent()); // This basic block is probably dead. We know it has at least // one fewer predecessor. @@ -1792,7 +1768,7 @@ bool llvm::SimplifyCFG(BasicBlock *BB) { // Remove basic blocks that have no predecessors... or that just have themself // as a predecessor. These are unreachable. if (pred_begin(BB) == pred_end(BB) || BB->getSinglePredecessor() == BB) { - DOUT << "Removing BB: \n" << *BB; + DEBUG(errs() << "Removing BB: \n" << *BB); DeleteDeadBlock(BB); return true; } @@ -1832,8 +1808,8 @@ bool llvm::SimplifyCFG(BasicBlock *BB) { if (!UncondBranchPreds.empty()) { while (!UncondBranchPreds.empty()) { BasicBlock *Pred = UncondBranchPreds.pop_back_val(); - DOUT << "FOLDING: " << *BB - << "INTO UNCOND BRANCH PRED: " << *Pred; + DEBUG(errs() << "FOLDING: " << *BB + << "INTO UNCOND BRANCH PRED: " << *Pred); Instruction *UncondBranch = Pred->getTerminator(); // Clone the return and add it to the end of the predecessor. Instruction *NewRet = RI->clone(); @@ -1884,33 +1860,26 @@ bool llvm::SimplifyCFG(BasicBlock *BB) { } else if (isa<UnwindInst>(BB->begin())) { // Check to see if the first instruction in this block is just an unwind. // If so, replace any invoke instructions which use this as an exception - // destination with call instructions, and any unconditional branch - // predecessor with an unwind. + // destination with call instructions. // SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB)); while (!Preds.empty()) { BasicBlock *Pred = Preds.back(); - if (BranchInst *BI = dyn_cast<BranchInst>(Pred->getTerminator())) { - if (BI->isUnconditional()) { - Pred->getInstList().pop_back(); // nuke uncond branch - new UnwindInst(Pred); // Use unwind. - Changed = true; - } - } else if (InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator())) + if (InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator())) if (II->getUnwindDest() == BB) { // Insert a new branch instruction before the invoke, because this - // is now a fall through... + // is now a fall through. BranchInst *BI = BranchInst::Create(II->getNormalDest(), II); Pred->getInstList().remove(II); // Take out of symbol table - // Insert the call now... + // Insert the call now. SmallVector<Value*,8> Args(II->op_begin()+3, II->op_end()); CallInst *CI = CallInst::Create(II->getCalledValue(), Args.begin(), Args.end(), II->getName(), BI); CI->setCallingConv(II->getCallingConv()); CI->setAttributes(II->getAttributes()); - // If the invoke produced a value, the Call now does instead + // If the invoke produced a value, the Call now does instead. II->replaceAllUsesWith(CI); delete II; Changed = true; @@ -2042,7 +2011,7 @@ bool llvm::SimplifyCFG(BasicBlock *BB) { if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { if (BI->isUnconditional()) { if (BI->getSuccessor(0) == BB) { - new UnreachableInst(TI); + new UnreachableInst(TI->getContext(), TI); TI->eraseFromParent(); Changed = true; } diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index 848f2b8..30cb94d 100644 --- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -66,8 +66,8 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { } else if (UnwindingBlocks.size() == 1) { UnwindBlock = UnwindingBlocks.front(); } else { - UnwindBlock = BasicBlock::Create("UnifiedUnwindBlock", &F); - new UnwindInst(UnwindBlock); + UnwindBlock = BasicBlock::Create(F.getContext(), "UnifiedUnwindBlock", &F); + new UnwindInst(F.getContext(), UnwindBlock); for (std::vector<BasicBlock*>::iterator I = UnwindingBlocks.begin(), E = UnwindingBlocks.end(); I != E; ++I) { @@ -83,8 +83,9 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { } else if (UnreachableBlocks.size() == 1) { UnreachableBlock = UnreachableBlocks.front(); } else { - UnreachableBlock = BasicBlock::Create("UnifiedUnreachableBlock", &F); - new UnreachableInst(UnreachableBlock); + UnreachableBlock = BasicBlock::Create(F.getContext(), + "UnifiedUnreachableBlock", &F); + new UnreachableInst(F.getContext(), UnreachableBlock); for (std::vector<BasicBlock*>::iterator I = UnreachableBlocks.begin(), E = UnreachableBlocks.end(); I != E; ++I) { @@ -107,16 +108,17 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { // nodes (if the function returns values), and convert all of the return // instructions into unconditional branches. // - BasicBlock *NewRetBlock = BasicBlock::Create("UnifiedReturnBlock", &F); + BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), + "UnifiedReturnBlock", &F); PHINode *PN = 0; - if (F.getReturnType() == Type::VoidTy) { - ReturnInst::Create(NULL, NewRetBlock); + if (F.getReturnType() == Type::getVoidTy(F.getContext())) { + ReturnInst::Create(F.getContext(), NULL, NewRetBlock); } else { // If the function doesn't return void... add a PHI node to the block... PN = PHINode::Create(F.getReturnType(), "UnifiedRetVal"); NewRetBlock->getInstList().push_back(PN); - ReturnInst::Create(PN, NewRetBlock); + ReturnInst::Create(F.getContext(), PN, NewRetBlock); } // Loop over all of the blocks, replacing the return instruction with an diff --git a/lib/Transforms/Utils/UnrollLoop.cpp b/lib/Transforms/Utils/UnrollLoop.cpp index caef7ec..4d838b5 100644 --- a/lib/Transforms/Utils/UnrollLoop.cpp +++ b/lib/Transforms/Utils/UnrollLoop.cpp @@ -25,6 +25,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" @@ -62,7 +63,7 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) { if (OnlyPred->getTerminator()->getNumSuccessors() != 1) return 0; - DOUT << "Merging: " << *BB << "into: " << *OnlyPred; + DEBUG(errs() << "Merging: " << *BB << "into: " << *OnlyPred); // Resolve any PHI nodes at the start of the block. They are all // guaranteed to have exactly one entry if they exist, unless there are @@ -113,7 +114,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) if (!BI || BI->isUnconditional()) { // The loop-rotate pass can be helpful to avoid this in many cases. - DOUT << " Can't unroll; loop not terminated by a conditional branch.\n"; + DEBUG(errs() << + " Can't unroll; loop not terminated by a conditional branch.\n"); return false; } @@ -125,9 +127,9 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) TripMultiple = L->getSmallConstantTripMultiple(); if (TripCount != 0) - DOUT << " Trip Count = " << TripCount << "\n"; + DEBUG(errs() << " Trip Count = " << TripCount << "\n"); if (TripMultiple != 1) - DOUT << " Trip Multiple = " << TripMultiple << "\n"; + DEBUG(errs() << " Trip Multiple = " << TripMultiple << "\n"); // Effectively "DCE" unrolled iterations that are beyond the tripcount // and will never be executed. @@ -153,17 +155,17 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) } if (CompletelyUnroll) { - DOUT << "COMPLETELY UNROLLING loop %" << Header->getName() - << " with trip count " << TripCount << "!\n"; + DEBUG(errs() << "COMPLETELY UNROLLING loop %" << Header->getName() + << " with trip count " << TripCount << "!\n"); } else { - DOUT << "UNROLLING loop %" << Header->getName() - << " by " << Count; + DEBUG(errs() << "UNROLLING loop %" << Header->getName() + << " by " << Count); if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { - DOUT << " with a breakout at trip " << BreakoutTrip; + DEBUG(errs() << " with a breakout at trip " << BreakoutTrip); } else if (TripMultiple != 1) { - DOUT << " with " << TripMultiple << " trips per branch"; + DEBUG(errs() << " with " << TripMultiple << " trips per branch"); } - DOUT << "!\n"; + DEBUG(errs() << "!\n"); } std::vector<BasicBlock*> LoopBlocks = L->getBlocks(); @@ -349,7 +351,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) if (isInstructionTriviallyDead(Inst)) (*BB)->getInstList().erase(Inst); - else if (Constant *C = ConstantFoldInstruction(Inst)) { + else if (Constant *C = ConstantFoldInstruction(Inst, + Header->getContext())) { Inst->replaceAllUsesWith(C); (*BB)->getInstList().erase(Inst); } diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index 20b676d..2d8332f 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -13,23 +13,27 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/BasicBlock.h" +#include "llvm/DerivedTypes.h" // For getNullValue(Type::Int32Ty) #include "llvm/Constants.h" #include "llvm/GlobalValue.h" #include "llvm/Instruction.h" -#include "llvm/MDNode.h" +#include "llvm/LLVMContext.h" +#include "llvm/Metadata.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; -Value *llvm::MapValue(const Value *V, ValueMapTy &VM) { +Value *llvm::MapValue(const Value *V, ValueMapTy &VM, LLVMContext &Context) { Value *&VMSlot = VM[V]; if (VMSlot) return VMSlot; // Does it exist in the map yet? // NOTE: VMSlot can be invalidated by any reference to VM, which can grow the // DenseMap. This includes any recursive calls to MapValue. - // Global values do not need to be seeded into the ValueMap if they are using - // the identity mapping. - if (isa<GlobalValue>(V) || isa<InlineAsm>(V)) + // Global values and metadata do not need to be seeded into the ValueMap if + // they are using the identity mapping. + if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MetadataBase>(V)) return VMSlot = const_cast<Value*>(V); if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) { @@ -40,7 +44,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) { else if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) { for (User::op_iterator b = CA->op_begin(), i = b, e = CA->op_end(); i != e; ++i) { - Value *MV = MapValue(*i, VM); + Value *MV = MapValue(*i, VM, Context); if (MV != *i) { // This array must contain a reference to a global, make a new array // and return it. @@ -51,7 +55,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) { Values.push_back(cast<Constant>(*j)); Values.push_back(cast<Constant>(MV)); for (++i; i != e; ++i) - Values.push_back(cast<Constant>(MapValue(*i, VM))); + Values.push_back(cast<Constant>(MapValue(*i, VM, Context))); return VM[V] = ConstantArray::get(CA->getType(), Values); } } @@ -60,7 +64,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) { } else if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) { for (User::op_iterator b = CS->op_begin(), i = b, e = CS->op_end(); i != e; ++i) { - Value *MV = MapValue(*i, VM); + Value *MV = MapValue(*i, VM, Context); if (MV != *i) { // This struct must contain a reference to a global, make a new struct // and return it. @@ -71,7 +75,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) { Values.push_back(cast<Constant>(*j)); Values.push_back(cast<Constant>(MV)); for (++i; i != e; ++i) - Values.push_back(cast<Constant>(MapValue(*i, VM))); + Values.push_back(cast<Constant>(MapValue(*i, VM, Context))); return VM[V] = ConstantStruct::get(CS->getType(), Values); } } @@ -80,12 +84,12 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) { } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { std::vector<Constant*> Ops; for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i) - Ops.push_back(cast<Constant>(MapValue(*i, VM))); + Ops.push_back(cast<Constant>(MapValue(*i, VM, Context))); return VM[V] = CE->getWithOperands(Ops); } else if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) { for (User::op_iterator b = CP->op_begin(), i = b, e = CP->op_end(); i != e; ++i) { - Value *MV = MapValue(*i, VM); + Value *MV = MapValue(*i, VM, Context); if (MV != *i) { // This vector value must contain a reference to a global, make a new // vector constant and return it. @@ -96,38 +100,16 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) { Values.push_back(cast<Constant>(*j)); Values.push_back(cast<Constant>(MV)); for (++i; i != e; ++i) - Values.push_back(cast<Constant>(MapValue(*i, VM))); + Values.push_back(cast<Constant>(MapValue(*i, VM, Context))); return VM[V] = ConstantVector::get(Values); } } return VM[V] = C; - } else if (MDNode *N = dyn_cast<MDNode>(C)) { - for (MDNode::const_elem_iterator b = N->elem_begin(), i = b, - e = N->elem_end(); i != e; ++i) { - if (!*i) continue; - - Value *MV = MapValue(*i, VM); - if (MV != *i) { - // This MDNode must contain a reference to a global, make a new MDNode - // and return it. - SmallVector<Value*, 8> Values; - Values.reserve(N->getNumElements()); - for (MDNode::const_elem_iterator j = b; j != i; ++j) - Values.push_back(*j); - Values.push_back(MV); - for (++i; i != e; ++i) - Values.push_back(MapValue(*i, VM)); - return VM[V] = MDNode::get(Values.data(), Values.size()); - } - } - return VM[V] = C; - } else { - assert(0 && "Unknown type of constant!"); + llvm_unreachable("Unknown type of constant!"); } } - return 0; } @@ -136,7 +118,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) { /// void llvm::RemapInstruction(Instruction *I, ValueMapTy &ValueMap) { for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) { - Value *V = MapValue(*op, ValueMap); + Value *V = MapValue(*op, ValueMap, I->getParent()->getContext()); assert(V && "Referenced value not in value map!"); *op = V; } |