diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms')
64 files changed, 1418 insertions, 1111 deletions
diff --git a/contrib/llvm/lib/Transforms/Hello/Hello.cpp b/contrib/llvm/lib/Transforms/Hello/Hello.cpp index 37d7a00..abfa514 100644 --- a/contrib/llvm/lib/Transforms/Hello/Hello.cpp +++ b/contrib/llvm/lib/Transforms/Hello/Hello.cpp @@ -28,7 +28,7 @@ namespace { Hello() : FunctionPass(&ID) {} virtual bool runOnFunction(Function &F) { - HelloCounter++; + ++HelloCounter; errs() << "Hello: "; errs().write_escaped(F.getName()) << '\n'; return false; @@ -46,7 +46,7 @@ namespace { Hello2() : FunctionPass(&ID) {} virtual bool runOnFunction(Function &F) { - HelloCounter++; + ++HelloCounter; errs() << "Hello: "; errs().write_escaped(F.getName()) << '\n'; return false; diff --git a/contrib/llvm/lib/Transforms/Hello/Hello.exports b/contrib/llvm/lib/Transforms/Hello/Hello.exports new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Hello/Hello.exports diff --git a/contrib/llvm/lib/Transforms/Hello/Makefile b/contrib/llvm/lib/Transforms/Hello/Makefile index c5e75d4..f1e3148 100644 --- a/contrib/llvm/lib/Transforms/Hello/Makefile +++ b/contrib/llvm/lib/Transforms/Hello/Makefile @@ -12,5 +12,13 @@ LIBRARYNAME = LLVMHello LOADABLE_MODULE = 1 USEDLIBS = +# If we don't need RTTI or EH, there's no reason to export anything +# from the hello plugin. +ifneq ($(REQUIRES_RTTI), 1) +ifneq ($(REQUIRES_EH), 1) +EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/Hello.exports +endif +endif + include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index 89f213e..28ea079 100644 --- a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -360,19 +360,20 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { IndicesVector Operands; for (Value::use_iterator UI = Arg->use_begin(), E = Arg->use_end(); UI != E; ++UI) { + User *U = *UI; Operands.clear(); - if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { + if (LoadInst *LI = dyn_cast<LoadInst>(U)) { if (LI->isVolatile()) return false; // Don't hack volatile loads Loads.push_back(LI); // Direct loads are equivalent to a GEP with a zero index and then a load. Operands.push_back(0); - } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) { + } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { if (GEP->use_empty()) { // Dead GEP's cause trouble later. Just remove them if we run into // them. getAnalysis<AliasAnalysis>().deleteValue(GEP); GEP->eraseFromParent(); - // TODO: This runs the above loop over and over again for dead GEPS + // TODO: This runs the above loop over and over again for dead GEPs // Couldn't we just do increment the UI iterator earlier and erase the // use? return isSafeToPromoteArgument(Arg, isByVal); @@ -452,12 +453,14 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { // Now check every path from the entry block to the load for transparency. // To do this, we perform a depth first search on the inverse CFG from the // loading block. - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *P = *PI; for (idf_ext_iterator<BasicBlock*, SmallPtrSet<BasicBlock*, 16> > - I = idf_ext_begin(*PI, TranspBlocks), - E = idf_ext_end(*PI, TranspBlocks); I != E; ++I) + I = idf_ext_begin(P, TranspBlocks), + E = idf_ext_end(P, TranspBlocks); I != E; ++I) if (AA.canBasicBlockModify(**I, Arg, LoadSize)) return false; + } } // If the path from the entry of the function to each load is free of diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp index 692e47d..475eee8 100644 --- a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -120,9 +120,14 @@ namespace { typedef SmallVector<RetOrArg, 5> UseVector; + protected: + // DAH uses this to specify a different ID. + explicit DAE(void *ID) : ModulePass(ID) {} + public: static char ID; // Pass identification, replacement for typeid DAE() : ModulePass(&ID) {} + bool runOnModule(Module &M); virtual bool ShouldHackArguments() const { return false; } @@ -155,6 +160,8 @@ namespace { /// by bugpoint. struct DAH : public DAE { static char ID; + DAH() : DAE(&ID) {} + virtual bool ShouldHackArguments() const { return true; } }; } diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp index b429213..735a1c4 100644 --- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -160,13 +160,12 @@ static bool SafeToDestroyConstant(const Constant *C) { static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, SmallPtrSet<const PHINode*, 16> &PHIUsers) { for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; - ++UI) - if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) { + ++UI) { + const User *U = *UI; + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { GS.HasNonInstructionUser = true; - if (AnalyzeGlobal(CE, GS, PHIUsers)) return true; - - } else if (const Instruction *I = dyn_cast<Instruction>(*UI)) { + } else if (const Instruction *I = dyn_cast<Instruction>(U)) { if (!GS.HasMultipleAccessingFunctions) { const Function *F = I->getParent()->getParent(); if (GS.AccessingFunction == 0) @@ -221,18 +220,21 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, if (AnalyzeGlobal(I, GS, PHIUsers)) return true; GS.HasPHIUser = true; } else if (isa<CmpInst>(I)) { + // Nothing to analyse. } else if (isa<MemTransferInst>(I)) { - if (I->getOperand(1) == V) + const MemTransferInst *MTI = cast<MemTransferInst>(I); + if (MTI->getArgOperand(0) == V) GS.StoredType = GlobalStatus::isStored; - if (I->getOperand(2) == V) + if (MTI->getArgOperand(1) == V) GS.isLoaded = true; } else if (isa<MemSetInst>(I)) { - assert(I->getOperand(1) == V && "Memset only takes one pointer!"); + assert(cast<MemSetInst>(I)->getArgOperand(0) == V && + "Memset only takes one pointer!"); GS.StoredType = GlobalStatus::isStored; } else { return true; // Any other non-load instruction might take address! } - } else if (const Constant *C = dyn_cast<Constant>(*UI)) { + } else if (const Constant *C = dyn_cast<Constant>(U)) { GS.HasNonInstructionUser = true; // We might have a dead and dangling constant hanging off of here. if (!SafeToDestroyConstant(C)) @@ -242,6 +244,7 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, // Otherwise must be some other user. return true; } + } return false; } @@ -1304,7 +1307,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, const Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, ConstantInt::get(IntPtrTy, TypeSize), - NElems, + NElems, 0, CI->getName() + ".f" + Twine(FieldNo)); FieldMallocs.push_back(NMI); new StoreInst(NMI, NGV, CI); @@ -1323,8 +1326,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, // if (F2) { free(F2); F2 = 0; } // } // The malloc can also fail if its argument is too large. - Constant *ConstantZero = ConstantInt::get(CI->getOperand(1)->getType(), 0); - Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getOperand(1), + Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0); + Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0), ConstantZero, "isneg"); for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i], @@ -1511,10 +1514,10 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // If this is an allocation of a fixed size array of structs, analyze as a // variable size array. malloc [100 x struct],1 -> malloc struct, 100 - if (NElems == ConstantInt::get(CI->getOperand(1)->getType(), 1)) + if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1)) if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) AllocTy = AT->getElementType(); - + const StructType *AllocSTy = dyn_cast<StructType>(AllocTy); if (!AllocSTy) return false; @@ -1533,7 +1536,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, AllocSize, NumElements, - CI->getName()); + 0, CI->getName()); Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI); CI->replaceAllUsesWith(Cast); CI->eraseFromParent(); @@ -1597,13 +1600,15 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { GVElType->isFloatingPointTy() || GVElType->isPointerTy() || GVElType->isVectorTy()) return false; - + // Walk the use list of the global seeing if all the uses are load or store. // If there is anything else, bail out. - for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I) - if (!isa<LoadInst>(I) && !isa<StoreInst>(I)) + for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){ + User *U = *I; + if (!isa<LoadInst>(U) && !isa<StoreInst>(U)) return false; - + } + DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV); // Create the new global, initializing it to false. @@ -1641,7 +1646,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { // bool. Instruction *StoredVal = cast<Instruction>(SI->getOperand(0)); - // If we're already replaced the input, StoredVal will be a cast or + // If we've already replaced the input, StoredVal will be a cast or // select instruction. If not, it will be a load of the original // global. if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) { @@ -2260,8 +2265,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, getVal(Values, CI->getOperand(0)), CI->getType()); } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) { - InstResult = - ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)), + InstResult = ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)), getVal(Values, SI->getOperand(1)), getVal(Values, SI->getOperand(2))); } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) { @@ -2302,7 +2306,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, if (!Callee) return false; // Cannot resolve. SmallVector<Constant*, 8> Formals; - for (User::op_iterator i = CI->op_begin() + 1, e = CI->op_end(); + CallSite CS(CI); + for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) Formals.push_back(getVal(Values, *i)); diff --git a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp index df2456f..e4db235 100644 --- a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -85,15 +85,16 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) { unsigned NumNonconstant = 0; for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) { + User *U = *UI; // Ignore blockaddress uses. - if (isa<BlockAddress>(*UI)) continue; + if (isa<BlockAddress>(U)) continue; // Used by a non-instruction, or not the callee of a function, do not // transform. - if (!isa<CallInst>(*UI) && !isa<InvokeInst>(*UI)) + if (!isa<CallInst>(U) && !isa<InvokeInst>(U)) return false; - CallSite CS = CallSite::get(cast<Instruction>(*UI)); + CallSite CS = CallSite::get(cast<Instruction>(U)); if (!CS.isCallee(UI)) return false; diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp index b785bb0..9bb01f5 100644 --- a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp @@ -399,7 +399,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { // We can only inline direct calls to non-declarations. if (Callee == 0 || Callee->isDeclaration()) continue; - // If this call sites was obtained by inlining another function, verify + // If this call site was obtained by inlining another function, verify // that the include path for the function did not include the callee // itself. If so, we'd be recursively inlinling the same function, // which would provide the same callsites, which would cause us to @@ -468,7 +468,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { // move a call site to a function in this SCC before the // 'FirstCallInSCC' barrier. if (SCC.isSingular()) { - std::swap(CallSites[CSi], CallSites.back()); + CallSites[CSi] = CallSites.back(); CallSites.pop_back(); } else { CallSites.erase(CallSites.begin()+CSi); diff --git a/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp b/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp index 4d61e83..76cfef8 100644 --- a/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp +++ b/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp @@ -42,6 +42,7 @@ #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/CFG.h" #include "llvm/Support/InstVisitor.h" #include "llvm/Transforms/Utils/Local.h" @@ -262,8 +263,8 @@ void LowerSetJmp::TransformLongJmpCall(CallInst* Inst) // char*. It returns "void", so it doesn't need to replace any of // Inst's uses and doesn't get a name. CastInst* CI = - new BitCastInst(Inst->getOperand(1), SBPTy, "LJBuf", Inst); - Value *Args[] = { CI, Inst->getOperand(2) }; + new BitCastInst(Inst->getArgOperand(0), SBPTy, "LJBuf", Inst); + Value *Args[] = { CI, Inst->getArgOperand(1) }; CallInst::Create(ThrowLongJmp, Args, Args + 2, "", Inst); SwitchValuePair& SVP = SwitchValMap[Inst->getParent()->getParent()]; @@ -378,7 +379,7 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst) const Type* SBPTy = Type::getInt8PtrTy(Inst->getContext()); CastInst* BufPtr = - new BitCastInst(Inst->getOperand(1), SBPTy, "SBJmpBuf", Inst); + new BitCastInst(Inst->getArgOperand(0), SBPTy, "SBJmpBuf", Inst); Value *Args[] = { GetSetJmpMap(Func), BufPtr, ConstantInt::get(Type::getInt32Ty(Inst->getContext()), SetJmpIDMap[Func]++) @@ -405,12 +406,14 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst) // Loop over all of the uses of instruction. If any of them are after the // call, "spill" the value to the stack. for (Value::use_iterator UI = II->use_begin(), E = II->use_end(); - UI != E; ++UI) - if (cast<Instruction>(*UI)->getParent() != ABlock || - InstrsAfterCall.count(cast<Instruction>(*UI))) { + UI != E; ++UI) { + User *U = *UI; + if (cast<Instruction>(U)->getParent() != ABlock || + InstrsAfterCall.count(cast<Instruction>(U))) { DemoteRegToStack(*II); break; } + } InstrsAfterCall.clear(); // Change the setjmp call into a branch statement. We'll remove the @@ -473,7 +476,8 @@ void LowerSetJmp::visitCallInst(CallInst& CI) // Construct the new "invoke" instruction. TerminatorInst* Term = OldBB->getTerminator(); - std::vector<Value*> Params(CI.op_begin() + 1, CI.op_end()); + CallSite CS(&CI); + std::vector<Value*> Params(CS.arg_begin(), CS.arg_end()); InvokeInst* II = InvokeInst::Create(CI.getCalledValue(), NewBB, PrelimBBMap[Func], Params.begin(), Params.end(), CI.getName(), Term); diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp index 622a9b5..aeeafe7 100644 --- a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp +++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp @@ -146,7 +146,7 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) { switch(Ty1->getTypeID()) { default: llvm_unreachable("Unknown type!"); - // Fall through in Release-Asserts mode. + // Fall through in Release mode. case Type::IntegerTyID: case Type::OpaqueTyID: // Ty1 == Ty2 would have returned true earlier. @@ -535,6 +535,7 @@ static LinkageCategory categorize(const Function *F) { case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: case GlobalValue::ExternalWeakLinkage: + case GlobalValue::LinkerPrivateWeakLinkage: return ExternalWeak; case GlobalValue::ExternalLinkage: @@ -602,6 +603,10 @@ static void ThunkGToF(Function *F, Function *G) { } static void AliasGToF(Function *F, Function *G) { + // Darwin will trigger llvm_unreachable if asked to codegen an alias. + return ThunkGToF(F, G); + +#if 0 if (!G->hasExternalLinkage() && !G->hasLocalLinkage() && !G->hasWeakLinkage()) return ThunkGToF(F, G); @@ -613,6 +618,7 @@ static void AliasGToF(Function *F, Function *G) { GA->setVisibility(G->getVisibility()); G->replaceAllUsesWith(GA); G->eraseFromParent(); +#endif } static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) { diff --git a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp index 07525ea..6b9814c 100644 --- a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -66,13 +66,13 @@ Function* PartialInliner::unswitchFunction(Function* F) { return 0; // Clone the function, so that we can hack away on it. - DenseMap<const Value*, Value*> ValueMap; - Function* duplicateFunction = CloneFunction(F, ValueMap); + ValueMap<const Value*, Value*> VMap; + Function* duplicateFunction = CloneFunction(F, VMap); duplicateFunction->setLinkage(GlobalValue::InternalLinkage); F->getParent()->getFunctionList().push_back(duplicateFunction); - BasicBlock* newEntryBlock = cast<BasicBlock>(ValueMap[entryBlock]); - BasicBlock* newReturnBlock = cast<BasicBlock>(ValueMap[returnBlock]); - BasicBlock* newNonReturnBlock = cast<BasicBlock>(ValueMap[nonReturnBlock]); + BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]); + BasicBlock* newReturnBlock = cast<BasicBlock>(VMap[returnBlock]); + BasicBlock* newNonReturnBlock = cast<BasicBlock>(VMap[nonReturnBlock]); // Go ahead and update all uses to the duplicate, so that we can just // use the inliner functionality when we're done hacking. diff --git a/contrib/llvm/lib/Transforms/IPO/PartialSpecialization.cpp b/contrib/llvm/lib/Transforms/IPO/PartialSpecialization.cpp index 084b94e..58e1448 100644 --- a/contrib/llvm/lib/Transforms/IPO/PartialSpecialization.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PartialSpecialization.cpp @@ -32,6 +32,10 @@ using namespace llvm; STATISTIC(numSpecialized, "Number of specialized functions created"); +STATISTIC(numReplaced, "Number of callers replaced by specialization"); + +// Maximum number of arguments markable interested +static const int MaxInterests = 6; // Call must be used at least occasionally static const int CallsMin = 5; @@ -40,8 +44,9 @@ static const int CallsMin = 5; static const double ConstValPercent = .1; namespace { + typedef SmallVector<int, MaxInterests> InterestingArgVector; class PartSpec : public ModulePass { - void scanForInterest(Function&, SmallVector<int, 6>&); + void scanForInterest(Function&, InterestingArgVector&); int scanDistribution(Function&, int, std::map<Constant*, int>&); public : static char ID; // Pass identification, replacement for typeid @@ -59,13 +64,15 @@ X("partialspecialization", "Partial Specialization"); // a call to the specialized function. Returns the specialized function static Function* SpecializeFunction(Function* F, - DenseMap<const Value*, Value*>& replacements) { + ValueMap<const Value*, Value*>& replacements) { // arg numbers of deleted arguments - DenseSet<unsigned> deleted; - for (DenseMap<const Value*, Value*>::iterator + DenseMap<unsigned, const Argument*> deleted; + for (ValueMap<const Value*, Value*>::iterator repb = replacements.begin(), repe = replacements.end(); - repb != repe; ++repb) - deleted.insert(cast<Argument>(repb->first)->getArgNo()); + repb != repe; ++repb) { + Argument const *arg = cast<const Argument>(repb->first); + deleted[arg->getArgNo()] = arg; + } Function* NF = CloneFunction(F, replacements); NF->setLinkage(GlobalValue::InternalLinkage); @@ -80,9 +87,23 @@ SpecializeFunction(Function* F, if (CS.getCalledFunction() == F) { SmallVector<Value*, 6> args; - for (unsigned x = 0; x < CS.arg_size(); ++x) - if (!deleted.count(x)) - args.push_back(CS.getArgument(x)); + // Assemble the non-specialized arguments for the updated callsite. + // In the process, make sure that the specialized arguments are + // constant and match the specialization. If that's not the case, + // this callsite needs to call the original or some other + // specialization; don't change it here. + CallSite::arg_iterator as = CS.arg_begin(), ae = CS.arg_end(); + for (CallSite::arg_iterator ai = as; ai != ae; ++ai) { + DenseMap<unsigned, const Argument*>::iterator delit = deleted.find( + std::distance(as, ai)); + if (delit == deleted.end()) + args.push_back(cast<Value>(ai)); + else { + Constant *ci = dyn_cast<Constant>(ai); + if (!(ci && ci == replacements[delit->second])) + goto next_use; + } + } Value* NCall; if (CallInst *CI = dyn_cast<CallInst>(i)) { NCall = CallInst::Create(NF, args.begin(), args.end(), @@ -99,8 +120,11 @@ SpecializeFunction(Function* F, } CS.getInstruction()->replaceAllUsesWith(NCall); CS.getInstruction()->eraseFromParent(); + ++numReplaced; } } + next_use: + ; } return NF; } @@ -111,7 +135,7 @@ bool PartSpec::runOnModule(Module &M) { for (Module::iterator I = M.begin(); I != M.end(); ++I) { Function &F = *I; if (F.isDeclaration() || F.mayBeOverridden()) continue; - SmallVector<int, 6> interestingArgs; + InterestingArgVector interestingArgs; scanForInterest(F, interestingArgs); // Find the first interesting Argument that we can specialize on @@ -126,7 +150,7 @@ bool PartSpec::runOnModule(Module &M) { ee = distribution.end(); ii != ee; ++ii) if (total > ii->second && ii->first && ii->second > total * ConstValPercent) { - DenseMap<const Value*, Value*> m; + ValueMap<const Value*, Value*> m; Function::arg_iterator arg = F.arg_begin(); for (int y = 0; y < interestingArgs[x]; ++y) ++arg; @@ -143,7 +167,7 @@ bool PartSpec::runOnModule(Module &M) { /// scanForInterest - This function decides which arguments would be worth /// specializing on. -void PartSpec::scanForInterest(Function& F, SmallVector<int, 6>& args) { +void PartSpec::scanForInterest(Function& F, InterestingArgVector& args) { for(Function::arg_iterator ii = F.arg_begin(), ee = F.arg_end(); ii != ee; ++ii) { for(Value::use_iterator ui = ii->use_begin(), ue = ii->use_end(); diff --git a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp index 6bc8e66..12e8db8 100644 --- a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp +++ b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp @@ -73,6 +73,19 @@ namespace { AU.setPreservesAll(); } }; + + class StripDeadDebugInfo : public ModulePass { + public: + static char ID; // Pass identification, replacement for typeid + explicit StripDeadDebugInfo() + : ModulePass(&ID) {} + + virtual bool runOnModule(Module &M); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; } char StripSymbols::ID = 0; @@ -99,6 +112,14 @@ ModulePass *llvm::createStripDebugDeclarePass() { return new StripDebugDeclare(); } +char StripDeadDebugInfo::ID = 0; +static RegisterPass<StripDeadDebugInfo> +A("strip-dead-debug-info", "Strip debug info for unused symbols"); + +ModulePass *llvm::createStripDeadDebugInfoPass() { + return new StripDeadDebugInfo(); +} + /// OnlyUsedBy - Return true if V is only used by Usr. static bool OnlyUsedBy(Value *V, Value *Usr) { for(Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) { @@ -223,27 +244,27 @@ static bool StripDebugInfo(Module &M) { Changed = true; } - NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv"); - if (NMD) { - Changed = true; - NMD->eraseFromParent(); - } - - NMD = M.getNamedMetadata("llvm.dbg.lv"); - if (NMD) { - Changed = true; - NMD->eraseFromParent(); + for (Module::named_metadata_iterator NMI = M.named_metadata_begin(), + NME = M.named_metadata_end(); NMI != NME;) { + NamedMDNode *NMD = NMI; + ++NMI; + if (NMD->getName().startswith("llvm.dbg.")) { + NMD->eraseFromParent(); + Changed = true; + } } - + unsigned MDDbgKind = M.getMDKindID("dbg"); - for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI) + for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI) for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE; ++FI) for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; - ++BI) + ++BI) { + Changed = true; // FIXME: Only set if there was debug metadata. BI->setMetadata(MDDbgKind, 0); + } - return true; + return Changed; } bool StripSymbols::runOnModule(Module &M) { @@ -266,8 +287,8 @@ bool StripDebugDeclare::runOnModule(Module &M) { if (Declare) { while (!Declare->use_empty()) { CallInst *CI = cast<CallInst>(Declare->use_back()); - Value *Arg1 = CI->getOperand(1); - Value *Arg2 = CI->getOperand(2); + Value *Arg1 = CI->getArgOperand(0); + Value *Arg2 = CI->getArgOperand(1); assert(CI->use_empty() && "llvm.dbg intrinsic should have void result"); CI->eraseFromParent(); if (Arg1->use_empty()) { @@ -295,3 +316,83 @@ bool StripDebugDeclare::runOnModule(Module &M) { return true; } + +/// getRealLinkageName - If special LLVM prefix that is used to inform the asm +/// printer to not emit usual symbol prefix before the symbol name is used then +/// return linkage name after skipping this special LLVM prefix. +static StringRef getRealLinkageName(StringRef LinkageName) { + char One = '\1'; + if (LinkageName.startswith(StringRef(&One, 1))) + return LinkageName.substr(1); + return LinkageName; +} + +bool StripDeadDebugInfo::runOnModule(Module &M) { + bool Changed = false; + + // Debugging infomration is encoded in llvm IR using metadata. This is designed + // such a way that debug info for symbols preserved even if symbols are + // optimized away by the optimizer. This special pass removes debug info for + // such symbols. + + // llvm.dbg.gv keeps track of debug info for global variables. + if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) { + SmallVector<MDNode *, 8> MDs; + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) + if (DIGlobalVariable(NMD->getOperand(i)).Verify()) + MDs.push_back(NMD->getOperand(i)); + else + Changed = true; + NMD->eraseFromParent(); + NMD = NULL; + + for (SmallVector<MDNode *, 8>::iterator I = MDs.begin(), + E = MDs.end(); I != E; ++I) { + if (M.getGlobalVariable(DIGlobalVariable(*I).getGlobal()->getName(), + true)) { + if (!NMD) + NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv"); + NMD->addOperand(*I); + } + else + Changed = true; + } + } + + // llvm.dbg.sp keeps track of debug info for subprograms. + if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp")) { + SmallVector<MDNode *, 8> MDs; + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) + if (DISubprogram(NMD->getOperand(i)).Verify()) + MDs.push_back(NMD->getOperand(i)); + else + Changed = true; + NMD->eraseFromParent(); + NMD = NULL; + + for (SmallVector<MDNode *, 8>::iterator I = MDs.begin(), + E = MDs.end(); I != E; ++I) { + bool FnIsLive = false; + if (Function *F = DISubprogram(*I).getFunction()) + if (M.getFunction(F->getName())) + FnIsLive = true; + if (FnIsLive) { + if (!NMD) + NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp"); + NMD->addOperand(*I); + } else { + // Remove llvm.dbg.lv.fnname named mdnode which may have been used + // to hold debug info for dead function's local variables. + StringRef FName = DISubprogram(*I).getLinkageName(); + if (FName.empty()) + FName = DISubprogram(*I).getName(); + if (NamedMDNode *LVNMD = + M.getNamedMetadata(Twine("llvm.dbg.lv.", + getRealLinkageName(FName)))) + LVNMD->eraseFromParent(); + } + } + } + + return Changed; +} diff --git a/contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp index 473e83c..a74686f 100644 --- a/contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp +++ b/contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp @@ -107,12 +107,12 @@ CallGraphNode *SRETPromotion::PromoteReturn(CallGraphNode *CGN) { // Check if it is ok to perform this promotion. if (isSafeToUpdateAllCallers(F) == false) { DEBUG(dbgs() << "SretPromotion: Not all callers can be updated\n"); - NumRejectedSRETUses++; + ++NumRejectedSRETUses; return 0; } DEBUG(dbgs() << "SretPromotion: sret argument will be promoted\n"); - NumSRET++; + ++NumSRET; // [1] Replace use of sret parameter AllocaInst *TheAlloca = new AllocaInst(STy, NULL, "mrv", F->getEntryBlock().begin()); @@ -171,16 +171,16 @@ bool SRETPromotion::isSafeToUpdateAllCallers(Function *F) { // Check FirstArg's users. for (Value::use_iterator ArgI = FirstArg->use_begin(), ArgE = FirstArg->use_end(); ArgI != ArgE; ++ArgI) { - + User *U = *ArgI; // If FirstArg user is a CallInst that does not correspond to current // call site then this function F is not suitable for sret promotion. - if (CallInst *CI = dyn_cast<CallInst>(ArgI)) { + if (CallInst *CI = dyn_cast<CallInst>(U)) { if (CI != Call) return false; } // If FirstArg user is a GEP whose all users are not LoadInst then // this function F is not suitable for sret promotion. - else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(ArgI)) { + else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { // TODO : Use dom info and insert PHINodes to collect get results // from multiple call sites for this GEP. if (GEP->getParent() != Call->getParent()) diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h index c7b04a4..24e0528 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h @@ -178,7 +178,8 @@ public: Instruction *visitPHINode(PHINode &PN); Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP); Instruction *visitAllocaInst(AllocaInst &AI); - Instruction *visitFree(Instruction &FI); + Instruction *visitMalloc(Instruction &FI); + Instruction *visitFree(CallInst &FI); Instruction *visitLoadInst(LoadInst &LI); Instruction *visitStoreInst(StoreInst &SI); Instruction *visitBranchInst(BranchInst &BI); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 8586054..5876f40 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -472,6 +472,25 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { Value *NewOr = Builder->CreateOr(Val, Val2); return Builder->CreateICmp(LHSCC, NewOr, LHSCst); } + + // (icmp ne (A & C1), 0) & (icmp ne (A & C2), 0) --> + // (icmp eq (A & (C1|C2)), (C1|C2)) + if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) { + Instruction *I1 = dyn_cast<Instruction>(Val); + Instruction *I2 = dyn_cast<Instruction>(Val2); + if (I1 && I1->getOpcode() == Instruction::And && + I2 && I2->getOpcode() == Instruction::And && + I1->getOperand(0) == I1->getOperand(0)) { + ConstantInt *CI1 = dyn_cast<ConstantInt>(I1->getOperand(1)); + ConstantInt *CI2 = dyn_cast<ConstantInt>(I2->getOperand(1)); + if (CI1 && !CI1->isZero() && CI2 && !CI2->isZero() && + CI1->getValue().operator&(CI2->getValue()) == 0) { + Constant *ConstOr = ConstantExpr::getOr(CI1, CI2); + Value *NewAnd = Builder->CreateAnd(I1->getOperand(0), ConstOr); + return Builder->CreateICmp(ICmpInst::ICMP_EQ, NewAnd, ConstOr); + } + } + } } // From here on, we only handle: @@ -1584,6 +1603,19 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if ((match(A, m_Not(m_Specific(B))) && match(D, m_Not(m_Specific(C))))) return BinaryOperator::CreateXor(C, B); + + // ((A|B)&1)|(B&-2) -> (A&1) | B + if (match(A, m_Or(m_Value(V1), m_Specific(B))) || + match(A, m_Or(m_Specific(B), m_Value(V1)))) { + Instruction *Ret = FoldOrWithConstants(I, Op1, V1, B, C); + if (Ret) return Ret; + } + // (B&-2)|((A|B)&1) -> (A&1) | B + if (match(B, m_Or(m_Specific(A), m_Value(V1))) || + match(B, m_Or(m_Value(V1), m_Specific(A)))) { + Instruction *Ret = FoldOrWithConstants(I, Op0, A, V1, D); + if (Ret) return Ret; + } } // (X >> Z) | (Y >> Z) -> (X|Y) >> Z for all shifts. @@ -1599,19 +1631,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { } } - // ((A|B)&1)|(B&-2) -> (A&1) | B - if (match(Op0, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) || - match(Op0, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) { - Instruction *Ret = FoldOrWithConstants(I, Op1, A, B, C); - if (Ret) return Ret; - } - // (B&-2)|((A|B)&1) -> (A&1) | B - if (match(Op1, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) || - match(Op1, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) { - Instruction *Ret = FoldOrWithConstants(I, Op0, A, B, C); - if (Ret) return Ret; - } - // (~A | ~B) == (~(A & B)) - De Morgan's Law if (Value *Op0NotVal = dyn_castNotVal(Op0)) if (Value *Op1NotVal = dyn_castNotVal(Op1)) diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 38e7b6e..85251a8 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -112,8 +112,8 @@ unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V, } Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { - unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getOperand(1)); - unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2)); + unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getArgOperand(0)); + unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getArgOperand(1)); unsigned MinAlign = std::min(DstAlign, SrcAlign); unsigned CopyAlign = MI->getAlignment(); @@ -125,7 +125,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with // load/store. - ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getOperand(3)); + ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2)); if (MemOpLength == 0) return 0; // Source and destination pointer types are always "i8*" for intrinsic. See @@ -140,9 +140,9 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // Use an integer load+store unless we can find something better. unsigned SrcAddrSp = - cast<PointerType>(MI->getOperand(2)->getType())->getAddressSpace(); + cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace(); unsigned DstAddrSp = - cast<PointerType>(MI->getOperand(1)->getType())->getAddressSpace(); + cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace(); const IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3); Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp); @@ -154,8 +154,8 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // an i64 load+store, here because this improves the odds that the source or // dest address will be promotable. See if we can find a better type than the // integer datatype. - Value *StrippedDest = MI->getOperand(1)->stripPointerCasts(); - if (StrippedDest != MI->getOperand(1)) { + Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts(); + if (StrippedDest != MI->getArgOperand(0)) { const Type *SrcETy = cast<PointerType>(StrippedDest->getType()) ->getElementType(); if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) { @@ -189,15 +189,15 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { SrcAlign = std::max(SrcAlign, CopyAlign); DstAlign = std::max(DstAlign, CopyAlign); - Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewSrcPtrTy); - Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewDstPtrTy); + Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy); + Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); Instruction *L = new LoadInst(Src, "tmp", MI->isVolatile(), SrcAlign); InsertNewInstBefore(L, *MI); InsertNewInstBefore(new StoreInst(L, Dest, MI->isVolatile(), DstAlign), *MI); // Set the size of the copy to 0, it will be deleted on the next iteration. - MI->setOperand(3, Constant::getNullValue(MemOpLength->getType())); + MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType())); return MI; } @@ -250,6 +250,8 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (isFreeCall(&CI)) return visitFree(CI); + if (isMalloc(&CI)) + return visitMalloc(CI); // If the caller function is nounwind, mark the call as nounwind, even if the // callee isn't. @@ -261,7 +263,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI); if (!II) return visitCallSite(&CI); - + // Intrinsics cannot occur in an invoke, so handle them here instead of in // visitCallSite. if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) { @@ -287,11 +289,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (GVSrc->isConstant()) { Module *M = CI.getParent()->getParent()->getParent(); Intrinsic::ID MemCpyID = Intrinsic::memcpy; - const Type *Tys[3] = { CI.getOperand(1)->getType(), - CI.getOperand(2)->getType(), - CI.getOperand(3)->getType() }; - CI.setCalledFunction( - Intrinsic::getDeclaration(M, MemCpyID, Tys, 3)); + const Type *Tys[3] = { CI.getArgOperand(0)->getType(), + CI.getArgOperand(1)->getType(), + CI.getArgOperand(2)->getType() }; + CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys, 3)); Changed = true; } } @@ -311,7 +312,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (Instruction *I = SimplifyMemSet(MSI)) return I; } - + if (Changed) return II; } @@ -322,10 +323,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (!TD) break; const Type *ReturnTy = CI.getType(); - bool Min = (cast<ConstantInt>(II->getOperand(2))->getZExtValue() == 1); + bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); // Get to the real allocated thing and offset as fast as possible. - Value *Op1 = II->getOperand(1)->stripPointerCasts(); + Value *Op1 = II->getArgOperand(0)->stripPointerCasts(); // If we've stripped down to a single global variable that we // can know the size of then just return that. @@ -393,7 +394,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Constant *RetVal = ConstantInt::get(ReturnTy, Size-Offset); return ReplaceInstUsesWith(CI, RetVal); - } // Do not return "I don't know" here. Later optimization passes could @@ -402,45 +402,45 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } case Intrinsic::bswap: // bswap(bswap(x)) -> x - if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getOperand(1))) + if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) if (Operand->getIntrinsicID() == Intrinsic::bswap) - return ReplaceInstUsesWith(CI, Operand->getOperand(1)); + return ReplaceInstUsesWith(CI, Operand->getArgOperand(0)); // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) - if (TruncInst *TI = dyn_cast<TruncInst>(II->getOperand(1))) { + if (TruncInst *TI = dyn_cast<TruncInst>(II->getArgOperand(0))) { if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0))) if (Operand->getIntrinsicID() == Intrinsic::bswap) { unsigned C = Operand->getType()->getPrimitiveSizeInBits() - TI->getType()->getPrimitiveSizeInBits(); Value *CV = ConstantInt::get(Operand->getType(), C); - Value *V = Builder->CreateLShr(Operand->getOperand(1), CV); + Value *V = Builder->CreateLShr(Operand->getArgOperand(0), CV); return new TruncInst(V, TI->getType()); } } break; case Intrinsic::powi: - if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getOperand(2))) { + if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) { // powi(x, 0) -> 1.0 if (Power->isZero()) return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0)); // powi(x, 1) -> x if (Power->isOne()) - return ReplaceInstUsesWith(CI, II->getOperand(1)); + return ReplaceInstUsesWith(CI, II->getArgOperand(0)); // powi(x, -1) -> 1/x if (Power->isAllOnesValue()) return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0), - II->getOperand(1)); + II->getArgOperand(0)); } break; case Intrinsic::cttz: { // If all bits below the first known one are known zero, // this value is constant. - const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType()); + const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType()); uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(II->getOperand(1), APInt::getAllOnesValue(BitWidth), + ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth), KnownZero, KnownOne); unsigned TrailingZeros = KnownOne.countTrailingZeros(); APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros)); @@ -453,11 +453,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ctlz: { // If all bits above the first known one are known zero, // this value is constant. - const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType()); + const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType()); uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(II->getOperand(1), APInt::getAllOnesValue(BitWidth), + ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth), KnownZero, KnownOne); unsigned LeadingZeros = KnownOne.countLeadingZeros(); APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros)); @@ -468,8 +468,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } break; case Intrinsic::uadd_with_overflow: { - Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); - const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType()); + Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); + const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType()); uint32_t BitWidth = IT->getBitWidth(); APInt Mask = APInt::getSignBit(BitWidth); APInt LHSKnownZero(BitWidth, 0); @@ -513,19 +513,19 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // FALL THROUGH uadd into sadd case Intrinsic::sadd_with_overflow: // Canonicalize constants into the RHS. - if (isa<Constant>(II->getOperand(1)) && - !isa<Constant>(II->getOperand(2))) { - Value *LHS = II->getOperand(1); - II->setOperand(1, II->getOperand(2)); - II->setOperand(2, LHS); + if (isa<Constant>(II->getArgOperand(0)) && + !isa<Constant>(II->getArgOperand(1))) { + Value *LHS = II->getArgOperand(0); + II->setArgOperand(0, II->getArgOperand(1)); + II->setArgOperand(1, LHS); return II; } // X + undef -> undef - if (isa<UndefValue>(II->getOperand(2))) + if (isa<UndefValue>(II->getArgOperand(1))) return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) { + if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) { // X + 0 -> {X, false} if (RHS->isZero()) { Constant *V[] = { @@ -533,7 +533,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { ConstantInt::getFalse(II->getContext()) }; Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); - return InsertValueInst::Create(Struct, II->getOperand(1), 0); + return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); } } break; @@ -541,38 +541,38 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ssub_with_overflow: // undef - X -> undef // X - undef -> undef - if (isa<UndefValue>(II->getOperand(1)) || - isa<UndefValue>(II->getOperand(2))) + if (isa<UndefValue>(II->getArgOperand(0)) || + isa<UndefValue>(II->getArgOperand(1))) return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) { + if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) { // X - 0 -> {X, false} if (RHS->isZero()) { Constant *V[] = { - UndefValue::get(II->getOperand(1)->getType()), + UndefValue::get(II->getArgOperand(0)->getType()), ConstantInt::getFalse(II->getContext()) }; Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); - return InsertValueInst::Create(Struct, II->getOperand(1), 0); + return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); } } break; case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: // Canonicalize constants into the RHS. - if (isa<Constant>(II->getOperand(1)) && - !isa<Constant>(II->getOperand(2))) { - Value *LHS = II->getOperand(1); - II->setOperand(1, II->getOperand(2)); - II->setOperand(2, LHS); + if (isa<Constant>(II->getArgOperand(0)) && + !isa<Constant>(II->getArgOperand(1))) { + Value *LHS = II->getArgOperand(0); + II->setArgOperand(0, II->getArgOperand(1)); + II->setArgOperand(1, LHS); return II; } // X * undef -> undef - if (isa<UndefValue>(II->getOperand(2))) + if (isa<UndefValue>(II->getArgOperand(1))) return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getOperand(2))) { + if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) { // X*0 -> {0, false} if (RHSI->isZero()) return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType())); @@ -580,11 +580,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // X * 1 -> {X, false} if (RHSI->equalsInt(1)) { Constant *V[] = { - UndefValue::get(II->getOperand(1)->getType()), + UndefValue::get(II->getArgOperand(0)->getType()), ConstantInt::getFalse(II->getContext()) }; Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); - return InsertValueInst::Create(Struct, II->getOperand(1), 0); + return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); } } break; @@ -595,8 +595,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_sse2_loadu_dq: // Turn PPC lvx -> load if the pointer is known aligned. // Turn X86 loadups -> load if the pointer is known aligned. - if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) { - Value *Ptr = Builder->CreateBitCast(II->getOperand(1), + if (GetOrEnforceKnownAlignment(II->getArgOperand(0), 16) >= 16) { + Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); return new LoadInst(Ptr); } @@ -604,22 +604,22 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: // Turn stvx -> store if the pointer is known aligned. - if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) { + if (GetOrEnforceKnownAlignment(II->getArgOperand(1), 16) >= 16) { const Type *OpPtrTy = - PointerType::getUnqual(II->getOperand(1)->getType()); - Value *Ptr = Builder->CreateBitCast(II->getOperand(2), OpPtrTy); - return new StoreInst(II->getOperand(1), Ptr); + PointerType::getUnqual(II->getArgOperand(0)->getType()); + Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); + return new StoreInst(II->getArgOperand(0), Ptr); } break; case Intrinsic::x86_sse_storeu_ps: case Intrinsic::x86_sse2_storeu_pd: case Intrinsic::x86_sse2_storeu_dq: // Turn X86 storeu -> store if the pointer is known aligned. - if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) { + if (GetOrEnforceKnownAlignment(II->getArgOperand(0), 16) >= 16) { const Type *OpPtrTy = - PointerType::getUnqual(II->getOperand(2)->getType()); - Value *Ptr = Builder->CreateBitCast(II->getOperand(1), OpPtrTy); - return new StoreInst(II->getOperand(2), Ptr); + PointerType::getUnqual(II->getArgOperand(1)->getType()); + Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy); + return new StoreInst(II->getArgOperand(1), Ptr); } break; @@ -627,12 +627,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // These intrinsics only demands the 0th element of its input vector. If // we can simplify the input based on that, do so now. unsigned VWidth = - cast<VectorType>(II->getOperand(1)->getType())->getNumElements(); + cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements(); APInt DemandedElts(VWidth, 1); APInt UndefElts(VWidth, 0); - if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts, + if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, UndefElts)) { - II->setOperand(1, V); + II->setArgOperand(0, V); return II; } break; @@ -640,7 +640,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_altivec_vperm: // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. - if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getOperand(3))) { + if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getArgOperand(2))) { assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!"); // Check that all of the elements are integer constants or undefs. @@ -655,8 +655,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (AllEltsOk) { // Cast the input vectors to byte vectors. - Value *Op0 = Builder->CreateBitCast(II->getOperand(1), Mask->getType()); - Value *Op1 = Builder->CreateBitCast(II->getOperand(2), Mask->getType()); + Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0), Mask->getType()); + Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1), Mask->getType()); Value *Result = UndefValue::get(Op0->getType()); // Only extract each element once. @@ -689,7 +689,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::stackrestore: { // If the save is right next to the restore, remove the restore. This can // happen when variable allocas are DCE'd. - if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getOperand(1))) { + if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) { if (SS->getIntrinsicID() == Intrinsic::stacksave) { BasicBlock::iterator BI = SS; if (&*++BI == II) @@ -772,13 +772,13 @@ protected: NewInstruction = IC->ReplaceInstUsesWith(*CI, With); } bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const { - if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(SizeCIOp))) { + if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp - CallInst::ArgOffset))) { if (SizeCI->isAllOnesValue()) return true; if (isString) return SizeCI->getZExtValue() >= - GetStringLength(CI->getOperand(SizeArgOp)); - if (ConstantInt *Arg = dyn_cast<ConstantInt>(CI->getOperand(SizeArgOp))) + GetStringLength(CI->getArgOperand(SizeArgOp - CallInst::ArgOffset)); + if (ConstantInt *Arg = dyn_cast<ConstantInt>(CI->getArgOperand(SizeArgOp - CallInst::ArgOffset))) return SizeCI->getZExtValue() >= Arg->getZExtValue(); } return false; @@ -846,7 +846,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), CS.getInstruction()); - // If CS dues not return void then replaceAllUsesWith undef. + // If CS does not return void then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. if (!CS.getInstruction()->getType()->isVoidTy()) CS.getInstruction()-> @@ -1140,7 +1140,7 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { IntrinsicInst *Tramp = cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0)); - Function *NestF = cast<Function>(Tramp->getOperand(2)->stripPointerCasts()); + Function *NestF = cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts()); const PointerType *NestFPTy = cast<PointerType>(NestF->getType()); const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType()); @@ -1181,7 +1181,7 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { do { if (Idx == NestIdx) { // Add the chain argument and attributes. - Value *NestVal = Tramp->getOperand(3); + Value *NestVal = Tramp->getArgOperand(2); if (NestVal->getType() != NestTy) NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller); NewArgs.push_back(NestVal); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index b0137c4..505a0bf 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -22,19 +22,18 @@ using namespace PatternMatch; /// X*Scale+Offset. /// static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, - int &Offset) { - assert(Val->getType()->isIntegerTy(32) && "Unexpected allocation size type!"); + uint64_t &Offset) { if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { Offset = CI->getZExtValue(); Scale = 0; - return ConstantInt::get(Type::getInt32Ty(Val->getContext()), 0); + return ConstantInt::get(Val->getType(), 0); } if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) { if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) { if (I->getOpcode() == Instruction::Shl) { // This is a value scaled by '1 << the shift amt'. - Scale = 1U << RHS->getZExtValue(); + Scale = UINT64_C(1) << RHS->getZExtValue(); Offset = 0; return I->getOperand(0); } @@ -100,7 +99,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, // See if we can satisfy the modulus by pulling a scale out of the array // size argument. unsigned ArraySizeScale; - int ArrayOffset; + uint64_t ArrayOffset; Value *NumElements = // See if the array size is a decomposable linear expr. DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset); @@ -114,13 +113,13 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, if (Scale == 1) { Amt = NumElements; } else { - Amt = ConstantInt::get(Type::getInt32Ty(CI.getContext()), Scale); + Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale); // Insert before the alloca, not before the cast. Amt = AllocaBuilder.CreateMul(Amt, NumElements, "tmp"); } - if (int Offset = (AllocElTySize*ArrayOffset)/CastElTySize) { - Value *Off = ConstantInt::get(Type::getInt32Ty(CI.getContext()), + if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) { + Value *Off = ConstantInt::get(AI.getArraySize()->getType(), Offset, true); Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp"); } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 861cf92..6c00586 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1423,7 +1423,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, switch (II->getIntrinsicID()) { case Intrinsic::bswap: Worklist.Add(II); - ICI.setOperand(0, II->getOperand(1)); + ICI.setOperand(0, II->getArgOperand(0)); ICI.setOperand(1, ConstantInt::get(II->getContext(), RHSV.byteSwap())); return &ICI; case Intrinsic::ctlz: @@ -1431,7 +1431,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // ctz(A) == bitwidth(a) -> A == 0 and likewise for != if (RHSV == RHS->getType()->getBitWidth()) { Worklist.Add(II); - ICI.setOperand(0, II->getOperand(1)); + ICI.setOperand(0, II->getArgOperand(0)); ICI.setOperand(1, ConstantInt::get(RHS->getType(), 0)); return &ICI; } @@ -1440,13 +1440,13 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // popcount(A) == 0 -> A == 0 and likewise for != if (RHS->isZero()) { Worklist.Add(II); - ICI.setOperand(0, II->getOperand(1)); + ICI.setOperand(0, II->getArgOperand(0)); ICI.setOperand(1, RHS); return &ICI; } break; default: - break; + break; } } } @@ -1924,35 +1924,6 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } break; } - case Instruction::Call: - // If we have (malloc != null), and if the malloc has a single use, we - // can assume it is successful and remove the malloc. - if (isMalloc(LHSI) && LHSI->hasOneUse() && - isa<ConstantPointerNull>(RHSC)) { - // Need to explicitly erase malloc call here, instead of adding it to - // Worklist, because it won't get DCE'd from the Worklist since - // isInstructionTriviallyDead() returns false for function calls. - // It is OK to replace LHSI/MallocCall with Undef because the - // instruction that uses it will be erased via Worklist. - if (extractMallocCall(LHSI)) { - LHSI->replaceAllUsesWith(UndefValue::get(LHSI->getType())); - EraseInstFromFunction(*LHSI); - return ReplaceInstUsesWith(I, - ConstantInt::get(Type::getInt1Ty(I.getContext()), - !I.isTrueWhenEqual())); - } - if (CallInst* MallocCall = extractMallocCallFromBitCast(LHSI)) - if (MallocCall->hasOneUse()) { - MallocCall->replaceAllUsesWith( - UndefValue::get(MallocCall->getType())); - EraseInstFromFunction(*MallocCall); - Worklist.Add(LHSI); // The malloc's bitcast use. - return ReplaceInstUsesWith(I, - ConstantInt::get(Type::getInt1Ty(I.getContext()), - !I.isTrueWhenEqual())); - } - } - break; case Instruction::IntToPtr: // icmp pred inttoptr(X), null -> icmp pred X, 0 if (RHSC->isNullValue() && TD && diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 0f2a24f..8933a0b 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -13,6 +13,7 @@ #include "InstCombine.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" @@ -22,6 +23,18 @@ using namespace llvm; STATISTIC(NumDeadStore, "Number of dead stores eliminated"); Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { + // Ensure that the alloca array size argument has type intptr_t, so that + // any casting is exposed early. + if (TD) { + const Type *IntPtrTy = TD->getIntPtrType(AI.getContext()); + if (AI.getArraySize()->getType() != IntPtrTy) { + Value *V = Builder->CreateIntCast(AI.getArraySize(), + IntPtrTy, false); + AI.setOperand(0, V); + return &AI; + } + } + // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 if (AI.isArrayAllocation()) { // Check C != 1 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { @@ -352,10 +365,11 @@ DbgDeclareInst *InstCombiner::hasOneUsePlusDeclare(Value *V) { return 0; for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) { - if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(UI)) + User *U = *UI; + if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(U)) return DI; - if (isa<BitCastInst>(UI) && UI->hasOneUse()) { - if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(UI->use_begin())) + if (isa<BitCastInst>(U) && U->hasOneUse()) { + if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(U->use_begin())) return DI; } } @@ -511,17 +525,20 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { // Determine whether Dest has exactly two predecessors and, if so, compute // the other predecessor. pred_iterator PI = pred_begin(DestBB); + BasicBlock *P = *PI; BasicBlock *OtherBB = 0; - if (*PI != StoreBB) - OtherBB = *PI; - ++PI; - if (PI == pred_end(DestBB)) + + if (P != StoreBB) + OtherBB = P; + + if (++PI == pred_end(DestBB)) return false; - if (*PI != StoreBB) { + P = *PI; + if (P != StoreBB) { if (OtherBB) return false; - OtherBB = *PI; + OtherBB = P; } if (++PI != pred_end(DestBB)) return false; diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp index 65f0393..f7fc62f 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -230,8 +230,9 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { bool isAddressTaken = false; for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ++UI) { - if (isa<LoadInst>(UI)) continue; - if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) { + User *U = *UI; + if (isa<LoadInst>(U)) continue; + if (StoreInst *SI = dyn_cast<StoreInst>(U)) { // If storing TO the alloca, then the address isn't taken. if (SI->getOperand(1) == AI) continue; } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index c958cde..f9ffdb1 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -329,6 +329,37 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, } } + // Transform (X >s -1) ? C1 : C2 --> ((X >>s 31) & (C2 - C1)) + C1 + // and (X <s 0) ? C2 : C1 --> ((X >>s 31) & (C2 - C1)) + C1 + // FIXME: Type and constness constraints could be lifted, but we have to + // watch code size carefully. We should consider xor instead of + // sub/add when we decide to do that. + if (const IntegerType *Ty = dyn_cast<IntegerType>(CmpLHS->getType())) { + if (TrueVal->getType() == Ty) { + if (ConstantInt *Cmp = dyn_cast<ConstantInt>(CmpRHS)) { + ConstantInt *C1 = NULL, *C2 = NULL; + if (Pred == ICmpInst::ICMP_SGT && Cmp->isAllOnesValue()) { + C1 = dyn_cast<ConstantInt>(TrueVal); + C2 = dyn_cast<ConstantInt>(FalseVal); + } else if (Pred == ICmpInst::ICMP_SLT && Cmp->isNullValue()) { + C1 = dyn_cast<ConstantInt>(FalseVal); + C2 = dyn_cast<ConstantInt>(TrueVal); + } + if (C1 && C2) { + // This shift results in either -1 or 0. + Value *AShr = Builder->CreateAShr(CmpLHS, Ty->getBitWidth()-1); + + // Check if we can express the operation with a single or. + if (C2->isAllOnesValue()) + return ReplaceInstUsesWith(SI, Builder->CreateOr(AShr, C1)); + + Value *And = Builder->CreateAnd(AShr, C2->getValue()-C1->getValue()); + return ReplaceInstUsesWith(SI, Builder->CreateAdd(And, C1)); + } + } + } + } + if (CmpLHS == TrueVal && CmpRHS == FalseVal) { // Transform (X == Y) ? X : Y -> Y if (Pred == ICmpInst::ICMP_EQ) @@ -668,6 +699,34 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { SI.setOperand(2, TrueVal); return &SI; } + + // select (A == 0 | B == 0), T, F--> select (A != 0 & B != 0), F, T + // Note: This is a canonicalization rather than an optimization, and is used + // to expose opportunities to other instcombine transforms. + Instruction* CondInst = dyn_cast<Instruction>(CondVal); + if (CondInst && CondInst->hasOneUse() && + CondInst->getOpcode() == Instruction::Or) { + ICmpInst *LHSCmp = dyn_cast<ICmpInst>(CondInst->getOperand(0)); + ICmpInst *RHSCmp = dyn_cast<ICmpInst>(CondInst->getOperand(1)); + if (LHSCmp && LHSCmp->hasOneUse() && + LHSCmp->getPredicate() == ICmpInst::ICMP_EQ && + RHSCmp && RHSCmp->hasOneUse() && + RHSCmp->getPredicate() == ICmpInst::ICMP_EQ) { + ConstantInt* C1 = dyn_cast<ConstantInt>(LHSCmp->getOperand(1)); + ConstantInt* C2 = dyn_cast<ConstantInt>(RHSCmp->getOperand(1)); + if (C1 && C1->isZero() && C2 && C2->isZero()) { + LHSCmp->setPredicate(ICmpInst::ICMP_NE); + RHSCmp->setPredicate(ICmpInst::ICMP_NE); + Value *And = + InsertNewInstBefore(BinaryOperator::CreateAnd(LHSCmp, RHSCmp, + "and."+CondVal->getName()), SI); + SI.setOperand(0, And); + SI.setOperand(1, FalseVal); + SI.setOperand(2, TrueVal); + return &SI; + } + } + } return 0; } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 836bda3..e5ce8a6 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -404,7 +404,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == Op1C->getZExtValue()){ bool isCtPop = II->getIntrinsicID() == Intrinsic::ctpop; Constant *RHS = ConstantInt::getSigned(Op0->getType(), isCtPop ? -1:0); - Value *Cmp = Builder->CreateICmpEQ(II->getOperand(1), RHS); + Value *Cmp = Builder->CreateICmpEQ(II->getArgOperand(0), RHS); return new ZExtInst(Cmp, II->getType()); } } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index cd41844..adf7a76 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -732,10 +732,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // the right place. Instruction *NewVal; if (InputBit > ResultBit) - NewVal = BinaryOperator::CreateLShr(I->getOperand(1), + NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0), ConstantInt::get(I->getType(), InputBit-ResultBit)); else - NewVal = BinaryOperator::CreateShl(I->getOperand(1), + NewVal = BinaryOperator::CreateShl(II->getArgOperand(0), ConstantInt::get(I->getType(), ResultBit-InputBit)); NewVal->takeName(I); return InsertNewInstBefore(NewVal, *I); @@ -1052,12 +1052,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, case Intrinsic::x86_sse2_mul_sd: case Intrinsic::x86_sse2_min_sd: case Intrinsic::x86_sse2_max_sd: - TmpV = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts, + TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, UndefElts, Depth+1); - if (TmpV) { II->setOperand(1, TmpV); MadeChange = true; } - TmpV = SimplifyDemandedVectorElts(II->getOperand(2), DemandedElts, + if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } + TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts, UndefElts2, Depth+1); - if (TmpV) { II->setOperand(2, TmpV); MadeChange = true; } + if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } // If only the low elt is demanded and this is a scalarizable intrinsic, // scalarize it now. @@ -1069,8 +1069,8 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, case Intrinsic::x86_sse2_sub_sd: case Intrinsic::x86_sse2_mul_sd: // TODO: Lower MIN/MAX/ABS/etc - Value *LHS = II->getOperand(1); - Value *RHS = II->getOperand(2); + Value *LHS = II->getArgOperand(0); + Value *RHS = II->getArgOperand(1); // Extract the element as scalars. LHS = InsertNewInstBefore(ExtractElementInst::Create(LHS, ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index af9ec5c..af2958f 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -710,8 +710,55 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { return 0; } -Instruction *InstCombiner::visitFree(Instruction &FI) { - Value *Op = FI.getOperand(1); + + +static bool IsOnlyNullComparedAndFreed(const Value &V) { + for (Value::const_use_iterator UI = V.use_begin(), UE = V.use_end(); + UI != UE; ++UI) { + const User *U = *UI; + if (isFreeCall(U)) + continue; + if (const ICmpInst *ICI = dyn_cast<ICmpInst>(U)) + if (ICI->isEquality() && isa<ConstantPointerNull>(ICI->getOperand(1))) + continue; + return false; + } + return true; +} + +Instruction *InstCombiner::visitMalloc(Instruction &MI) { + // If we have a malloc call which is only used in any amount of comparisons + // to null and free calls, delete the calls and replace the comparisons with + // true or false as appropriate. + if (IsOnlyNullComparedAndFreed(MI)) { + for (Value::use_iterator UI = MI.use_begin(), UE = MI.use_end(); + UI != UE;) { + // We can assume that every remaining use is a free call or an icmp eq/ne + // to null, so the cast is safe. + Instruction *I = cast<Instruction>(*UI); + + // Early increment here, as we're about to get rid of the user. + ++UI; + + if (isFreeCall(I)) { + EraseInstFromFunction(*cast<CallInst>(I)); + continue; + } + // Again, the cast is safe. + ICmpInst *C = cast<ICmpInst>(I); + ReplaceInstUsesWith(*C, ConstantInt::get(Type::getInt1Ty(C->getContext()), + C->isFalseWhenEqual())); + EraseInstFromFunction(*C); + } + return EraseInstFromFunction(MI); + } + return 0; +} + + + +Instruction *InstCombiner::visitFree(CallInst &FI) { + Value *Op = FI.getArgOperand(0); // free undef -> unreachable. if (isa<UndefValue>(Op)) { @@ -726,23 +773,6 @@ Instruction *InstCombiner::visitFree(Instruction &FI) { if (isa<ConstantPointerNull>(Op)) return EraseInstFromFunction(FI); - // If we have a malloc call whose only use is a free call, delete both. - if (isMalloc(Op)) { - if (CallInst* CI = extractMallocCallFromBitCast(Op)) { - if (Op->hasOneUse() && CI->hasOneUse()) { - EraseInstFromFunction(FI); - EraseInstFromFunction(*CI); - return EraseInstFromFunction(*cast<Instruction>(Op)); - } - } else { - // Op is a call to malloc - if (Op->hasOneUse()) { - EraseInstFromFunction(FI); - return EraseInstFromFunction(*cast<Instruction>(Op)); - } - } - } - return 0; } @@ -896,7 +926,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) { // We're extracting from an intrinsic, see if we're the only user, which // allows us to simplify multiple result intrinsics to simpler things that - // just get one value.. + // just get one value. if (II->hasOneUse()) { // Check if we're grabbing the overflow bit or the result of a 'with // overflow' intrinsic. If it's the latter we can remove the intrinsic @@ -905,7 +935,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: if (*EV.idx_begin() == 0) { // Normal result. - Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); + Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); II->replaceAllUsesWith(UndefValue::get(II->getType())); EraseInstFromFunction(*II); return BinaryOperator::CreateAdd(LHS, RHS); @@ -914,7 +944,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: if (*EV.idx_begin() == 0) { // Normal result. - Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); + Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); II->replaceAllUsesWith(UndefValue::get(II->getType())); EraseInstFromFunction(*II); return BinaryOperator::CreateSub(LHS, RHS); @@ -923,7 +953,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: if (*EV.idx_begin() == 0) { // Normal result. - Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); + Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); II->replaceAllUsesWith(UndefValue::get(II->getType())); EraseInstFromFunction(*II); return BinaryOperator::CreateMul(LHS, RHS); diff --git a/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp index 5650150..41e3a39 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp @@ -143,7 +143,7 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { ProfileInfo::Edge edge = ProfileInfo::getEdge(0,entry); if (!std::binary_search(MST.begin(), MST.end(), edge)) { printEdgeCounter(edge,entry,i); - IncrementCounterInBlock(entry, i, Counters); NumEdgesInserted++; + IncrementCounterInBlock(entry, i, Counters); ++NumEdgesInserted; Initializer[i++] = (Zero); } else{ Initializer[i++] = (Uncounted); @@ -166,7 +166,7 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,0); if (!std::binary_search(MST.begin(), MST.end(), edge)) { printEdgeCounter(edge,BB,i); - IncrementCounterInBlock(BB, i, Counters); NumEdgesInserted++; + IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted; Initializer[i++] = (Zero); } else{ Initializer[i++] = (Uncounted); @@ -189,11 +189,11 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { if (TI->getNumSuccessors() == 1) { // Insert counter at the start of the block printEdgeCounter(edge,BB,i); - IncrementCounterInBlock(BB, i, Counters); NumEdgesInserted++; + IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted; } else { // Insert counter at the start of the block printEdgeCounter(edge,Succ,i); - IncrementCounterInBlock(Succ, i, Counters); NumEdgesInserted++; + IncrementCounterInBlock(Succ, i, Counters); ++NumEdgesInserted; } Initializer[i++] = (Zero); } else { diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp index 8662a82..1a30e9b 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp @@ -61,8 +61,8 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, } Args[3] = ConstantInt::get(Type::getInt32Ty(Context), NumElements); - Instruction *InitCall = CallInst::Create(InitFn, Args.begin(), Args.end(), - "newargc", InsertPos); + CallInst *InitCall = CallInst::Create(InitFn, Args.begin(), Args.end(), + "newargc", InsertPos); // If argc or argv are not available in main, just pass null values in. Function::arg_iterator AI; @@ -73,10 +73,10 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, if (AI->getType() != ArgVTy) { Instruction::CastOps opcode = CastInst::getCastOpcode(AI, false, ArgVTy, false); - InitCall->setOperand(2, + InitCall->setArgOperand(1, CastInst::Create(opcode, AI, ArgVTy, "argv.cast", InitCall)); } else { - InitCall->setOperand(2, AI); + InitCall->setArgOperand(1, AI); } /* FALL THROUGH */ @@ -93,12 +93,12 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, } opcode = CastInst::getCastOpcode(AI, true, Type::getInt32Ty(Context), true); - InitCall->setOperand(1, + InitCall->setArgOperand(0, CastInst::Create(opcode, AI, Type::getInt32Ty(Context), "argc.cast", InitCall)); } else { AI->replaceAllUsesWith(InitCall); - InitCall->setOperand(1, AI); + InitCall->setArgOperand(0, AI); } case 0: break; diff --git a/contrib/llvm/lib/Transforms/Scalar/ABCD.cpp b/contrib/llvm/lib/Transforms/Scalar/ABCD.cpp index 6135992..dcf14a6 100644 --- a/contrib/llvm/lib/Transforms/Scalar/ABCD.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/ABCD.cpp @@ -230,7 +230,7 @@ class ABCD : public FunctionPass { DenseMapIterator<Value*, MemoizedResultChart> begin = map.begin(); DenseMapIterator<Value*, MemoizedResultChart> end = map.end(); for (; begin != end; ++begin) { - begin->second.clear(); + begin->second.clear(); } map.clear(); } @@ -396,8 +396,8 @@ class ABCD : public FunctionPass { /// this case the method returns true, otherwise false. It also obtains the /// Instruction and ConstantInt from the BinaryOperator and returns it. bool createBinaryOperatorInfo(BinaryOperator *BO, Instruction **I1, - Instruction **I2, ConstantInt **C1, - ConstantInt **C2); + Instruction **I2, ConstantInt **C1, + ConstantInt **C2); /// This method creates a constraint between a Sigma and an Instruction. /// These constraints are created as soon as we find a comparator that uses a diff --git a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp index 5a49841..2d19467 100644 --- a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp @@ -83,7 +83,7 @@ bool ADCE::runOnFunction(Function& F) { for (SmallVector<Instruction*, 1024>::iterator I = worklist.begin(), E = worklist.end(); I != E; ++I) { - NumRemoved++; + ++NumRemoved; (*I)->eraseFromParent(); } diff --git a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp index 93e9bfb..272066c 100644 --- a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -548,7 +548,8 @@ protected: CI->eraseFromParent(); } bool isFoldable(unsigned SizeCIOp, unsigned, bool) const { - if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(SizeCIOp))) + if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp + - CallInst::ArgOffset))) return SizeCI->isAllOnesValue(); return false; } @@ -559,7 +560,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { // Lower all uses of llvm.objectsize.* IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); if (II && II->getIntrinsicID() == Intrinsic::objectsize) { - bool Min = (cast<ConstantInt>(II->getOperand(2))->getZExtValue() == 1); + bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); const Type *ReturnTy = CI->getType(); Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); CI->replaceAllUsesWith(RetVal); @@ -759,8 +760,7 @@ bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS, } // Compute the constraint code and ConstraintType to use. - TLI->ComputeConstraintToUse(OpInfo, SDValue(), - OpInfo.ConstraintType == TargetLowering::C_Memory); + TLI->ComputeConstraintToUse(OpInfo, SDValue()); if (OpInfo.ConstraintType == TargetLowering::C_Memory && OpInfo.isIndirect) { diff --git a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 09c01d3..e047e4f 100644 --- a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -56,7 +56,8 @@ namespace { } bool runOnBasicBlock(BasicBlock &BB); - bool handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep); + bool handleFreeWithNonTrivialDependency(const CallInst *F, + MemDepResult Dep); bool handleEndBlock(BasicBlock &BB); bool RemoveUndeadPointers(Value *Ptr, uint64_t killPointerSize, BasicBlock::iterator &BBI, @@ -73,7 +74,6 @@ namespace { AU.addRequired<AliasAnalysis>(); AU.addRequired<MemoryDependenceAnalysis>(); AU.addPreserved<DominatorTree>(); - AU.addPreserved<AliasAnalysis>(); AU.addPreserved<MemoryDependenceAnalysis>(); } @@ -123,14 +123,15 @@ static Value *getPointerOperand(Instruction *I) { if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand(); if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) - return MI->getOperand(1); - - switch (cast<IntrinsicInst>(I)->getIntrinsicID()) { + return MI->getArgOperand(0); + + IntrinsicInst *II = cast<IntrinsicInst>(I); + switch (II->getIntrinsicID()) { default: assert(false && "Unexpected intrinsic!"); case Intrinsic::init_trampoline: - return I->getOperand(1); + return II->getArgOperand(0); case Intrinsic::lifetime_end: - return I->getOperand(2); + return II->getArgOperand(1); } } @@ -147,12 +148,13 @@ static unsigned getStoreSize(Instruction *I, const TargetData *TD) { if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) { Len = MI->getLength(); } else { - switch (cast<IntrinsicInst>(I)->getIntrinsicID()) { + IntrinsicInst *II = cast<IntrinsicInst>(I); + switch (II->getIntrinsicID()) { default: assert(false && "Unexpected intrinsic!"); case Intrinsic::init_trampoline: return -1u; case Intrinsic::lifetime_end: - Len = I->getOperand(1); + Len = II->getArgOperand(0); break; } } @@ -201,8 +203,8 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { if (InstDep.isNonLocal()) continue; // Handle frees whose dependencies are non-trivial. - if (isFreeCall(Inst)) { - MadeChange |= handleFreeWithNonTrivialDependency(Inst, InstDep); + if (const CallInst *F = isFreeCall(Inst)) { + MadeChange |= handleFreeWithNonTrivialDependency(F, InstDep); continue; } @@ -218,7 +220,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { isElidable(DepStore)) { // Delete the store and now-dead instructions that feed it. DeleteDeadInstruction(DepStore); - NumFastStores++; + ++NumFastStores; MadeChange = true; // DeleteDeadInstruction can delete the current instruction in loop @@ -249,7 +251,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { BBI = BB.begin(); else if (BBI != BB.begin()) // Revisit this instruction if possible. --BBI; - NumFastStores++; + ++NumFastStores; MadeChange = true; continue; } @@ -270,7 +272,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { BBI = BB.begin(); else if (BBI != BB.begin()) // Revisit this instruction if possible. --BBI; - NumFastStores++; + ++NumFastStores; MadeChange = true; continue; } @@ -287,7 +289,8 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { /// handleFreeWithNonTrivialDependency - Handle frees of entire structures whose /// dependency is a store to a field of that structure. -bool DSE::handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep) { +bool DSE::handleFreeWithNonTrivialDependency(const CallInst *F, + MemDepResult Dep) { AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); Instruction *Dependency = Dep.getInst(); @@ -297,13 +300,13 @@ bool DSE::handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep) { Value *DepPointer = getPointerOperand(Dependency)->getUnderlyingObject(); // Check for aliasing. - if (AA.alias(F->getOperand(1), 1, DepPointer, 1) != + if (AA.alias(F->getArgOperand(0), 1, DepPointer, 1) != AliasAnalysis::MustAlias) return false; // DCE instructions only used to calculate that store DeleteDeadInstruction(Dependency); - NumFastStores++; + ++NumFastStores; return true; } @@ -349,9 +352,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { if (deadPointers.count(pointerOperand)) { // DCE instructions only used to calculate that store. Instruction *Dead = BBI; - BBI++; + ++BBI; DeleteDeadInstruction(Dead, &deadPointers); - NumFastStores++; + ++NumFastStores; MadeChange = true; continue; } @@ -371,9 +374,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // However, if this load is unused and not volatile, we can go ahead and // remove it, and not have to worry about it making our pointer undead! if (L->use_empty() && !L->isVolatile()) { - BBI++; + ++BBI; DeleteDeadInstruction(L, &deadPointers); - NumFastOther++; + ++NumFastOther; MadeChange = true; continue; } @@ -391,9 +394,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // Dead alloca's can be DCE'd when we reach them if (A->use_empty()) { - BBI++; + ++BBI; DeleteDeadInstruction(A, &deadPointers); - NumFastOther++; + ++NumFastOther; MadeChange = true; } @@ -426,9 +429,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { getPointerSize(*I)); if (A == AliasAnalysis::ModRef) - modRef++; + ++modRef; else - other++; + ++other; if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref) dead.push_back(*I); @@ -442,9 +445,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { } else if (isInstructionTriviallyDead(BBI)) { // For any non-memory-affecting non-terminators, DCE them as we reach them Instruction *Inst = BBI; - BBI++; + ++BBI; DeleteDeadInstruction(Inst, &deadPointers); - NumFastOther++; + ++NumFastOther; MadeChange = true; continue; } @@ -497,7 +500,7 @@ bool DSE::RemoveUndeadPointers(Value *killPointer, uint64_t killPointerSize, // Remove it! ++BBI; DeleteDeadInstruction(S, &deadPointers); - NumFastStores++; + ++NumFastStores; MadeChange = true; continue; diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp index ca8ab49..88b6776 100644 --- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp @@ -35,6 +35,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/PHITransAddr.h" @@ -271,7 +272,8 @@ Expression ValueTable::create_expression(CallInst* C) { e.function = C->getCalledFunction(); e.opcode = Expression::CALL; - for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end(); + CallSite CS(C); + for (CallInst::op_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; ++I) e.varargs.push_back(lookup_or_add(*I)); @@ -447,14 +449,14 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) { if (local_dep.isDef()) { CallInst* local_cdep = cast<CallInst>(local_dep.getInst()); - if (local_cdep->getNumOperands() != C->getNumOperands()) { + if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) { valueNumbering[C] = nextValueNumber; return nextValueNumber++; } - for (unsigned i = 1; i < C->getNumOperands(); ++i) { - uint32_t c_vn = lookup_or_add(C->getOperand(i)); - uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i)); + for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { + uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); + uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i)); if (c_vn != cd_vn) { valueNumbering[C] = nextValueNumber; return nextValueNumber++; @@ -504,13 +506,13 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) { return nextValueNumber++; } - if (cdep->getNumOperands() != C->getNumOperands()) { + if (cdep->getNumArgOperands() != C->getNumArgOperands()) { valueNumbering[C] = nextValueNumber; return nextValueNumber++; } - for (unsigned i = 1; i < C->getNumOperands(); ++i) { - uint32_t c_vn = lookup_or_add(C->getOperand(i)); - uint32_t cd_vn = lookup_or_add(cdep->getOperand(i)); + for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { + uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); + uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i)); if (c_vn != cd_vn) { valueNumbering[C] = nextValueNumber; return nextValueNumber++; @@ -1500,7 +1502,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, MD->invalidateCachedPointerInfo(V); VN.erase(LI); toErase.push_back(LI); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1723,7 +1725,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, MD->invalidateCachedPointerInfo(V); VN.erase(LI); toErase.push_back(LI); - NumPRELoad++; + ++NumPRELoad; return true; } @@ -1784,7 +1786,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { MD->invalidateCachedPointerInfo(AvailVal); VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1830,7 +1832,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { MD->invalidateCachedPointerInfo(StoredVal); VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1860,7 +1862,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { MD->invalidateCachedPointerInfo(DepLI); VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1871,7 +1873,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { L->replaceAllUsesWith(UndefValue::get(L->getType())); VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1882,7 +1884,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { L->replaceAllUsesWith(UndefValue::get(L->getType())); VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } } @@ -2014,7 +2016,7 @@ bool GVN::runOnFunction(Function& F) { BasicBlock *BB = FI; ++FI; bool removedBlock = MergeBlockIntoPredecessor(BB, this); - if (removedBlock) NumGVNBlocks++; + if (removedBlock) ++NumGVNBlocks; Changed |= removedBlock; } @@ -2126,27 +2128,28 @@ bool GVN::performPRE(Function &F) { for (pred_iterator PI = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock); PI != PE; ++PI) { + BasicBlock *P = *PI; // We're not interested in PRE where the block is its // own predecessor, or in blocks with predecessors // that are not reachable. - if (*PI == CurrentBlock) { + if (P == CurrentBlock) { NumWithout = 2; break; - } else if (!localAvail.count(*PI)) { + } else if (!localAvail.count(P)) { NumWithout = 2; break; } DenseMap<uint32_t, Value*>::iterator predV = - localAvail[*PI]->table.find(ValNo); - if (predV == localAvail[*PI]->table.end()) { - PREPred = *PI; - NumWithout++; + localAvail[P]->table.find(ValNo); + if (predV == localAvail[P]->table.end()) { + PREPred = P; + ++NumWithout; } else if (predV->second == CurInst) { NumWithout = 2; } else { - predMap[*PI] = predV->second; - NumWith++; + predMap[P] = predV->second; + ++NumWith; } } @@ -2201,7 +2204,7 @@ bool GVN::performPRE(Function &F) { PREInstr->setName(CurInst->getName() + ".pre"); predMap[PREPred] = PREInstr; VN.add(PREInstr, ValNo); - NumGVNPRE++; + ++NumGVNPRE; // Update the availability map to include the new instruction. localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr)); @@ -2211,8 +2214,10 @@ bool GVN::performPRE(Function &F) { CurInst->getName() + ".pre-phi", CurrentBlock->begin()); for (pred_iterator PI = pred_begin(CurrentBlock), - PE = pred_end(CurrentBlock); PI != PE; ++PI) - Phi->addIncoming(predMap[*PI], *PI); + PE = pred_end(CurrentBlock); PI != PE; ++PI) { + BasicBlock *P = *PI; + Phi->addIncoming(predMap[P], P); + } VN.add(Phi, ValNo); localAvail[CurrentBlock]->table[ValNo] = Phi; diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 36bea67..b5c9dd8 100644 --- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -467,6 +467,17 @@ void IndVarSimplify::EliminateIVRemainders() { } bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { + // If LoopSimplify form is not available, stay out of trouble. Some notes: + // - LSR currently only supports LoopSimplify-form loops. Indvars' + // canonicalization can be a pessimization without LSR to "clean up" + // afterwards. + // - We depend on having a preheader; in particular, + // Loop::getCanonicalInductionVariable only supports loops with preheaders, + // and we're in trouble if we can't find the induction variable even when + // we've manually inserted one. + if (!L->isLoopSimplifyForm()) + return false; + IU = &getAnalysis<IVUsers>(); LI = &getAnalysis<LoopInfo>(); SE = &getAnalysis<ScalarEvolution>(); @@ -760,8 +771,9 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) { bool UsedInLoop = false; for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; ++UI) { - BasicBlock *UseBB = cast<Instruction>(UI)->getParent(); - if (PHINode *P = dyn_cast<PHINode>(UI)) { + User *U = *UI; + BasicBlock *UseBB = cast<Instruction>(U)->getParent(); + if (PHINode *P = dyn_cast<PHINode>(U)) { unsigned i = PHINode::getIncomingValueNumForOperand(UI.getOperandNo()); UseBB = P->getIncomingBlock(i); diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp index df05b71..edce14c 100644 --- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -18,6 +18,7 @@ #include "llvm/Pass.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -288,14 +289,15 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ // Perhaps getConstantOnEdge should be smart enough to do this? for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *P = *PI; // If the value is known by LazyValueInfo to be a constant in a // predecessor, use that information to try to thread this block. - Constant *PredCst = LVI->getConstantOnEdge(V, *PI, BB); + Constant *PredCst = LVI->getConstantOnEdge(V, P, BB); if (PredCst == 0 || (!isa<ConstantInt>(PredCst) && !isa<UndefValue>(PredCst))) continue; - Result.push_back(std::make_pair(dyn_cast<ConstantInt>(PredCst), *PI)); + Result.push_back(std::make_pair(dyn_cast<ConstantInt>(PredCst), P)); } return !Result.empty(); @@ -345,8 +347,19 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ } for (unsigned i = 0, e = RHSVals.size(); i != e; ++i) if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) { - Result.push_back(RHSVals[i]); - Result.back().first = InterestingVal; + // If we already inferred a value for this block on the LHS, don't + // re-add it. + bool HasValue = false; + for (unsigned r = 0, e = Result.size(); r != e; ++r) + if (Result[r].second == RHSVals[i].second) { + HasValue = true; + break; + } + + if (!HasValue) { + Result.push_back(RHSVals[i]); + Result.back().first = InterestingVal; + } } return !Result.empty(); } @@ -409,20 +422,21 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ (!isa<Instruction>(Cmp->getOperand(0)) || cast<Instruction>(Cmp->getOperand(0))->getParent() != BB)) { Constant *RHSCst = cast<Constant>(Cmp->getOperand(1)); - + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *P = *PI; // If the value is known by LazyValueInfo to be a constant in a // predecessor, use that information to try to thread this block. LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0), - RHSCst, *PI, BB); + RHSCst, P, BB); if (Res == LazyValueInfo::Unknown) continue; Constant *ResC = ConstantInt::get(Cmp->getType(), Res); - Result.push_back(std::make_pair(cast<ConstantInt>(ResC), *PI)); + Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P)); } - + return !Result.empty(); } } @@ -538,18 +552,22 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { (CondInst == 0 || CondInst->getParent() != BB)) { // Non-local definition. pred_iterator PI = pred_begin(BB), E = pred_end(BB); if (isa<BranchInst>(BB->getTerminator())) { - for (; PI != E; ++PI) - if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) + for (; PI != E; ++PI) { + BasicBlock *P = *PI; + if (BranchInst *PBI = dyn_cast<BranchInst>(P->getTerminator())) if (PBI->isConditional() && PBI->getCondition() == Condition && - ProcessBranchOnDuplicateCond(*PI, BB)) + ProcessBranchOnDuplicateCond(P, BB)) return true; + } } else { assert(isa<SwitchInst>(BB->getTerminator()) && "Unknown jump terminator"); - for (; PI != E; ++PI) - if (SwitchInst *PSI = dyn_cast<SwitchInst>((*PI)->getTerminator())) + for (; PI != E; ++PI) { + BasicBlock *P = *PI; + if (SwitchInst *PSI = dyn_cast<SwitchInst>(P->getTerminator())) if (PSI->getCondition() == Condition && - ProcessSwitchOnDuplicateCond(*PI, BB)) + ProcessSwitchOnDuplicateCond(P, BB)) return true; + } } } @@ -569,19 +587,21 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { // If we have a comparison, loop over the predecessors to see if there is // a condition with a lexically identical value. pred_iterator PI = pred_begin(BB), E = pred_end(BB); - for (; PI != E; ++PI) - if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) - if (PBI->isConditional() && *PI != BB) { + for (; PI != E; ++PI) { + BasicBlock *P = *PI; + if (BranchInst *PBI = dyn_cast<BranchInst>(P->getTerminator())) + if (PBI->isConditional() && P != BB) { if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) { if (CI->getOperand(0) == CondCmp->getOperand(0) && CI->getOperand(1) == CondCmp->getOperand(1) && CI->getPredicate() == CondCmp->getPredicate()) { // TODO: Could handle things like (x != 4) --> (x == 17) - if (ProcessBranchOnDuplicateCond(*PI, BB)) + if (ProcessBranchOnDuplicateCond(P, BB)) return true; } } } + } } } @@ -869,9 +889,15 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // Add all the unavailable predecessors to the PredsToSplit list. for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB); - PI != PE; ++PI) - if (!AvailablePredSet.count(*PI)) - PredsToSplit.push_back(*PI); + PI != PE; ++PI) { + BasicBlock *P = *PI; + // If the predecessor is an indirect goto, we can't split the edge. + if (isa<IndirectBrInst>(P->getTerminator())) + return false; + + if (!AvailablePredSet.count(P)) + PredsToSplit.push_back(P); + } // Split them out to their own block. UnavailablePred = @@ -903,11 +929,12 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // have multiple entries here. for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E; ++PI) { + BasicBlock *P = *PI; AvailablePredsTy::iterator I = std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(), - std::make_pair(*PI, (Value*)0)); + std::make_pair(P, (Value*)0)); - assert(I != AvailablePreds.end() && I->first == *PI && + assert(I != AvailablePreds.end() && I->first == P && "Didn't find entry for predecessor!"); PN->addIncoming(I->second, I->first); diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp index 48817ab..e4894e9 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp @@ -83,7 +83,7 @@ bool LoopDeletion::IsLoopDead(Loop* L, if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator())) return false; - BI++; + ++BI; } // Make sure that no instructions in the block have potential side-effects. @@ -176,7 +176,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { BasicBlock::iterator BI = exitBlock->begin(); while (PHINode* P = dyn_cast<PHINode>(BI)) { P->replaceUsesOfWith(exitingBlock, preheader); - BI++; + ++BI; } // Update the dominator tree and remove the instructions and blocks that will @@ -226,7 +226,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { LPM.deleteLoopFromQueue(L); Changed = true; - NumDeleted++; + ++NumDeleted; return Changed; } diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIndexSplit.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIndexSplit.cpp index 101ff5b..31058e5 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopIndexSplit.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopIndexSplit.cpp @@ -649,7 +649,7 @@ bool LoopIndexSplit::updateLoopIterationSpace() { } } } - NumRestrictBounds++; + ++NumRestrictBounds; return true; } @@ -958,11 +958,11 @@ bool LoopIndexSplit::splitLoop() { continue; for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); - BI != BE; ++BI) { + BI != BE; ++BI) { Instruction *Inst = BI; if (!Inst->isSafeToSpeculativelyExecute() && !isa<PHINode>(Inst) - && !isa<BranchInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst)) + && !isa<BranchInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst)) return false; } } @@ -1016,13 +1016,13 @@ bool LoopIndexSplit::splitLoop() { BSV = getMax(BSV, IVStartValue, Sign, PHTerm); // [*] Clone Loop - DenseMap<const Value *, Value *> ValueMap; - Loop *BLoop = CloneLoop(L, LPM, LI, ValueMap, this); + ValueMap<const Value *, Value *> VMap; + Loop *BLoop = CloneLoop(L, LPM, LI, VMap, this); Loop *ALoop = L; // [*] ALoop's exiting edge enters BLoop's header. // ALoop's original exit block becomes BLoop's exit block. - PHINode *B_IndVar = cast<PHINode>(ValueMap[IndVar]); + PHINode *B_IndVar = cast<PHINode>(VMap[IndVar]); BasicBlock *A_ExitingBlock = ExitCondition->getParent(); BranchInst *A_ExitInsn = dyn_cast<BranchInst>(A_ExitingBlock->getTerminator()); @@ -1047,7 +1047,7 @@ bool LoopIndexSplit::splitLoop() { for (BasicBlock::iterator BI = ALoop->getHeader()->begin(), BE = ALoop->getHeader()->end(); BI != BE; ++BI) { if (PHINode *PN = dyn_cast<PHINode>(BI)) { - PHINode *PNClone = cast<PHINode>(ValueMap[PN]); + PHINode *PNClone = cast<PHINode>(VMap[PN]); InverseMap[PNClone] = PN; } else break; @@ -1085,11 +1085,11 @@ bool LoopIndexSplit::splitLoop() { // block. Remove incoming PHINode values from ALoop's exiting block. // Add new incoming values from BLoop's incoming exiting value. // Update BLoop exit block's dominator info.. - BasicBlock *B_ExitingBlock = cast<BasicBlock>(ValueMap[A_ExitingBlock]); + BasicBlock *B_ExitingBlock = cast<BasicBlock>(VMap[A_ExitingBlock]); for (BasicBlock::iterator BI = B_ExitBlock->begin(), BE = B_ExitBlock->end(); BI != BE; ++BI) { if (PHINode *PN = dyn_cast<PHINode>(BI)) { - PN->addIncoming(ValueMap[PN->getIncomingValueForBlock(A_ExitingBlock)], + PN->addIncoming(VMap[PN->getIncomingValueForBlock(A_ExitingBlock)], B_ExitingBlock); PN->removeIncomingValue(A_ExitingBlock); } else @@ -1131,7 +1131,7 @@ bool LoopIndexSplit::splitLoop() { removeBlocks(A_InactiveBranch, L, A_ActiveBranch); //[*] Eliminate split condition's inactive branch in from BLoop. - BasicBlock *B_SplitCondBlock = cast<BasicBlock>(ValueMap[A_SplitCondBlock]); + BasicBlock *B_SplitCondBlock = cast<BasicBlock>(VMap[A_SplitCondBlock]); BranchInst *B_BR = cast<BranchInst>(B_SplitCondBlock->getTerminator()); BasicBlock *B_InactiveBranch = NULL; BasicBlock *B_ActiveBranch = NULL; @@ -1146,9 +1146,9 @@ bool LoopIndexSplit::splitLoop() { //[*] Move exit condition into split condition block to avoid // executing dead loop iteration. - ICmpInst *B_ExitCondition = cast<ICmpInst>(ValueMap[ExitCondition]); - Instruction *B_IndVarIncrement = cast<Instruction>(ValueMap[IVIncrement]); - ICmpInst *B_SplitCondition = cast<ICmpInst>(ValueMap[SplitCondition]); + ICmpInst *B_ExitCondition = cast<ICmpInst>(VMap[ExitCondition]); + Instruction *B_IndVarIncrement = cast<Instruction>(VMap[IVIncrement]); + ICmpInst *B_SplitCondition = cast<ICmpInst>(VMap[SplitCondition]); moveExitCondition(A_SplitCondBlock, A_ActiveBranch, A_ExitBlock, ExitCondition, cast<ICmpInst>(SplitCondition), IndVar, IVIncrement, @@ -1159,7 +1159,7 @@ bool LoopIndexSplit::splitLoop() { B_SplitCondition, B_IndVar, B_IndVarIncrement, BLoop, EVOpNum); - NumIndexSplit++; + ++NumIndexSplit; return true; } diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp index 5004483..16c4a15 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp @@ -147,7 +147,7 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) { continue; // PHI nodes don't count. if (isa<DbgInfoIntrinsic>(OI)) continue; // Debug intrinsics don't count as size. - Size++; + ++Size; } if (Size > MAX_HEADER_SIZE) @@ -263,7 +263,7 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) { preserveCanonicalLoopForm(LPM); - NumRotated++; + ++NumRotated; return true; } diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 86ea3eb..1f9b415 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -392,12 +392,13 @@ static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) { return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy)); } -/// isMulSExtable - Return true if the given add can be sign-extended +/// isMulSExtable - Return true if the given mul can be sign-extended /// without changing its value. -static bool isMulSExtable(const SCEVMulExpr *A, ScalarEvolution &SE) { +static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) { const Type *WideTy = - IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1); - return isa<SCEVMulExpr>(SE.getSignExtendExpr(A, WideTy)); + IntegerType::get(SE.getContext(), + SE.getTypeSizeInBits(M->getType()) * M->getNumOperands()); + return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy)); } /// getExactSDiv - Return an expression for LHS /s RHS, if it can be determined @@ -413,20 +414,28 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, if (LHS == RHS) return SE.getConstant(LHS->getType(), 1); - // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do some - // folding. - if (RHS->isAllOnesValue()) - return SE.getMulExpr(LHS, RHS); + // Handle a few RHS special cases. + const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS); + if (RC) { + const APInt &RA = RC->getValue()->getValue(); + // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do + // some folding. + if (RA.isAllOnesValue()) + return SE.getMulExpr(LHS, RC); + // Handle x /s 1 as x. + if (RA == 1) + return LHS; + } // Check for a division of a constant by a constant. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) { - const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS); if (!RC) return 0; - if (C->getValue()->getValue().srem(RC->getValue()->getValue()) != 0) + const APInt &LA = C->getValue()->getValue(); + const APInt &RA = RC->getValue()->getValue(); + if (LA.srem(RA) != 0) return 0; - return SE.getConstant(C->getValue()->getValue() - .sdiv(RC->getValue()->getValue())); + return SE.getConstant(LA.sdiv(RA)); } // Distribute the sdiv over addrec operands, if the addrec doesn't overflow. @@ -440,6 +449,7 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, if (!Step) return 0; return SE.getAddRecExpr(Start, Step, AR->getLoop()); } + return 0; } // Distribute the sdiv over add operands, if the add doesn't overflow. @@ -455,10 +465,11 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, } return SE.getAddExpr(Ops); } + return 0; } // Check for a multiply operand that we can pull RHS out of. - if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) + if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) { if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) { SmallVector<const SCEV *, 4> Ops; bool Found = false; @@ -475,6 +486,8 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, } return Found ? SE.getMulExpr(Ops) : 0; } + return 0; + } // Otherwise we don't know. return 0; @@ -546,7 +559,7 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) { case Intrinsic::x86_sse2_storeu_pd: case Intrinsic::x86_sse2_storeu_dq: case Intrinsic::x86_sse2_storel_dq: - if (II->getOperand(1) == OperandVal) + if (II->getArgOperand(0) == OperandVal) isAddress = true; break; } @@ -568,7 +581,7 @@ static const Type *getAccessType(const Instruction *Inst) { case Intrinsic::x86_sse2_storeu_pd: case Intrinsic::x86_sse2_storeu_dq: case Intrinsic::x86_sse2_storel_dq: - AccessTy = II->getOperand(1)->getType(); + AccessTy = II->getArgOperand(0)->getType(); break; } } @@ -976,6 +989,8 @@ public: void dump() const; }; +} + /// HasFormula - Test whether this use as a formula which has the same /// registers as the given formula. bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { @@ -1203,6 +1218,32 @@ static bool isAlwaysFoldable(const SCEV *S, return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI); } +namespace { + +/// UseMapDenseMapInfo - A DenseMapInfo implementation for holding +/// DenseMaps and DenseSets of pairs of const SCEV* and LSRUse::Kind. +struct UseMapDenseMapInfo { + static std::pair<const SCEV *, LSRUse::KindType> getEmptyKey() { + return std::make_pair(reinterpret_cast<const SCEV *>(-1), LSRUse::Basic); + } + + static std::pair<const SCEV *, LSRUse::KindType> getTombstoneKey() { + return std::make_pair(reinterpret_cast<const SCEV *>(-2), LSRUse::Basic); + } + + static unsigned + getHashValue(const std::pair<const SCEV *, LSRUse::KindType> &V) { + unsigned Result = DenseMapInfo<const SCEV *>::getHashValue(V.first); + Result ^= DenseMapInfo<unsigned>::getHashValue(unsigned(V.second)); + return Result; + } + + static bool isEqual(const std::pair<const SCEV *, LSRUse::KindType> &LHS, + const std::pair<const SCEV *, LSRUse::KindType> &RHS) { + return LHS == RHS; + } +}; + /// FormulaSorter - This class implements an ordering for formulae which sorts /// the by their standalone cost. class FormulaSorter { @@ -1275,7 +1316,9 @@ class LSRInstance { } // Support for sharing of LSRUses between LSRFixups. - typedef DenseMap<const SCEV *, size_t> UseMapTy; + typedef DenseMap<std::pair<const SCEV *, LSRUse::KindType>, + size_t, + UseMapDenseMapInfo> UseMapTy; UseMapTy UseMap; bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, @@ -1613,8 +1656,11 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { NewRHS = Sel->getOperand(1); else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS) NewRHS = Sel->getOperand(2); + else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS)) + NewRHS = SU->getValue(); else - llvm_unreachable("Max doesn't match expected pattern!"); + // Max doesn't match expected pattern. + return Cond; // Determine the new comparison opcode. It may be signed or unsigned, // and the original comparison may be either equality or inequality. @@ -1805,6 +1851,8 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, NewMaxOffset = NewOffset; } // Check for a mismatched access type, and fall back conservatively as needed. + // TODO: Be less conservative when the type is similar and can use the same + // addressing modes. if (Kind == LSRUse::Address && AccessTy != LU.AccessTy) NewAccessTy = Type::getVoidTy(AccessTy->getContext()); @@ -1833,7 +1881,7 @@ LSRInstance::getUse(const SCEV *&Expr, } std::pair<UseMapTy::iterator, bool> P = - UseMap.insert(std::make_pair(Expr, 0)); + UseMap.insert(std::make_pair(std::make_pair(Expr, Kind), 0)); if (!P.second) { // A use already existed with this base. size_t LUIdx = P.first->second; @@ -1919,7 +1967,7 @@ void LSRInstance::CollectInterestingTypesAndFactors() { Strides.insert(AR->getStepRecurrence(SE)); Worklist.push_back(AR->getStart()); } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { - Worklist.insert(Worklist.end(), Add->op_begin(), Add->op_end()); + Worklist.append(Add->op_begin(), Add->op_end()); } } while (!Worklist.empty()); } @@ -2086,7 +2134,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { const SCEV *S = Worklist.pop_back_val(); if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) - Worklist.insert(Worklist.end(), N->op_begin(), N->op_end()); + Worklist.append(N->op_begin(), N->op_end()); else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) Worklist.push_back(C->getOperand()); else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) { @@ -2095,8 +2143,12 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { if (!Inserted.insert(U)) continue; const Value *V = U->getValue(); - if (const Instruction *Inst = dyn_cast<Instruction>(V)) + if (const Instruction *Inst = dyn_cast<Instruction>(V)) { + // Look for instructions defined outside the loop. if (L->contains(Inst)) continue; + } else if (isa<UndefValue>(V)) + // Undef doesn't have a live range, so it doesn't matter. + continue; for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE; ++UI) { const Instruction *UserInst = dyn_cast<Instruction>(*UI); @@ -2155,20 +2207,23 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { /// separate registers. If C is non-null, multiply each subexpression by C. static void CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl<const SCEV *> &Ops, + SmallVectorImpl<const SCEV *> &UninterestingOps, + const Loop *L, ScalarEvolution &SE) { if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { // Break out add operands. for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); I != E; ++I) - CollectSubexprs(*I, C, Ops, SE); + CollectSubexprs(*I, C, Ops, UninterestingOps, L, SE); return; } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { // Split a non-zero base out of an addrec. if (!AR->getStart()->isZero()) { CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0), AR->getStepRecurrence(SE), - AR->getLoop()), C, Ops, SE); - CollectSubexprs(AR->getStart(), C, Ops, SE); + AR->getLoop()), + C, Ops, UninterestingOps, L, SE); + CollectSubexprs(AR->getStart(), C, Ops, UninterestingOps, L, SE); return; } } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { @@ -2178,13 +2233,17 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C, dyn_cast<SCEVConstant>(Mul->getOperand(0))) { CollectSubexprs(Mul->getOperand(1), C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0, - Ops, SE); + Ops, UninterestingOps, L, SE); return; } } - // Otherwise use the value itself. - Ops.push_back(C ? SE.getMulExpr(C, S) : S); + // Otherwise use the value itself. Loop-variant "unknown" values are + // uninteresting; we won't be able to do anything meaningful with them. + if (!C && isa<SCEVUnknown>(S) && !S->isLoopInvariant(L)) + UninterestingOps.push_back(S); + else + Ops.push_back(C ? SE.getMulExpr(C, S) : S); } /// GenerateReassociations - Split out subexpressions from adds and the bases of @@ -2198,8 +2257,15 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { const SCEV *BaseReg = Base.BaseRegs[i]; - SmallVector<const SCEV *, 8> AddOps; - CollectSubexprs(BaseReg, 0, AddOps, SE); + SmallVector<const SCEV *, 8> AddOps, UninterestingAddOps; + CollectSubexprs(BaseReg, 0, AddOps, UninterestingAddOps, L, SE); + + // Add any uninteresting values as one register, as we won't be able to + // form any interesting reassociation opportunities with them. They'll + // just have to be added inside the loop no matter what we do. + if (!UninterestingAddOps.empty()) + AddOps.push_back(SE.getAddExpr(UninterestingAddOps)); + if (AddOps.size() == 1) continue; for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(), @@ -2212,11 +2278,10 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, continue; // Collect all operands except *J. - SmallVector<const SCEV *, 8> InnerAddOps; - for (SmallVectorImpl<const SCEV *>::const_iterator K = AddOps.begin(), - KE = AddOps.end(); K != KE; ++K) - if (K != J) - InnerAddOps.push_back(*K); + SmallVector<const SCEV *, 8> InnerAddOps + ( ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J); + InnerAddOps.append + (next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end()); // Don't leave just a constant behind in a register if the constant could // be folded into an immediate field. @@ -2297,7 +2362,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base) { // TODO: For now, just add the min and max offset, because it usually isn't // worthwhile looking at everything inbetween. - SmallVector<int64_t, 4> Worklist; + SmallVector<int64_t, 2> Worklist; Worklist.push_back(LU.MinOffset); if (LU.MaxOffset != LU.MinOffset) Worklist.push_back(LU.MaxOffset); @@ -2311,7 +2376,14 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I; if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind, LU.AccessTy, TLI)) { - F.BaseRegs[i] = SE.getAddExpr(G, SE.getConstant(G->getType(), *I)); + // Add the offset to the base register. + const SCEV *NewG = SE.getAddExpr(G, SE.getConstant(G->getType(), *I)); + // If it cancelled out, drop the base register, otherwise update it. + if (NewG->isZero()) { + std::swap(F.BaseRegs[i], F.BaseRegs.back()); + F.BaseRegs.pop_back(); + } else + F.BaseRegs[i] = NewG; (void)InsertFormula(LU, LUIdx, F); } @@ -2350,13 +2422,12 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, for (SmallSetVector<int64_t, 8>::const_iterator I = Factors.begin(), E = Factors.end(); I != E; ++I) { int64_t Factor = *I; - Formula F = Base; // Check that the multiplication doesn't overflow. - if (F.AM.BaseOffs == INT64_MIN && Factor == -1) + if (Base.AM.BaseOffs == INT64_MIN && Factor == -1) continue; - F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs * Factor; - if (F.AM.BaseOffs / Factor != Base.AM.BaseOffs) + int64_t NewBaseOffs = (uint64_t)Base.AM.BaseOffs * Factor; + if (NewBaseOffs / Factor != Base.AM.BaseOffs) continue; // Check that multiplying with the use offset doesn't overflow. @@ -2367,6 +2438,9 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, if (Offset / Factor != LU.MinOffset) continue; + Formula F = Base; + F.AM.BaseOffs = NewBaseOffs; + // Check that this scale is legal. if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI)) continue; diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp index ae7bf40..0c900ff 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -445,7 +445,7 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { // This is a very ad-hoc heuristic. if (Metrics.NumInsts > Threshold || Metrics.NumBlocks * 5 > Threshold || - Metrics.NeverInline) { + Metrics.containsIndirectBr || Metrics.isRecursive) { DEBUG(dbgs() << "NOT unswitching loop %" << currentLoop->getHeader()->getName() << ", cost too high: " << currentLoop->getBlocks().size() << "\n"); @@ -457,21 +457,21 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { } // RemapInstruction - Convert the instruction operands from referencing the -// current values into those specified by ValueMap. +// current values into those specified by VMap. // static inline void RemapInstruction(Instruction *I, - DenseMap<const Value *, Value*> &ValueMap) { + ValueMap<const Value *, Value*> &VMap) { for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) { Value *Op = I->getOperand(op); - DenseMap<const Value *, Value*>::iterator It = ValueMap.find(Op); - if (It != ValueMap.end()) Op = It->second; + ValueMap<const Value *, Value*>::iterator It = VMap.find(Op); + if (It != VMap.end()) Op = It->second; I->setOperand(op, Op); } } /// CloneLoop - Recursively clone the specified loop and all of its children, /// mapping the blocks with the specified map. -static Loop *CloneLoop(Loop *L, Loop *PL, DenseMap<const Value*, Value*> &VM, +static Loop *CloneLoop(Loop *L, Loop *PL, ValueMap<const Value*, Value*> &VM, LoopInfo *LI, LPPassManager *LPM) { Loop *New = new Loop(); LPM->insertLoop(New, PL); @@ -615,11 +615,11 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, // the loop preheader and exit blocks), keeping track of the mapping between // the instructions and blocks. NewBlocks.reserve(LoopBlocks.size()); - DenseMap<const Value*, Value*> ValueMap; + ValueMap<const Value*, Value*> VMap; for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) { - BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], ValueMap, ".us", F); + BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F); NewBlocks.push_back(NewBB); - ValueMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping. + VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping. LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L); } @@ -629,7 +629,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, NewBlocks[0], F->end()); // Now we create the new Loop object for the versioned loop. - Loop *NewLoop = CloneLoop(L, L->getParentLoop(), ValueMap, LI, LPM); + Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM); Loop *ParentLoop = L->getParentLoop(); if (ParentLoop) { // Make sure to add the cloned preheader and exit blocks to the parent loop @@ -638,7 +638,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, } for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { - BasicBlock *NewExit = cast<BasicBlock>(ValueMap[ExitBlocks[i]]); + BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]); // The new exit block should be in the same loop as the old one. if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i])) ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase()); @@ -653,8 +653,8 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) { PN = cast<PHINode>(I); Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]); - DenseMap<const Value *, Value*>::iterator It = ValueMap.find(V); - if (It != ValueMap.end()) V = It->second; + ValueMap<const Value *, Value*>::iterator It = VMap.find(V); + if (It != VMap.end()) V = It->second; PN->addIncoming(V, NewExit); } } @@ -663,7 +663,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) - RemapInstruction(I, ValueMap); + RemapInstruction(I, VMap); // Rewrite the original preheader to select between versions of the loop. BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator()); diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 3611b8e..0e566c5 100644 --- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -632,7 +632,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { // Remove the memcpy MD.removeInstruction(cpy); cpy->eraseFromParent(); - NumMemCpyInstr++; + ++NumMemCpyInstr; return true; } @@ -710,7 +710,7 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) { if (MD.getDependency(C) == dep) { MD.removeInstruction(M); M->eraseFromParent(); - NumMemCpyInstr++; + ++NumMemCpyInstr; return true; } diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp index 5aca9cdc..98452f5 100644 --- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -407,13 +407,14 @@ static Value *NegateValue(Value *V, Instruction *BI) { // Okay, we need to materialize a negated version of V with an instruction. // Scan the use lists of V to see if we have one already. for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ - if (!BinaryOperator::isNeg(*UI)) continue; + User *U = *UI; + if (!BinaryOperator::isNeg(U)) continue; // We found one! Now we have to make sure that the definition dominates // this use. We do this by moving it to the entry block (if it is a // non-instruction value) or right after the definition. These negates will // be zapped by reassociate later, so we don't need much finesse here. - BinaryOperator *TheNeg = cast<BinaryOperator>(*UI); + BinaryOperator *TheNeg = cast<BinaryOperator>(U); // Verify that the negate is in this function, V might be a constant expr. if (TheNeg->getParent()->getParent() != BI->getParent()->getParent()) diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 5ca9ce3..dd445f6 100644 --- a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -926,7 +926,7 @@ void SROA::DoScalarReplacement(AllocaInst *AI, DeleteDeadInstructions(); AI->eraseFromParent(); - NumReplaced++; + ++NumReplaced; } /// DeleteDeadInstructions - Erase instructions on the DeadInstrs list, @@ -965,11 +965,11 @@ void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, isSafeGEP(GEPI, AI, GEPOffset, Info); if (!Info.isUnsafe) isSafeForScalarRepl(GEPI, AI, GEPOffset, Info); - } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(UI)) { + } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); if (Length) isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0, - UI.getOperandNo() == 1, Info); + UI.getOperandNo() == CallInst::ArgOffset, Info); else MarkUnsafe(Info); } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { @@ -1272,6 +1272,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // If there is an other pointer, we want to convert it to the same pointer // type as AI has, so we can GEP through it safely. if (OtherPtr) { + unsigned AddrSpace = + cast<PointerType>(OtherPtr->getType())->getAddressSpace(); // Remove bitcasts and all-zero GEPs from OtherPtr. This is an // optimization, but it's also required to detect the corner case where @@ -1279,20 +1281,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // OtherPtr may be a bitcast or GEP that currently being rewritten. (This // function is only called for mem intrinsics that access the whole // aggregate, so non-zero GEPs are not an issue here.) - while (1) { - if (BitCastInst *BC = dyn_cast<BitCastInst>(OtherPtr)) { - OtherPtr = BC->getOperand(0); - continue; - } - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(OtherPtr)) { - // All zero GEPs are effectively bitcasts. - if (GEP->hasAllZeroIndices()) { - OtherPtr = GEP->getOperand(0); - continue; - } - } - break; - } + OtherPtr = OtherPtr->stripPointerCasts(); + // Copying the alloca to itself is a no-op: just delete it. if (OtherPtr == AI || OtherPtr == NewElts[0]) { // This code will run twice for a no-op memcpy -- once for each operand. @@ -1304,15 +1294,13 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, return; } - if (ConstantExpr *BCE = dyn_cast<ConstantExpr>(OtherPtr)) - if (BCE->getOpcode() == Instruction::BitCast) - OtherPtr = BCE->getOperand(0); - // If the pointer is not the right type, insert a bitcast to the right // type. - if (OtherPtr->getType() != AI->getType()) - OtherPtr = new BitCastInst(OtherPtr, AI->getType(), OtherPtr->getName(), - MI); + const Type *NewTy = + PointerType::get(AI->getType()->getElementType(), AddrSpace); + + if (OtherPtr->getType() != NewTy) + OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI); } // Process each element of the aggregate. @@ -1373,7 +1361,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // If the stored element is zero (common case), just store a null // constant. Constant *StoreVal; - if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getOperand(2))) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getArgOperand(1))) { if (CI->isZero()) { StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0> } else { @@ -1436,7 +1424,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, Value *Ops[] = { SROADest ? EltPtr : OtherElt, // Dest ptr SROADest ? OtherElt : EltPtr, // Src ptr - ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size + ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size // Align ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign), MI->getVolatileCst() @@ -1451,8 +1439,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, } else { assert(isa<MemSetInst>(MI)); Value *Ops[] = { - EltPtr, MI->getOperand(2), // Dest, Value, - ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size + EltPtr, MI->getArgOperand(1), // Dest, Value, + ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size Zero, // Align ConstantInt::get(Type::getInt1Ty(MI->getContext()), 0) // isVolatile }; @@ -1655,7 +1643,12 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI); } - ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI); + // Don't create an 'or x, 0' on the first iteration. + if (!isa<Constant>(ResultVal) || + !cast<Constant>(ResultVal)->isNullValue()) + ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI); + else + ResultVal = SrcField; } // Handle tail padding by truncating the result @@ -1794,7 +1787,7 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, if (isOffset) return false; // If the memintrinsic isn't using the alloca as the dest, reject it. - if (UI.getOperandNo() != 1) return false; + if (UI.getOperandNo() != CallInst::ArgOffset) return false; // If the source of the memcpy/move is not a constant global, reject it. if (!PointsToConstantGlobal(MI->getSource())) diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 9744100..49d93a2 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -137,6 +137,9 @@ static bool MarkAliveBlocks(BasicBlock *BB, // they should be changed to unreachable by passes that can't modify the // CFG. if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { + // Don't touch volatile stores. + if (SI->isVolatile()) continue; + Value *Ptr = SI->getOperand(1); if (isa<UndefValue>(Ptr) || diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 7414be7..b1c6191 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -66,6 +66,11 @@ public: this->TD = TD; if (CI->getCalledFunction()) Context = &CI->getCalledFunction()->getContext(); + + // We never change the calling convention. + if (CI->getCallingConv() != llvm::CallingConv::C) + return NULL; + return CallOptimizer(CI->getCalledFunction(), CI, B); } }; @@ -92,6 +97,20 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) { return true; } +/// IsOnlyUsedInEqualityComparison - Return true if it is only used in equality +/// comparisons with With. +static bool IsOnlyUsedInEqualityComparison(Value *V, Value *With) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) + if (IC->isEquality() && IC->getOperand(1) == With) + continue; + // Unknown instruction. + return false; + } + return true; +} + //===----------------------------------------------------------------------===// // String and Memory LibCall Optimizations //===----------------------------------------------------------------------===// @@ -110,8 +129,8 @@ struct StrCatOpt : public LibCallOptimization { return 0; // Extract some information from the instruction - Value *Dst = CI->getOperand(1); - Value *Src = CI->getOperand(2); + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); @@ -162,12 +181,12 @@ struct StrNCatOpt : public StrCatOpt { return 0; // Extract some information from the instruction - Value *Dst = CI->getOperand(1); - Value *Src = CI->getOperand(2); + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); uint64_t Len; // We don't do anything if length is not constant - if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getOperand(3))) + if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) Len = LengthArg->getZExtValue(); else return 0; @@ -207,11 +226,11 @@ struct StrChrOpt : public LibCallOptimization { FT->getParamType(0) != FT->getReturnType()) return 0; - Value *SrcStr = CI->getOperand(1); + Value *SrcStr = CI->getArgOperand(0); // If the second operand is non-constant, see if we can compute the length // of the input string and turn this into memchr. - ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getOperand(2)); + ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); if (CharC == 0) { // These optimizations require TargetData. if (!TD) return 0; @@ -220,7 +239,7 @@ struct StrChrOpt : public LibCallOptimization { if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32. return 0; - return EmitMemChr(SrcStr, CI->getOperand(2), // include nul. + return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul. ConstantInt::get(TD->getIntPtrType(*Context), Len), B, TD); } @@ -260,12 +279,12 @@ struct StrCmpOpt : public LibCallOptimization { // Verify the "strcmp" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || - !FT->getReturnType()->isIntegerTy(32) || + !FT->getReturnType()->isIntegerTy(32) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(*Context)) return 0; - Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2); + Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); if (Str1P == Str2P) // strcmp(x,x) -> 0 return ConstantInt::get(CI->getType(), 0); @@ -308,19 +327,19 @@ struct StrNCmpOpt : public LibCallOptimization { // Verify the "strncmp" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || - !FT->getReturnType()->isIntegerTy(32) || + !FT->getReturnType()->isIntegerTy(32) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(*Context) || !FT->getParamType(2)->isIntegerTy()) return 0; - Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2); + Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); if (Str1P == Str2P) // strncmp(x,x,n) -> 0 return ConstantInt::get(CI->getType(), 0); // Get the length argument if it is constant. uint64_t Length; - if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getOperand(3))) + if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) Length = LengthArg->getZExtValue(); else return 0; @@ -328,6 +347,9 @@ struct StrNCmpOpt : public LibCallOptimization { if (Length == 0) // strncmp(x,y,0) -> 0 return ConstantInt::get(CI->getType(), 0); + if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) + return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD); + std::string Str1, Str2; bool HasStr1 = GetConstantStringInfo(Str1P, Str1); bool HasStr2 = GetConstantStringInfo(Str2P, Str2); @@ -365,7 +387,7 @@ struct StrCpyOpt : public LibCallOptimization { FT->getParamType(0) != Type::getInt8PtrTy(*Context)) return 0; - Value *Dst = CI->getOperand(1), *Src = CI->getOperand(2); + Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); if (Dst == Src) // strcpy(x,x) -> x return Src; @@ -381,7 +403,7 @@ struct StrCpyOpt : public LibCallOptimization { if (OptChkCall) EmitMemCpyChk(Dst, Src, ConstantInt::get(TD->getIntPtrType(*Context), Len), - CI->getOperand(3), B, TD); + CI->getArgOperand(2), B, TD); else EmitMemCpy(Dst, Src, ConstantInt::get(TD->getIntPtrType(*Context), Len), @@ -402,9 +424,9 @@ struct StrNCpyOpt : public LibCallOptimization { !FT->getParamType(2)->isIntegerTy()) return 0; - Value *Dst = CI->getOperand(1); - Value *Src = CI->getOperand(2); - Value *LenOp = CI->getOperand(3); + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + Value *LenOp = CI->getArgOperand(2); // See if we can get the length of the input string. uint64_t SrcLen = GetStringLength(Src); @@ -452,7 +474,7 @@ struct StrLenOpt : public LibCallOptimization { !FT->getReturnType()->isIntegerTy()) return 0; - Value *Src = CI->getOperand(1); + Value *Src = CI->getArgOperand(0); // Constant folding: strlen("xyz") -> 3 if (uint64_t Len = GetStringLength(Src)) @@ -477,7 +499,7 @@ struct StrToOpt : public LibCallOptimization { !FT->getParamType(1)->isPointerTy()) return 0; - Value *EndPtr = CI->getOperand(2); + Value *EndPtr = CI->getArgOperand(1); if (isa<ConstantPointerNull>(EndPtr)) { CI->setOnlyReadsMemory(); CI->addAttribute(1, Attribute::NoCapture); @@ -500,17 +522,34 @@ struct StrStrOpt : public LibCallOptimization { return 0; // fold strstr(x, x) -> x. - if (CI->getOperand(1) == CI->getOperand(2)) - return B.CreateBitCast(CI->getOperand(1), CI->getType()); + if (CI->getArgOperand(0) == CI->getArgOperand(1)) + return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); + + // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0 + if (TD && IsOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { + Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD); + Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1), + StrLen, B, TD); + for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end(); + UI != UE; ) { + ICmpInst *Old = cast<ICmpInst>(UI++); + Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp, + ConstantInt::getNullValue(StrNCmp->getType()), + "cmp"); + Old->replaceAllUsesWith(Cmp); + Old->eraseFromParent(); + } + return CI; + } // See if either input string is a constant string. std::string SearchStr, ToFindStr; - bool HasStr1 = GetConstantStringInfo(CI->getOperand(1), SearchStr); - bool HasStr2 = GetConstantStringInfo(CI->getOperand(2), ToFindStr); + bool HasStr1 = GetConstantStringInfo(CI->getArgOperand(0), SearchStr); + bool HasStr2 = GetConstantStringInfo(CI->getArgOperand(1), ToFindStr); // fold strstr(x, "") -> x. if (HasStr2 && ToFindStr.empty()) - return B.CreateBitCast(CI->getOperand(1), CI->getType()); + return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); // If both strings are known, constant fold it. if (HasStr1 && HasStr2) { @@ -520,14 +559,14 @@ struct StrStrOpt : public LibCallOptimization { return Constant::getNullValue(CI->getType()); // strstr("abcd", "bc") -> gep((char*)"abcd", 1) - Value *Result = CastToCStr(CI->getOperand(1), B); + Value *Result = CastToCStr(CI->getArgOperand(0), B); Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr"); return B.CreateBitCast(Result, CI->getType()); } // fold strstr(x, "y") -> strchr(x, 'y'). if (HasStr2 && ToFindStr.size() == 1) - return B.CreateBitCast(EmitStrChr(CI->getOperand(1), ToFindStr[0], B, TD), + return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD), CI->getType()); return 0; } @@ -545,13 +584,13 @@ struct MemCmpOpt : public LibCallOptimization { !FT->getReturnType()->isIntegerTy(32)) return 0; - Value *LHS = CI->getOperand(1), *RHS = CI->getOperand(2); + Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1); if (LHS == RHS) // memcmp(s,s,x) -> 0 return Constant::getNullValue(CI->getType()); // Make sure we have a constant length. - ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getOperand(3)); + ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); if (!LenC) return 0; uint64_t Len = LenC->getZExtValue(); @@ -598,9 +637,9 @@ struct MemCpyOpt : public LibCallOptimization { return 0; // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) - EmitMemCpy(CI->getOperand(1), CI->getOperand(2), - CI->getOperand(3), 1, false, B, TD); - return CI->getOperand(1); + EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1, false, B, TD); + return CI->getArgOperand(0); } }; @@ -620,9 +659,9 @@ struct MemMoveOpt : public LibCallOptimization { return 0; // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) - EmitMemMove(CI->getOperand(1), CI->getOperand(2), - CI->getOperand(3), 1, false, B, TD); - return CI->getOperand(1); + EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1, false, B, TD); + return CI->getArgOperand(0); } }; @@ -642,10 +681,10 @@ struct MemSetOpt : public LibCallOptimization { return 0; // memset(p, v, n) -> llvm.memset(p, v, n, 1) - Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context), + Value *Val = B.CreateIntCast(CI->getArgOperand(1), Type::getInt8Ty(*Context), false); - EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), false, B, TD); - return CI->getOperand(1); + EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), false, B, TD); + return CI->getArgOperand(0); } }; @@ -666,7 +705,7 @@ struct PowOpt : public LibCallOptimization { !FT->getParamType(0)->isFloatingPointTy()) return 0; - Value *Op1 = CI->getOperand(1), *Op2 = CI->getOperand(2); + Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1); if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) { if (Op1C->isExactlyValue(1.0)) // pow(1.0, x) -> 1.0 return Op1C; @@ -720,18 +759,18 @@ struct Exp2Opt : public LibCallOptimization { !FT->getParamType(0)->isFloatingPointTy()) return 0; - Value *Op = CI->getOperand(1); + Value *Op = CI->getArgOperand(0); // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32 // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32 Value *LdExpArg = 0; if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) { if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32) LdExpArg = B.CreateSExt(OpC->getOperand(0), - Type::getInt32Ty(*Context), "tmp"); + Type::getInt32Ty(*Context), "tmp"); } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) { if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32) LdExpArg = B.CreateZExt(OpC->getOperand(0), - Type::getInt32Ty(*Context), "tmp"); + Type::getInt32Ty(*Context), "tmp"); } if (LdExpArg) { @@ -772,7 +811,7 @@ struct UnaryDoubleFPOpt : public LibCallOptimization { return 0; // If this is something like 'floor((double)floatval)', convert to floorf. - FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getOperand(1)); + FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0)); if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy()) return 0; @@ -797,11 +836,11 @@ struct FFSOpt : public LibCallOptimization { // Just make sure this has 2 arguments of the same FP type, which match the // result type. if (FT->getNumParams() != 1 || - !FT->getReturnType()->isIntegerTy(32) || + !FT->getReturnType()->isIntegerTy(32) || !FT->getParamType(0)->isIntegerTy()) return 0; - Value *Op = CI->getOperand(1); + Value *Op = CI->getArgOperand(0); // Constant fold. if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { @@ -821,7 +860,7 @@ struct FFSOpt : public LibCallOptimization { Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp"); return B.CreateSelect(Cond, V, - ConstantInt::get(Type::getInt32Ty(*Context), 0)); + ConstantInt::get(Type::getInt32Ty(*Context), 0)); } }; @@ -837,7 +876,7 @@ struct IsDigitOpt : public LibCallOptimization { return 0; // isdigit(c) -> (c-'0') <u 10 - Value *Op = CI->getOperand(1); + Value *Op = CI->getArgOperand(0); Op = B.CreateSub(Op, ConstantInt::get(Type::getInt32Ty(*Context), '0'), "isdigittmp"); Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 10), @@ -858,7 +897,7 @@ struct IsAsciiOpt : public LibCallOptimization { return 0; // isascii(c) -> c <u 128 - Value *Op = CI->getOperand(1); + Value *Op = CI->getArgOperand(0); Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 128), "isascii"); return B.CreateZExt(Op, CI->getType()); @@ -877,7 +916,7 @@ struct AbsOpt : public LibCallOptimization { return 0; // abs(x) -> x >s -1 ? x : -x - Value *Op = CI->getOperand(1); + Value *Op = CI->getArgOperand(0); Value *Pos = B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()), "ispos"); @@ -899,7 +938,7 @@ struct ToAsciiOpt : public LibCallOptimization { return 0; // isascii(c) -> c & 0x7f - return B.CreateAnd(CI->getOperand(1), + return B.CreateAnd(CI->getArgOperand(0), ConstantInt::get(CI->getType(),0x7F)); } }; @@ -922,7 +961,7 @@ struct PrintFOpt : public LibCallOptimization { // Check for a fixed format string. std::string FormatStr; - if (!GetConstantStringInfo(CI->getOperand(1), FormatStr)) + if (!GetConstantStringInfo(CI->getArgOperand(0), FormatStr)) return 0; // Empty format string -> noop. @@ -954,20 +993,20 @@ struct PrintFOpt : public LibCallOptimization { } // Optimize specific format strings. - // printf("%c", chr) --> putchar(*(i8*)dst) - if (FormatStr == "%c" && CI->getNumOperands() > 2 && - CI->getOperand(2)->getType()->isIntegerTy()) { - Value *Res = EmitPutChar(CI->getOperand(2), B, TD); + // printf("%c", chr) --> putchar(chr) + if (FormatStr == "%c" && CI->getNumArgOperands() > 1 && + CI->getArgOperand(1)->getType()->isIntegerTy()) { + Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD); if (CI->use_empty()) return CI; return B.CreateIntCast(Res, CI->getType(), true); } // printf("%s\n", str) --> puts(str) - if (FormatStr == "%s\n" && CI->getNumOperands() > 2 && - CI->getOperand(2)->getType()->isPointerTy() && + if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 && + CI->getArgOperand(1)->getType()->isPointerTy() && CI->use_empty()) { - EmitPutS(CI->getOperand(2), B, TD); + EmitPutS(CI->getArgOperand(1), B, TD); return CI; } return 0; @@ -988,11 +1027,11 @@ struct SPrintFOpt : public LibCallOptimization { // Check for a fixed format string. std::string FormatStr; - if (!GetConstantStringInfo(CI->getOperand(2), FormatStr)) + if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr)) return 0; // If we just have a format string (nothing else crazy) transform it. - if (CI->getNumOperands() == 3) { + if (CI->getNumArgOperands() == 2) { // Make sure there's no % in the constant array. We could try to handle // %% -> % in the future if we cared. for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) @@ -1003,7 +1042,7 @@ struct SPrintFOpt : public LibCallOptimization { if (!TD) return 0; // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) - EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte. + EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), // Copy the nul byte. ConstantInt::get(TD->getIntPtrType(*Context), FormatStr.size()+1), 1, false, B, TD); return ConstantInt::get(CI->getType(), FormatStr.size()); @@ -1011,16 +1050,17 @@ struct SPrintFOpt : public LibCallOptimization { // The remaining optimizations require the format string to be "%s" or "%c" // and have an extra operand. - if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4) + if (FormatStr.size() != 2 || FormatStr[0] != '%' || + CI->getNumArgOperands() < 3) return 0; // Decode the second character of the format string. if (FormatStr[1] == 'c') { // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 - if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0; - Value *V = B.CreateTrunc(CI->getOperand(3), + if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; + Value *V = B.CreateTrunc(CI->getArgOperand(2), Type::getInt8Ty(*Context), "char"); - Value *Ptr = CastToCStr(CI->getOperand(1), B); + Value *Ptr = CastToCStr(CI->getArgOperand(0), B); B.CreateStore(V, Ptr); Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::getInt32Ty(*Context), 1), "nul"); @@ -1034,13 +1074,13 @@ struct SPrintFOpt : public LibCallOptimization { if (!TD) return 0; // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) - if (!CI->getOperand(3)->getType()->isPointerTy()) return 0; + if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0; - Value *Len = EmitStrLen(CI->getOperand(3), B, TD); + Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD); Value *IncLen = B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc"); - EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, false, B, TD); + EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1, false, B, TD); // The sprintf result is the unincremented number of bytes in the string. return B.CreateIntCast(Len, CI->getType(), false); @@ -1064,8 +1104,8 @@ struct FWriteOpt : public LibCallOptimization { return 0; // Get the element size and count. - ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getOperand(2)); - ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getOperand(3)); + ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); if (!SizeC || !CountC) return 0; uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue(); @@ -1075,8 +1115,8 @@ struct FWriteOpt : public LibCallOptimization { // If this is writing one byte, turn it into fputc. if (Bytes == 1) { // fwrite(S,1,1,F) -> fputc(S[0],F) - Value *Char = B.CreateLoad(CastToCStr(CI->getOperand(1), B), "char"); - EmitFPutC(Char, CI->getOperand(4), B, TD); + Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char"); + EmitFPutC(Char, CI->getArgOperand(3), B, TD); return ConstantInt::get(CI->getType(), 1); } @@ -1100,11 +1140,11 @@ struct FPutsOpt : public LibCallOptimization { return 0; // fputs(s,F) --> fwrite(s,1,strlen(s),F) - uint64_t Len = GetStringLength(CI->getOperand(1)); + uint64_t Len = GetStringLength(CI->getArgOperand(0)); if (!Len) return 0; - EmitFWrite(CI->getOperand(1), + EmitFWrite(CI->getArgOperand(0), ConstantInt::get(TD->getIntPtrType(*Context), Len-1), - CI->getOperand(2), B, TD); + CI->getArgOperand(1), B, TD); return CI; // Known to have no uses (see above). } }; @@ -1123,11 +1163,11 @@ struct FPrintFOpt : public LibCallOptimization { // All the optimizations depend on the format string. std::string FormatStr; - if (!GetConstantStringInfo(CI->getOperand(2), FormatStr)) + if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr)) return 0; // fprintf(F, "foo") --> fwrite("foo", 3, 1, F) - if (CI->getNumOperands() == 3) { + if (CI->getNumArgOperands() == 2) { for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) if (FormatStr[i] == '%') // Could handle %% -> % if we cared. return 0; // We found a format specifier. @@ -1135,31 +1175,32 @@ struct FPrintFOpt : public LibCallOptimization { // These optimizations require TargetData. if (!TD) return 0; - EmitFWrite(CI->getOperand(2), + EmitFWrite(CI->getArgOperand(1), ConstantInt::get(TD->getIntPtrType(*Context), FormatStr.size()), - CI->getOperand(1), B, TD); + CI->getArgOperand(0), B, TD); return ConstantInt::get(CI->getType(), FormatStr.size()); } // The remaining optimizations require the format string to be "%s" or "%c" // and have an extra operand. - if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4) + if (FormatStr.size() != 2 || FormatStr[0] != '%' || + CI->getNumArgOperands() < 3) return 0; // Decode the second character of the format string. if (FormatStr[1] == 'c') { - // fprintf(F, "%c", chr) --> *(i8*)dst = chr - if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0; - EmitFPutC(CI->getOperand(3), CI->getOperand(1), B, TD); + // fprintf(F, "%c", chr) --> fputc(chr, F) + if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; + EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD); return ConstantInt::get(CI->getType(), 1); } if (FormatStr[1] == 's') { - // fprintf(F, "%s", str) -> fputs(str, F) - if (!CI->getOperand(3)->getType()->isPointerTy() || !CI->use_empty()) + // fprintf(F, "%s", str) --> fputs(str, F) + if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty()) return 0; - EmitFPutS(CI->getOperand(3), CI->getOperand(1), B, TD); + EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD); return CI; } return 0; diff --git a/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp b/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp index 2306a77..9208238 100644 --- a/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp @@ -206,12 +206,13 @@ static BasicBlock *FindObviousSharedDomOf(BasicBlock *SrcBlock, // there is only one other pred, get it, otherwise we can't handle it. PI = pred_begin(DstBlock); PE = pred_end(DstBlock); BasicBlock *DstOtherPred = 0; - if (*PI == SrcBlock) { + BasicBlock *P = *PI; + if (P == SrcBlock) { if (++PI == PE) return 0; DstOtherPred = *PI; if (++PI != PE) return 0; } else { - DstOtherPred = *PI; + DstOtherPred = P; if (++PI == PE || *PI != SrcBlock || ++PI != PE) return 0; } diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp index 5ad5de2..01c8e5d 100644 --- a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -16,9 +16,9 @@ // transformation from taking place, though currently the analysis cannot // support moving any really useful instructions (only dead ones). // 2. This pass transforms functions that are prevented from being tail -// recursive by an associative expression to use an accumulator variable, -// thus compiling the typical naive factorial or 'fib' implementation into -// efficient code. +// recursive by an associative and commutative expression to use an +// accumulator variable, thus compiling the typical naive factorial or +// 'fib' implementation into efficient code. // 3. TRE is performed if the function returns void, if the return // returns the result returned by the call, or if the function returns a // run-time constant on all exits from the function. It is possible, though @@ -60,6 +60,7 @@ #include "llvm/Pass.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CFG.h" #include "llvm/ADT/Statistic.h" @@ -252,7 +253,7 @@ static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) { // If we are passing this argument into call as the corresponding // argument operand, then the argument is dynamically constant. // Otherwise, we cannot transform this function safely. - if (CI->getOperand(ArgNo+1) == Arg) + if (CI->getArgOperand(ArgNo) == Arg) return true; } @@ -269,16 +270,16 @@ static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) { } // getCommonReturnValue - Check to see if the function containing the specified -// return instruction and tail call consistently returns the same -// runtime-constant value at all exit points. If so, return the returned value. +// tail call consistently returns the same runtime-constant value at all exit +// points except for IgnoreRI. If so, return the returned value. // -static Value *getCommonReturnValue(ReturnInst *TheRI, CallInst *CI) { - Function *F = TheRI->getParent()->getParent(); +static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) { + Function *F = CI->getParent()->getParent(); Value *ReturnedValue = 0; for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator())) - if (RI != TheRI) { + if (RI != IgnoreRI) { Value *RetOp = RI->getOperand(0); // We can only perform this transformation if the value returned is @@ -301,9 +302,9 @@ static Value *getCommonReturnValue(ReturnInst *TheRI, CallInst *CI) { /// Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI) { - if (!I->isAssociative()) return 0; + if (!I->isAssociative() || !I->isCommutative()) return 0; assert(I->getNumOperands() == 2 && - "Associative operations should have 2 args!"); + "Associative/commutative operations should have 2 args!"); // Exactly one operand should be the result of the call instruction... if ((I->getOperand(0) == CI && I->getOperand(1) == CI) || @@ -368,11 +369,16 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, return false; } - // If we are introducing accumulator recursion to eliminate associative - // operations after the call instruction, this variable contains the initial - // value for the accumulator. If this value is set, we actually perform - // accumulator recursion elimination instead of simple tail recursion - // elimination. + // If we are introducing accumulator recursion to eliminate operations after + // the call instruction that are both associative and commutative, the initial + // value for the accumulator is placed in this variable. If this value is set + // then we actually perform accumulator recursion elimination instead of + // simple tail recursion elimination. If the operation is an LLVM instruction + // (eg: "add") then it is recorded in AccumulatorRecursionInstr. If not, then + // we are handling the case when the return instruction returns a constant C + // which is different to the constant returned by other return instructions + // (which is recorded in AccumulatorRecursionEliminationInitVal). This is a + // special case of accumulator recursion, the operation being "return C". Value *AccumulatorRecursionEliminationInitVal = 0; Instruction *AccumulatorRecursionInstr = 0; @@ -383,9 +389,9 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI) if (!CanMoveAboveCall(BBI, CI)) { // If we can't move the instruction above the call, it might be because it - // is an associative operation that could be tranformed using accumulator - // recursion elimination. Check to see if this is the case, and if so, - // remember the initial accumulator value for later. + // is an associative and commutative operation that could be tranformed + // using accumulator recursion elimination. Check to see if this is the + // case, and if so, remember the initial accumulator value for later. if ((AccumulatorRecursionEliminationInitVal = CanTransformAccumulatorRecursion(BBI, CI))) { // Yes, this is accumulator recursion. Remember which instruction @@ -403,8 +409,18 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI && !isa<UndefValue>(Ret->getReturnValue()) && AccumulatorRecursionEliminationInitVal == 0 && - !getCommonReturnValue(Ret, CI)) - return false; + !getCommonReturnValue(0, CI)) { + // One case remains that we are able to handle: the current return + // instruction returns a constant, and all other return instructions + // return a different constant. + if (!isDynamicConstant(Ret->getReturnValue(), CI, Ret)) + return false; // Current return instruction does not return a constant. + // Check that all other return instructions return a common constant. If + // so, record it in AccumulatorRecursionEliminationInitVal. + AccumulatorRecursionEliminationInitVal = getCommonReturnValue(Ret, CI); + if (!AccumulatorRecursionEliminationInitVal) + return false; + } // OK! We can transform this tail call. If this is the first one found, // create the new entry block, allowing us to branch back to the old entry. @@ -453,8 +469,8 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, // Ok, now that we know we have a pseudo-entry block WITH all of the // required PHI nodes, add entries into the PHI node for the actual // parameters passed into the tail-recursive call. - for (unsigned i = 0, e = CI->getNumOperands()-1; i != e; ++i) - ArgumentPHIs[i]->addIncoming(CI->getOperand(i+1), BB); + for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) + ArgumentPHIs[i]->addIncoming(CI->getArgOperand(i), BB); // If we are introducing an accumulator variable to eliminate the recursion, // do so now. Note that we _know_ that no subsequent tail recursion @@ -464,8 +480,9 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, if (AccumulatorRecursionEliminationInitVal) { Instruction *AccRecInstr = AccumulatorRecursionInstr; // Start by inserting a new PHI node for the accumulator. - PHINode *AccPN = PHINode::Create(AccRecInstr->getType(), "accumulator.tr", - OldEntry->begin()); + PHINode *AccPN = + PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(), + "accumulator.tr", OldEntry->begin()); // Loop over all of the predecessors of the tail recursion block. For the // real entry into the function we seed the PHI with the initial value, @@ -475,20 +492,27 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, // it will not show up as a predecessor. for (pred_iterator PI = pred_begin(OldEntry), PE = pred_end(OldEntry); PI != PE; ++PI) { - if (*PI == &F->getEntryBlock()) - AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, *PI); + BasicBlock *P = *PI; + if (P == &F->getEntryBlock()) + AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P); else - AccPN->addIncoming(AccPN, *PI); + AccPN->addIncoming(AccPN, P); } - // Add an incoming argument for the current block, which is computed by our - // associative accumulator instruction. - AccPN->addIncoming(AccRecInstr, BB); - - // Next, rewrite the accumulator recursion instruction so that it does not - // use the result of the call anymore, instead, use the PHI node we just - // inserted. - AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN); + if (AccRecInstr) { + // Add an incoming argument for the current block, which is computed by + // our associative and commutative accumulator instruction. + AccPN->addIncoming(AccRecInstr, BB); + + // Next, rewrite the accumulator recursion instruction so that it does not + // use the result of the call anymore, instead, use the PHI node we just + // inserted. + AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN); + } else { + // Add an incoming argument for the current block, which is just the + // constant returned by the current return instruction. + AccPN->addIncoming(Ret->getReturnValue(), BB); + } // Finally, rewrite any return instructions in the program to return the PHI // node instead of the "initval" that they do currently. This loop will diff --git a/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp b/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp index ea9d1c1..4d64c85 100644 --- a/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp +++ b/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp @@ -381,29 +381,28 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI) { std::vector<InlineAsm::ConstraintInfo> Constraints = IA->ParseConstraints(); - - unsigned ArgNo = 1; // ArgNo - The operand of the CallInst. + + unsigned ArgNo = 0; // The argument of the CallInst. for (unsigned i = 0, e = Constraints.size(); i != e; ++i) { TargetLowering::AsmOperandInfo OpInfo(Constraints[i]); - + // Compute the value type for each operand. switch (OpInfo.Type) { case InlineAsm::isOutput: if (OpInfo.isIndirect) - OpInfo.CallOperandVal = CI->getOperand(ArgNo++); + OpInfo.CallOperandVal = CI->getArgOperand(ArgNo++); break; case InlineAsm::isInput: - OpInfo.CallOperandVal = CI->getOperand(ArgNo++); + OpInfo.CallOperandVal = CI->getArgOperand(ArgNo++); break; case InlineAsm::isClobber: // Nothing to do. break; } - + // Compute the constraint code and ConstraintType to use. - TLI.ComputeConstraintToUse(OpInfo, SDValue(), - OpInfo.ConstraintType == TargetLowering::C_Memory); - + TLI.ComputeConstraintToUse(OpInfo, SDValue()); + // If this asm operand is our Value*, and if it isn't an indirect memory // operand, we can't fold it! if (OpInfo.CallOperandVal == OpVal && @@ -411,7 +410,7 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, !OpInfo.isIndirect)) return false; } - + return true; } @@ -450,7 +449,7 @@ static bool FindAllMemoryUses(Instruction *I, if (CallInst *CI = dyn_cast<CallInst>(U)) { InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue()); - if (IA == 0) return true; + if (!IA) return true; // If this is a memory operand, we're cool, otherwise bail out. if (!IsOperandAMemoryOperand(CI, IA, I, TLI)) diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 2f1ae00..ec625b4 100644 --- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -558,121 +558,3 @@ void llvm::FindFunctionBackedges(const Function &F, } - - - -/// AreEquivalentAddressValues - Test if A and B will obviously have the same -/// value. This includes recognizing that %t0 and %t1 will have the same -/// value in code like this: -/// %t0 = getelementptr \@a, 0, 3 -/// store i32 0, i32* %t0 -/// %t1 = getelementptr \@a, 0, 3 -/// %t2 = load i32* %t1 -/// -static bool AreEquivalentAddressValues(const Value *A, const Value *B) { - // Test if the values are trivially equivalent. - if (A == B) return true; - - // Test if the values come from identical arithmetic instructions. - // Use isIdenticalToWhenDefined instead of isIdenticalTo because - // this function is only used when one address use dominates the - // other, which means that they'll always either have the same - // value or one of them will have an undefined value. - if (isa<BinaryOperator>(A) || isa<CastInst>(A) || - isa<PHINode>(A) || isa<GetElementPtrInst>(A)) - if (const Instruction *BI = dyn_cast<Instruction>(B)) - if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) - return true; - - // Otherwise they may not be equivalent. - return false; -} - -/// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at the -/// instruction before ScanFrom) checking to see if we have the value at the -/// memory address *Ptr locally available within a small number of instructions. -/// If the value is available, return it. -/// -/// If not, return the iterator for the last validated instruction that the -/// value would be live through. If we scanned the entire block and didn't find -/// something that invalidates *Ptr or provides it, ScanFrom would be left at -/// begin() and this returns null. ScanFrom could also be left -/// -/// MaxInstsToScan specifies the maximum instructions to scan in the block. If -/// it is set to 0, it will scan the whole block. You can also optionally -/// specify an alias analysis implementation, which makes this more precise. -Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, - BasicBlock::iterator &ScanFrom, - unsigned MaxInstsToScan, - AliasAnalysis *AA) { - if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; - - // If we're using alias analysis to disambiguate get the size of *Ptr. - unsigned AccessSize = 0; - if (AA) { - const Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType(); - AccessSize = AA->getTypeStoreSize(AccessTy); - } - - while (ScanFrom != ScanBB->begin()) { - // We must ignore debug info directives when counting (otherwise they - // would affect codegen). - Instruction *Inst = --ScanFrom; - if (isa<DbgInfoIntrinsic>(Inst)) - continue; - - // Restore ScanFrom to expected value in case next test succeeds - ScanFrom++; - - // Don't scan huge blocks. - if (MaxInstsToScan-- == 0) return 0; - - --ScanFrom; - // If this is a load of Ptr, the loaded value is available. - if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) - if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) - return LI; - - if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { - // If this is a store through Ptr, the value is available! - if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) - return SI->getOperand(0); - - // If Ptr is an alloca and this is a store to a different alloca, ignore - // the store. This is a trivial form of alias analysis that is important - // for reg2mem'd code. - if ((isa<AllocaInst>(Ptr) || isa<GlobalVariable>(Ptr)) && - (isa<AllocaInst>(SI->getOperand(1)) || - isa<GlobalVariable>(SI->getOperand(1)))) - continue; - - // If we have alias analysis and it says the store won't modify the loaded - // value, ignore the store. - if (AA && - (AA->getModRefInfo(SI, Ptr, AccessSize) & AliasAnalysis::Mod) == 0) - continue; - - // Otherwise the store that may or may not alias the pointer, bail out. - ++ScanFrom; - return 0; - } - - // If this is some other instruction that may clobber Ptr, bail out. - if (Inst->mayWriteToMemory()) { - // If alias analysis claims that it really won't modify the load, - // ignore it. - if (AA && - (AA->getModRefInfo(Inst, Ptr, AccessSize) & AliasAnalysis::Mod) == 0) - continue; - - // May modify the pointer, bail out. - ++ScanFrom; - return 0; - } - } - - // Got to the start of the block, we didn't find it, but are done for this - // block. - return 0; -} - diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp index 8c25ad1..26f53c0 100644 --- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -106,11 +106,12 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum, // If AllowIdenticalEdges is true, then we allow this edge to be considered // non-critical iff all preds come from TI's block. while (I != E) { - if (*I != FirstPred) + const BasicBlock *P = *I; + if (P != FirstPred) return true; // Note: leave this as is until no one ever compiles with either gcc 4.0.1 // or Xcode 2. This seems to work around the pred_iterator assert in PR 2207 - E = pred_end(*I); + E = pred_end(P); ++I; } return false; @@ -277,11 +278,13 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, OtherPreds.push_back(PN->getIncomingBlock(i)); } else { for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); - I != E; ++I) - if (*I != NewBB) - OtherPreds.push_back(*I); + I != E; ++I) { + BasicBlock *P = *I; + if (P != NewBB) + OtherPreds.push_back(P); + } } - + bool NewBBDominatesDestBB = true; // Should we update DominatorTree information? @@ -400,11 +403,13 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, bool HasPredOutsideOfLoop = false; BasicBlock *Exit = ExitBlocks[i]; for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); - I != E; ++I) - if (TIL->contains(*I)) - Preds.push_back(*I); + I != E; ++I) { + BasicBlock *P = *I; + if (TIL->contains(P)) + Preds.push_back(P); else HasPredOutsideOfLoop = true; + } // If there are any preds not in the loop, we'll need to split // the edges. The Preds.empty() check is needed because a block // may appear multiple times in the list. We can't use diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 767fa3a..7a9d007 100644 --- a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -69,6 +69,31 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, return CI; } +/// EmitStrNCmp - Emit a call to the strncmp function to the builder. +Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, + IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[3]; + AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture); + AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly | + Attribute::NoUnwind); + + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Value *StrNCmp = M->getOrInsertFunction("strncmp", AttrListPtr::get(AWI, 3), + B.getInt32Ty(), + B.getInt8PtrTy(), + B.getInt8PtrTy(), + TD->getIntPtrType(Context), NULL); + CallInst *CI = B.CreateCall3(StrNCmp, CastToCStr(Ptr1, B), + CastToCStr(Ptr2, B), Len, "strncmp"); + + if (const Function *F = dyn_cast<Function>(StrNCmp->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + /// EmitStrCpy - Emit a call to the strcpy function to the builder, for the /// specified pointer arguments. Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, @@ -112,10 +137,10 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, Value *llvm::EmitMemCpy(Value *Dst, Value *Src, Value *Len, unsigned Align, bool isVolatile, IRBuilder<> &B, const TargetData *TD) { Module *M = B.GetInsertBlock()->getParent()->getParent(); - const Type *ArgTys[3] = { Dst->getType(), Src->getType(), Len->getType() }; - Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, ArgTys, 3); Dst = CastToCStr(Dst, B); Src = CastToCStr(Src, B); + const Type *ArgTys[3] = { Dst->getType(), Src->getType(), Len->getType() }; + Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, ArgTys, 3); return B.CreateCall5(MemCpy, Dst, Src, Len, ConstantInt::get(B.getInt32Ty(), Align), ConstantInt::get(B.getInt1Ty(), isVolatile)); @@ -395,11 +420,11 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { FT->getParamType(2) != TD->getIntPtrType(Context) || FT->getParamType(3) != TD->getIntPtrType(Context)) return false; - - if (isFoldable(4, 3, false)) { - EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), + + if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) { + EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), 1, false, B, TD); - replaceCall(CI->getOperand(1)); + replaceCall(CI->getArgOperand(0)); return true; } return false; @@ -418,11 +443,11 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { FT->getParamType(2) != TD->getIntPtrType(Context) || FT->getParamType(3) != TD->getIntPtrType(Context)) return false; - - if (isFoldable(4, 3, false)) { - EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), + + if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) { + EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), 1, false, B, TD); - replaceCall(CI->getOperand(1)); + replaceCall(CI->getArgOperand(0)); return true; } return false; @@ -436,12 +461,12 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { FT->getParamType(2) != TD->getIntPtrType(Context) || FT->getParamType(3) != TD->getIntPtrType(Context)) return false; - - if (isFoldable(4, 3, false)) { - Value *Val = B.CreateIntCast(CI->getOperand(2), B.getInt8Ty(), + + if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) { + Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); - EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), false, B, TD); - replaceCall(CI->getOperand(1)); + EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), false, B, TD); + replaceCall(CI->getArgOperand(0)); return true; } return false; @@ -462,8 +487,8 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { // st[rp]cpy_chk call which may fail at runtime if the size is too long. // TODO: It might be nice to get a maximum length out of the possible // string lengths for varying. - if (isFoldable(3, 2, true)) { - Value *Ret = EmitStrCpy(CI->getOperand(1), CI->getOperand(2), B, TD, + if (isFoldable(2 + CallInst::ArgOffset, 1 + CallInst::ArgOffset, true)) { + Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD, Name.substr(2, 6)); replaceCall(Ret); return true; @@ -479,10 +504,10 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { !FT->getParamType(2)->isIntegerTy() || FT->getParamType(3) != TD->getIntPtrType(Context)) return false; - - if (isFoldable(4, 3, false)) { - Value *Ret = EmitStrNCpy(CI->getOperand(1), CI->getOperand(2), - CI->getOperand(3), B, TD, Name.substr(2, 7)); + + if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) { + Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TD, Name.substr(2, 7)); replaceCall(Ret); return true; } diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp index 6d4fe4b..1dcfd57 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -32,7 +32,7 @@ using namespace llvm; // CloneBasicBlock - See comments in Cloning.h BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, - DenseMap<const Value*, Value*> &ValueMap, + ValueToValueMapTy &VMap, const Twine &NameSuffix, Function *F, ClonedCodeInfo *CodeInfo) { BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F); @@ -47,7 +47,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, if (II->hasName()) NewInst->setName(II->getName()+NameSuffix); NewBB->getInstList().push_back(NewInst); - ValueMap[II] = NewInst; // Add instruction map to value. + VMap[II] = NewInst; // Add instruction map to value. hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II)); if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { @@ -72,7 +72,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, // ArgMap values. // void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, - DenseMap<const Value*, Value*> &ValueMap, + ValueToValueMapTy &VMap, SmallVectorImpl<ReturnInst*> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo) { assert(NameSuffix && "NameSuffix cannot be null!"); @@ -80,17 +80,17 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, #ifndef NDEBUG for (Function::const_arg_iterator I = OldFunc->arg_begin(), E = OldFunc->arg_end(); I != E; ++I) - assert(ValueMap.count(I) && "No mapping from source argument specified!"); + assert(VMap.count(I) && "No mapping from source argument specified!"); #endif // Clone any attributes. if (NewFunc->arg_size() == OldFunc->arg_size()) NewFunc->copyAttributesFrom(OldFunc); else { - //Some arguments were deleted with the ValueMap. Copy arguments one by one + //Some arguments were deleted with the VMap. Copy arguments one by one for (Function::const_arg_iterator I = OldFunc->arg_begin(), E = OldFunc->arg_end(); I != E; ++I) - if (Argument* Anew = dyn_cast<Argument>(ValueMap[I])) + if (Argument* Anew = dyn_cast<Argument>(VMap[I])) Anew->addAttr( OldFunc->getAttributes() .getParamAttributes(I->getArgNo() + 1)); NewFunc->setAttributes(NewFunc->getAttributes() @@ -111,43 +111,43 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, const BasicBlock &BB = *BI; // Create a new basic block and copy instructions into it! - BasicBlock *CBB = CloneBasicBlock(&BB, ValueMap, NameSuffix, NewFunc, + BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo); - ValueMap[&BB] = CBB; // Add basic block mapping. + VMap[&BB] = CBB; // Add basic block mapping. if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator())) Returns.push_back(RI); } // Loop over all of the instructions in the function, fixing up operand - // references as we go. This uses ValueMap to do all the hard work. + // references as we go. This uses VMap to do all the hard work. // - for (Function::iterator BB = cast<BasicBlock>(ValueMap[OldFunc->begin()]), + for (Function::iterator BB = cast<BasicBlock>(VMap[OldFunc->begin()]), BE = NewFunc->end(); BB != BE; ++BB) // Loop over all instructions, fixing each one as we find it... for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II) - RemapInstruction(II, ValueMap); + RemapInstruction(II, VMap); } /// CloneFunction - Return a copy of the specified function, but without /// embedding the function into another module. Also, any references specified -/// in the ValueMap are changed to refer to their mapped value instead of the -/// original one. If any of the arguments to the function are in the ValueMap, -/// the arguments are deleted from the resultant function. The ValueMap is +/// in the VMap are changed to refer to their mapped value instead of the +/// original one. If any of the arguments to the function are in the VMap, +/// the arguments are deleted from the resultant function. The VMap is /// updated to include mappings from all of the instructions and basicblocks in /// the function from their old to new values. /// Function *llvm::CloneFunction(const Function *F, - DenseMap<const Value*, Value*> &ValueMap, + ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo) { std::vector<const Type*> ArgTypes; // The user might be deleting arguments to the function by specifying them in - // the ValueMap. If so, we need to not add the arguments to the arg ty vector + // the VMap. If so, we need to not add the arguments to the arg ty vector // for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) - if (ValueMap.count(I) == 0) // Haven't mapped the argument to anything yet? + if (VMap.count(I) == 0) // Haven't mapped the argument to anything yet? ArgTypes.push_back(I->getType()); // Create a new function type... @@ -161,13 +161,13 @@ Function *llvm::CloneFunction(const Function *F, Function::arg_iterator DestI = NewF->arg_begin(); for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) - if (ValueMap.count(I) == 0) { // Is this argument preserved? + if (VMap.count(I) == 0) { // Is this argument preserved? DestI->setName(I->getName()); // Copy the name over... - ValueMap[I] = DestI++; // Add mapping to ValueMap + VMap[I] = DestI++; // Add mapping to VMap } SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned. - CloneFunctionInto(NewF, F, ValueMap, Returns, "", CodeInfo); + CloneFunctionInto(NewF, F, VMap, Returns, "", CodeInfo); return NewF; } @@ -179,19 +179,19 @@ namespace { struct PruningFunctionCloner { Function *NewFunc; const Function *OldFunc; - DenseMap<const Value*, Value*> &ValueMap; + ValueToValueMapTy &VMap; SmallVectorImpl<ReturnInst*> &Returns; const char *NameSuffix; ClonedCodeInfo *CodeInfo; const TargetData *TD; public: PruningFunctionCloner(Function *newFunc, const Function *oldFunc, - DenseMap<const Value*, Value*> &valueMap, + ValueToValueMapTy &valueMap, SmallVectorImpl<ReturnInst*> &returns, const char *nameSuffix, ClonedCodeInfo *codeInfo, const TargetData *td) - : NewFunc(newFunc), OldFunc(oldFunc), ValueMap(valueMap), Returns(returns), + : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), Returns(returns), NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) { } @@ -202,7 +202,7 @@ namespace { public: /// ConstantFoldMappedInstruction - Constant fold the specified instruction, - /// mapping its operands through ValueMap if they are available. + /// mapping its operands through VMap if they are available. Constant *ConstantFoldMappedInstruction(const Instruction *I); }; } @@ -211,7 +211,7 @@ namespace { /// anything that it can reach. void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, std::vector<const BasicBlock*> &ToClone){ - Value *&BBEntry = ValueMap[BB]; + Value *&BBEntry = VMap[BB]; // Have we already cloned this block? if (BBEntry) return; @@ -230,7 +230,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // If this instruction constant folds, don't bother cloning the instruction, // instead, just add the constant to the value map. if (Constant *C = ConstantFoldMappedInstruction(II)) { - ValueMap[II] = C; + VMap[II] = C; continue; } @@ -238,7 +238,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, if (II->hasName()) NewInst->setName(II->getName()+NameSuffix); NewBB->getInstList().push_back(NewInst); - ValueMap[II] = NewInst; // Add instruction map to value. + VMap[II] = NewInst; // Add instruction map to value. hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II)); if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { @@ -258,12 +258,12 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition()); // Or is a known constant in the caller... if (Cond == 0) - Cond = dyn_cast_or_null<ConstantInt>(ValueMap[BI->getCondition()]); + Cond = dyn_cast_or_null<ConstantInt>(VMap[BI->getCondition()]); // Constant fold to uncond branch! if (Cond) { BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue()); - ValueMap[OldTI] = BranchInst::Create(Dest, NewBB); + VMap[OldTI] = BranchInst::Create(Dest, NewBB); ToClone.push_back(Dest); TerminatorDone = true; } @@ -272,10 +272,10 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // If switching on a value known constant in the caller. ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition()); if (Cond == 0) // Or known constant after constant prop in the callee... - Cond = dyn_cast_or_null<ConstantInt>(ValueMap[SI->getCondition()]); + Cond = dyn_cast_or_null<ConstantInt>(VMap[SI->getCondition()]); if (Cond) { // Constant fold to uncond branch! BasicBlock *Dest = SI->getSuccessor(SI->findCaseValue(Cond)); - ValueMap[OldTI] = BranchInst::Create(Dest, NewBB); + VMap[OldTI] = BranchInst::Create(Dest, NewBB); ToClone.push_back(Dest); TerminatorDone = true; } @@ -286,7 +286,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, if (OldTI->hasName()) NewInst->setName(OldTI->getName()+NameSuffix); NewBB->getInstList().push_back(NewInst); - ValueMap[OldTI] = NewInst; // Add instruction map to value. + VMap[OldTI] = NewInst; // Add instruction map to value. // Recursively clone any reachable successor blocks. const TerminatorInst *TI = BB->getTerminator(); @@ -307,13 +307,13 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, } /// ConstantFoldMappedInstruction - Constant fold the specified instruction, -/// mapping its operands through ValueMap if they are available. +/// mapping its operands through VMap if they are available. Constant *PruningFunctionCloner:: ConstantFoldMappedInstruction(const Instruction *I) { SmallVector<Constant*, 8> Ops; for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i), - ValueMap))) + VMap))) Ops.push_back(Op); else return 0; // All operands not constant! @@ -363,7 +363,7 @@ static MDNode *UpdateInlinedAtInfo(MDNode *InsnMD, MDNode *TheCallMD) { /// dead. Since this doesn't produce an exact copy of the input, it can't be /// used for things like CloneFunction or CloneModule. void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, - DenseMap<const Value*, Value*> &ValueMap, + ValueToValueMapTy &VMap, SmallVectorImpl<ReturnInst*> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, @@ -374,10 +374,10 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, #ifndef NDEBUG for (Function::const_arg_iterator II = OldFunc->arg_begin(), E = OldFunc->arg_end(); II != E; ++II) - assert(ValueMap.count(II) && "No mapping from source argument specified!"); + assert(VMap.count(II) && "No mapping from source argument specified!"); #endif - PruningFunctionCloner PFC(NewFunc, OldFunc, ValueMap, Returns, + PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, Returns, NameSuffix, CodeInfo, TD); // Clone the entry block, and anything recursively reachable from it. @@ -397,14 +397,14 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, SmallVector<const PHINode*, 16> PHIToResolve; for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); BI != BE; ++BI) { - BasicBlock *NewBB = cast_or_null<BasicBlock>(ValueMap[BI]); + BasicBlock *NewBB = cast_or_null<BasicBlock>(VMap[BI]); if (NewBB == 0) continue; // Dead block. // Add the new block to the new function. NewFunc->getBasicBlockList().push_back(NewBB); // Loop over all of the instructions in the block, fixing up operand - // references as we go. This uses ValueMap to do all the hard work. + // references as we go. This uses VMap to do all the hard work. // BasicBlock::iterator I = NewBB->begin(); @@ -455,7 +455,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, I->setMetadata(DbgKind, 0); } } - RemapInstruction(I, ValueMap); + RemapInstruction(I, VMap); } } @@ -465,19 +465,19 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, const PHINode *OPN = PHIToResolve[phino]; unsigned NumPreds = OPN->getNumIncomingValues(); const BasicBlock *OldBB = OPN->getParent(); - BasicBlock *NewBB = cast<BasicBlock>(ValueMap[OldBB]); + BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]); // Map operands for blocks that are live and remove operands for blocks // that are dead. for (; phino != PHIToResolve.size() && PHIToResolve[phino]->getParent() == OldBB; ++phino) { OPN = PHIToResolve[phino]; - PHINode *PN = cast<PHINode>(ValueMap[OPN]); + PHINode *PN = cast<PHINode>(VMap[OPN]); for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { if (BasicBlock *MappedBlock = - cast_or_null<BasicBlock>(ValueMap[PN->getIncomingBlock(pred)])) { + cast_or_null<BasicBlock>(VMap[PN->getIncomingBlock(pred)])) { Value *InVal = MapValue(PN->getIncomingValue(pred), - ValueMap); + VMap); assert(InVal && "Unknown input value?"); PN->setIncomingValue(pred, InVal); PN->setIncomingBlock(pred, MappedBlock); @@ -531,15 +531,15 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, while ((PN = dyn_cast<PHINode>(I++))) { Value *NV = UndefValue::get(PN->getType()); PN->replaceAllUsesWith(NV); - assert(ValueMap[OldI] == PN && "ValueMap mismatch"); - ValueMap[OldI] = NV; + assert(VMap[OldI] == PN && "VMap mismatch"); + VMap[OldI] = NV; PN->eraseFromParent(); ++OldI; } } // NOTE: We cannot eliminate single entry phi nodes here, because of - // ValueMap. Single entry phi nodes can have multiple ValueMap entries - // pointing at them. Thus, deleting one would require scanning the ValueMap + // VMap. Single entry phi nodes can have multiple VMap entries + // pointing at them. Thus, deleting one would require scanning the VMap // to update any entries in it that would require that. This would be // really slow. } @@ -548,14 +548,14 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, // and zap unconditional fall-through branches. This happen all the time when // specializing code: code specialization turns conditional branches into // uncond branches, and this code folds them. - Function::iterator I = cast<BasicBlock>(ValueMap[&OldFunc->getEntryBlock()]); + Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]); while (I != NewFunc->end()) { BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); if (!BI || BI->isConditional()) { ++I; continue; } // Note that we can't eliminate uncond branches if the destination has // single-entry PHI nodes. Eliminating the single-entry phi nodes would - // require scanning the ValueMap to update any entries that point to the phi + // require scanning the VMap to update any entries that point to the phi // node. BasicBlock *Dest = BI->getSuccessor(0); if (!Dest->getSinglePredecessor() || isa<PHINode>(Dest->begin())) { diff --git a/contrib/llvm/lib/Transforms/Utils/CloneLoop.cpp b/contrib/llvm/lib/Transforms/Utils/CloneLoop.cpp index 38928dc..551b630 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneLoop.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneLoop.cpp @@ -15,7 +15,6 @@ #include "llvm/BasicBlock.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/ADT/DenseMap.h" using namespace llvm; @@ -23,13 +22,13 @@ using namespace llvm; /// CloneDominatorInfo - Clone basicblock's dominator tree and, if available, /// dominance info. It is expected that basic block is already cloned. static void CloneDominatorInfo(BasicBlock *BB, - DenseMap<const Value *, Value *> &ValueMap, + ValueMap<const Value *, Value *> &VMap, DominatorTree *DT, DominanceFrontier *DF) { assert (DT && "DominatorTree is not available"); - DenseMap<const Value *, Value*>::iterator BI = ValueMap.find(BB); - assert (BI != ValueMap.end() && "BasicBlock clone is missing"); + ValueMap<const Value *, Value*>::iterator BI = VMap.find(BB); + assert (BI != VMap.end() && "BasicBlock clone is missing"); BasicBlock *NewBB = cast<BasicBlock>(BI->second); // NewBB already got dominator info. @@ -43,11 +42,11 @@ static void CloneDominatorInfo(BasicBlock *BB, // NewBB's dominator is either BB's dominator or BB's dominator's clone. BasicBlock *NewBBDom = BBDom; - DenseMap<const Value *, Value*>::iterator BBDomI = ValueMap.find(BBDom); - if (BBDomI != ValueMap.end()) { + ValueMap<const Value *, Value*>::iterator BBDomI = VMap.find(BBDom); + if (BBDomI != VMap.end()) { NewBBDom = cast<BasicBlock>(BBDomI->second); if (!DT->getNode(NewBBDom)) - CloneDominatorInfo(BBDom, ValueMap, DT, DF); + CloneDominatorInfo(BBDom, VMap, DT, DF); } DT->addNewBlock(NewBB, NewBBDom); @@ -60,8 +59,8 @@ static void CloneDominatorInfo(BasicBlock *BB, for (DominanceFrontier::DomSetType::iterator I = S.begin(), E = S.end(); I != E; ++I) { BasicBlock *DB = *I; - DenseMap<const Value*, Value*>::iterator IDM = ValueMap.find(DB); - if (IDM != ValueMap.end()) + ValueMap<const Value*, Value*>::iterator IDM = VMap.find(DB); + if (IDM != VMap.end()) NewDFSet.insert(cast<BasicBlock>(IDM->second)); else NewDFSet.insert(DB); @@ -71,10 +70,10 @@ static void CloneDominatorInfo(BasicBlock *BB, } } -/// CloneLoop - Clone Loop. Clone dominator info. Populate ValueMap +/// CloneLoop - Clone Loop. Clone dominator info. Populate VMap /// using old blocks to new blocks mapping. Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI, - DenseMap<const Value *, Value *> &ValueMap, Pass *P) { + ValueMap<const Value *, Value *> &VMap, Pass *P) { DominatorTree *DT = NULL; DominanceFrontier *DF = NULL; @@ -104,8 +103,8 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI, for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) { BasicBlock *BB = *I; - BasicBlock *NewBB = CloneBasicBlock(BB, ValueMap, ".clone"); - ValueMap[BB] = NewBB; + BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".clone"); + VMap[BB] = NewBB; if (P) LPM->cloneBasicBlockSimpleAnalysis(BB, NewBB, L); NewLoop->addBasicBlockToLoop(NewBB, LI->getBase()); @@ -117,7 +116,7 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI, for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) { BasicBlock *BB = *I; - CloneDominatorInfo(BB, ValueMap, DT, DF); + CloneDominatorInfo(BB, VMap, DT, DF); } // Process sub loops @@ -125,7 +124,7 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI, LoopNest.push_back(*I); } while (!LoopNest.empty()); - // Remap instructions to reference operands from ValueMap. + // Remap instructions to reference operands from VMap. for(SmallVector<BasicBlock *, 16>::iterator NBItr = NewBlocks.begin(), NBE = NewBlocks.end(); NBItr != NBE; ++NBItr) { BasicBlock *NB = *NBItr; @@ -135,8 +134,8 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI, for (unsigned index = 0, num_ops = Insn->getNumOperands(); index != num_ops; ++index) { Value *Op = Insn->getOperand(index); - DenseMap<const Value *, Value *>::iterator OpItr = ValueMap.find(Op); - if (OpItr != ValueMap.end()) + ValueMap<const Value *, Value *>::iterator OpItr = VMap.find(Op); + if (OpItr != VMap.end()) Insn->setOperand(index, OpItr->second); } } diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp index b87c082..fc603d2 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp @@ -28,12 +28,12 @@ using namespace llvm; Module *llvm::CloneModule(const Module *M) { // Create the value map that maps things from the old module over to the new // module. - DenseMap<const Value*, Value*> ValueMap; - return CloneModule(M, ValueMap); + ValueToValueMapTy VMap; + return CloneModule(M, VMap); } Module *llvm::CloneModule(const Module *M, - DenseMap<const Value*, Value*> &ValueMap) { + ValueToValueMapTy &VMap) { // First off, we need to create the new module... Module *New = new Module(M->getModuleIdentifier(), M->getContext()); New->setDataLayout(M->getDataLayout()); @@ -51,7 +51,7 @@ Module *llvm::CloneModule(const Module *M, New->addLibrary(*I); // Loop over all of the global variables, making corresponding globals in the - // new module. Here we add them to the ValueMap and to the new Module. We + // new module. Here we add them to the VMap and to the new Module. We // don't worry about attributes or initializers, they will come later. // for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); @@ -62,7 +62,7 @@ Module *llvm::CloneModule(const Module *M, GlobalValue::ExternalLinkage, 0, I->getName()); GV->setAlignment(I->getAlignment()); - ValueMap[I] = GV; + VMap[I] = GV; } // Loop over the functions in the module, making external functions as before @@ -71,13 +71,13 @@ Module *llvm::CloneModule(const Module *M, Function::Create(cast<FunctionType>(I->getType()->getElementType()), GlobalValue::ExternalLinkage, I->getName(), New); NF->copyAttributesFrom(I); - ValueMap[I] = NF; + VMap[I] = NF; } // Loop over the aliases in the module for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); I != E; ++I) - ValueMap[I] = new GlobalAlias(I->getType(), GlobalAlias::ExternalLinkage, + VMap[I] = new GlobalAlias(I->getType(), GlobalAlias::ExternalLinkage, I->getName(), NULL, New); // Now that all of the things that global variable initializer can refer to @@ -86,10 +86,10 @@ Module *llvm::CloneModule(const Module *M, // for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); I != E; ++I) { - GlobalVariable *GV = cast<GlobalVariable>(ValueMap[I]); + GlobalVariable *GV = cast<GlobalVariable>(VMap[I]); if (I->hasInitializer()) GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(), - ValueMap))); + VMap))); GV->setLinkage(I->getLinkage()); GV->setThreadLocal(I->isThreadLocal()); GV->setConstant(I->isConstant()); @@ -98,17 +98,17 @@ Module *llvm::CloneModule(const Module *M, // Similarly, copy over function bodies now... // for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) { - Function *F = cast<Function>(ValueMap[I]); + Function *F = cast<Function>(VMap[I]); if (!I->isDeclaration()) { Function::arg_iterator DestI = F->arg_begin(); for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end(); ++J) { DestI->setName(J->getName()); - ValueMap[J] = DestI++; + VMap[J] = DestI++; } SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned. - CloneFunctionInto(F, I, ValueMap, Returns); + CloneFunctionInto(F, I, VMap, Returns); } F->setLinkage(I->getLinkage()); @@ -117,11 +117,37 @@ Module *llvm::CloneModule(const Module *M, // And aliases for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); I != E; ++I) { - GlobalAlias *GA = cast<GlobalAlias>(ValueMap[I]); + GlobalAlias *GA = cast<GlobalAlias>(VMap[I]); GA->setLinkage(I->getLinkage()); if (const Constant* C = I->getAliasee()) - GA->setAliasee(cast<Constant>(MapValue(C, ValueMap))); + GA->setAliasee(cast<Constant>(MapValue(C, VMap))); } - + + // And named metadata.... + for (Module::const_named_metadata_iterator I = M->named_metadata_begin(), + E = M->named_metadata_end(); I != E; ++I) { + const NamedMDNode &NMD = *I; + SmallVector<MDNode*, 4> MDs; + for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) + MDs.push_back(cast<MDNode>(MapValue(NMD.getOperand(i), VMap))); + NamedMDNode::Create(New->getContext(), NMD.getName(), + MDs.data(), MDs.size(), New); + } + + // Update metadata attach with instructions. + for (Module::iterator MI = New->begin(), ME = New->end(); MI != ME; ++MI) + for (Function::iterator FI = MI->begin(), FE = MI->end(); + FI != FE; ++FI) + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); + BI != BE; ++BI) { + SmallVector<std::pair<unsigned, MDNode *>, 4 > MDs; + BI->getAllMetadata(MDs); + for (SmallVector<std::pair<unsigned, MDNode *>, 4>::iterator + MDI = MDs.begin(), MDE = MDs.end(); MDI != MDE; ++MDI) { + Value *MappedValue = MapValue(MDI->second, VMap); + if (MDI->second != MappedValue && MappedValue) + BI->setMetadata(MDI->first, cast<MDNode>(MappedValue)); + } + } return New; } diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp index c908b4a..8e82a02 100644 --- a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -35,7 +35,7 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, I.eraseFromParent(); return 0; } - + // Create a stack slot to hold the value. AllocaInst *Slot; if (AllocaPoint) { @@ -46,7 +46,7 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem", F->getEntryBlock().begin()); } - + // Change all of the users of the instruction to read from the stack slot // instead. while (!I.use_empty()) { @@ -67,7 +67,7 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, Value *&V = Loads[PN->getIncomingBlock(i)]; if (V == 0) { // Insert the load into the predecessor block - V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, + V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, PN->getIncomingBlock(i)->getTerminator()); } PN->setIncomingValue(i, V); @@ -110,8 +110,8 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, /// The phi node is deleted and it returns the pointer to the alloca inserted. AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) { if (P->use_empty()) { - P->eraseFromParent(); - return 0; + P->eraseFromParent(); + return 0; } // Create a stack slot to hold the value. @@ -124,23 +124,23 @@ AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) { Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem", F->getEntryBlock().begin()); } - + // Iterate over each operand, insert store in each predecessor. for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) { if (InvokeInst *II = dyn_cast<InvokeInst>(P->getIncomingValue(i))) { - assert(II->getParent() != P->getIncomingBlock(i) && + assert(II->getParent() != P->getIncomingBlock(i) && "Invoke edge not supported yet"); II=II; } - new StoreInst(P->getIncomingValue(i), Slot, + new StoreInst(P->getIncomingValue(i), Slot, P->getIncomingBlock(i)->getTerminator()); } - + // Insert load in place of the phi and replace all uses. Value *V = new LoadInst(Slot, P->getName()+".reload", P); P->replaceAllUsesWith(V); - + // Delete phi. P->eraseFromParent(); - + return Slot; } diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp index 91390bc..598e7d2 100644 --- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -63,7 +63,8 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, // Next, create the new invoke instruction, inserting it at the end // of the old basic block. - SmallVector<Value*, 8> InvokeArgs(CI->op_begin()+1, CI->op_end()); + ImmutableCallSite CS(CI); + SmallVector<Value*, 8> InvokeArgs(CS.arg_begin(), CS.arg_end()); InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split, InvokeDest, InvokeArgs.begin(), InvokeArgs.end(), @@ -169,7 +170,7 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, /// some edges of the callgraph may remain. static void UpdateCallGraphAfterInlining(CallSite CS, Function::iterator FirstNewBlock, - DenseMap<const Value*, Value*> &ValueMap, + ValueMap<const Value*, Value*> &VMap, InlineFunctionInfo &IFI) { CallGraph &CG = *IFI.CG; const Function *Caller = CS.getInstruction()->getParent()->getParent(); @@ -192,9 +193,9 @@ static void UpdateCallGraphAfterInlining(CallSite CS, for (; I != E; ++I) { const Value *OrigCall = I->first; - DenseMap<const Value*, Value*>::iterator VMI = ValueMap.find(OrigCall); + ValueMap<const Value*, Value*>::iterator VMI = VMap.find(OrigCall); // Only copy the edge if the call was inlined! - if (VMI == ValueMap.end() || VMI->second == 0) + if (VMI == VMap.end() || VMI->second == 0) continue; // If the call was inlined, but then constant folded, there is no edge to @@ -285,8 +286,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { ClonedCodeInfo InlinedFunctionInfo; Function::iterator FirstNewBlock; - { // Scope to destroy ValueMap after cloning. - DenseMap<const Value*, Value*> ValueMap; + { // Scope to destroy VMap after cloning. + ValueMap<const Value*, Value*> VMap; assert(CalledFunc->arg_size() == CS.arg_size() && "No varargs calls can be inlined!"); @@ -351,16 +352,20 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // Uses of the argument in the function should use our new alloca // instead. ActualArg = NewAlloca; + + // Calls that we inline may use the new alloca, so we need to clear + // their 'tail' flags. + MustClearTailCallFlags = true; } - ValueMap[I] = ActualArg; + VMap[I] = ActualArg; } // We want the inliner to prune the code as it copies. We would LOVE to // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be // happy with whatever the cloner can do. - CloneAndPruneFunctionInto(Caller, CalledFunc, ValueMap, Returns, ".i", + CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, Returns, ".i", &InlinedFunctionInfo, IFI.TD, TheCall); // Remember the first block that is newly cloned over. @@ -368,7 +373,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // Update the callgraph if requested. if (IFI.CG) - UpdateCallGraphAfterInlining(CS, FirstNewBlock, ValueMap, IFI); + UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); } // If there are any alloca instructions in the block that used to be the entry diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp index df6e603..e90c30b 100644 --- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp @@ -190,14 +190,15 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); UI != E; ++UI) { - BasicBlock *UserBB = cast<Instruction>(*UI)->getParent(); - if (PHINode *PN = dyn_cast<PHINode>(*UI)) + User *U = *UI; + BasicBlock *UserBB = cast<Instruction>(U)->getParent(); + if (PHINode *PN = dyn_cast<PHINode>(U)) UserBB = PN->getIncomingBlock(UI); if (InstBB != UserBB && !inLoop(UserBB)) UsesToRewrite.push_back(&UI.getUse()); } - + // If there are no uses outside the loop, exit with no change. if (UsesToRewrite.empty()) return false; diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp index d03f7a6..8e91138 100644 --- a/contrib/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp @@ -35,111 +35,6 @@ using namespace llvm; //===----------------------------------------------------------------------===// -// Local analysis. -// - -/// getUnderlyingObjectWithOffset - Strip off up to MaxLookup GEPs and -/// bitcasts to get back to the underlying object being addressed, keeping -/// track of the offset in bytes from the GEPs relative to the result. -/// This is closely related to Value::getUnderlyingObject but is located -/// here to avoid making VMCore depend on TargetData. -static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD, - uint64_t &ByteOffset, - unsigned MaxLookup = 6) { - if (!V->getType()->isPointerTy()) - return V; - for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { - if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { - if (!GEP->hasAllConstantIndices()) - return V; - SmallVector<Value*, 8> Indices(GEP->op_begin() + 1, GEP->op_end()); - ByteOffset += TD->getIndexedOffset(GEP->getPointerOperandType(), - &Indices[0], Indices.size()); - V = GEP->getPointerOperand(); - } else if (Operator::getOpcode(V) == Instruction::BitCast) { - V = cast<Operator>(V)->getOperand(0); - } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { - if (GA->mayBeOverridden()) - return V; - V = GA->getAliasee(); - } else { - return V; - } - assert(V->getType()->isPointerTy() && "Unexpected operand type!"); - } - return V; -} - -/// isSafeToLoadUnconditionally - Return true if we know that executing a load -/// from this value cannot trap. If it is not obviously safe to load from the -/// specified pointer, we do a quick local scan of the basic block containing -/// ScanFrom, to determine if the address is already accessed. -bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, - unsigned Align, const TargetData *TD) { - uint64_t ByteOffset = 0; - Value *Base = V; - if (TD) - Base = getUnderlyingObjectWithOffset(V, TD, ByteOffset); - - const Type *BaseType = 0; - unsigned BaseAlign = 0; - if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { - // An alloca is safe to load from as load as it is suitably aligned. - BaseType = AI->getAllocatedType(); - BaseAlign = AI->getAlignment(); - } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Base)) { - // Global variables are safe to load from but their size cannot be - // guaranteed if they are overridden. - if (!isa<GlobalAlias>(GV) && !GV->mayBeOverridden()) { - BaseType = GV->getType()->getElementType(); - BaseAlign = GV->getAlignment(); - } - } - - if (BaseType && BaseType->isSized()) { - if (TD && BaseAlign == 0) - BaseAlign = TD->getPrefTypeAlignment(BaseType); - - if (Align <= BaseAlign) { - if (!TD) - return true; // Loading directly from an alloca or global is OK. - - // Check if the load is within the bounds of the underlying object. - const PointerType *AddrTy = cast<PointerType>(V->getType()); - uint64_t LoadSize = TD->getTypeStoreSize(AddrTy->getElementType()); - if (ByteOffset + LoadSize <= TD->getTypeAllocSize(BaseType) && - (Align == 0 || (ByteOffset % Align) == 0)) - return true; - } - } - - // Otherwise, be a little bit aggressive by scanning the local block where we - // want to check to see if the pointer is already being loaded or stored - // from/to. If so, the previous load or store would have already trapped, - // so there is no harm doing an extra load (also, CSE will later eliminate - // the load entirely). - BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin(); - - while (BBI != E) { - --BBI; - - // If we see a free or a call which may write to memory (i.e. which might do - // a free) the pointer could be marked invalid. - if (isa<CallInst>(BBI) && BBI->mayWriteToMemory() && - !isa<DbgInfoIntrinsic>(BBI)) - return false; - - if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { - if (LI->getOperand(0) == V) return true; - } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { - if (SI->getOperand(1) == V) return true; - } - } - return false; -} - - -//===----------------------------------------------------------------------===// // Local constant propagation. // @@ -411,7 +306,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) { WeakVH BIHandle(BI); ReplaceAndSimplifyAllUses(Inst, V, TD); MadeChange = true; - if (BIHandle == 0) + if (BIHandle != BI) BI = BB->begin(); continue; } @@ -459,12 +354,13 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, // value into all of its uses. assert(PNV != PN && "hasConstantValue broken"); + Value *OldPhiIt = PhiIt; ReplaceAndSimplifyAllUses(PN, PNV, TD); // If recursive simplification ended up deleting the next PHI node we would // iterate to, then our iterator is invalid, restart scanning from the top // of the block. - if (PhiIt == 0) PhiIt = &BB->front(); + if (PhiIt != OldPhiIt) PhiIt = &BB->front(); } } @@ -537,9 +433,11 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { // Use that list to make another list of common predecessors of BB and Succ BlockSet CommonPreds; for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ); - PI != PE; ++PI) - if (BBPreds.count(*PI)) - CommonPreds.insert(*PI); + PI != PE; ++PI) { + BasicBlock *P = *PI; + if (BBPreds.count(P)) + CommonPreds.insert(P); + } // Shortcut, if there are no common predecessors, merging is always safe if (CommonPreds.empty()) diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp index 1ef3c32..4f4edf3 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -142,9 +142,11 @@ ReprocessLoop: if (*BB == L->getHeader()) continue; SmallPtrSet<BasicBlock *, 4> BadPreds; - for (pred_iterator PI = pred_begin(*BB), PE = pred_end(*BB); PI != PE; ++PI) - if (!L->contains(*PI)) - BadPreds.insert(*PI); + for (pred_iterator PI = pred_begin(*BB), PE = pred_end(*BB); PI != PE; ++PI){ + BasicBlock *P = *PI; + if (!L->contains(P)) + BadPreds.insert(P); + } // Delete each unique out-of-loop (and thus dead) predecessor. for (SmallPtrSet<BasicBlock *, 4>::iterator I = BadPreds.begin(), @@ -192,7 +194,7 @@ ReprocessLoop: if (!Preheader) { Preheader = InsertPreheaderForLoop(L); if (Preheader) { - NumInserted++; + ++NumInserted; Changed = true; } } @@ -215,7 +217,7 @@ ReprocessLoop: // allowed. if (!L->contains(*PI)) { if (RewriteLoopExitBlock(L, ExitBlock)) { - NumInserted++; + ++NumInserted; Changed = true; } break; @@ -244,7 +246,7 @@ ReprocessLoop: // loop header. LoopLatch = InsertUniqueBackedgeBlock(L, Preheader); if (LoopLatch) { - NumInserted++; + ++NumInserted; Changed = true; } } @@ -353,16 +355,18 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) { // Compute the set of predecessors of the loop that are not in the loop. SmallVector<BasicBlock*, 8> OutsideBlocks; for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); - PI != PE; ++PI) - if (!L->contains(*PI)) { // Coming in from outside the loop? + PI != PE; ++PI) { + BasicBlock *P = *PI; + if (!L->contains(P)) { // Coming in from outside the loop? // If the loop is branched to from an indirect branch, we won't // be able to fully transform the loop, because it prohibits // edge splitting. - if (isa<IndirectBrInst>((*PI)->getTerminator())) return 0; + if (isa<IndirectBrInst>(P->getTerminator())) return 0; // Keep track of it. - OutsideBlocks.push_back(*PI); + OutsideBlocks.push_back(P); } + } // Split out the loop pre-header. BasicBlock *NewBB = @@ -385,13 +389,15 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) { /// outside of the loop. BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) { SmallVector<BasicBlock*, 8> LoopBlocks; - for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) - if (L->contains(*I)) { + for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) { + BasicBlock *P = *I; + if (L->contains(P)) { // Don't do this if the loop is exited via an indirect branch. - if (isa<IndirectBrInst>((*I)->getTerminator())) return 0; + if (isa<IndirectBrInst>(P->getTerminator())) return 0; - LoopBlocks.push_back(*I); + LoopBlocks.push_back(P); } + } assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?"); BasicBlock *NewBB = SplitBlockPredecessors(Exit, &LoopBlocks[0], @@ -559,10 +565,11 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) { // Determine which blocks should stay in L and which should be moved out to // the Outer loop now. std::set<BasicBlock*> BlocksInL; - for (pred_iterator PI = pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) - if (DT->dominates(Header, *PI)) - AddBlockAndPredsToSet(*PI, Header, BlocksInL); - + for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) { + BasicBlock *P = *PI; + if (DT->dominates(Header, P)) + AddBlockAndPredsToSet(P, Header, BlocksInL); + } // Scan all of the loop children of L, moving them to OuterLoop if they are // not part of the inner loop. @@ -610,8 +617,10 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { // Figure out which basic blocks contain back-edges to the loop header. std::vector<BasicBlock*> BackedgeBlocks; - for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I) - if (*I != Preheader) BackedgeBlocks.push_back(*I); + for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){ + BasicBlock *P = *I; + if (P != Preheader) BackedgeBlocks.push_back(P); + } // Create and insert the new backedge block... BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(), diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 84fd1eb..e0e07e7 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -37,13 +37,13 @@ STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled"); STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)"); /// RemapInstruction - Convert the instruction operands from referencing the -/// current values into those specified by ValueMap. +/// current values into those specified by VMap. static inline void RemapInstruction(Instruction *I, - DenseMap<const Value *, Value*> &ValueMap) { + ValueMap<const Value *, Value*> &VMap) { for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) { Value *Op = I->getOperand(op); - DenseMap<const Value *, Value*>::iterator It = ValueMap.find(Op); - if (It != ValueMap.end()) + ValueMap<const Value *, Value*>::iterator It = VMap.find(Op); + if (It != VMap.end()) I->setOperand(op, It->second); } } @@ -183,7 +183,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) // For the first iteration of the loop, we should use the precloned values for // PHI nodes. Insert associations now. - typedef DenseMap<const Value*, Value*> ValueToValueMapTy; + typedef ValueMap<const Value*, Value*> ValueToValueMapTy; ValueToValueMapTy LastValueMap; std::vector<PHINode*> OrigPHINode; for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { @@ -205,26 +205,26 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(), E = LoopBlocks.end(); BB != E; ++BB) { - ValueToValueMapTy ValueMap; - BasicBlock *New = CloneBasicBlock(*BB, ValueMap, "." + Twine(It)); + ValueToValueMapTy VMap; + BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); Header->getParent()->getBasicBlockList().push_back(New); // Loop over all of the PHI nodes in the block, changing them to use the // incoming values from the previous block. if (*BB == Header) for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { - PHINode *NewPHI = cast<PHINode>(ValueMap[OrigPHINode[i]]); + PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]); Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock); if (Instruction *InValI = dyn_cast<Instruction>(InVal)) if (It > 1 && L->contains(InValI)) InVal = LastValueMap[InValI]; - ValueMap[OrigPHINode[i]] = InVal; + VMap[OrigPHINode[i]] = InVal; New->getInstList().erase(NewPHI); } // Update our running map of newest clones LastValueMap[*BB] = New; - for (ValueToValueMapTy::iterator VI = ValueMap.begin(), VE = ValueMap.end(); + for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); VI != VE; ++VI) LastValueMap[VI->first] = VI->second; diff --git a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp index 0ed8c72..2696e69 100644 --- a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp @@ -45,6 +45,7 @@ #include "llvm/Pass.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLowering.h" @@ -62,10 +63,7 @@ static cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support", namespace { class LowerInvoke : public FunctionPass { // Used for both models. - Constant *WriteFn; Constant *AbortFn; - Value *AbortMessage; - unsigned AbortMessageLength; // Used for expensive EH support. const Type *JBLinkTy; @@ -92,10 +90,8 @@ namespace { } private: - void createAbortMessage(Module *M); - void writeAbortMessage(Instruction *IB); bool insertCheapEHSupport(Function &F); - void splitLiveRangesLiveAcrossInvokes(std::vector<InvokeInst*> &Invokes); + void splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*>&Invokes); void rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, AllocaInst *InvokeNum, AllocaInst *StackPtr, SwitchInst *CatchSwitch); @@ -123,7 +119,6 @@ FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI, bool LowerInvoke::doInitialization(Module &M) { const Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); - AbortMessage = 0; if (useExpensiveEHSupport) { // Insert a type for the linked list of jump buffers. unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0; @@ -175,68 +170,14 @@ bool LowerInvoke::doInitialization(Module &M) { // We need the 'write' and 'abort' functions for both models. AbortFn = M.getOrInsertFunction("abort", Type::getVoidTy(M.getContext()), (Type *)0); -#if 0 // "write" is Unix-specific.. code is going away soon anyway. - WriteFn = M.getOrInsertFunction("write", Type::VoidTy, Type::Int32Ty, - VoidPtrTy, Type::Int32Ty, (Type *)0); -#else - WriteFn = 0; -#endif return true; } -void LowerInvoke::createAbortMessage(Module *M) { - if (useExpensiveEHSupport) { - // The abort message for expensive EH support tells the user that the - // program 'unwound' without an 'invoke' instruction. - Constant *Msg = - ConstantArray::get(M->getContext(), - "ERROR: Exception thrown, but not caught!\n"); - AbortMessageLength = Msg->getNumOperands()-1; // don't include \0 - - GlobalVariable *MsgGV = new GlobalVariable(*M, Msg->getType(), true, - GlobalValue::InternalLinkage, - Msg, "abortmsg"); - std::vector<Constant*> GEPIdx(2, - Constant::getNullValue(Type::getInt32Ty(M->getContext()))); - AbortMessage = ConstantExpr::getGetElementPtr(MsgGV, &GEPIdx[0], 2); - } else { - // The abort message for cheap EH support tells the user that EH is not - // enabled. - Constant *Msg = - ConstantArray::get(M->getContext(), - "Exception handler needed, but not enabled." - "Recompile program with -enable-correct-eh-support.\n"); - AbortMessageLength = Msg->getNumOperands()-1; // don't include \0 - - GlobalVariable *MsgGV = new GlobalVariable(*M, Msg->getType(), true, - GlobalValue::InternalLinkage, - Msg, "abortmsg"); - std::vector<Constant*> GEPIdx(2, Constant::getNullValue( - Type::getInt32Ty(M->getContext()))); - AbortMessage = ConstantExpr::getGetElementPtr(MsgGV, &GEPIdx[0], 2); - } -} - - -void LowerInvoke::writeAbortMessage(Instruction *IB) { -#if 0 - if (AbortMessage == 0) - createAbortMessage(IB->getParent()->getParent()->getParent()); - - // These are the arguments we WANT... - Value* Args[3]; - Args[0] = ConstantInt::get(Type::Int32Ty, 2); - Args[1] = AbortMessage; - Args[2] = ConstantInt::get(Type::Int32Ty, AbortMessageLength); - (new CallInst(WriteFn, Args, 3, "", IB))->setTailCall(); -#endif -} - bool LowerInvoke::insertCheapEHSupport(Function &F) { bool Changed = false; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { - std::vector<Value*> CallArgs(II->op_begin(), II->op_end() - 3); + SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3); // Insert a normal call instruction... CallInst *NewCall = CallInst::Create(II->getCalledValue(), CallArgs.begin(), CallArgs.end(), @@ -257,9 +198,6 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) { ++NumInvokes; Changed = true; } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { - // Insert a new call to write(2, AbortMessage, AbortMessageLength); - writeAbortMessage(UI); - // Insert a call to abort() CallInst::Create(AbortFn, "", UI)->setTailCall(); @@ -320,7 +258,7 @@ void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, CatchSwitch->addCase(InvokeNoC, II->getUnwindDest()); // Insert a normal call instruction. - std::vector<Value*> CallArgs(II->op_begin(), II->op_end() - 3); + SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3); CallInst *NewCall = CallInst::Create(II->getCalledValue(), CallArgs.begin(), CallArgs.end(), "", II); @@ -349,7 +287,7 @@ static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) { // across the unwind edge. This process also splits all critical edges // coming out of invoke's. void LowerInvoke:: -splitLiveRangesLiveAcrossInvokes(std::vector<InvokeInst*> &Invokes) { +splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) { // First step, split all critical edges from invoke instructions. for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { InvokeInst *II = Invokes[i]; @@ -371,16 +309,33 @@ splitLiveRangesLiveAcrossInvokes(std::vector<InvokeInst*> &Invokes) { ++AfterAllocaInsertPt; for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; ++AI) { - // This is always a no-op cast because we're casting AI to AI->getType() so - // src and destination types are identical. BitCast is the only possibility. - CastInst *NC = new BitCastInst( - AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt); - AI->replaceAllUsesWith(NC); - // Normally its is forbidden to replace a CastInst's operand because it - // could cause the opcode to reflect an illegal conversion. However, we're - // replacing it here with the same value it was constructed with to simply - // make NC its user. - NC->setOperand(0, AI); + const Type *Ty = AI->getType(); + // Aggregate types can't be cast, but are legal argument types, so we have + // to handle them differently. We use an extract/insert pair as a + // lightweight method to achieve the same goal. + if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) { + Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt); + Instruction *NI = InsertValueInst::Create(AI, EI, 0); + NI->insertAfter(EI); + AI->replaceAllUsesWith(NI); + // Set the operand of the instructions back to the AllocaInst. + EI->setOperand(0, AI); + NI->setOperand(0, AI); + } else { + // This is always a no-op cast because we're casting AI to AI->getType() + // so src and destination types are identical. BitCast is the only + // possibility. + CastInst *NC = new BitCastInst( + AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt); + AI->replaceAllUsesWith(NC); + // Set the operand of the cast instruction back to the AllocaInst. + // Normally it's forbidden to replace a CastInst's operand because it + // could cause the opcode to reflect an illegal conversion. However, + // we're replacing it here with the same value it was constructed with. + // We do this because the above replaceAllUsesWith() clobbered the + // operand, but we want this one to remain. + NC->setOperand(0, AI); + } } // Finally, scan the code looking for instructions with bad live ranges. @@ -402,7 +357,7 @@ splitLiveRangesLiveAcrossInvokes(std::vector<InvokeInst*> &Invokes) { continue; // Avoid iterator invalidation by copying users to a temporary vector. - std::vector<Instruction*> Users; + SmallVector<Instruction*,16> Users; for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); @@ -452,9 +407,9 @@ splitLiveRangesLiveAcrossInvokes(std::vector<InvokeInst*> &Invokes) { } bool LowerInvoke::insertExpensiveEHSupport(Function &F) { - std::vector<ReturnInst*> Returns; - std::vector<UnwindInst*> Unwinds; - std::vector<InvokeInst*> Invokes; + SmallVector<ReturnInst*,16> Returns; + SmallVector<UnwindInst*,16> Unwinds; + SmallVector<InvokeInst*,16> Invokes; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { @@ -502,12 +457,11 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { new AllocaInst(JBLinkTy, 0, Align, "jblink", F.begin()->begin()); - std::vector<Value*> Idx; - Idx.push_back(Constant::getNullValue(Type::getInt32Ty(F.getContext()))); - Idx.push_back(ConstantInt::get(Type::getInt32Ty(F.getContext()), 1)); - OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx.begin(), Idx.end(), + Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())), + ConstantInt::get(Type::getInt32Ty(F.getContext()), 1) }; + OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, &Idx[0], &Idx[2], "OldBuf", - EntryBB->getTerminator()); + EntryBB->getTerminator()); // Copy the JBListHead to the alloca. Value *OldBuf = new LoadInst(JBListHead, "oldjmpbufptr", true, @@ -552,7 +506,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { "setjmp.cont"); Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 0); - Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx.begin(), Idx.end(), + Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, &Idx[0], &Idx[2], "TheJmpBuf", EntryBB->getTerminator()); JmpBufPtr = new BitCastInst(JmpBufPtr, @@ -605,24 +559,20 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { // Create the block to do the longjmp. // Get a pointer to the jmpbuf and longjmp. - std::vector<Value*> Idx; - Idx.push_back(Constant::getNullValue(Type::getInt32Ty(F.getContext()))); - Idx.push_back(ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)); - Idx[0] = GetElementPtrInst::Create(BufPtr, Idx.begin(), Idx.end(), "JmpBuf", + Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())), + ConstantInt::get(Type::getInt32Ty(F.getContext()), 0) }; + Idx[0] = GetElementPtrInst::Create(BufPtr, &Idx[0], &Idx[2], "JmpBuf", UnwindBlock); Idx[0] = new BitCastInst(Idx[0], Type::getInt8PtrTy(F.getContext()), "tmp", UnwindBlock); Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1); - CallInst::Create(LongJmpFn, Idx.begin(), Idx.end(), "", UnwindBlock); + CallInst::Create(LongJmpFn, &Idx[0], &Idx[2], "", UnwindBlock); new UnreachableInst(F.getContext(), UnwindBlock); // Set up the term block ("throw without a catch"). new UnreachableInst(F.getContext(), TermBlock); - // Insert a new call to write(2, AbortMessage, AbortMessageLength); - writeAbortMessage(TermBlock->getTerminator()); - // Insert a call to abort() CallInst::Create(AbortFn, "", TermBlock->getTerminator())->setTailCall(); diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 13f0a28..c0de193 100644 --- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -69,11 +69,12 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { // Only allow direct and non-volatile loads and stores... for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end(); - UI != UE; ++UI) // Loop over all of the uses of the alloca - if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) { + UI != UE; ++UI) { // Loop over all of the uses of the alloca + const User *U = *UI; + if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { if (LI->isVolatile()) return false; - } else if (const StoreInst *SI = dyn_cast<StoreInst>(*UI)) { + } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { if (SI->getOperand(0) == AI) return false; // Don't allow a store OF the AI, only INTO the AI. if (SI->isVolatile()) @@ -81,6 +82,7 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { } else { return false; } + } return true; } @@ -603,9 +605,8 @@ ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, // To determine liveness, we must iterate through the predecessors of blocks // where the def is live. Blocks are added to the worklist if we need to // check their predecessors. Start with all the using blocks. - SmallVector<BasicBlock*, 64> LiveInBlockWorklist; - LiveInBlockWorklist.insert(LiveInBlockWorklist.end(), - Info.UsingBlocks.begin(), Info.UsingBlocks.end()); + SmallVector<BasicBlock*, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(), + Info.UsingBlocks.end()); // If any of the using blocks is also a definition block, check to see if the // definition occurs before or after the use. If it happens before the use, @@ -897,6 +898,9 @@ void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, // Propagate any debug metadata from the store onto the dbg.value. if (MDNode *SIMD = SI->getMetadata("dbg")) DbgVal->setMetadata("dbg", SIMD); + // Otherwise propagate debug metadata from dbg.declare. + else if (MDNode *MD = DDI->getMetadata("dbg")) + DbgVal->setMetadata("dbg", MD); } // QueuePhiNode - queues a phi-node to be added to a basic-block for a specific diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 9f2209d..27b07d9 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1377,8 +1377,9 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) { bool llvm::FoldBranchToCommonDest(BranchInst *BI) { BasicBlock *BB = BI->getParent(); Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); - if (Cond == 0) return false; - + if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) || + Cond->getParent() != BB || !Cond->hasOneUse()) + return false; // Only allow this if the condition is a simple instruction that can be // executed unconditionally. It must be in the same block as the branch, and @@ -1387,11 +1388,23 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Ignore dbg intrinsics. while(isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt; - if ((!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) || - Cond->getParent() != BB || &*FrontIt != Cond || !Cond->hasOneUse()) { - return false; + + // Allow a single instruction to be hoisted in addition to the compare + // that feeds the branch. We later ensure that any values that _it_ uses + // were also live in the predecessor, so that we don't unnecessarily create + // register pressure or inhibit out-of-order execution. + Instruction *BonusInst = 0; + if (&*FrontIt != Cond && + FrontIt->hasOneUse() && *FrontIt->use_begin() == Cond && + FrontIt->isSafeToSpeculativelyExecute()) { + BonusInst = &*FrontIt; + ++FrontIt; } + // Only a single bonus inst is allowed. + if (&*FrontIt != Cond) + return false; + // Make sure the instruction after the condition is the cond branch. BasicBlock::iterator CondIt = Cond; ++CondIt; // Ingore dbg intrinsics. @@ -1429,6 +1442,44 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { !SafeToMergeTerminators(BI, PBI)) continue; + // Ensure that any values used in the bonus instruction are also used + // by the terminator of the predecessor. This means that those values + // must already have been resolved, so we won't be inhibiting the + // out-of-order core by speculating them earlier. + if (BonusInst) { + // Collect the values used by the bonus inst + SmallPtrSet<Value*, 4> UsedValues; + for (Instruction::op_iterator OI = BonusInst->op_begin(), + OE = BonusInst->op_end(); OI != OE; ++OI) { + Value* V = *OI; + if (!isa<Constant>(V)) + UsedValues.insert(V); + } + + SmallVector<std::pair<Value*, unsigned>, 4> Worklist; + Worklist.push_back(std::make_pair(PBI->getOperand(0), 0)); + + // Walk up to four levels back up the use-def chain of the predecessor's + // terminator to see if all those values were used. The choice of four + // levels is arbitrary, to provide a compile-time-cost bound. + while (!Worklist.empty()) { + std::pair<Value*, unsigned> Pair = Worklist.back(); + Worklist.pop_back(); + + if (Pair.second >= 4) continue; + UsedValues.erase(Pair.first); + if (UsedValues.empty()) break; + + if (Instruction* I = dyn_cast<Instruction>(Pair.first)) { + for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end(); + OI != OE; ++OI) + Worklist.push_back(std::make_pair(OI->get(), Pair.second+1)); + } + } + + if (!UsedValues.empty()) return false; + } + Instruction::BinaryOps Opc; bool InvertPredCond = false; @@ -1457,9 +1508,19 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { PBI->setSuccessor(1, OldTrue); } + // If we have a bonus inst, clone it into the predecessor block. + Instruction *NewBonus = 0; + if (BonusInst) { + NewBonus = BonusInst->clone(); + PredBlock->getInstList().insert(PBI, NewBonus); + NewBonus->takeName(BonusInst); + BonusInst->setName(BonusInst->getName()+".old"); + } + // Clone Cond into the predecessor basic block, and or/and the // two conditions together. Instruction *New = Cond->clone(); + if (BonusInst) New->replaceUsesOfWith(BonusInst, NewBonus); PredBlock->getInstList().insert(PBI, New); New->takeName(Cond); Cond->setName(New->getName()+".old"); @@ -1513,17 +1574,19 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { // Okay, we're going to insert the PHI node. Since PBI is not the only // predecessor, compute the PHI'd conditional value for all of the preds. // Any predecessor where the condition is not computable we keep symbolic. - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - if ((PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) && + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *P = *PI; + if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) && PBI != BI && PBI->isConditional() && PBI->getCondition() == BI->getCondition() && PBI->getSuccessor(0) != PBI->getSuccessor(1)) { bool CondIsTrue = PBI->getSuccessor(0) == BB; NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()), - CondIsTrue), *PI); + CondIsTrue), P); } else { - NewPN->addIncoming(BI->getCondition(), *PI); + NewPN->addIncoming(BI->getCondition(), P); } + } BI->setCondition(NewPN); return true; @@ -1697,10 +1760,11 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { SmallVector<BasicBlock*, 8> UncondBranchPreds; SmallVector<BranchInst*, 8> CondBranchPreds; for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - TerminatorInst *PTI = (*PI)->getTerminator(); + BasicBlock *P = *PI; + TerminatorInst *PTI = P->getTerminator(); if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) { if (BI->isUnconditional()) - UncondBranchPreds.push_back(*PI); + UncondBranchPreds.push_back(P); else CondBranchPreds.push_back(BI); } diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp index 87ce631..3f6a90c 100644 --- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp +++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp @@ -28,7 +28,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) { // DenseMap. This includes any recursive calls to MapValue. // Global values and non-function-local metadata do not need to be seeded into - // the ValueMap if they are using the identity mapping. + // the VM if they are using the identity mapping. if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MDString>(V) || (isa<MDNode>(V) && !cast<MDNode>(V)->isFunctionLocal())) return VMSlot = const_cast<Value*>(V); @@ -45,7 +45,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) { if (isa<ConstantInt>(C) || isa<ConstantFP>(C) || isa<ConstantPointerNull>(C) || isa<ConstantAggregateZero>(C) || - isa<UndefValue>(C) || isa<MDString>(C)) + isa<UndefValue>(C)) return VMSlot = C; // Primitive constants map directly if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) { @@ -125,11 +125,11 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) { } /// RemapInstruction - Convert the instruction operands from referencing the -/// current values into those specified by ValueMap. +/// current values into those specified by VMap. /// -void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &ValueMap) { +void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap) { for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) { - Value *V = MapValue(*op, ValueMap); + Value *V = MapValue(*op, VMap); assert(V && "Referenced value not in value map!"); *op = V; } diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.h b/contrib/llvm/lib/Transforms/Utils/ValueMapper.h index d61c24c..f4ff643 100644 --- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.h +++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.h @@ -15,12 +15,12 @@ #ifndef VALUEMAPPER_H #define VALUEMAPPER_H -#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/ValueMap.h" namespace llvm { class Value; class Instruction; - typedef DenseMap<const Value *, Value *> ValueToValueMapTy; + typedef ValueMap<const Value *, Value *> ValueToValueMapTy; Value *MapValue(const Value *V, ValueToValueMapTy &VM); void RemapInstruction(Instruction *I, ValueToValueMapTy &VM); |