diff options
Diffstat (limited to 'lib/Transforms')
-rw-r--r-- | lib/Transforms/Hello/Hello.cpp | 10 | ||||
-rw-r--r-- | lib/Transforms/IPO/CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/Transforms/IPO/FunctionAttrs.cpp | 5 | ||||
-rw-r--r-- | lib/Transforms/IPO/GlobalDCE.cpp | 2 | ||||
-rw-r--r-- | lib/Transforms/IPO/GlobalOpt.cpp | 505 | ||||
-rw-r--r-- | lib/Transforms/IPO/RaiseAllocations.cpp | 118 | ||||
-rw-r--r-- | lib/Transforms/IPO/StripSymbols.cpp | 2 | ||||
-rw-r--r-- | lib/Transforms/Scalar/CodeGenPrepare.cpp | 42 | ||||
-rw-r--r-- | lib/Transforms/Scalar/GVN.cpp | 481 | ||||
-rw-r--r-- | lib/Transforms/Scalar/InstructionCombining.cpp | 218 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopUnswitch.cpp | 63 | ||||
-rw-r--r-- | lib/Transforms/Scalar/Reassociate.cpp | 1 | ||||
-rw-r--r-- | lib/Transforms/Scalar/SCCP.cpp | 6 | ||||
-rw-r--r-- | lib/Transforms/Utils/BasicBlockUtils.cpp | 28 | ||||
-rw-r--r-- | lib/Transforms/Utils/InlineFunction.cpp | 14 | ||||
-rw-r--r-- | lib/Transforms/Utils/InstructionNamer.cpp | 4 | ||||
-rw-r--r-- | lib/Transforms/Utils/LowerAllocations.cpp | 39 | ||||
-rw-r--r-- | lib/Transforms/Utils/SSAUpdater.cpp | 73 |
18 files changed, 538 insertions, 1074 deletions
diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp index 8000d0d..91534a7 100644 --- a/lib/Transforms/Hello/Hello.cpp +++ b/lib/Transforms/Hello/Hello.cpp @@ -30,9 +30,8 @@ namespace { virtual bool runOnFunction(Function &F) { HelloCounter++; - std::string fname = F.getName(); - EscapeString(fname); - errs() << "Hello: " << fname << "\n"; + errs() << "Hello: "; + errs().write_escaped(F.getName()) << '\n'; return false; } }; @@ -49,9 +48,8 @@ namespace { virtual bool runOnFunction(Function &F) { HelloCounter++; - std::string fname = F.getName(); - EscapeString(fname); - errs() << "Hello: " << fname << "\n"; + errs() << "Hello: "; + errs().write_escaped(F.getName()) << '\n'; return false; } diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index ec0f1e1..5c28801 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -9,7 +9,6 @@ add_llvm_library(LLVMipo GlobalOpt.cpp IPConstantPropagation.cpp IPO.cpp - IndMemRemoval.cpp InlineAlways.cpp InlineSimple.cpp Inliner.cpp diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 7edaa7f..0701b94 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -153,7 +153,7 @@ bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) { // Writes memory. Just give up. return false; - if (isa<MallocInst>(I)) + if (isMalloc(I)) // malloc claims not to write memory! PR3754. return false; @@ -267,11 +267,8 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F, // Check whether the pointer came from an allocation. case Instruction::Alloca: - case Instruction::Malloc: break; case Instruction::Call: - if (isMalloc(RVI)) - break; case Instruction::Invoke: { CallSite CS(RVI); if (CS.paramHasAttr(0, Attribute::NoAlias)) diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index 09f9e7c..8f4e8b3 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -149,8 +149,6 @@ bool GlobalDCE::runOnModule(Module &M) { // Make sure that all memory is released AliveGlobals.clear(); - // Remove dead metadata. - Changed |= M.getContext().RemoveDeadMetadata(); return Changed; } diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index a44386e..9ced2e8 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -822,140 +822,18 @@ static void ConstantPropUsersOf(Value *V, LLVMContext &Context) { /// malloc, there is no reason to actually DO the malloc. Instead, turn the /// malloc into a global, and any loads of GV as uses of the new global. static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, - MallocInst *MI, - LLVMContext &Context) { - DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV << " MALLOC = " << *MI); - ConstantInt *NElements = cast<ConstantInt>(MI->getArraySize()); - - if (NElements->getZExtValue() != 1) { - // If we have an array allocation, transform it to a single element - // allocation to make the code below simpler. - Type *NewTy = ArrayType::get(MI->getAllocatedType(), - NElements->getZExtValue()); - MallocInst *NewMI = - new MallocInst(NewTy, Constant::getNullValue(Type::getInt32Ty(Context)), - MI->getAlignment(), MI->getName(), MI); - Value* Indices[2]; - Indices[0] = Indices[1] = Constant::getNullValue(Type::getInt32Ty(Context)); - Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2, - NewMI->getName()+".el0", MI); - MI->replaceAllUsesWith(NewGEP); - MI->eraseFromParent(); - MI = NewMI; - } - - // Create the new global variable. The contents of the malloc'd memory is - // undefined, so initialize with an undef value. - // FIXME: This new global should have the alignment returned by malloc. Code - // could depend on malloc returning large alignment (on the mac, 16 bytes) but - // this would only guarantee some lower alignment. - Constant *Init = UndefValue::get(MI->getAllocatedType()); - GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), - MI->getAllocatedType(), false, - GlobalValue::InternalLinkage, Init, - GV->getName()+".body", - GV, - GV->isThreadLocal()); - - // Anything that used the malloc now uses the global directly. - MI->replaceAllUsesWith(NewGV); - - Constant *RepValue = NewGV; - if (NewGV->getType() != GV->getType()->getElementType()) - RepValue = ConstantExpr::getBitCast(RepValue, - GV->getType()->getElementType()); - - // If there is a comparison against null, we will insert a global bool to - // keep track of whether the global was initialized yet or not. - GlobalVariable *InitBool = - new GlobalVariable(Context, Type::getInt1Ty(Context), false, - GlobalValue::InternalLinkage, - ConstantInt::getFalse(Context), GV->getName()+".init", - GV->isThreadLocal()); - bool InitBoolUsed = false; - - // Loop over all uses of GV, processing them in turn. - std::vector<StoreInst*> Stores; - while (!GV->use_empty()) - if (LoadInst *LI = dyn_cast<LoadInst>(GV->use_back())) { - while (!LI->use_empty()) { - Use &LoadUse = LI->use_begin().getUse(); - if (!isa<ICmpInst>(LoadUse.getUser())) - LoadUse = RepValue; - else { - ICmpInst *CI = cast<ICmpInst>(LoadUse.getUser()); - // Replace the cmp X, 0 with a use of the bool value. - Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", CI); - InitBoolUsed = true; - switch (CI->getPredicate()) { - default: llvm_unreachable("Unknown ICmp Predicate!"); - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_SLT: - LV = ConstantInt::getFalse(Context); // X < null -> always false - break; - case ICmpInst::ICMP_ULE: - case ICmpInst::ICMP_SLE: - case ICmpInst::ICMP_EQ: - LV = BinaryOperator::CreateNot(LV, "notinit", CI); - break; - case ICmpInst::ICMP_NE: - case ICmpInst::ICMP_UGE: - case ICmpInst::ICMP_SGE: - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_SGT: - break; // no change. - } - CI->replaceAllUsesWith(LV); - CI->eraseFromParent(); - } - } - LI->eraseFromParent(); - } else { - StoreInst *SI = cast<StoreInst>(GV->use_back()); - // The global is initialized when the store to it occurs. - new StoreInst(ConstantInt::getTrue(Context), InitBool, SI); - SI->eraseFromParent(); - } - - // If the initialization boolean was used, insert it, otherwise delete it. - if (!InitBoolUsed) { - while (!InitBool->use_empty()) // Delete initializations - cast<Instruction>(InitBool->use_back())->eraseFromParent(); - delete InitBool; - } else - GV->getParent()->getGlobalList().insert(GV, InitBool); - - - // Now the GV is dead, nuke it and the malloc. - GV->eraseFromParent(); - MI->eraseFromParent(); - - // To further other optimizations, loop over all users of NewGV and try to - // constant prop them. This will promote GEP instructions with constant - // indices into GEP constant-exprs, which will allow global-opt to hack on it. - ConstantPropUsersOf(NewGV, Context); - if (RepValue != NewGV) - ConstantPropUsersOf(RepValue, Context); - - return NewGV; -} - -/// OptimizeGlobalAddressOfMalloc - This function takes the specified global -/// variable, and transforms the program as if it always contained the result of -/// the specified malloc. Because it is always the result of the specified -/// malloc, there is no reason to actually DO the malloc. Instead, turn the -/// malloc into a global, and any loads of GV as uses of the new global. -static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, BitCastInst *BCI, LLVMContext &Context, TargetData* TD) { + DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV + << " CALL = " << *CI << " BCI = " << *BCI << '\n'); + const Type *IntPtrTy = TD->getIntPtrType(Context); - DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV << " MALLOC = " << *CI); - - ConstantInt *NElements = cast<ConstantInt>(getMallocArraySize(CI, - Context, TD)); + Value* ArraySize = getMallocArraySize(CI, Context, TD); + assert(ArraySize && "not a malloc whose array size can be determined"); + ConstantInt *NElements = cast<ConstantInt>(ArraySize); if (NElements->getZExtValue() != 1) { // If we have an array allocation, transform it to a single element // allocation to make the code below simpler. @@ -976,9 +854,6 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, // Create the new global variable. The contents of the malloc'd memory is // undefined, so initialize with an undef value. - // FIXME: This new global should have the alignment returned by malloc. Code - // could depend on malloc returning large alignment (on the mac, 16 bytes) but - // this would only guarantee some lower alignment. const Type *MAT = getMallocAllocatedType(CI); Constant *Init = UndefValue::get(MAT); GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), @@ -1398,185 +1273,6 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, } } -/// PerformHeapAllocSRoA - MI is an allocation of an array of structures. Break -/// it up into multiple allocations of arrays of the fields. -static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI, - LLVMContext &Context){ - DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *MI); - const StructType *STy = cast<StructType>(MI->getAllocatedType()); - - // There is guaranteed to be at least one use of the malloc (storing - // it into GV). If there are other uses, change them to be uses of - // the global to simplify later code. This also deletes the store - // into GV. - ReplaceUsesOfMallocWithGlobal(MI, GV); - - // Okay, at this point, there are no users of the malloc. Insert N - // new mallocs at the same place as MI, and N globals. - std::vector<Value*> FieldGlobals; - std::vector<MallocInst*> FieldMallocs; - - for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ - const Type *FieldTy = STy->getElementType(FieldNo); - const Type *PFieldTy = PointerType::getUnqual(FieldTy); - - GlobalVariable *NGV = - new GlobalVariable(*GV->getParent(), - PFieldTy, false, GlobalValue::InternalLinkage, - Constant::getNullValue(PFieldTy), - GV->getName() + ".f" + Twine(FieldNo), GV, - GV->isThreadLocal()); - FieldGlobals.push_back(NGV); - - MallocInst *NMI = new MallocInst(FieldTy, MI->getArraySize(), - MI->getName() + ".f" + Twine(FieldNo), MI); - FieldMallocs.push_back(NMI); - new StoreInst(NMI, NGV, MI); - } - - // The tricky aspect of this transformation is handling the case when malloc - // fails. In the original code, malloc failing would set the result pointer - // of malloc to null. In this case, some mallocs could succeed and others - // could fail. As such, we emit code that looks like this: - // F0 = malloc(field0) - // F1 = malloc(field1) - // F2 = malloc(field2) - // if (F0 == 0 || F1 == 0 || F2 == 0) { - // if (F0) { free(F0); F0 = 0; } - // if (F1) { free(F1); F1 = 0; } - // if (F2) { free(F2); F2 = 0; } - // } - Value *RunningOr = 0; - for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { - Value *Cond = new ICmpInst(MI, ICmpInst::ICMP_EQ, FieldMallocs[i], - Constant::getNullValue(FieldMallocs[i]->getType()), - "isnull"); - if (!RunningOr) - RunningOr = Cond; // First seteq - else - RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", MI); - } - - // Split the basic block at the old malloc. - BasicBlock *OrigBB = MI->getParent(); - BasicBlock *ContBB = OrigBB->splitBasicBlock(MI, "malloc_cont"); - - // Create the block to check the first condition. Put all these blocks at the - // end of the function as they are unlikely to be executed. - BasicBlock *NullPtrBlock = BasicBlock::Create(Context, "malloc_ret_null", - OrigBB->getParent()); - - // Remove the uncond branch from OrigBB to ContBB, turning it into a cond - // branch on RunningOr. - OrigBB->getTerminator()->eraseFromParent(); - BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB); - - // Within the NullPtrBlock, we need to emit a comparison and branch for each - // pointer, because some may be null while others are not. - for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { - Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock); - Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, - Constant::getNullValue(GVVal->getType()), - "tmp"); - BasicBlock *FreeBlock = BasicBlock::Create(Context, "free_it", - OrigBB->getParent()); - BasicBlock *NextBlock = BasicBlock::Create(Context, "next", - OrigBB->getParent()); - BranchInst::Create(FreeBlock, NextBlock, Cmp, NullPtrBlock); - - // Fill in FreeBlock. - new FreeInst(GVVal, FreeBlock); - new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i], - FreeBlock); - BranchInst::Create(NextBlock, FreeBlock); - - NullPtrBlock = NextBlock; - } - - BranchInst::Create(ContBB, NullPtrBlock); - - // MI is no longer needed, remove it. - MI->eraseFromParent(); - - /// InsertedScalarizedLoads - As we process loads, if we can't immediately - /// update all uses of the load, keep track of what scalarized loads are - /// inserted for a given load. - DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues; - InsertedScalarizedValues[GV] = FieldGlobals; - - std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite; - - // Okay, the malloc site is completely handled. All of the uses of GV are now - // loads, and all uses of those loads are simple. Rewrite them to use loads - // of the per-field globals instead. - for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) { - Instruction *User = cast<Instruction>(*UI++); - - if (LoadInst *LI = dyn_cast<LoadInst>(User)) { - RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite, - Context); - continue; - } - - // Must be a store of null. - StoreInst *SI = cast<StoreInst>(User); - assert(isa<ConstantPointerNull>(SI->getOperand(0)) && - "Unexpected heap-sra user!"); - - // Insert a store of null into each global. - for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { - const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType()); - Constant *Null = Constant::getNullValue(PT->getElementType()); - new StoreInst(Null, FieldGlobals[i], SI); - } - // Erase the original store. - SI->eraseFromParent(); - } - - // While we have PHIs that are interesting to rewrite, do it. - while (!PHIsToRewrite.empty()) { - PHINode *PN = PHIsToRewrite.back().first; - unsigned FieldNo = PHIsToRewrite.back().second; - PHIsToRewrite.pop_back(); - PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]); - assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi"); - - // Add all the incoming values. This can materialize more phis. - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - Value *InVal = PN->getIncomingValue(i); - InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues, - PHIsToRewrite, Context); - FieldPN->addIncoming(InVal, PN->getIncomingBlock(i)); - } - } - - // Drop all inter-phi links and any loads that made it this far. - for (DenseMap<Value*, std::vector<Value*> >::iterator - I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); - I != E; ++I) { - if (PHINode *PN = dyn_cast<PHINode>(I->first)) - PN->dropAllReferences(); - else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) - LI->dropAllReferences(); - } - - // Delete all the phis and loads now that inter-references are dead. - for (DenseMap<Value*, std::vector<Value*> >::iterator - I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); - I != E; ++I) { - if (PHINode *PN = dyn_cast<PHINode>(I->first)) - PN->eraseFromParent(); - else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) - LI->eraseFromParent(); - } - - // The old global is now dead, remove it. - GV->eraseFromParent(); - - ++NumHeapSRA; - return cast<GlobalVariable>(FieldGlobals[0]); -} - /// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break /// it up into multiple allocations of arrays of the fields. static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, @@ -1587,6 +1283,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, << " BITCAST = " << *BCI << '\n'); const Type* MAT = getMallocAllocatedType(CI); const StructType *STy = cast<StructType>(MAT); + Value* ArraySize = getMallocArraySize(CI, Context, TD); + assert(ArraySize && "not a malloc whose array size can be determined"); // There is guaranteed to be at least one use of the malloc (storing // it into GV). If there are other uses, change them to be uses of @@ -1611,8 +1309,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, GV->isThreadLocal()); FieldGlobals.push_back(NGV); - Value *NMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), FieldTy, - getMallocArraySize(CI, Context, TD), + Value *NMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), + FieldTy, ArraySize, BCI->getName() + ".f" + Twine(FieldNo)); FieldMallocs.push_back(NMI); new StoreInst(NMI, NGV, BCI); @@ -1766,95 +1464,6 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, /// pointer global variable with a single value stored it that is a malloc or /// cast of malloc. static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, - MallocInst *MI, - Module::global_iterator &GVI, - TargetData *TD, - LLVMContext &Context) { - // If this is a malloc of an abstract type, don't touch it. - if (!MI->getAllocatedType()->isSized()) - return false; - - // We can't optimize this global unless all uses of it are *known* to be - // of the malloc value, not of the null initializer value (consider a use - // that compares the global's value against zero to see if the malloc has - // been reached). To do this, we check to see if all uses of the global - // would trap if the global were null: this proves that they must all - // happen after the malloc. - if (!AllUsesOfLoadedValueWillTrapIfNull(GV)) - return false; - - // We can't optimize this if the malloc itself is used in a complex way, - // for example, being stored into multiple globals. This allows the - // malloc to be stored into the specified global, loaded setcc'd, and - // GEP'd. These are all things we could transform to using the global - // for. - { - SmallPtrSet<PHINode*, 8> PHIs; - if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(MI, GV, PHIs)) - return false; - } - - - // If we have a global that is only initialized with a fixed size malloc, - // transform the program to use global memory instead of malloc'd memory. - // This eliminates dynamic allocation, avoids an indirection accessing the - // data, and exposes the resultant global to further GlobalOpt. - if (ConstantInt *NElements = dyn_cast<ConstantInt>(MI->getArraySize())) { - // Restrict this transformation to only working on small allocations - // (2048 bytes currently), as we don't want to introduce a 16M global or - // something. - if (TD && - NElements->getZExtValue()* - TD->getTypeAllocSize(MI->getAllocatedType()) < 2048) { - GVI = OptimizeGlobalAddressOfMalloc(GV, MI, Context); - return true; - } - } - - // If the allocation is an array of structures, consider transforming this - // into multiple malloc'd arrays, one for each field. This is basically - // SRoA for malloc'd memory. - const Type *AllocTy = MI->getAllocatedType(); - - // If this is an allocation of a fixed size array of structs, analyze as a - // variable size array. malloc [100 x struct],1 -> malloc struct, 100 - if (!MI->isArrayAllocation()) - if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) - AllocTy = AT->getElementType(); - - if (const StructType *AllocSTy = dyn_cast<StructType>(AllocTy)) { - // This the structure has an unreasonable number of fields, leave it - // alone. - if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && - AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, MI)) { - - // If this is a fixed size array, transform the Malloc to be an alloc of - // structs. malloc [100 x struct],1 -> malloc struct, 100 - if (const ArrayType *AT = dyn_cast<ArrayType>(MI->getAllocatedType())) { - MallocInst *NewMI = - new MallocInst(AllocSTy, - ConstantInt::get(Type::getInt32Ty(Context), - AT->getNumElements()), - "", MI); - NewMI->takeName(MI); - Value *Cast = new BitCastInst(NewMI, MI->getType(), "tmp", MI); - MI->replaceAllUsesWith(Cast); - MI->eraseFromParent(); - MI = NewMI; - } - - GVI = PerformHeapAllocSRoA(GV, MI, Context); - return true; - } - } - - return false; -} - -/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a -/// pointer global variable with a single value stored it that is a malloc or -/// cast of malloc. -static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, BitCastInst *BCI, Module::global_iterator &GVI, @@ -1892,52 +1501,55 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // transform the program to use global memory instead of malloc'd memory. // This eliminates dynamic allocation, avoids an indirection accessing the // data, and exposes the resultant global to further GlobalOpt. - if (ConstantInt *NElements = - dyn_cast<ConstantInt>(getMallocArraySize(CI, Context, TD))) { - // Restrict this transformation to only working on small allocations - // (2048 bytes currently), as we don't want to introduce a 16M global or - // something. - if (TD && - NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { - GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, Context, TD); - return true; - } - } - - // If the allocation is an array of structures, consider transforming this - // into multiple malloc'd arrays, one for each field. This is basically - // SRoA for malloc'd memory. - - // If this is an allocation of a fixed size array of structs, analyze as a - // variable size array. malloc [100 x struct],1 -> malloc struct, 100 - if (!isArrayMalloc(CI, Context, TD)) - if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) - AllocTy = AT->getElementType(); - - if (const StructType *AllocSTy = dyn_cast<StructType>(AllocTy)) { - // This the structure has an unreasonable number of fields, leave it - // alone. - if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && - AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, BCI)) { - - // If this is a fixed size array, transform the Malloc to be an alloc of - // structs. malloc [100 x struct],1 -> malloc struct, 100 - if (const ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { - Value* NumElements = ConstantInt::get(Type::getInt32Ty(Context), - AT->getNumElements()); - Value* NewMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), - AllocSTy, NumElements, - BCI->getName()); - Value *Cast = new BitCastInst(NewMI, getMallocType(CI), "tmp", CI); - BCI->replaceAllUsesWith(Cast); - BCI->eraseFromParent(); - CI->eraseFromParent(); - BCI = cast<BitCastInst>(NewMI); - CI = extractMallocCallFromBitCast(NewMI); + Value *NElems = getMallocArraySize(CI, Context, TD); + // We cannot optimize the malloc if we cannot determine malloc array size. + if (NElems) { + if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems)) + // Restrict this transformation to only working on small allocations + // (2048 bytes currently), as we don't want to introduce a 16M global or + // something. + if (TD && + NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { + GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, Context, TD); + return true; } + + // If the allocation is an array of structures, consider transforming this + // into multiple malloc'd arrays, one for each field. This is basically + // SRoA for malloc'd memory. + + // If this is an allocation of a fixed size array of structs, analyze as a + // variable size array. malloc [100 x struct],1 -> malloc struct, 100 + if (!isArrayMalloc(CI, Context, TD)) + if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) + AllocTy = AT->getElementType(); + + if (const StructType *AllocSTy = dyn_cast<StructType>(AllocTy)) { + // This the structure has an unreasonable number of fields, leave it + // alone. + if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && + AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, BCI)) { + + // If this is a fixed size array, transform the Malloc to be an alloc of + // structs. malloc [100 x struct],1 -> malloc struct, 100 + if (const ArrayType *AT = + dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { + Value* NumElements = ConstantInt::get(Type::getInt32Ty(Context), + AT->getNumElements()); + Value* NewMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), + AllocSTy, NumElements, + BCI->getName()); + Value *Cast = new BitCastInst(NewMI, getMallocType(CI), "tmp", CI); + BCI->replaceAllUsesWith(Cast); + BCI->eraseFromParent(); + CI->eraseFromParent(); + BCI = cast<BitCastInst>(NewMI); + CI = extractMallocCallFromBitCast(NewMI); + } - GVI = PerformHeapAllocSRoA(GV, CI, BCI, Context, TD); - return true; + GVI = PerformHeapAllocSRoA(GV, CI, BCI, Context, TD); + return true; + } } } @@ -1966,9 +1578,6 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, // Optimize away any trapping uses of the loaded value. if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, Context)) return true; - } else if (MallocInst *MI = dyn_cast<MallocInst>(StoredOnceVal)) { - if (TryToOptimizeStoreOfMallocToGlobal(GV, MI, GVI, TD, Context)) - return true; } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) { if (getMallocAllocatedType(CI)) { BitCastInst* BCI = NULL; diff --git a/lib/Transforms/IPO/RaiseAllocations.cpp b/lib/Transforms/IPO/RaiseAllocations.cpp index 4c1f26d..deb4405 100644 --- a/lib/Transforms/IPO/RaiseAllocations.cpp +++ b/lib/Transforms/IPO/RaiseAllocations.cpp @@ -1,4 +1,4 @@ -//===- RaiseAllocations.cpp - Convert @malloc & @free calls to insts ------===// +//===- RaiseAllocations.cpp - Convert @free calls to insts ------===// // // The LLVM Compiler Infrastructure // @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file defines the RaiseAllocations pass which convert malloc and free -// calls to malloc and free instructions. +// This file defines the RaiseAllocations pass which convert free calls to free +// instructions. // //===----------------------------------------------------------------------===// @@ -29,19 +29,19 @@ using namespace llvm; STATISTIC(NumRaised, "Number of allocations raised"); namespace { - // RaiseAllocations - Turn @malloc and @free calls into the appropriate + // RaiseAllocations - Turn @free calls into the appropriate // instruction. // class VISIBILITY_HIDDEN RaiseAllocations : public ModulePass { - Function *MallocFunc; // Functions in the module we are processing - Function *FreeFunc; // Initialized by doPassInitializationVirt + Function *FreeFunc; // Functions in the module we are processing + // Initialized by doPassInitializationVirt public: static char ID; // Pass identification, replacement for typeid RaiseAllocations() - : ModulePass(&ID), MallocFunc(0), FreeFunc(0) {} + : ModulePass(&ID), FreeFunc(0) {} // doPassInitialization - For the raise allocations pass, this finds a - // declaration for malloc and free if they exist. + // declaration for free if it exists. // void doInitialization(Module &M); @@ -61,50 +61,16 @@ ModulePass *llvm::createRaiseAllocationsPass() { } -// If the module has a symbol table, they might be referring to the malloc and -// free functions. If this is the case, grab the method pointers that the -// module is using. +// If the module has a symbol table, they might be referring to the free +// function. If this is the case, grab the method pointers that the module is +// using. // -// Lookup @malloc and @free in the symbol table, for later use. If they don't +// Lookup @free in the symbol table, for later use. If they don't // exist, or are not external, we do not worry about converting calls to that // function into the appropriate instruction. // void RaiseAllocations::doInitialization(Module &M) { - // Get Malloc and free prototypes if they exist! - MallocFunc = M.getFunction("malloc"); - if (MallocFunc) { - const FunctionType* TyWeHave = MallocFunc->getFunctionType(); - - // Get the expected prototype for malloc - const FunctionType *Malloc1Type = - FunctionType::get(Type::getInt8PtrTy(M.getContext()), - std::vector<const Type*>(1, - Type::getInt64Ty(M.getContext())), false); - - // Chck to see if we got the expected malloc - if (TyWeHave != Malloc1Type) { - // Check to see if the prototype is wrong, giving us i8*(i32) * malloc - // This handles the common declaration of: 'void *malloc(unsigned);' - const FunctionType *Malloc2Type = - FunctionType::get(PointerType::getUnqual( - Type::getInt8Ty(M.getContext())), - std::vector<const Type*>(1, - Type::getInt32Ty(M.getContext())), false); - if (TyWeHave != Malloc2Type) { - // Check to see if the prototype is missing, giving us - // i8*(...) * malloc - // This handles the common declaration of: 'void *malloc();' - const FunctionType *Malloc3Type = - FunctionType::get(PointerType::getUnqual( - Type::getInt8Ty(M.getContext())), - true); - if (TyWeHave != Malloc3Type) - // Give up - MallocFunc = 0; - } - } - } - + // Get free prototype if it exists! FreeFunc = M.getFunction("free"); if (FreeFunc) { const FunctionType* TyWeHave = FreeFunc->getFunctionType(); @@ -138,72 +104,18 @@ void RaiseAllocations::doInitialization(Module &M) { } // Don't mess with locally defined versions of these functions... - if (MallocFunc && !MallocFunc->isDeclaration()) MallocFunc = 0; if (FreeFunc && !FreeFunc->isDeclaration()) FreeFunc = 0; } // run - Transform calls into instructions... // bool RaiseAllocations::runOnModule(Module &M) { - // Find the malloc/free prototypes... + // Find the free prototype... doInitialization(M); bool Changed = false; - // First, process all of the malloc calls... - if (MallocFunc) { - std::vector<User*> Users(MallocFunc->use_begin(), MallocFunc->use_end()); - std::vector<Value*> EqPointers; // Values equal to MallocFunc - while (!Users.empty()) { - User *U = Users.back(); - Users.pop_back(); - - if (Instruction *I = dyn_cast<Instruction>(U)) { - CallSite CS = CallSite::get(I); - if (CS.getInstruction() && !CS.arg_empty() && - (CS.getCalledFunction() == MallocFunc || - std::find(EqPointers.begin(), EqPointers.end(), - CS.getCalledValue()) != EqPointers.end())) { - - Value *Source = *CS.arg_begin(); - - // If no prototype was provided for malloc, we may need to cast the - // source size. - if (Source->getType() != Type::getInt32Ty(M.getContext())) - Source = - CastInst::CreateIntegerCast(Source, - Type::getInt32Ty(M.getContext()), - false/*ZExt*/, - "MallocAmtCast", I); - - MallocInst *MI = new MallocInst(Type::getInt8Ty(M.getContext()), - Source, "", I); - MI->takeName(I); - I->replaceAllUsesWith(MI); - - // If the old instruction was an invoke, add an unconditional branch - // before the invoke, which will become the new terminator. - if (InvokeInst *II = dyn_cast<InvokeInst>(I)) - BranchInst::Create(II->getNormalDest(), I); - - // Delete the old call site - I->eraseFromParent(); - Changed = true; - ++NumRaised; - } - } else if (GlobalValue *GV = dyn_cast<GlobalValue>(U)) { - Users.insert(Users.end(), GV->use_begin(), GV->use_end()); - EqPointers.push_back(GV); - } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { - if (CE->isCast()) { - Users.insert(Users.end(), CE->use_begin(), CE->use_end()); - EqPointers.push_back(CE); - } - } - } - } - - // Next, process all free calls... + // Process all free calls... if (FreeFunc) { std::vector<User*> Users(FreeFunc->use_begin(), FreeFunc->use_end()); std::vector<Value*> EqPointers; // Values equal to FreeFunc diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index 77d44b2..57aaf43 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -250,8 +250,6 @@ static bool StripDebugInfo(Module &M) { if (NMD) NMD->eraseFromParent(); - // Remove dead metadata. - M.getContext().RemoveDeadMetadata(); return true; } diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index a3e3fea..42209b8 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -73,6 +73,7 @@ namespace { DenseMap<Value*,Value*> &SunkAddrs); bool OptimizeInlineAsmInst(Instruction *I, CallSite CS, DenseMap<Value*,Value*> &SunkAddrs); + bool MoveExtToFormExtLoad(Instruction *I); bool OptimizeExtUses(Instruction *I); void findLoopBackEdges(const Function &F); }; @@ -731,6 +732,43 @@ bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS, return MadeChange; } +/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same +/// basic block as the load, unless conditions are unfavorable. This allows +/// SelectionDAG to fold the extend into the load. +/// +bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) { + // Look for a load being extended. + LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0)); + if (!LI) return false; + + // If they're already in the same block, there's nothing to do. + if (LI->getParent() == I->getParent()) + return false; + + // If the load has other users and the truncate is not free, this probably + // isn't worthwhile. + if (!LI->hasOneUse() && + TLI && !TLI->isTruncateFree(I->getType(), LI->getType())) + return false; + + // Check whether the target supports casts folded into loads. + unsigned LType; + if (isa<ZExtInst>(I)) + LType = ISD::ZEXTLOAD; + else { + assert(isa<SExtInst>(I) && "Unexpected ext type!"); + LType = ISD::SEXTLOAD; + } + if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType()))) + return false; + + // Move the extend into the same block as the load, so that SelectionDAG + // can fold it. + I->removeFromParent(); + I->insertAfter(LI); + return true; +} + bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { BasicBlock *DefBB = I->getParent(); @@ -846,8 +884,10 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { MadeChange |= Change; } - if (!Change && (isa<ZExtInst>(I) || isa<SExtInst>(I))) + if (!Change && (isa<ZExtInst>(I) || isa<SExtInst>(I))) { + MadeChange |= MoveExtToFormExtLoad(I); MadeChange |= OptimizeExtUses(I); + } } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) { MadeChange |= OptimizeCmpExpression(CI); } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 2ed4a63..8859324 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -78,13 +78,10 @@ namespace { SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI, FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT, PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, CONSTANT, - EMPTY, TOMBSTONE }; + INSERTVALUE, EXTRACTVALUE, EMPTY, TOMBSTONE }; ExpressionOpcode opcode; const Type* type; - uint32_t firstVN; - uint32_t secondVN; - uint32_t thirdVN; SmallVector<uint32_t, 4> varargs; Value *function; @@ -100,12 +97,6 @@ namespace { return false; else if (function != other.function) return false; - else if (firstVN != other.firstVN) - return false; - else if (secondVN != other.secondVN) - return false; - else if (thirdVN != other.thirdVN) - return false; else { if (varargs.size() != other.varargs.size()) return false; @@ -146,6 +137,10 @@ namespace { Expression create_expression(GetElementPtrInst* G); Expression create_expression(CallInst* C); Expression create_expression(Constant* C); + Expression create_expression(ExtractValueInst* C); + Expression create_expression(InsertValueInst* C); + + uint32_t lookup_or_add_call(CallInst* C); public: ValueTable() : nextValueNumber(1) { } uint32_t lookup_or_add(Value *V); @@ -176,13 +171,8 @@ template <> struct DenseMapInfo<Expression> { static unsigned getHashValue(const Expression e) { unsigned hash = e.opcode; - hash = e.firstVN + hash * 37; - hash = e.secondVN + hash * 37; - hash = e.thirdVN + hash * 37; - hash = ((unsigned)((uintptr_t)e.type >> 4) ^ - (unsigned)((uintptr_t)e.type >> 9)) + - hash * 37; + (unsigned)((uintptr_t)e.type >> 9)); for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(), E = e.varargs.end(); I != E; ++I) @@ -290,9 +280,6 @@ Expression ValueTable::create_expression(CallInst* C) { Expression e; e.type = C->getType(); - e.firstVN = 0; - e.secondVN = 0; - e.thirdVN = 0; e.function = C->getCalledFunction(); e.opcode = Expression::CALL; @@ -305,10 +292,8 @@ Expression ValueTable::create_expression(CallInst* C) { Expression ValueTable::create_expression(BinaryOperator* BO) { Expression e; - - e.firstVN = lookup_or_add(BO->getOperand(0)); - e.secondVN = lookup_or_add(BO->getOperand(1)); - e.thirdVN = 0; + e.varargs.push_back(lookup_or_add(BO->getOperand(0))); + e.varargs.push_back(lookup_or_add(BO->getOperand(1))); e.function = 0; e.type = BO->getType(); e.opcode = getOpcode(BO); @@ -319,9 +304,8 @@ Expression ValueTable::create_expression(BinaryOperator* BO) { Expression ValueTable::create_expression(CmpInst* C) { Expression e; - e.firstVN = lookup_or_add(C->getOperand(0)); - e.secondVN = lookup_or_add(C->getOperand(1)); - e.thirdVN = 0; + e.varargs.push_back(lookup_or_add(C->getOperand(0))); + e.varargs.push_back(lookup_or_add(C->getOperand(1))); e.function = 0; e.type = C->getType(); e.opcode = getOpcode(C); @@ -332,9 +316,7 @@ Expression ValueTable::create_expression(CmpInst* C) { Expression ValueTable::create_expression(CastInst* C) { Expression e; - e.firstVN = lookup_or_add(C->getOperand(0)); - e.secondVN = 0; - e.thirdVN = 0; + e.varargs.push_back(lookup_or_add(C->getOperand(0))); e.function = 0; e.type = C->getType(); e.opcode = getOpcode(C); @@ -345,9 +327,9 @@ Expression ValueTable::create_expression(CastInst* C) { Expression ValueTable::create_expression(ShuffleVectorInst* S) { Expression e; - e.firstVN = lookup_or_add(S->getOperand(0)); - e.secondVN = lookup_or_add(S->getOperand(1)); - e.thirdVN = lookup_or_add(S->getOperand(2)); + e.varargs.push_back(lookup_or_add(S->getOperand(0))); + e.varargs.push_back(lookup_or_add(S->getOperand(1))); + e.varargs.push_back(lookup_or_add(S->getOperand(2))); e.function = 0; e.type = S->getType(); e.opcode = Expression::SHUFFLE; @@ -358,9 +340,8 @@ Expression ValueTable::create_expression(ShuffleVectorInst* S) { Expression ValueTable::create_expression(ExtractElementInst* E) { Expression e; - e.firstVN = lookup_or_add(E->getOperand(0)); - e.secondVN = lookup_or_add(E->getOperand(1)); - e.thirdVN = 0; + e.varargs.push_back(lookup_or_add(E->getOperand(0))); + e.varargs.push_back(lookup_or_add(E->getOperand(1))); e.function = 0; e.type = E->getType(); e.opcode = Expression::EXTRACT; @@ -371,9 +352,9 @@ Expression ValueTable::create_expression(ExtractElementInst* E) { Expression ValueTable::create_expression(InsertElementInst* I) { Expression e; - e.firstVN = lookup_or_add(I->getOperand(0)); - e.secondVN = lookup_or_add(I->getOperand(1)); - e.thirdVN = lookup_or_add(I->getOperand(2)); + e.varargs.push_back(lookup_or_add(I->getOperand(0))); + e.varargs.push_back(lookup_or_add(I->getOperand(1))); + e.varargs.push_back(lookup_or_add(I->getOperand(2))); e.function = 0; e.type = I->getType(); e.opcode = Expression::INSERT; @@ -384,9 +365,9 @@ Expression ValueTable::create_expression(InsertElementInst* I) { Expression ValueTable::create_expression(SelectInst* I) { Expression e; - e.firstVN = lookup_or_add(I->getCondition()); - e.secondVN = lookup_or_add(I->getTrueValue()); - e.thirdVN = lookup_or_add(I->getFalseValue()); + e.varargs.push_back(lookup_or_add(I->getCondition())); + e.varargs.push_back(lookup_or_add(I->getTrueValue())); + e.varargs.push_back(lookup_or_add(I->getFalseValue())); e.function = 0; e.type = I->getType(); e.opcode = Expression::SELECT; @@ -397,9 +378,7 @@ Expression ValueTable::create_expression(SelectInst* I) { Expression ValueTable::create_expression(GetElementPtrInst* G) { Expression e; - e.firstVN = lookup_or_add(G->getPointerOperand()); - e.secondVN = 0; - e.thirdVN = 0; + e.varargs.push_back(lookup_or_add(G->getPointerOperand())); e.function = 0; e.type = G->getType(); e.opcode = Expression::GEP; @@ -411,6 +390,35 @@ Expression ValueTable::create_expression(GetElementPtrInst* G) { return e; } +Expression ValueTable::create_expression(ExtractValueInst* E) { + Expression e; + + e.varargs.push_back(lookup_or_add(E->getAggregateOperand())); + for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); + II != IE; ++II) + e.varargs.push_back(*II); + e.function = 0; + e.type = E->getType(); + e.opcode = Expression::EXTRACTVALUE; + + return e; +} + +Expression ValueTable::create_expression(InsertValueInst* E) { + Expression e; + + e.varargs.push_back(lookup_or_add(E->getAggregateOperand())); + e.varargs.push_back(lookup_or_add(E->getInsertedValueOperand())); + for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); + II != IE; ++II) + e.varargs.push_back(*II); + e.function = 0; + e.type = E->getType(); + e.opcode = Expression::INSERTVALUE; + + return e; +} + //===----------------------------------------------------------------------===// // ValueTable External Functions //===----------------------------------------------------------------------===// @@ -420,232 +428,197 @@ void ValueTable::add(Value *V, uint32_t num) { valueNumbering.insert(std::make_pair(V, num)); } -/// lookup_or_add - Returns the value number for the specified value, assigning -/// it a new number if it did not have one before. -uint32_t ValueTable::lookup_or_add(Value *V) { - DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); - if (VI != valueNumbering.end()) - return VI->second; - - if (CallInst* C = dyn_cast<CallInst>(V)) { - if (AA->doesNotAccessMemory(C)) { - Expression e = create_expression(C); - - DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); - if (EI != expressionNumbering.end()) { - valueNumbering.insert(std::make_pair(V, EI->second)); - return EI->second; - } else { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - - return nextValueNumber++; - } - } else if (AA->onlyReadsMemory(C)) { - Expression e = create_expression(C); - - if (expressionNumbering.find(e) == expressionNumbering.end()) { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - return nextValueNumber++; - } - - MemDepResult local_dep = MD->getDependency(C); - - if (!local_dep.isDef() && !local_dep.isNonLocal()) { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - return nextValueNumber++; - } - - if (local_dep.isDef()) { - CallInst* local_cdep = cast<CallInst>(local_dep.getInst()); - - if (local_cdep->getNumOperands() != C->getNumOperands()) { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - return nextValueNumber++; - } - - for (unsigned i = 1; i < C->getNumOperands(); ++i) { - uint32_t c_vn = lookup_or_add(C->getOperand(i)); - uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i)); - if (c_vn != cd_vn) { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - return nextValueNumber++; - } - } - - uint32_t v = lookup_or_add(local_cdep); - valueNumbering.insert(std::make_pair(V, v)); - return v; - } - - // Non-local case. - const MemoryDependenceAnalysis::NonLocalDepInfo &deps = - MD->getNonLocalCallDependency(CallSite(C)); - // FIXME: call/call dependencies for readonly calls should return def, not - // clobber! Move the checking logic to MemDep! - CallInst* cdep = 0; - - // Check to see if we have a single dominating call instruction that is - // identical to C. - for (unsigned i = 0, e = deps.size(); i != e; ++i) { - const MemoryDependenceAnalysis::NonLocalDepEntry *I = &deps[i]; - // Ignore non-local dependencies. - if (I->second.isNonLocal()) - continue; +uint32_t ValueTable::lookup_or_add_call(CallInst* C) { + if (AA->doesNotAccessMemory(C)) { + Expression exp = create_expression(C); + uint32_t& e = expressionNumbering[exp]; + if (!e) e = nextValueNumber++; + valueNumbering[C] = e; + return e; + } else if (AA->onlyReadsMemory(C)) { + Expression exp = create_expression(C); + uint32_t& e = expressionNumbering[exp]; + if (!e) { + e = nextValueNumber++; + valueNumbering[C] = e; + return e; + } - // We don't handle non-depedencies. If we already have a call, reject - // instruction dependencies. - if (I->second.isClobber() || cdep != 0) { - cdep = 0; - break; - } + MemDepResult local_dep = MD->getDependency(C); - CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->second.getInst()); - // FIXME: All duplicated with non-local case. - if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){ - cdep = NonLocalDepCall; - continue; - } + if (!local_dep.isDef() && !local_dep.isNonLocal()) { + valueNumbering[C] = nextValueNumber; + return nextValueNumber++; + } - cdep = 0; - break; - } + if (local_dep.isDef()) { + CallInst* local_cdep = cast<CallInst>(local_dep.getInst()); - if (!cdep) { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); + if (local_cdep->getNumOperands() != C->getNumOperands()) { + valueNumbering[C] = nextValueNumber; return nextValueNumber++; } - if (cdep->getNumOperands() != C->getNumOperands()) { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - return nextValueNumber++; - } for (unsigned i = 1; i < C->getNumOperands(); ++i) { uint32_t c_vn = lookup_or_add(C->getOperand(i)); - uint32_t cd_vn = lookup_or_add(cdep->getOperand(i)); + uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i)); if (c_vn != cd_vn) { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); + valueNumbering[C] = nextValueNumber; return nextValueNumber++; } } - uint32_t v = lookup_or_add(cdep); - valueNumbering.insert(std::make_pair(V, v)); + uint32_t v = lookup_or_add(local_cdep); + valueNumbering[C] = v; return v; - - } else { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - return nextValueNumber++; } - } else if (BinaryOperator* BO = dyn_cast<BinaryOperator>(V)) { - Expression e = create_expression(BO); - DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); - if (EI != expressionNumbering.end()) { - valueNumbering.insert(std::make_pair(V, EI->second)); - return EI->second; - } else { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); + // Non-local case. + const MemoryDependenceAnalysis::NonLocalDepInfo &deps = + MD->getNonLocalCallDependency(CallSite(C)); + // FIXME: call/call dependencies for readonly calls should return def, not + // clobber! Move the checking logic to MemDep! + CallInst* cdep = 0; + + // Check to see if we have a single dominating call instruction that is + // identical to C. + for (unsigned i = 0, e = deps.size(); i != e; ++i) { + const MemoryDependenceAnalysis::NonLocalDepEntry *I = &deps[i]; + // Ignore non-local dependencies. + if (I->second.isNonLocal()) + continue; - return nextValueNumber++; - } - } else if (CmpInst* C = dyn_cast<CmpInst>(V)) { - Expression e = create_expression(C); + // We don't handle non-depedencies. If we already have a call, reject + // instruction dependencies. + if (I->second.isClobber() || cdep != 0) { + cdep = 0; + break; + } - DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); - if (EI != expressionNumbering.end()) { - valueNumbering.insert(std::make_pair(V, EI->second)); - return EI->second; - } else { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); + CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->second.getInst()); + // FIXME: All duplicated with non-local case. + if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){ + cdep = NonLocalDepCall; + continue; + } - return nextValueNumber++; + cdep = 0; + break; } - } else if (ShuffleVectorInst* U = dyn_cast<ShuffleVectorInst>(V)) { - Expression e = create_expression(U); - - DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); - if (EI != expressionNumbering.end()) { - valueNumbering.insert(std::make_pair(V, EI->second)); - return EI->second; - } else { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); + if (!cdep) { + valueNumbering[C] = nextValueNumber; return nextValueNumber++; } - } else if (ExtractElementInst* U = dyn_cast<ExtractElementInst>(V)) { - Expression e = create_expression(U); - - DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); - if (EI != expressionNumbering.end()) { - valueNumbering.insert(std::make_pair(V, EI->second)); - return EI->second; - } else { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); + if (cdep->getNumOperands() != C->getNumOperands()) { + valueNumbering[C] = nextValueNumber; return nextValueNumber++; } - } else if (InsertElementInst* U = dyn_cast<InsertElementInst>(V)) { - Expression e = create_expression(U); - - DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); - if (EI != expressionNumbering.end()) { - valueNumbering.insert(std::make_pair(V, EI->second)); - return EI->second; - } else { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - - return nextValueNumber++; + for (unsigned i = 1; i < C->getNumOperands(); ++i) { + uint32_t c_vn = lookup_or_add(C->getOperand(i)); + uint32_t cd_vn = lookup_or_add(cdep->getOperand(i)); + if (c_vn != cd_vn) { + valueNumbering[C] = nextValueNumber; + return nextValueNumber++; + } } - } else if (SelectInst* U = dyn_cast<SelectInst>(V)) { - Expression e = create_expression(U); - DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); - if (EI != expressionNumbering.end()) { - valueNumbering.insert(std::make_pair(V, EI->second)); - return EI->second; - } else { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); + uint32_t v = lookup_or_add(cdep); + valueNumbering[C] = v; + return v; - return nextValueNumber++; - } - } else if (CastInst* U = dyn_cast<CastInst>(V)) { - Expression e = create_expression(U); - - DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); - if (EI != expressionNumbering.end()) { - valueNumbering.insert(std::make_pair(V, EI->second)); - return EI->second; - } else { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - - return nextValueNumber++; - } - } else if (GetElementPtrInst* U = dyn_cast<GetElementPtrInst>(V)) { - Expression e = create_expression(U); + } else { + valueNumbering[C] = nextValueNumber; + return nextValueNumber++; + } +} - DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); - if (EI != expressionNumbering.end()) { - valueNumbering.insert(std::make_pair(V, EI->second)); - return EI->second; - } else { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); +/// lookup_or_add - Returns the value number for the specified value, assigning +/// it a new number if it did not have one before. +uint32_t ValueTable::lookup_or_add(Value *V) { + DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); + if (VI != valueNumbering.end()) + return VI->second; - return nextValueNumber++; - } - } else { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); + if (!isa<Instruction>(V)) { + valueNumbering[V] = nextValueNumber; return nextValueNumber++; } + + Instruction* I = cast<Instruction>(V); + Expression exp; + switch (I->getOpcode()) { + case Instruction::Call: + return lookup_or_add_call(cast<CallInst>(I)); + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or : + case Instruction::Xor: + exp = create_expression(cast<BinaryOperator>(I)); + break; + case Instruction::ICmp: + case Instruction::FCmp: + exp = create_expression(cast<CmpInst>(I)); + break; + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::BitCast: + exp = create_expression(cast<CastInst>(I)); + break; + case Instruction::Select: + exp = create_expression(cast<SelectInst>(I)); + break; + case Instruction::ExtractElement: + exp = create_expression(cast<ExtractElementInst>(I)); + break; + case Instruction::InsertElement: + exp = create_expression(cast<InsertElementInst>(I)); + break; + case Instruction::ShuffleVector: + exp = create_expression(cast<ShuffleVectorInst>(I)); + break; + case Instruction::ExtractValue: + exp = create_expression(cast<ExtractValueInst>(I)); + break; + case Instruction::InsertValue: + exp = create_expression(cast<InsertValueInst>(I)); + break; + case Instruction::GetElementPtr: + exp = create_expression(cast<GetElementPtrInst>(I)); + break; + default: + valueNumbering[V] = nextValueNumber; + return nextValueNumber++; + } + + uint32_t& e = expressionNumbering[exp]; + if (!e) e = nextValueNumber++; + valueNumbering[V] = e; + return e; } /// lookup - Returns the value number of the specified value. Fails if @@ -1557,15 +1530,18 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { // actually have the same type. See if we know how to reuse the stored // value (depending on its type). const TargetData *TD = 0; - if (StoredVal->getType() != L->getType() && - (TD = getAnalysisIfAvailable<TargetData>())) { - StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), - L, *TD); - if (StoredVal == 0) + if (StoredVal->getType() != L->getType()) { + if ((TD = getAnalysisIfAvailable<TargetData>())) { + StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), + L, *TD); + if (StoredVal == 0) + return false; + + DEBUG(errs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal + << '\n' << *L << "\n\n\n"); + } + else return false; - - DEBUG(errs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal - << '\n' << *L << "\n\n\n"); } // Remove it! @@ -1584,14 +1560,17 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { // the same type. See if we know how to reuse the previously loaded value // (depending on its type). const TargetData *TD = 0; - if (DepLI->getType() != L->getType() && - (TD = getAnalysisIfAvailable<TargetData>())) { - AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD); - if (AvailableVal == 0) - return false; + if (DepLI->getType() != L->getType()) { + if ((TD = getAnalysisIfAvailable<TargetData>())) { + AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD); + if (AvailableVal == 0) + return false; - DEBUG(errs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal - << "\n" << *L << "\n\n\n"); + DEBUG(errs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal + << "\n" << *L << "\n\n\n"); + } + else + return false; } // Remove it! diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index f635af3..b41b5d4 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -6300,25 +6300,33 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); break; } - case Instruction::Malloc: - // If we have (malloc != null), and if the malloc has a single use, we - // can assume it is successful and remove the malloc. - if (LHSI->hasOneUse() && isa<ConstantPointerNull>(RHSC)) { - Worklist.Add(LHSI); - return ReplaceInstUsesWith(I, - ConstantInt::get(Type::getInt1Ty(*Context), - !I.isTrueWhenEqual())); - } - break; case Instruction::Call: // If we have (malloc != null), and if the malloc has a single use, we // can assume it is successful and remove the malloc. if (isMalloc(LHSI) && LHSI->hasOneUse() && isa<ConstantPointerNull>(RHSC)) { - Worklist.Add(LHSI); - return ReplaceInstUsesWith(I, + // Need to explicitly erase malloc call here, instead of adding it to + // Worklist, because it won't get DCE'd from the Worklist since + // isInstructionTriviallyDead() returns false for function calls. + // It is OK to replace LHSI/MallocCall with Undef because the + // instruction that uses it will be erased via Worklist. + if (extractMallocCall(LHSI)) { + LHSI->replaceAllUsesWith(UndefValue::get(LHSI->getType())); + EraseInstFromFunction(*LHSI); + return ReplaceInstUsesWith(I, ConstantInt::get(Type::getInt1Ty(*Context), !I.isTrueWhenEqual())); + } + if (CallInst* MallocCall = extractMallocCallFromBitCast(LHSI)) + if (MallocCall->hasOneUse()) { + MallocCall->replaceAllUsesWith( + UndefValue::get(MallocCall->getType())); + EraseInstFromFunction(*MallocCall); + Worklist.Add(LHSI); // The malloc's bitcast use. + return ReplaceInstUsesWith(I, + ConstantInt::get(Type::getInt1Ty(*Context), + !I.isTrueWhenEqual())); + } } break; } @@ -7809,11 +7817,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp"); } - AllocationInst *New; - if (isa<MallocInst>(AI)) - New = AllocaBuilder.CreateMalloc(CastElTy, Amt); - else - New = AllocaBuilder.CreateAlloca(CastElTy, Amt); + AllocationInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt); New->setAlignment(AI.getAlignment()); New->takeName(&AI); @@ -9270,14 +9274,44 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, return Changed ? &SI : 0; } -/// isDefinedInBB - Return true if the value is an instruction defined in the -/// specified basicblock. -static bool isDefinedInBB(const Value *V, const BasicBlock *BB) { + +/// CanSelectOperandBeMappingIntoPredBlock - SI is a select whose condition is a +/// PHI node (but the two may be in different blocks). See if the true/false +/// values (V) are live in all of the predecessor blocks of the PHI. For +/// example, cases like this cannot be mapped: +/// +/// X = phi [ C1, BB1], [C2, BB2] +/// Y = add +/// Z = select X, Y, 0 +/// +/// because Y is not live in BB1/BB2. +/// +static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V, + const SelectInst &SI) { + // If the value is a non-instruction value like a constant or argument, it + // can always be mapped. const Instruction *I = dyn_cast<Instruction>(V); - return I != 0 && I->getParent() == BB; + if (I == 0) return true; + + // If V is a PHI node defined in the same block as the condition PHI, we can + // map the arguments. + const PHINode *CondPHI = cast<PHINode>(SI.getCondition()); + + if (const PHINode *VP = dyn_cast<PHINode>(I)) + if (VP->getParent() == CondPHI->getParent()) + return true; + + // Otherwise, if the PHI and select are defined in the same block and if V is + // defined in a different block, then we can transform it. + if (SI.getParent() == CondPHI->getParent() && + I->getParent() != CondPHI->getParent()) + return true; + + // Otherwise we have a 'hard' case and we can't tell without doing more + // detailed dominator based analysis, punt. + return false; } - Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *CondVal = SI.getCondition(); Value *TrueVal = SI.getTrueValue(); @@ -9489,16 +9523,13 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return FoldI; } - // See if we can fold the select into a phi node. The true/false values have - // to be live in the predecessor blocks. If they are instructions in SI's - // block, we can't map to the predecessor. - if (isa<PHINode>(SI.getCondition()) && - (!isDefinedInBB(SI.getTrueValue(), SI.getParent()) || - isa<PHINode>(SI.getTrueValue())) && - (!isDefinedInBB(SI.getFalseValue(), SI.getParent()) || - isa<PHINode>(SI.getFalseValue()))) - if (Instruction *NV = FoldOpIntoPhi(SI)) - return NV; + // See if we can fold the select into a phi node if the condition is a select. + if (isa<PHINode>(SI.getCondition())) + // The true/false values have to be live in the PHI predecessor's blocks. + if (CanSelectOperandBeMappingIntoPredBlock(TrueVal, SI) && + CanSelectOperandBeMappingIntoPredBlock(FalseVal, SI)) + if (Instruction *NV = FoldOpIntoPhi(SI)) + return NV; if (BinaryOperator::isNot(CondVal)) { SI.setOperand(0, BinaryOperator::getNotArgument(CondVal)); @@ -11213,15 +11244,8 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) { if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { const Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); - AllocationInst *New = 0; - - // Create and insert the replacement instruction... - if (isa<MallocInst>(AI)) - New = Builder->CreateMalloc(NewTy, 0, AI.getName()); - else { - assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!"); - New = Builder->CreateAlloca(NewTy, 0, AI.getName()); - } + assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!"); + AllocationInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); New->setAlignment(AI.getAlignment()); // Scan to the end of the allocation instructions, to skip over a block of @@ -11294,12 +11318,6 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) { } } - // Change free(malloc) into nothing, if the malloc has a single use. - if (MallocInst *MI = dyn_cast<MallocInst>(Op)) - if (MI->hasOneUse()) { - EraseInstFromFunction(FI); - return EraseInstFromFunction(*MI); - } if (isMalloc(Op)) { if (CallInst* CI = extractMallocCallFromBitCast(Op)) { if (Op->hasOneUse() && CI->hasOneUse()) { @@ -11327,40 +11345,6 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, Value *CastOp = CI->getOperand(0); LLVMContext *Context = IC.getContext(); - if (TD) { - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(CI)) { - // Instead of loading constant c string, use corresponding integer value - // directly if string length is small enough. - std::string Str; - if (GetConstantStringInfo(CE->getOperand(0), Str) && !Str.empty()) { - unsigned len = Str.length(); - const Type *Ty = cast<PointerType>(CE->getType())->getElementType(); - unsigned numBits = Ty->getPrimitiveSizeInBits(); - // Replace LI with immediate integer store. - if ((numBits >> 3) == len + 1) { - APInt StrVal(numBits, 0); - APInt SingleChar(numBits, 0); - if (TD->isLittleEndian()) { - for (signed i = len-1; i >= 0; i--) { - SingleChar = (uint64_t) Str[i] & UCHAR_MAX; - StrVal = (StrVal << 8) | SingleChar; - } - } else { - for (unsigned i = 0; i < len; i++) { - SingleChar = (uint64_t) Str[i] & UCHAR_MAX; - StrVal = (StrVal << 8) | SingleChar; - } - // Append NULL at the end. - SingleChar = 0; - StrVal = (StrVal << 8) | SingleChar; - } - Value *NL = ConstantInt::get(*Context, StrVal); - return IC.ReplaceInstUsesWith(LI, NL); - } - } - } - } - const PointerType *DestTy = cast<PointerType>(CI->getType()); const Type *DestPTy = DestTy->getElementType(); if (const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) { @@ -11380,7 +11364,8 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, if (Constant *CSrc = dyn_cast<Constant>(CastOp)) if (ASrcTy->getNumElements() != 0) { Value *Idxs[2]; - Idxs[0] = Idxs[1] = Constant::getNullValue(Type::getInt32Ty(*Context)); + Idxs[0] = Constant::getNullValue(Type::getInt32Ty(*Context)); + Idxs[1] = Idxs[0]; CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2); SrcTy = cast<PointerType>(CastOp->getType()); SrcPTy = SrcTy->getElementType(); @@ -11436,6 +11421,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) return ReplaceInstUsesWith(LI, AvailableVal); + // load(gep null, ...) -> unreachable if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) { const Value *GEPI0 = GEPI->getOperand(0); // TODO: Consider a target hook for valid address spaces for this xform. @@ -11450,60 +11436,24 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { } } - if (Constant *C = dyn_cast<Constant>(Op)) { - // load null/undef -> undef - // TODO: Consider a target hook for valid address spaces for this xform. - if (isa<UndefValue>(C) || - (C->isNullValue() && LI.getPointerAddressSpace() == 0)) { - // Insert a new store to null instruction before the load to indicate that - // this code is not reachable. We do this instead of inserting an - // unreachable instruction directly because we cannot modify the CFG. - new StoreInst(UndefValue::get(LI.getType()), - Constant::getNullValue(Op->getType()), &LI); - return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); - } - - // Instcombine load (constant global) into the value loaded. - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op)) - if (GV->isConstant() && GV->hasDefinitiveInitializer()) - return ReplaceInstUsesWith(LI, GV->getInitializer()); - - // Instcombine load (constantexpr_GEP global, 0, ...) into the value loaded. - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op)) { - if (CE->getOpcode() == Instruction::GetElementPtr) { - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0))) - if (GV->isConstant() && GV->hasDefinitiveInitializer()) - if (Constant *V = - ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE)) - return ReplaceInstUsesWith(LI, V); - if (CE->getOperand(0)->isNullValue()) { - // Insert a new store to null instruction before the load to indicate - // that this code is not reachable. We do this instead of inserting - // an unreachable instruction directly because we cannot modify the - // CFG. - new StoreInst(UndefValue::get(LI.getType()), - Constant::getNullValue(Op->getType()), &LI); - return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); - } - - } else if (CE->isCast()) { - if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) - return Res; - } - } - } - - // If this load comes from anywhere in a constant global, and if the global - // is all undef or zero, we know what it loads. - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op->getUnderlyingObject())){ - if (GV->isConstant() && GV->hasDefinitiveInitializer()) { - if (GV->getInitializer()->isNullValue()) - return ReplaceInstUsesWith(LI, Constant::getNullValue(LI.getType())); - else if (isa<UndefValue>(GV->getInitializer())) - return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); - } + // load null/undef -> unreachable + // TODO: Consider a target hook for valid address spaces for this xform. + if (isa<UndefValue>(Op) || + (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) { + // Insert a new store to null instruction before the load to indicate that + // this code is not reachable. We do this instead of inserting an + // unreachable instruction directly because we cannot modify the CFG. + new StoreInst(UndefValue::get(LI.getType()), + Constant::getNullValue(Op->getType()), &LI); + return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); } + // Instcombine load (constantexpr_cast global) -> cast (load global) + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op)) + if (CE->isCast()) + if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) + return Res; + if (Op->hasOneUse()) { // Change select and PHI nodes to select values instead of addresses: this // helps alias analysis out a lot, allows many others simplifications, and diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index f6de362..223d2b9 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -138,7 +138,6 @@ namespace { void SplitExitEdges(Loop *L, const SmallVector<BasicBlock *, 8> &ExitBlocks); bool UnswitchIfProfitable(Value *LoopCond, Constant *Val); - unsigned getLoopUnswitchCost(Value *LIC); void UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, BasicBlock *ExitBlock); void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L); @@ -400,25 +399,6 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, return true; } -/// getLoopUnswitchCost - Return the cost (code size growth) that will happen if -/// we choose to unswitch current loop on the specified value. -/// -unsigned LoopUnswitch::getLoopUnswitchCost(Value *LIC) { - // If the condition is trivial, always unswitch. There is no code growth for - // this case. - if (IsTrivialUnswitchCondition(LIC)) - return 0; - - // FIXME: This is overly conservative because it does not take into - // consideration code simplification opportunities. - CodeMetrics Metrics; - for (Loop::block_iterator I = currentLoop->block_begin(), - E = currentLoop->block_end(); - I != E; ++I) - Metrics.analyzeBasicBlock(*I); - return Metrics.NumInsts; -} - /// UnswitchIfProfitable - We have found that we can unswitch currentLoop when /// LoopCond == Val to simplify the loop. If we decide that this is profitable, /// unswitch the loop, reprocess the pieces, then return true. @@ -427,24 +407,35 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){ initLoopData(); Function *F = loopHeader->getParent(); + // If the condition is trivial, always unswitch. There is no code growth for + // this case. + if (!IsTrivialUnswitchCondition(LoopCond)) { + // Check to see if it would be profitable to unswitch current loop. - // Check to see if it would be profitable to unswitch current loop. - unsigned Cost = getLoopUnswitchCost(LoopCond); - - // Do not do non-trivial unswitch while optimizing for size. - if (Cost && OptimizeForSize) - return false; - if (Cost && !F->isDeclaration() && F->hasFnAttr(Attribute::OptimizeForSize)) - return false; + // Do not do non-trivial unswitch while optimizing for size. + if (OptimizeForSize || F->hasFnAttr(Attribute::OptimizeForSize)) + return false; - if (Cost > Threshold) { - // FIXME: this should estimate growth by the amount of code shared by the - // resultant unswitched loops. - // - DEBUG(errs() << "NOT unswitching loop %" - << currentLoop->getHeader()->getName() << ", cost too high: " - << currentLoop->getBlocks().size() << "\n"); - return false; + // FIXME: This is overly conservative because it does not take into + // consideration code simplification opportunities and code that can + // be shared by the resultant unswitched loops. + CodeMetrics Metrics; + for (Loop::block_iterator I = currentLoop->block_begin(), + E = currentLoop->block_end(); + I != E; ++I) + Metrics.analyzeBasicBlock(*I); + + // Limit the number of instructions to avoid causing significant code + // expansion, and the number of basic blocks, to avoid loops with + // large numbers of branches which cause loop unswitching to go crazy. + // This is a very ad-hoc heuristic. + if (Metrics.NumInsts > Threshold || + Metrics.NumBlocks * 5 > Threshold) { + DEBUG(errs() << "NOT unswitching loop %" + << currentLoop->getHeader()->getName() << ", cost too high: " + << currentLoop->getBlocks().size() << "\n"); + return false; + } } Constant *CondVal; diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index e6ffac2..af29f97 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -121,7 +121,6 @@ static bool isUnmovableInstruction(Instruction *I) { if (I->getOpcode() == Instruction::PHI || I->getOpcode() == Instruction::Alloca || I->getOpcode() == Instruction::Load || - I->getOpcode() == Instruction::Malloc || I->getOpcode() == Instruction::Invoke || (I->getOpcode() == Instruction::Call && !isa<DbgInfoIntrinsic>(I)) || diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index b5edf4e..b745097 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -1229,7 +1229,10 @@ CallOverdefined: TMRVI = TrackedMultipleRetVals.find(std::make_pair(F, 0)); if (TMRVI == TrackedMultipleRetVals.end()) goto CallOverdefined; - + + // Need to mark as overdefined, otherwise it stays undefined which + // creates extractvalue undef, <idx> + markOverdefined(I); // If we are tracking this callee, propagate the return values of the call // into this call site. We do this by walking all the uses. Single-index // ExtractValueInst uses can be tracked; anything more complicated is @@ -1271,7 +1274,6 @@ CallOverdefined: } } - void SCCPSolver::Solve() { // Process the work lists until they are empty! while (!BBWorkList.empty() || !InstWorkList.empty() || diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 4931ab3..35907fd 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -425,14 +425,26 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, if (L) { if (IsLoopEntry) { - if (Loop *PredLoop = LI->getLoopFor(Preds[0])) { - // Add the new block to the nearest enclosing loop (and not an - // adjacent loop). - while (PredLoop && !PredLoop->contains(BB)) - PredLoop = PredLoop->getParentLoop(); - if (PredLoop) - PredLoop->addBasicBlockToLoop(NewBB, LI->getBase()); - } + // Add the new block to the nearest enclosing loop (and not an + // adjacent loop). To find this, examine each of the predecessors and + // determine which loops enclose them, and select the most-nested loop + // which contains the loop containing the block being split. + Loop *InnermostPredLoop = 0; + for (unsigned i = 0; i != NumPreds; ++i) + if (Loop *PredLoop = LI->getLoopFor(Preds[i])) { + // Seek a loop which actually contains the block being split (to + // avoid adjacent loops). + while (PredLoop && !PredLoop->contains(BB)) + PredLoop = PredLoop->getParentLoop(); + // Select the most-nested of these loops which contains the block. + if (PredLoop && + PredLoop->contains(BB) && + (!InnermostPredLoop || + InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth())) + InnermostPredLoop = PredLoop; + } + if (InnermostPredLoop) + InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } else { L->addBasicBlockToLoop(NewBB, LI->getBase()); if (SplitMakesNewLoopHeader) diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 0d00d69..619c939 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -444,18 +444,15 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD, if (InlinedFunctionInfo.ContainsDynamicAllocas) { Module *M = Caller->getParent(); // Get the two intrinsics we care about. - Constant *StackSave, *StackRestore; - StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); - StackRestore = Intrinsic::getDeclaration(M, Intrinsic::stackrestore); + Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); + Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore); // If we are preserving the callgraph, add edges to the stacksave/restore // functions for the calls we insert. CallGraphNode *StackSaveCGN = 0, *StackRestoreCGN = 0, *CallerNode = 0; if (CG) { - // We know that StackSave/StackRestore are Function*'s, because they are - // intrinsics which must have the right types. - StackSaveCGN = CG->getOrInsertFunction(cast<Function>(StackSave)); - StackRestoreCGN = CG->getOrInsertFunction(cast<Function>(StackRestore)); + StackSaveCGN = CG->getOrInsertFunction(StackSave); + StackRestoreCGN = CG->getOrInsertFunction(StackRestore); CallerNode = (*CG)[Caller]; } @@ -480,7 +477,8 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD, for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { - CallInst::Create(StackRestore, SavedPtr, "", UI); + CallInst *CI = CallInst::Create(StackRestore, SavedPtr, "", UI); + if (CG) CallerNode->addCalledFunction(CI, StackRestoreCGN); ++NumStackRestores; } } diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp index 1fa51a3..7f11acf 100644 --- a/lib/Transforms/Utils/InstructionNamer.cpp +++ b/lib/Transforms/Utils/InstructionNamer.cpp @@ -33,11 +33,11 @@ namespace { for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) if (!AI->hasName() && AI->getType() != Type::getVoidTy(F.getContext())) - AI->setName("tmp"); + AI->setName("arg"); for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { if (!BB->hasName()) - BB->setName("BB"); + BB->setName("bb"); for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (!I->hasName() && I->getType() != Type::getVoidTy(F.getContext())) diff --git a/lib/Transforms/Utils/LowerAllocations.cpp b/lib/Transforms/Utils/LowerAllocations.cpp index f26d7c1..9c9113d 100644 --- a/lib/Transforms/Utils/LowerAllocations.cpp +++ b/lib/Transforms/Utils/LowerAllocations.cpp @@ -1,4 +1,4 @@ -//===- LowerAllocations.cpp - Reduce malloc & free insts to calls ---------===// +//===- LowerAllocations.cpp - Reduce free insts to calls ------------------===// // // The LLVM Compiler Infrastructure // @@ -29,18 +29,15 @@ using namespace llvm; STATISTIC(NumLowered, "Number of allocations lowered"); namespace { - /// LowerAllocations - Turn malloc and free instructions into @malloc and - /// @free calls. + /// LowerAllocations - Turn free instructions into @free calls. /// class VISIBILITY_HIDDEN LowerAllocations : public BasicBlockPass { Constant *FreeFunc; // Functions in the module we are processing // Initialized by doInitialization - bool LowerMallocArgToInteger; public: static char ID; // Pass ID, replacement for typeid - explicit LowerAllocations(bool LowerToInt = false) - : BasicBlockPass(&ID), FreeFunc(0), - LowerMallocArgToInteger(LowerToInt) {} + explicit LowerAllocations() + : BasicBlockPass(&ID), FreeFunc(0) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetData>(); @@ -54,7 +51,7 @@ namespace { } /// doPassInitialization - For the lower allocations pass, this ensures that - /// a module contains a declaration for a malloc and a free function. + /// a module contains a declaration for a free function. /// bool doInitialization(Module &M); @@ -76,13 +73,13 @@ X("lowerallocs", "Lower allocations from instructions to calls"); // Publically exposed interface to pass... const PassInfo *const llvm::LowerAllocationsID = &X; // createLowerAllocationsPass - Interface to this file... -Pass *llvm::createLowerAllocationsPass(bool LowerMallocArgToInteger) { - return new LowerAllocations(LowerMallocArgToInteger); +Pass *llvm::createLowerAllocationsPass() { + return new LowerAllocations(); } // doInitialization - For the lower allocations pass, this ensures that a -// module contains a declaration for a malloc and a free function. +// module contains a declaration for a free function. // // This function is always successful. // @@ -102,25 +99,9 @@ bool LowerAllocations::runOnBasicBlock(BasicBlock &BB) { BasicBlock::InstListType &BBIL = BB.getInstList(); - const TargetData &TD = getAnalysis<TargetData>(); - const Type *IntPtrTy = TD.getIntPtrType(BB.getContext()); - - // Loop over all of the instructions, looking for malloc or free instructions + // Loop over all of the instructions, looking for free instructions for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) { - if (MallocInst *MI = dyn_cast<MallocInst>(I)) { - Value *ArraySize = MI->getOperand(0); - if (ArraySize->getType() != IntPtrTy) - ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, - false /*ZExt*/, "", I); - Value *MCast = CallInst::CreateMalloc(I, IntPtrTy, - MI->getAllocatedType(), ArraySize); - - // Replace all uses of the old malloc inst with the cast inst - MI->replaceAllUsesWith(MCast); - I = --BBIL.erase(I); // remove and delete the malloc instr... - Changed = true; - ++NumLowered; - } else if (FreeInst *FI = dyn_cast<FreeInst>(I)) { + if (FreeInst *FI = dyn_cast<FreeInst>(I)) { Value *PtrCast = new BitCastInst(FI->getOperand(0), Type::getInt8PtrTy(BB.getContext()), "", I); diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index 780ee26..8a07c35 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -48,7 +48,7 @@ void SSAUpdater::Initialize(Value *ProtoValue) { AV = new AvailableValsTy(); else getAvailableVals(AV).clear(); - + if (IPI == 0) IPI = new IncomingPredInfoTy(); else @@ -104,12 +104,12 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // GetValueAtEndOfBlock to do our work. if (!getAvailableVals(AV).count(BB)) return GetValueAtEndOfBlock(BB); - + // Otherwise, we have the hard case. Get the live-in values for each // predecessor. SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues; Value *SingularValue = 0; - + // We can get our predecessor info by walking the pred_iterator list, but it // is relatively slow. If we already have PHI nodes in this block, walk one // of them to get the predecessor list instead. @@ -118,7 +118,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { BasicBlock *PredBB = SomePhi->getIncomingBlock(i); Value *PredVal = GetValueAtEndOfBlock(PredBB); PredValues.push_back(std::make_pair(PredBB, PredVal)); - + // Compute SingularValue. if (i == 0) SingularValue = PredVal; @@ -131,7 +131,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { BasicBlock *PredBB = *PI; Value *PredVal = GetValueAtEndOfBlock(PredBB); PredValues.push_back(std::make_pair(PredBB, PredVal)); - + // Compute SingularValue. if (isFirstPred) { SingularValue = PredVal; @@ -140,25 +140,25 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { SingularValue = 0; } } - + // If there are no predecessors, just return undef. if (PredValues.empty()) return UndefValue::get(PrototypeValue->getType()); - + // Otherwise, if all the merged values are the same, just use it. if (SingularValue != 0) return SingularValue; - + // Otherwise, we do need a PHI: insert one now. PHINode *InsertedPHI = PHINode::Create(PrototypeValue->getType(), PrototypeValue->getName(), &BB->front()); InsertedPHI->reserveOperandSpace(PredValues.size()); - + // Fill in all the predecessors of the PHI. for (unsigned i = 0, e = PredValues.size(); i != e; ++i) InsertedPHI->addIncoming(PredValues[i].second, PredValues[i].first); - + // See if the PHI node can be merged to a single value. This can happen in // loop cases when we get a PHI of itself and one other value. if (Value *ConstVal = InsertedPHI->hasConstantValue()) { @@ -168,7 +168,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // If the client wants to know about all new instructions, tell it. if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); - + DEBUG(errs() << " Inserted PHI: " << *InsertedPHI << "\n"); return InsertedPHI; } @@ -177,11 +177,14 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { /// which use their value in the corresponding predecessor. void SSAUpdater::RewriteUse(Use &U) { Instruction *User = cast<Instruction>(U.getUser()); - BasicBlock *UseBB = User->getParent(); - if (PHINode *UserPN = dyn_cast<PHINode>(User)) - UseBB = UserPN->getIncomingBlock(U); - U.set(GetValueInMiddleOfBlock(UseBB)); + Value *V; + if (PHINode *UserPN = dyn_cast<PHINode>(User)) + V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U)); + else + V = GetValueInMiddleOfBlock(User->getParent()); + + U.set(V); } @@ -192,11 +195,11 @@ void SSAUpdater::RewriteUse(Use &U) { /// Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { AvailableValsTy &AvailableVals = getAvailableVals(AV); - + // Query AvailableVals by doing an insertion of null. std::pair<AvailableValsTy::iterator, bool> InsertRes = AvailableVals.insert(std::make_pair(BB, WeakVH())); - + // Handle the case when the insertion fails because we have already seen BB. if (!InsertRes.second) { // If the insertion failed, there are two cases. The first case is that the @@ -204,7 +207,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { // return the value. if (InsertRes.first->second != 0) return InsertRes.first->second; - + // Otherwise, if the value we find is null, then this is the value is not // known but it is being computed elsewhere in our recursion. This means // that we have a cycle. Handle this by inserting a PHI node and returning @@ -214,7 +217,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { PHINode::Create(PrototypeValue->getType(), PrototypeValue->getName(), &BB->front()); } - + // Okay, the value isn't in the map and we just inserted a null in the entry // to indicate that we're processing the block. Since we have no idea what // value is in this block, we have to recurse through our predecessors. @@ -225,13 +228,13 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { // of the recursion, just use IncomingPredInfo as an explicit stack. IncomingPredInfoTy &IncomingPredInfo = getIncomingPredInfo(IPI); unsigned FirstPredInfoEntry = IncomingPredInfo.size(); - + // As we're walking the predecessors, keep track of whether they are all // producing the same value. If so, this value will capture it, if not, it // will get reset to null. We distinguish the no-predecessor case explicitly // below. TrackingVH<Value> SingularValue; - + // We can get our predecessor info by walking the pred_iterator list, but it // is relatively slow. If we already have PHI nodes in this block, walk one // of them to get the predecessor list instead. @@ -240,7 +243,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { BasicBlock *PredBB = SomePhi->getIncomingBlock(i); Value *PredVal = GetValueAtEndOfBlockInternal(PredBB); IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal)); - + // Compute SingularValue. if (i == 0) SingularValue = PredVal; @@ -253,7 +256,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { BasicBlock *PredBB = *PI; Value *PredVal = GetValueAtEndOfBlockInternal(PredBB); IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal)); - + // Compute SingularValue. if (isFirstPred) { SingularValue = PredVal; @@ -262,19 +265,19 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { SingularValue = 0; } } - + // If there are no predecessors, then we must have found an unreachable block // just return 'undef'. Since there are no predecessors, InsertRes must not // be invalidated. if (IncomingPredInfo.size() == FirstPredInfoEntry) return InsertRes.first->second = UndefValue::get(PrototypeValue->getType()); - + /// Look up BB's entry in AvailableVals. 'InsertRes' may be invalidated. If /// this block is involved in a loop, a no-entry PHI node will have been /// inserted as InsertedVal. Otherwise, we'll still have the null we inserted /// above. TrackingVH<Value> &InsertedVal = AvailableVals[BB]; - + // If all the predecessor values are the same then we don't need to insert a // PHI. This is the simple and common case. if (SingularValue) { @@ -291,31 +294,31 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { } else { InsertedVal = SingularValue; } - + // Drop the entries we added in IncomingPredInfo to restore the stack. IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry, IncomingPredInfo.end()); return InsertedVal; } - + // Otherwise, we do need a PHI: insert one now if we don't already have one. if (InsertedVal == 0) InsertedVal = PHINode::Create(PrototypeValue->getType(), PrototypeValue->getName(), &BB->front()); - + PHINode *InsertedPHI = cast<PHINode>(InsertedVal); InsertedPHI->reserveOperandSpace(IncomingPredInfo.size()-FirstPredInfoEntry); - + // Fill in all the predecessors of the PHI. for (IncomingPredInfoTy::iterator I = IncomingPredInfo.begin()+FirstPredInfoEntry, E = IncomingPredInfo.end(); I != E; ++I) InsertedPHI->addIncoming(I->second, I->first); - + // Drop the entries we added in IncomingPredInfo to restore the stack. IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry, IncomingPredInfo.end()); - + // See if the PHI node can be merged to a single value. This can happen in // loop cases when we get a PHI of itself and one other value. if (Value *ConstVal = InsertedPHI->hasConstantValue()) { @@ -324,12 +327,10 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { InsertedVal = ConstVal; } else { DEBUG(errs() << " Inserted PHI: " << *InsertedPHI << "\n"); - + // If the client wants to know about all new instructions, tell it. if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); } - + return InsertedVal; } - - |