summaryrefslogtreecommitdiffstats
path: root/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/Hello/Hello.cpp10
-rw-r--r--lib/Transforms/IPO/CMakeLists.txt1
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp5
-rw-r--r--lib/Transforms/IPO/GlobalDCE.cpp2
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp505
-rw-r--r--lib/Transforms/IPO/RaiseAllocations.cpp118
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp2
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp42
-rw-r--r--lib/Transforms/Scalar/GVN.cpp481
-rw-r--r--lib/Transforms/Scalar/InstructionCombining.cpp218
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp63
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp1
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp6
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp28
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp14
-rw-r--r--lib/Transforms/Utils/InstructionNamer.cpp4
-rw-r--r--lib/Transforms/Utils/LowerAllocations.cpp39
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp73
18 files changed, 538 insertions, 1074 deletions
diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp
index 8000d0d..91534a7 100644
--- a/lib/Transforms/Hello/Hello.cpp
+++ b/lib/Transforms/Hello/Hello.cpp
@@ -30,9 +30,8 @@ namespace {
virtual bool runOnFunction(Function &F) {
HelloCounter++;
- std::string fname = F.getName();
- EscapeString(fname);
- errs() << "Hello: " << fname << "\n";
+ errs() << "Hello: ";
+ errs().write_escaped(F.getName()) << '\n';
return false;
}
};
@@ -49,9 +48,8 @@ namespace {
virtual bool runOnFunction(Function &F) {
HelloCounter++;
- std::string fname = F.getName();
- EscapeString(fname);
- errs() << "Hello: " << fname << "\n";
+ errs() << "Hello: ";
+ errs().write_escaped(F.getName()) << '\n';
return false;
}
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index ec0f1e1..5c28801 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -9,7 +9,6 @@ add_llvm_library(LLVMipo
GlobalOpt.cpp
IPConstantPropagation.cpp
IPO.cpp
- IndMemRemoval.cpp
InlineAlways.cpp
InlineSimple.cpp
Inliner.cpp
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 7edaa7f..0701b94 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -153,7 +153,7 @@ bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) {
// Writes memory. Just give up.
return false;
- if (isa<MallocInst>(I))
+ if (isMalloc(I))
// malloc claims not to write memory! PR3754.
return false;
@@ -267,11 +267,8 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F,
// Check whether the pointer came from an allocation.
case Instruction::Alloca:
- case Instruction::Malloc:
break;
case Instruction::Call:
- if (isMalloc(RVI))
- break;
case Instruction::Invoke: {
CallSite CS(RVI);
if (CS.paramHasAttr(0, Attribute::NoAlias))
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 09f9e7c..8f4e8b3 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -149,8 +149,6 @@ bool GlobalDCE::runOnModule(Module &M) {
// Make sure that all memory is released
AliveGlobals.clear();
- // Remove dead metadata.
- Changed |= M.getContext().RemoveDeadMetadata();
return Changed;
}
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index a44386e..9ced2e8 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -822,140 +822,18 @@ static void ConstantPropUsersOf(Value *V, LLVMContext &Context) {
/// malloc, there is no reason to actually DO the malloc. Instead, turn the
/// malloc into a global, and any loads of GV as uses of the new global.
static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
- MallocInst *MI,
- LLVMContext &Context) {
- DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV << " MALLOC = " << *MI);
- ConstantInt *NElements = cast<ConstantInt>(MI->getArraySize());
-
- if (NElements->getZExtValue() != 1) {
- // If we have an array allocation, transform it to a single element
- // allocation to make the code below simpler.
- Type *NewTy = ArrayType::get(MI->getAllocatedType(),
- NElements->getZExtValue());
- MallocInst *NewMI =
- new MallocInst(NewTy, Constant::getNullValue(Type::getInt32Ty(Context)),
- MI->getAlignment(), MI->getName(), MI);
- Value* Indices[2];
- Indices[0] = Indices[1] = Constant::getNullValue(Type::getInt32Ty(Context));
- Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2,
- NewMI->getName()+".el0", MI);
- MI->replaceAllUsesWith(NewGEP);
- MI->eraseFromParent();
- MI = NewMI;
- }
-
- // Create the new global variable. The contents of the malloc'd memory is
- // undefined, so initialize with an undef value.
- // FIXME: This new global should have the alignment returned by malloc. Code
- // could depend on malloc returning large alignment (on the mac, 16 bytes) but
- // this would only guarantee some lower alignment.
- Constant *Init = UndefValue::get(MI->getAllocatedType());
- GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),
- MI->getAllocatedType(), false,
- GlobalValue::InternalLinkage, Init,
- GV->getName()+".body",
- GV,
- GV->isThreadLocal());
-
- // Anything that used the malloc now uses the global directly.
- MI->replaceAllUsesWith(NewGV);
-
- Constant *RepValue = NewGV;
- if (NewGV->getType() != GV->getType()->getElementType())
- RepValue = ConstantExpr::getBitCast(RepValue,
- GV->getType()->getElementType());
-
- // If there is a comparison against null, we will insert a global bool to
- // keep track of whether the global was initialized yet or not.
- GlobalVariable *InitBool =
- new GlobalVariable(Context, Type::getInt1Ty(Context), false,
- GlobalValue::InternalLinkage,
- ConstantInt::getFalse(Context), GV->getName()+".init",
- GV->isThreadLocal());
- bool InitBoolUsed = false;
-
- // Loop over all uses of GV, processing them in turn.
- std::vector<StoreInst*> Stores;
- while (!GV->use_empty())
- if (LoadInst *LI = dyn_cast<LoadInst>(GV->use_back())) {
- while (!LI->use_empty()) {
- Use &LoadUse = LI->use_begin().getUse();
- if (!isa<ICmpInst>(LoadUse.getUser()))
- LoadUse = RepValue;
- else {
- ICmpInst *CI = cast<ICmpInst>(LoadUse.getUser());
- // Replace the cmp X, 0 with a use of the bool value.
- Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", CI);
- InitBoolUsed = true;
- switch (CI->getPredicate()) {
- default: llvm_unreachable("Unknown ICmp Predicate!");
- case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_SLT:
- LV = ConstantInt::getFalse(Context); // X < null -> always false
- break;
- case ICmpInst::ICMP_ULE:
- case ICmpInst::ICMP_SLE:
- case ICmpInst::ICMP_EQ:
- LV = BinaryOperator::CreateNot(LV, "notinit", CI);
- break;
- case ICmpInst::ICMP_NE:
- case ICmpInst::ICMP_UGE:
- case ICmpInst::ICMP_SGE:
- case ICmpInst::ICMP_UGT:
- case ICmpInst::ICMP_SGT:
- break; // no change.
- }
- CI->replaceAllUsesWith(LV);
- CI->eraseFromParent();
- }
- }
- LI->eraseFromParent();
- } else {
- StoreInst *SI = cast<StoreInst>(GV->use_back());
- // The global is initialized when the store to it occurs.
- new StoreInst(ConstantInt::getTrue(Context), InitBool, SI);
- SI->eraseFromParent();
- }
-
- // If the initialization boolean was used, insert it, otherwise delete it.
- if (!InitBoolUsed) {
- while (!InitBool->use_empty()) // Delete initializations
- cast<Instruction>(InitBool->use_back())->eraseFromParent();
- delete InitBool;
- } else
- GV->getParent()->getGlobalList().insert(GV, InitBool);
-
-
- // Now the GV is dead, nuke it and the malloc.
- GV->eraseFromParent();
- MI->eraseFromParent();
-
- // To further other optimizations, loop over all users of NewGV and try to
- // constant prop them. This will promote GEP instructions with constant
- // indices into GEP constant-exprs, which will allow global-opt to hack on it.
- ConstantPropUsersOf(NewGV, Context);
- if (RepValue != NewGV)
- ConstantPropUsersOf(RepValue, Context);
-
- return NewGV;
-}
-
-/// OptimizeGlobalAddressOfMalloc - This function takes the specified global
-/// variable, and transforms the program as if it always contained the result of
-/// the specified malloc. Because it is always the result of the specified
-/// malloc, there is no reason to actually DO the malloc. Instead, turn the
-/// malloc into a global, and any loads of GV as uses of the new global.
-static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
CallInst *CI,
BitCastInst *BCI,
LLVMContext &Context,
TargetData* TD) {
+ DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV
+ << " CALL = " << *CI << " BCI = " << *BCI << '\n');
+
const Type *IntPtrTy = TD->getIntPtrType(Context);
- DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV << " MALLOC = " << *CI);
-
- ConstantInt *NElements = cast<ConstantInt>(getMallocArraySize(CI,
- Context, TD));
+ Value* ArraySize = getMallocArraySize(CI, Context, TD);
+ assert(ArraySize && "not a malloc whose array size can be determined");
+ ConstantInt *NElements = cast<ConstantInt>(ArraySize);
if (NElements->getZExtValue() != 1) {
// If we have an array allocation, transform it to a single element
// allocation to make the code below simpler.
@@ -976,9 +854,6 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
// Create the new global variable. The contents of the malloc'd memory is
// undefined, so initialize with an undef value.
- // FIXME: This new global should have the alignment returned by malloc. Code
- // could depend on malloc returning large alignment (on the mac, 16 bytes) but
- // this would only guarantee some lower alignment.
const Type *MAT = getMallocAllocatedType(CI);
Constant *Init = UndefValue::get(MAT);
GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),
@@ -1398,185 +1273,6 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
}
}
-/// PerformHeapAllocSRoA - MI is an allocation of an array of structures. Break
-/// it up into multiple allocations of arrays of the fields.
-static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI,
- LLVMContext &Context){
- DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *MI);
- const StructType *STy = cast<StructType>(MI->getAllocatedType());
-
- // There is guaranteed to be at least one use of the malloc (storing
- // it into GV). If there are other uses, change them to be uses of
- // the global to simplify later code. This also deletes the store
- // into GV.
- ReplaceUsesOfMallocWithGlobal(MI, GV);
-
- // Okay, at this point, there are no users of the malloc. Insert N
- // new mallocs at the same place as MI, and N globals.
- std::vector<Value*> FieldGlobals;
- std::vector<MallocInst*> FieldMallocs;
-
- for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
- const Type *FieldTy = STy->getElementType(FieldNo);
- const Type *PFieldTy = PointerType::getUnqual(FieldTy);
-
- GlobalVariable *NGV =
- new GlobalVariable(*GV->getParent(),
- PFieldTy, false, GlobalValue::InternalLinkage,
- Constant::getNullValue(PFieldTy),
- GV->getName() + ".f" + Twine(FieldNo), GV,
- GV->isThreadLocal());
- FieldGlobals.push_back(NGV);
-
- MallocInst *NMI = new MallocInst(FieldTy, MI->getArraySize(),
- MI->getName() + ".f" + Twine(FieldNo), MI);
- FieldMallocs.push_back(NMI);
- new StoreInst(NMI, NGV, MI);
- }
-
- // The tricky aspect of this transformation is handling the case when malloc
- // fails. In the original code, malloc failing would set the result pointer
- // of malloc to null. In this case, some mallocs could succeed and others
- // could fail. As such, we emit code that looks like this:
- // F0 = malloc(field0)
- // F1 = malloc(field1)
- // F2 = malloc(field2)
- // if (F0 == 0 || F1 == 0 || F2 == 0) {
- // if (F0) { free(F0); F0 = 0; }
- // if (F1) { free(F1); F1 = 0; }
- // if (F2) { free(F2); F2 = 0; }
- // }
- Value *RunningOr = 0;
- for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
- Value *Cond = new ICmpInst(MI, ICmpInst::ICMP_EQ, FieldMallocs[i],
- Constant::getNullValue(FieldMallocs[i]->getType()),
- "isnull");
- if (!RunningOr)
- RunningOr = Cond; // First seteq
- else
- RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", MI);
- }
-
- // Split the basic block at the old malloc.
- BasicBlock *OrigBB = MI->getParent();
- BasicBlock *ContBB = OrigBB->splitBasicBlock(MI, "malloc_cont");
-
- // Create the block to check the first condition. Put all these blocks at the
- // end of the function as they are unlikely to be executed.
- BasicBlock *NullPtrBlock = BasicBlock::Create(Context, "malloc_ret_null",
- OrigBB->getParent());
-
- // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
- // branch on RunningOr.
- OrigBB->getTerminator()->eraseFromParent();
- BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
-
- // Within the NullPtrBlock, we need to emit a comparison and branch for each
- // pointer, because some may be null while others are not.
- for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
- Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
- Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
- Constant::getNullValue(GVVal->getType()),
- "tmp");
- BasicBlock *FreeBlock = BasicBlock::Create(Context, "free_it",
- OrigBB->getParent());
- BasicBlock *NextBlock = BasicBlock::Create(Context, "next",
- OrigBB->getParent());
- BranchInst::Create(FreeBlock, NextBlock, Cmp, NullPtrBlock);
-
- // Fill in FreeBlock.
- new FreeInst(GVVal, FreeBlock);
- new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
- FreeBlock);
- BranchInst::Create(NextBlock, FreeBlock);
-
- NullPtrBlock = NextBlock;
- }
-
- BranchInst::Create(ContBB, NullPtrBlock);
-
- // MI is no longer needed, remove it.
- MI->eraseFromParent();
-
- /// InsertedScalarizedLoads - As we process loads, if we can't immediately
- /// update all uses of the load, keep track of what scalarized loads are
- /// inserted for a given load.
- DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
- InsertedScalarizedValues[GV] = FieldGlobals;
-
- std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite;
-
- // Okay, the malloc site is completely handled. All of the uses of GV are now
- // loads, and all uses of those loads are simple. Rewrite them to use loads
- // of the per-field globals instead.
- for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) {
- Instruction *User = cast<Instruction>(*UI++);
-
- if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
- RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite,
- Context);
- continue;
- }
-
- // Must be a store of null.
- StoreInst *SI = cast<StoreInst>(User);
- assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
- "Unexpected heap-sra user!");
-
- // Insert a store of null into each global.
- for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
- const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
- Constant *Null = Constant::getNullValue(PT->getElementType());
- new StoreInst(Null, FieldGlobals[i], SI);
- }
- // Erase the original store.
- SI->eraseFromParent();
- }
-
- // While we have PHIs that are interesting to rewrite, do it.
- while (!PHIsToRewrite.empty()) {
- PHINode *PN = PHIsToRewrite.back().first;
- unsigned FieldNo = PHIsToRewrite.back().second;
- PHIsToRewrite.pop_back();
- PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]);
- assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi");
-
- // Add all the incoming values. This can materialize more phis.
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- Value *InVal = PN->getIncomingValue(i);
- InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
- PHIsToRewrite, Context);
- FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
- }
- }
-
- // Drop all inter-phi links and any loads that made it this far.
- for (DenseMap<Value*, std::vector<Value*> >::iterator
- I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
- I != E; ++I) {
- if (PHINode *PN = dyn_cast<PHINode>(I->first))
- PN->dropAllReferences();
- else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
- LI->dropAllReferences();
- }
-
- // Delete all the phis and loads now that inter-references are dead.
- for (DenseMap<Value*, std::vector<Value*> >::iterator
- I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
- I != E; ++I) {
- if (PHINode *PN = dyn_cast<PHINode>(I->first))
- PN->eraseFromParent();
- else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
- LI->eraseFromParent();
- }
-
- // The old global is now dead, remove it.
- GV->eraseFromParent();
-
- ++NumHeapSRA;
- return cast<GlobalVariable>(FieldGlobals[0]);
-}
-
/// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break
/// it up into multiple allocations of arrays of the fields.
static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
@@ -1587,6 +1283,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
<< " BITCAST = " << *BCI << '\n');
const Type* MAT = getMallocAllocatedType(CI);
const StructType *STy = cast<StructType>(MAT);
+ Value* ArraySize = getMallocArraySize(CI, Context, TD);
+ assert(ArraySize && "not a malloc whose array size can be determined");
// There is guaranteed to be at least one use of the malloc (storing
// it into GV). If there are other uses, change them to be uses of
@@ -1611,8 +1309,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
GV->isThreadLocal());
FieldGlobals.push_back(NGV);
- Value *NMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), FieldTy,
- getMallocArraySize(CI, Context, TD),
+ Value *NMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context),
+ FieldTy, ArraySize,
BCI->getName() + ".f" + Twine(FieldNo));
FieldMallocs.push_back(NMI);
new StoreInst(NMI, NGV, BCI);
@@ -1766,95 +1464,6 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
/// pointer global variable with a single value stored it that is a malloc or
/// cast of malloc.
static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
- MallocInst *MI,
- Module::global_iterator &GVI,
- TargetData *TD,
- LLVMContext &Context) {
- // If this is a malloc of an abstract type, don't touch it.
- if (!MI->getAllocatedType()->isSized())
- return false;
-
- // We can't optimize this global unless all uses of it are *known* to be
- // of the malloc value, not of the null initializer value (consider a use
- // that compares the global's value against zero to see if the malloc has
- // been reached). To do this, we check to see if all uses of the global
- // would trap if the global were null: this proves that they must all
- // happen after the malloc.
- if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
- return false;
-
- // We can't optimize this if the malloc itself is used in a complex way,
- // for example, being stored into multiple globals. This allows the
- // malloc to be stored into the specified global, loaded setcc'd, and
- // GEP'd. These are all things we could transform to using the global
- // for.
- {
- SmallPtrSet<PHINode*, 8> PHIs;
- if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(MI, GV, PHIs))
- return false;
- }
-
-
- // If we have a global that is only initialized with a fixed size malloc,
- // transform the program to use global memory instead of malloc'd memory.
- // This eliminates dynamic allocation, avoids an indirection accessing the
- // data, and exposes the resultant global to further GlobalOpt.
- if (ConstantInt *NElements = dyn_cast<ConstantInt>(MI->getArraySize())) {
- // Restrict this transformation to only working on small allocations
- // (2048 bytes currently), as we don't want to introduce a 16M global or
- // something.
- if (TD &&
- NElements->getZExtValue()*
- TD->getTypeAllocSize(MI->getAllocatedType()) < 2048) {
- GVI = OptimizeGlobalAddressOfMalloc(GV, MI, Context);
- return true;
- }
- }
-
- // If the allocation is an array of structures, consider transforming this
- // into multiple malloc'd arrays, one for each field. This is basically
- // SRoA for malloc'd memory.
- const Type *AllocTy = MI->getAllocatedType();
-
- // If this is an allocation of a fixed size array of structs, analyze as a
- // variable size array. malloc [100 x struct],1 -> malloc struct, 100
- if (!MI->isArrayAllocation())
- if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
- AllocTy = AT->getElementType();
-
- if (const StructType *AllocSTy = dyn_cast<StructType>(AllocTy)) {
- // This the structure has an unreasonable number of fields, leave it
- // alone.
- if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
- AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, MI)) {
-
- // If this is a fixed size array, transform the Malloc to be an alloc of
- // structs. malloc [100 x struct],1 -> malloc struct, 100
- if (const ArrayType *AT = dyn_cast<ArrayType>(MI->getAllocatedType())) {
- MallocInst *NewMI =
- new MallocInst(AllocSTy,
- ConstantInt::get(Type::getInt32Ty(Context),
- AT->getNumElements()),
- "", MI);
- NewMI->takeName(MI);
- Value *Cast = new BitCastInst(NewMI, MI->getType(), "tmp", MI);
- MI->replaceAllUsesWith(Cast);
- MI->eraseFromParent();
- MI = NewMI;
- }
-
- GVI = PerformHeapAllocSRoA(GV, MI, Context);
- return true;
- }
- }
-
- return false;
-}
-
-/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a
-/// pointer global variable with a single value stored it that is a malloc or
-/// cast of malloc.
-static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
CallInst *CI,
BitCastInst *BCI,
Module::global_iterator &GVI,
@@ -1892,52 +1501,55 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// transform the program to use global memory instead of malloc'd memory.
// This eliminates dynamic allocation, avoids an indirection accessing the
// data, and exposes the resultant global to further GlobalOpt.
- if (ConstantInt *NElements =
- dyn_cast<ConstantInt>(getMallocArraySize(CI, Context, TD))) {
- // Restrict this transformation to only working on small allocations
- // (2048 bytes currently), as we don't want to introduce a 16M global or
- // something.
- if (TD &&
- NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
- GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, Context, TD);
- return true;
- }
- }
-
- // If the allocation is an array of structures, consider transforming this
- // into multiple malloc'd arrays, one for each field. This is basically
- // SRoA for malloc'd memory.
-
- // If this is an allocation of a fixed size array of structs, analyze as a
- // variable size array. malloc [100 x struct],1 -> malloc struct, 100
- if (!isArrayMalloc(CI, Context, TD))
- if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
- AllocTy = AT->getElementType();
-
- if (const StructType *AllocSTy = dyn_cast<StructType>(AllocTy)) {
- // This the structure has an unreasonable number of fields, leave it
- // alone.
- if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
- AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, BCI)) {
-
- // If this is a fixed size array, transform the Malloc to be an alloc of
- // structs. malloc [100 x struct],1 -> malloc struct, 100
- if (const ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) {
- Value* NumElements = ConstantInt::get(Type::getInt32Ty(Context),
- AT->getNumElements());
- Value* NewMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context),
- AllocSTy, NumElements,
- BCI->getName());
- Value *Cast = new BitCastInst(NewMI, getMallocType(CI), "tmp", CI);
- BCI->replaceAllUsesWith(Cast);
- BCI->eraseFromParent();
- CI->eraseFromParent();
- BCI = cast<BitCastInst>(NewMI);
- CI = extractMallocCallFromBitCast(NewMI);
+ Value *NElems = getMallocArraySize(CI, Context, TD);
+ // We cannot optimize the malloc if we cannot determine malloc array size.
+ if (NElems) {
+ if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
+ // Restrict this transformation to only working on small allocations
+ // (2048 bytes currently), as we don't want to introduce a 16M global or
+ // something.
+ if (TD &&
+ NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
+ GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, Context, TD);
+ return true;
}
+
+ // If the allocation is an array of structures, consider transforming this
+ // into multiple malloc'd arrays, one for each field. This is basically
+ // SRoA for malloc'd memory.
+
+ // If this is an allocation of a fixed size array of structs, analyze as a
+ // variable size array. malloc [100 x struct],1 -> malloc struct, 100
+ if (!isArrayMalloc(CI, Context, TD))
+ if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
+ AllocTy = AT->getElementType();
+
+ if (const StructType *AllocSTy = dyn_cast<StructType>(AllocTy)) {
+ // This the structure has an unreasonable number of fields, leave it
+ // alone.
+ if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
+ AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, BCI)) {
+
+ // If this is a fixed size array, transform the Malloc to be an alloc of
+ // structs. malloc [100 x struct],1 -> malloc struct, 100
+ if (const ArrayType *AT =
+ dyn_cast<ArrayType>(getMallocAllocatedType(CI))) {
+ Value* NumElements = ConstantInt::get(Type::getInt32Ty(Context),
+ AT->getNumElements());
+ Value* NewMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context),
+ AllocSTy, NumElements,
+ BCI->getName());
+ Value *Cast = new BitCastInst(NewMI, getMallocType(CI), "tmp", CI);
+ BCI->replaceAllUsesWith(Cast);
+ BCI->eraseFromParent();
+ CI->eraseFromParent();
+ BCI = cast<BitCastInst>(NewMI);
+ CI = extractMallocCallFromBitCast(NewMI);
+ }
- GVI = PerformHeapAllocSRoA(GV, CI, BCI, Context, TD);
- return true;
+ GVI = PerformHeapAllocSRoA(GV, CI, BCI, Context, TD);
+ return true;
+ }
}
}
@@ -1966,9 +1578,6 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
// Optimize away any trapping uses of the loaded value.
if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, Context))
return true;
- } else if (MallocInst *MI = dyn_cast<MallocInst>(StoredOnceVal)) {
- if (TryToOptimizeStoreOfMallocToGlobal(GV, MI, GVI, TD, Context))
- return true;
} else if (CallInst *CI = extractMallocCall(StoredOnceVal)) {
if (getMallocAllocatedType(CI)) {
BitCastInst* BCI = NULL;
diff --git a/lib/Transforms/IPO/RaiseAllocations.cpp b/lib/Transforms/IPO/RaiseAllocations.cpp
index 4c1f26d..deb4405 100644
--- a/lib/Transforms/IPO/RaiseAllocations.cpp
+++ b/lib/Transforms/IPO/RaiseAllocations.cpp
@@ -1,4 +1,4 @@
-//===- RaiseAllocations.cpp - Convert @malloc & @free calls to insts ------===//
+//===- RaiseAllocations.cpp - Convert @free calls to insts ------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines the RaiseAllocations pass which convert malloc and free
-// calls to malloc and free instructions.
+// This file defines the RaiseAllocations pass which convert free calls to free
+// instructions.
//
//===----------------------------------------------------------------------===//
@@ -29,19 +29,19 @@ using namespace llvm;
STATISTIC(NumRaised, "Number of allocations raised");
namespace {
- // RaiseAllocations - Turn @malloc and @free calls into the appropriate
+ // RaiseAllocations - Turn @free calls into the appropriate
// instruction.
//
class VISIBILITY_HIDDEN RaiseAllocations : public ModulePass {
- Function *MallocFunc; // Functions in the module we are processing
- Function *FreeFunc; // Initialized by doPassInitializationVirt
+ Function *FreeFunc; // Functions in the module we are processing
+ // Initialized by doPassInitializationVirt
public:
static char ID; // Pass identification, replacement for typeid
RaiseAllocations()
- : ModulePass(&ID), MallocFunc(0), FreeFunc(0) {}
+ : ModulePass(&ID), FreeFunc(0) {}
// doPassInitialization - For the raise allocations pass, this finds a
- // declaration for malloc and free if they exist.
+ // declaration for free if it exists.
//
void doInitialization(Module &M);
@@ -61,50 +61,16 @@ ModulePass *llvm::createRaiseAllocationsPass() {
}
-// If the module has a symbol table, they might be referring to the malloc and
-// free functions. If this is the case, grab the method pointers that the
-// module is using.
+// If the module has a symbol table, they might be referring to the free
+// function. If this is the case, grab the method pointers that the module is
+// using.
//
-// Lookup @malloc and @free in the symbol table, for later use. If they don't
+// Lookup @free in the symbol table, for later use. If they don't
// exist, or are not external, we do not worry about converting calls to that
// function into the appropriate instruction.
//
void RaiseAllocations::doInitialization(Module &M) {
- // Get Malloc and free prototypes if they exist!
- MallocFunc = M.getFunction("malloc");
- if (MallocFunc) {
- const FunctionType* TyWeHave = MallocFunc->getFunctionType();
-
- // Get the expected prototype for malloc
- const FunctionType *Malloc1Type =
- FunctionType::get(Type::getInt8PtrTy(M.getContext()),
- std::vector<const Type*>(1,
- Type::getInt64Ty(M.getContext())), false);
-
- // Chck to see if we got the expected malloc
- if (TyWeHave != Malloc1Type) {
- // Check to see if the prototype is wrong, giving us i8*(i32) * malloc
- // This handles the common declaration of: 'void *malloc(unsigned);'
- const FunctionType *Malloc2Type =
- FunctionType::get(PointerType::getUnqual(
- Type::getInt8Ty(M.getContext())),
- std::vector<const Type*>(1,
- Type::getInt32Ty(M.getContext())), false);
- if (TyWeHave != Malloc2Type) {
- // Check to see if the prototype is missing, giving us
- // i8*(...) * malloc
- // This handles the common declaration of: 'void *malloc();'
- const FunctionType *Malloc3Type =
- FunctionType::get(PointerType::getUnqual(
- Type::getInt8Ty(M.getContext())),
- true);
- if (TyWeHave != Malloc3Type)
- // Give up
- MallocFunc = 0;
- }
- }
- }
-
+ // Get free prototype if it exists!
FreeFunc = M.getFunction("free");
if (FreeFunc) {
const FunctionType* TyWeHave = FreeFunc->getFunctionType();
@@ -138,72 +104,18 @@ void RaiseAllocations::doInitialization(Module &M) {
}
// Don't mess with locally defined versions of these functions...
- if (MallocFunc && !MallocFunc->isDeclaration()) MallocFunc = 0;
if (FreeFunc && !FreeFunc->isDeclaration()) FreeFunc = 0;
}
// run - Transform calls into instructions...
//
bool RaiseAllocations::runOnModule(Module &M) {
- // Find the malloc/free prototypes...
+ // Find the free prototype...
doInitialization(M);
bool Changed = false;
- // First, process all of the malloc calls...
- if (MallocFunc) {
- std::vector<User*> Users(MallocFunc->use_begin(), MallocFunc->use_end());
- std::vector<Value*> EqPointers; // Values equal to MallocFunc
- while (!Users.empty()) {
- User *U = Users.back();
- Users.pop_back();
-
- if (Instruction *I = dyn_cast<Instruction>(U)) {
- CallSite CS = CallSite::get(I);
- if (CS.getInstruction() && !CS.arg_empty() &&
- (CS.getCalledFunction() == MallocFunc ||
- std::find(EqPointers.begin(), EqPointers.end(),
- CS.getCalledValue()) != EqPointers.end())) {
-
- Value *Source = *CS.arg_begin();
-
- // If no prototype was provided for malloc, we may need to cast the
- // source size.
- if (Source->getType() != Type::getInt32Ty(M.getContext()))
- Source =
- CastInst::CreateIntegerCast(Source,
- Type::getInt32Ty(M.getContext()),
- false/*ZExt*/,
- "MallocAmtCast", I);
-
- MallocInst *MI = new MallocInst(Type::getInt8Ty(M.getContext()),
- Source, "", I);
- MI->takeName(I);
- I->replaceAllUsesWith(MI);
-
- // If the old instruction was an invoke, add an unconditional branch
- // before the invoke, which will become the new terminator.
- if (InvokeInst *II = dyn_cast<InvokeInst>(I))
- BranchInst::Create(II->getNormalDest(), I);
-
- // Delete the old call site
- I->eraseFromParent();
- Changed = true;
- ++NumRaised;
- }
- } else if (GlobalValue *GV = dyn_cast<GlobalValue>(U)) {
- Users.insert(Users.end(), GV->use_begin(), GV->use_end());
- EqPointers.push_back(GV);
- } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
- if (CE->isCast()) {
- Users.insert(Users.end(), CE->use_begin(), CE->use_end());
- EqPointers.push_back(CE);
- }
- }
- }
- }
-
- // Next, process all free calls...
+ // Process all free calls...
if (FreeFunc) {
std::vector<User*> Users(FreeFunc->use_begin(), FreeFunc->use_end());
std::vector<Value*> EqPointers; // Values equal to FreeFunc
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 77d44b2..57aaf43 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -250,8 +250,6 @@ static bool StripDebugInfo(Module &M) {
if (NMD)
NMD->eraseFromParent();
- // Remove dead metadata.
- M.getContext().RemoveDeadMetadata();
return true;
}
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index a3e3fea..42209b8 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -73,6 +73,7 @@ namespace {
DenseMap<Value*,Value*> &SunkAddrs);
bool OptimizeInlineAsmInst(Instruction *I, CallSite CS,
DenseMap<Value*,Value*> &SunkAddrs);
+ bool MoveExtToFormExtLoad(Instruction *I);
bool OptimizeExtUses(Instruction *I);
void findLoopBackEdges(const Function &F);
};
@@ -731,6 +732,43 @@ bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS,
return MadeChange;
}
+/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
+/// basic block as the load, unless conditions are unfavorable. This allows
+/// SelectionDAG to fold the extend into the load.
+///
+bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
+ // Look for a load being extended.
+ LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0));
+ if (!LI) return false;
+
+ // If they're already in the same block, there's nothing to do.
+ if (LI->getParent() == I->getParent())
+ return false;
+
+ // If the load has other users and the truncate is not free, this probably
+ // isn't worthwhile.
+ if (!LI->hasOneUse() &&
+ TLI && !TLI->isTruncateFree(I->getType(), LI->getType()))
+ return false;
+
+ // Check whether the target supports casts folded into loads.
+ unsigned LType;
+ if (isa<ZExtInst>(I))
+ LType = ISD::ZEXTLOAD;
+ else {
+ assert(isa<SExtInst>(I) && "Unexpected ext type!");
+ LType = ISD::SEXTLOAD;
+ }
+ if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType())))
+ return false;
+
+ // Move the extend into the same block as the load, so that SelectionDAG
+ // can fold it.
+ I->removeFromParent();
+ I->insertAfter(LI);
+ return true;
+}
+
bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
BasicBlock *DefBB = I->getParent();
@@ -846,8 +884,10 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
MadeChange |= Change;
}
- if (!Change && (isa<ZExtInst>(I) || isa<SExtInst>(I)))
+ if (!Change && (isa<ZExtInst>(I) || isa<SExtInst>(I))) {
+ MadeChange |= MoveExtToFormExtLoad(I);
MadeChange |= OptimizeExtUses(I);
+ }
} else if (CmpInst *CI = dyn_cast<CmpInst>(I)) {
MadeChange |= OptimizeCmpExpression(CI);
} else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 2ed4a63..8859324 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -78,13 +78,10 @@ namespace {
SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI,
FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT,
PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, CONSTANT,
- EMPTY, TOMBSTONE };
+ INSERTVALUE, EXTRACTVALUE, EMPTY, TOMBSTONE };
ExpressionOpcode opcode;
const Type* type;
- uint32_t firstVN;
- uint32_t secondVN;
- uint32_t thirdVN;
SmallVector<uint32_t, 4> varargs;
Value *function;
@@ -100,12 +97,6 @@ namespace {
return false;
else if (function != other.function)
return false;
- else if (firstVN != other.firstVN)
- return false;
- else if (secondVN != other.secondVN)
- return false;
- else if (thirdVN != other.thirdVN)
- return false;
else {
if (varargs.size() != other.varargs.size())
return false;
@@ -146,6 +137,10 @@ namespace {
Expression create_expression(GetElementPtrInst* G);
Expression create_expression(CallInst* C);
Expression create_expression(Constant* C);
+ Expression create_expression(ExtractValueInst* C);
+ Expression create_expression(InsertValueInst* C);
+
+ uint32_t lookup_or_add_call(CallInst* C);
public:
ValueTable() : nextValueNumber(1) { }
uint32_t lookup_or_add(Value *V);
@@ -176,13 +171,8 @@ template <> struct DenseMapInfo<Expression> {
static unsigned getHashValue(const Expression e) {
unsigned hash = e.opcode;
- hash = e.firstVN + hash * 37;
- hash = e.secondVN + hash * 37;
- hash = e.thirdVN + hash * 37;
-
hash = ((unsigned)((uintptr_t)e.type >> 4) ^
- (unsigned)((uintptr_t)e.type >> 9)) +
- hash * 37;
+ (unsigned)((uintptr_t)e.type >> 9));
for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
E = e.varargs.end(); I != E; ++I)
@@ -290,9 +280,6 @@ Expression ValueTable::create_expression(CallInst* C) {
Expression e;
e.type = C->getType();
- e.firstVN = 0;
- e.secondVN = 0;
- e.thirdVN = 0;
e.function = C->getCalledFunction();
e.opcode = Expression::CALL;
@@ -305,10 +292,8 @@ Expression ValueTable::create_expression(CallInst* C) {
Expression ValueTable::create_expression(BinaryOperator* BO) {
Expression e;
-
- e.firstVN = lookup_or_add(BO->getOperand(0));
- e.secondVN = lookup_or_add(BO->getOperand(1));
- e.thirdVN = 0;
+ e.varargs.push_back(lookup_or_add(BO->getOperand(0)));
+ e.varargs.push_back(lookup_or_add(BO->getOperand(1)));
e.function = 0;
e.type = BO->getType();
e.opcode = getOpcode(BO);
@@ -319,9 +304,8 @@ Expression ValueTable::create_expression(BinaryOperator* BO) {
Expression ValueTable::create_expression(CmpInst* C) {
Expression e;
- e.firstVN = lookup_or_add(C->getOperand(0));
- e.secondVN = lookup_or_add(C->getOperand(1));
- e.thirdVN = 0;
+ e.varargs.push_back(lookup_or_add(C->getOperand(0)));
+ e.varargs.push_back(lookup_or_add(C->getOperand(1)));
e.function = 0;
e.type = C->getType();
e.opcode = getOpcode(C);
@@ -332,9 +316,7 @@ Expression ValueTable::create_expression(CmpInst* C) {
Expression ValueTable::create_expression(CastInst* C) {
Expression e;
- e.firstVN = lookup_or_add(C->getOperand(0));
- e.secondVN = 0;
- e.thirdVN = 0;
+ e.varargs.push_back(lookup_or_add(C->getOperand(0)));
e.function = 0;
e.type = C->getType();
e.opcode = getOpcode(C);
@@ -345,9 +327,9 @@ Expression ValueTable::create_expression(CastInst* C) {
Expression ValueTable::create_expression(ShuffleVectorInst* S) {
Expression e;
- e.firstVN = lookup_or_add(S->getOperand(0));
- e.secondVN = lookup_or_add(S->getOperand(1));
- e.thirdVN = lookup_or_add(S->getOperand(2));
+ e.varargs.push_back(lookup_or_add(S->getOperand(0)));
+ e.varargs.push_back(lookup_or_add(S->getOperand(1)));
+ e.varargs.push_back(lookup_or_add(S->getOperand(2)));
e.function = 0;
e.type = S->getType();
e.opcode = Expression::SHUFFLE;
@@ -358,9 +340,8 @@ Expression ValueTable::create_expression(ShuffleVectorInst* S) {
Expression ValueTable::create_expression(ExtractElementInst* E) {
Expression e;
- e.firstVN = lookup_or_add(E->getOperand(0));
- e.secondVN = lookup_or_add(E->getOperand(1));
- e.thirdVN = 0;
+ e.varargs.push_back(lookup_or_add(E->getOperand(0)));
+ e.varargs.push_back(lookup_or_add(E->getOperand(1)));
e.function = 0;
e.type = E->getType();
e.opcode = Expression::EXTRACT;
@@ -371,9 +352,9 @@ Expression ValueTable::create_expression(ExtractElementInst* E) {
Expression ValueTable::create_expression(InsertElementInst* I) {
Expression e;
- e.firstVN = lookup_or_add(I->getOperand(0));
- e.secondVN = lookup_or_add(I->getOperand(1));
- e.thirdVN = lookup_or_add(I->getOperand(2));
+ e.varargs.push_back(lookup_or_add(I->getOperand(0)));
+ e.varargs.push_back(lookup_or_add(I->getOperand(1)));
+ e.varargs.push_back(lookup_or_add(I->getOperand(2)));
e.function = 0;
e.type = I->getType();
e.opcode = Expression::INSERT;
@@ -384,9 +365,9 @@ Expression ValueTable::create_expression(InsertElementInst* I) {
Expression ValueTable::create_expression(SelectInst* I) {
Expression e;
- e.firstVN = lookup_or_add(I->getCondition());
- e.secondVN = lookup_or_add(I->getTrueValue());
- e.thirdVN = lookup_or_add(I->getFalseValue());
+ e.varargs.push_back(lookup_or_add(I->getCondition()));
+ e.varargs.push_back(lookup_or_add(I->getTrueValue()));
+ e.varargs.push_back(lookup_or_add(I->getFalseValue()));
e.function = 0;
e.type = I->getType();
e.opcode = Expression::SELECT;
@@ -397,9 +378,7 @@ Expression ValueTable::create_expression(SelectInst* I) {
Expression ValueTable::create_expression(GetElementPtrInst* G) {
Expression e;
- e.firstVN = lookup_or_add(G->getPointerOperand());
- e.secondVN = 0;
- e.thirdVN = 0;
+ e.varargs.push_back(lookup_or_add(G->getPointerOperand()));
e.function = 0;
e.type = G->getType();
e.opcode = Expression::GEP;
@@ -411,6 +390,35 @@ Expression ValueTable::create_expression(GetElementPtrInst* G) {
return e;
}
+Expression ValueTable::create_expression(ExtractValueInst* E) {
+ Expression e;
+
+ e.varargs.push_back(lookup_or_add(E->getAggregateOperand()));
+ for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
+ II != IE; ++II)
+ e.varargs.push_back(*II);
+ e.function = 0;
+ e.type = E->getType();
+ e.opcode = Expression::EXTRACTVALUE;
+
+ return e;
+}
+
+Expression ValueTable::create_expression(InsertValueInst* E) {
+ Expression e;
+
+ e.varargs.push_back(lookup_or_add(E->getAggregateOperand()));
+ e.varargs.push_back(lookup_or_add(E->getInsertedValueOperand()));
+ for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
+ II != IE; ++II)
+ e.varargs.push_back(*II);
+ e.function = 0;
+ e.type = E->getType();
+ e.opcode = Expression::INSERTVALUE;
+
+ return e;
+}
+
//===----------------------------------------------------------------------===//
// ValueTable External Functions
//===----------------------------------------------------------------------===//
@@ -420,232 +428,197 @@ void ValueTable::add(Value *V, uint32_t num) {
valueNumbering.insert(std::make_pair(V, num));
}
-/// lookup_or_add - Returns the value number for the specified value, assigning
-/// it a new number if it did not have one before.
-uint32_t ValueTable::lookup_or_add(Value *V) {
- DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
- if (VI != valueNumbering.end())
- return VI->second;
-
- if (CallInst* C = dyn_cast<CallInst>(V)) {
- if (AA->doesNotAccessMemory(C)) {
- Expression e = create_expression(C);
-
- DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
- if (EI != expressionNumbering.end()) {
- valueNumbering.insert(std::make_pair(V, EI->second));
- return EI->second;
- } else {
- expressionNumbering.insert(std::make_pair(e, nextValueNumber));
- valueNumbering.insert(std::make_pair(V, nextValueNumber));
-
- return nextValueNumber++;
- }
- } else if (AA->onlyReadsMemory(C)) {
- Expression e = create_expression(C);
-
- if (expressionNumbering.find(e) == expressionNumbering.end()) {
- expressionNumbering.insert(std::make_pair(e, nextValueNumber));
- valueNumbering.insert(std::make_pair(V, nextValueNumber));
- return nextValueNumber++;
- }
-
- MemDepResult local_dep = MD->getDependency(C);
-
- if (!local_dep.isDef() && !local_dep.isNonLocal()) {
- valueNumbering.insert(std::make_pair(V, nextValueNumber));
- return nextValueNumber++;
- }
-
- if (local_dep.isDef()) {
- CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
-
- if (local_cdep->getNumOperands() != C->getNumOperands()) {
- valueNumbering.insert(std::make_pair(V, nextValueNumber));
- return nextValueNumber++;
- }
-
- for (unsigned i = 1; i < C->getNumOperands(); ++i) {
- uint32_t c_vn = lookup_or_add(C->getOperand(i));
- uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i));
- if (c_vn != cd_vn) {
- valueNumbering.insert(std::make_pair(V, nextValueNumber));
- return nextValueNumber++;
- }
- }
-
- uint32_t v = lookup_or_add(local_cdep);
- valueNumbering.insert(std::make_pair(V, v));
- return v;
- }
-
- // Non-local case.
- const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
- MD->getNonLocalCallDependency(CallSite(C));
- // FIXME: call/call dependencies for readonly calls should return def, not
- // clobber! Move the checking logic to MemDep!
- CallInst* cdep = 0;
-
- // Check to see if we have a single dominating call instruction that is
- // identical to C.
- for (unsigned i = 0, e = deps.size(); i != e; ++i) {
- const MemoryDependenceAnalysis::NonLocalDepEntry *I = &deps[i];
- // Ignore non-local dependencies.
- if (I->second.isNonLocal())
- continue;
+uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
+ if (AA->doesNotAccessMemory(C)) {
+ Expression exp = create_expression(C);
+ uint32_t& e = expressionNumbering[exp];
+ if (!e) e = nextValueNumber++;
+ valueNumbering[C] = e;
+ return e;
+ } else if (AA->onlyReadsMemory(C)) {
+ Expression exp = create_expression(C);
+ uint32_t& e = expressionNumbering[exp];
+ if (!e) {
+ e = nextValueNumber++;
+ valueNumbering[C] = e;
+ return e;
+ }
- // We don't handle non-depedencies. If we already have a call, reject
- // instruction dependencies.
- if (I->second.isClobber() || cdep != 0) {
- cdep = 0;
- break;
- }
+ MemDepResult local_dep = MD->getDependency(C);
- CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->second.getInst());
- // FIXME: All duplicated with non-local case.
- if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){
- cdep = NonLocalDepCall;
- continue;
- }
+ if (!local_dep.isDef() && !local_dep.isNonLocal()) {
+ valueNumbering[C] = nextValueNumber;
+ return nextValueNumber++;
+ }
- cdep = 0;
- break;
- }
+ if (local_dep.isDef()) {
+ CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
- if (!cdep) {
- valueNumbering.insert(std::make_pair(V, nextValueNumber));
+ if (local_cdep->getNumOperands() != C->getNumOperands()) {
+ valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
}
- if (cdep->getNumOperands() != C->getNumOperands()) {
- valueNumbering.insert(std::make_pair(V, nextValueNumber));
- return nextValueNumber++;
- }
for (unsigned i = 1; i < C->getNumOperands(); ++i) {
uint32_t c_vn = lookup_or_add(C->getOperand(i));
- uint32_t cd_vn = lookup_or_add(cdep->getOperand(i));
+ uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i));
if (c_vn != cd_vn) {
- valueNumbering.insert(std::make_pair(V, nextValueNumber));
+ valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
}
}
- uint32_t v = lookup_or_add(cdep);
- valueNumbering.insert(std::make_pair(V, v));
+ uint32_t v = lookup_or_add(local_cdep);
+ valueNumbering[C] = v;
return v;
-
- } else {
- valueNumbering.insert(std::make_pair(V, nextValueNumber));
- return nextValueNumber++;
}
- } else if (BinaryOperator* BO = dyn_cast<BinaryOperator>(V)) {
- Expression e = create_expression(BO);
- DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
- if (EI != expressionNumbering.end()) {
- valueNumbering.insert(std::make_pair(V, EI->second));
- return EI->second;
- } else {
- expressionNumbering.insert(std::make_pair(e, nextValueNumber));
- valueNumbering.insert(std::make_pair(V, nextValueNumber));
+ // Non-local case.
+ const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
+ MD->getNonLocalCallDependency(CallSite(C));
+ // FIXME: call/call dependencies for readonly calls should return def, not
+ // clobber! Move the checking logic to MemDep!
+ CallInst* cdep = 0;
+
+ // Check to see if we have a single dominating call instruction that is
+ // identical to C.
+ for (unsigned i = 0, e = deps.size(); i != e; ++i) {
+ const MemoryDependenceAnalysis::NonLocalDepEntry *I = &deps[i];
+ // Ignore non-local dependencies.
+ if (I->second.isNonLocal())
+ continue;
- return nextValueNumber++;
- }
- } else if (CmpInst* C = dyn_cast<CmpInst>(V)) {
- Expression e = create_expression(C);
+ // We don't handle non-depedencies. If we already have a call, reject
+ // instruction dependencies.
+ if (I->second.isClobber() || cdep != 0) {
+ cdep = 0;
+ break;
+ }
- DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
- if (EI != expressionNumbering.end()) {
- valueNumbering.insert(std::make_pair(V, EI->second));
- return EI->second;
- } else {
- expressionNumbering.insert(std::make_pair(e, nextValueNumber));
- valueNumbering.insert(std::make_pair(V, nextValueNumber));
+ CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->second.getInst());
+ // FIXME: All duplicated with non-local case.
+ if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){
+ cdep = NonLocalDepCall;
+ continue;
+ }
- return nextValueNumber++;
+ cdep = 0;
+ break;
}
- } else if (ShuffleVectorInst* U = dyn_cast<ShuffleVectorInst>(V)) {
- Expression e = create_expression(U);
-
- DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
- if (EI != expressionNumbering.end()) {
- valueNumbering.insert(std::make_pair(V, EI->second));
- return EI->second;
- } else {
- expressionNumbering.insert(std::make_pair(e, nextValueNumber));
- valueNumbering.insert(std::make_pair(V, nextValueNumber));
+ if (!cdep) {
+ valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
}
- } else if (ExtractElementInst* U = dyn_cast<ExtractElementInst>(V)) {
- Expression e = create_expression(U);
-
- DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
- if (EI != expressionNumbering.end()) {
- valueNumbering.insert(std::make_pair(V, EI->second));
- return EI->second;
- } else {
- expressionNumbering.insert(std::make_pair(e, nextValueNumber));
- valueNumbering.insert(std::make_pair(V, nextValueNumber));
+ if (cdep->getNumOperands() != C->getNumOperands()) {
+ valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
}
- } else if (InsertElementInst* U = dyn_cast<InsertElementInst>(V)) {
- Expression e = create_expression(U);
-
- DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
- if (EI != expressionNumbering.end()) {
- valueNumbering.insert(std::make_pair(V, EI->second));
- return EI->second;
- } else {
- expressionNumbering.insert(std::make_pair(e, nextValueNumber));
- valueNumbering.insert(std::make_pair(V, nextValueNumber));
-
- return nextValueNumber++;
+ for (unsigned i = 1; i < C->getNumOperands(); ++i) {
+ uint32_t c_vn = lookup_or_add(C->getOperand(i));
+ uint32_t cd_vn = lookup_or_add(cdep->getOperand(i));
+ if (c_vn != cd_vn) {
+ valueNumbering[C] = nextValueNumber;
+ return nextValueNumber++;
+ }
}
- } else if (SelectInst* U = dyn_cast<SelectInst>(V)) {
- Expression e = create_expression(U);
- DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
- if (EI != expressionNumbering.end()) {
- valueNumbering.insert(std::make_pair(V, EI->second));
- return EI->second;
- } else {
- expressionNumbering.insert(std::make_pair(e, nextValueNumber));
- valueNumbering.insert(std::make_pair(V, nextValueNumber));
+ uint32_t v = lookup_or_add(cdep);
+ valueNumbering[C] = v;
+ return v;
- return nextValueNumber++;
- }
- } else if (CastInst* U = dyn_cast<CastInst>(V)) {
- Expression e = create_expression(U);
-
- DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
- if (EI != expressionNumbering.end()) {
- valueNumbering.insert(std::make_pair(V, EI->second));
- return EI->second;
- } else {
- expressionNumbering.insert(std::make_pair(e, nextValueNumber));
- valueNumbering.insert(std::make_pair(V, nextValueNumber));
-
- return nextValueNumber++;
- }
- } else if (GetElementPtrInst* U = dyn_cast<GetElementPtrInst>(V)) {
- Expression e = create_expression(U);
+ } else {
+ valueNumbering[C] = nextValueNumber;
+ return nextValueNumber++;
+ }
+}
- DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
- if (EI != expressionNumbering.end()) {
- valueNumbering.insert(std::make_pair(V, EI->second));
- return EI->second;
- } else {
- expressionNumbering.insert(std::make_pair(e, nextValueNumber));
- valueNumbering.insert(std::make_pair(V, nextValueNumber));
+/// lookup_or_add - Returns the value number for the specified value, assigning
+/// it a new number if it did not have one before.
+uint32_t ValueTable::lookup_or_add(Value *V) {
+ DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
+ if (VI != valueNumbering.end())
+ return VI->second;
- return nextValueNumber++;
- }
- } else {
- valueNumbering.insert(std::make_pair(V, nextValueNumber));
+ if (!isa<Instruction>(V)) {
+ valueNumbering[V] = nextValueNumber;
return nextValueNumber++;
}
+
+ Instruction* I = cast<Instruction>(V);
+ Expression exp;
+ switch (I->getOpcode()) {
+ case Instruction::Call:
+ return lookup_or_add_call(cast<CallInst>(I));
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or :
+ case Instruction::Xor:
+ exp = create_expression(cast<BinaryOperator>(I));
+ break;
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ exp = create_expression(cast<CmpInst>(I));
+ break;
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ exp = create_expression(cast<CastInst>(I));
+ break;
+ case Instruction::Select:
+ exp = create_expression(cast<SelectInst>(I));
+ break;
+ case Instruction::ExtractElement:
+ exp = create_expression(cast<ExtractElementInst>(I));
+ break;
+ case Instruction::InsertElement:
+ exp = create_expression(cast<InsertElementInst>(I));
+ break;
+ case Instruction::ShuffleVector:
+ exp = create_expression(cast<ShuffleVectorInst>(I));
+ break;
+ case Instruction::ExtractValue:
+ exp = create_expression(cast<ExtractValueInst>(I));
+ break;
+ case Instruction::InsertValue:
+ exp = create_expression(cast<InsertValueInst>(I));
+ break;
+ case Instruction::GetElementPtr:
+ exp = create_expression(cast<GetElementPtrInst>(I));
+ break;
+ default:
+ valueNumbering[V] = nextValueNumber;
+ return nextValueNumber++;
+ }
+
+ uint32_t& e = expressionNumbering[exp];
+ if (!e) e = nextValueNumber++;
+ valueNumbering[V] = e;
+ return e;
}
/// lookup - Returns the value number of the specified value. Fails if
@@ -1557,15 +1530,18 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
// actually have the same type. See if we know how to reuse the stored
// value (depending on its type).
const TargetData *TD = 0;
- if (StoredVal->getType() != L->getType() &&
- (TD = getAnalysisIfAvailable<TargetData>())) {
- StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
- L, *TD);
- if (StoredVal == 0)
+ if (StoredVal->getType() != L->getType()) {
+ if ((TD = getAnalysisIfAvailable<TargetData>())) {
+ StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
+ L, *TD);
+ if (StoredVal == 0)
+ return false;
+
+ DEBUG(errs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
+ << '\n' << *L << "\n\n\n");
+ }
+ else
return false;
-
- DEBUG(errs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
- << '\n' << *L << "\n\n\n");
}
// Remove it!
@@ -1584,14 +1560,17 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
// the same type. See if we know how to reuse the previously loaded value
// (depending on its type).
const TargetData *TD = 0;
- if (DepLI->getType() != L->getType() &&
- (TD = getAnalysisIfAvailable<TargetData>())) {
- AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD);
- if (AvailableVal == 0)
- return false;
+ if (DepLI->getType() != L->getType()) {
+ if ((TD = getAnalysisIfAvailable<TargetData>())) {
+ AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD);
+ if (AvailableVal == 0)
+ return false;
- DEBUG(errs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
- << "\n" << *L << "\n\n\n");
+ DEBUG(errs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
+ << "\n" << *L << "\n\n\n");
+ }
+ else
+ return false;
}
// Remove it!
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index f635af3..b41b5d4 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -6300,25 +6300,33 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
break;
}
- case Instruction::Malloc:
- // If we have (malloc != null), and if the malloc has a single use, we
- // can assume it is successful and remove the malloc.
- if (LHSI->hasOneUse() && isa<ConstantPointerNull>(RHSC)) {
- Worklist.Add(LHSI);
- return ReplaceInstUsesWith(I,
- ConstantInt::get(Type::getInt1Ty(*Context),
- !I.isTrueWhenEqual()));
- }
- break;
case Instruction::Call:
// If we have (malloc != null), and if the malloc has a single use, we
// can assume it is successful and remove the malloc.
if (isMalloc(LHSI) && LHSI->hasOneUse() &&
isa<ConstantPointerNull>(RHSC)) {
- Worklist.Add(LHSI);
- return ReplaceInstUsesWith(I,
+ // Need to explicitly erase malloc call here, instead of adding it to
+ // Worklist, because it won't get DCE'd from the Worklist since
+ // isInstructionTriviallyDead() returns false for function calls.
+ // It is OK to replace LHSI/MallocCall with Undef because the
+ // instruction that uses it will be erased via Worklist.
+ if (extractMallocCall(LHSI)) {
+ LHSI->replaceAllUsesWith(UndefValue::get(LHSI->getType()));
+ EraseInstFromFunction(*LHSI);
+ return ReplaceInstUsesWith(I,
ConstantInt::get(Type::getInt1Ty(*Context),
!I.isTrueWhenEqual()));
+ }
+ if (CallInst* MallocCall = extractMallocCallFromBitCast(LHSI))
+ if (MallocCall->hasOneUse()) {
+ MallocCall->replaceAllUsesWith(
+ UndefValue::get(MallocCall->getType()));
+ EraseInstFromFunction(*MallocCall);
+ Worklist.Add(LHSI); // The malloc's bitcast use.
+ return ReplaceInstUsesWith(I,
+ ConstantInt::get(Type::getInt1Ty(*Context),
+ !I.isTrueWhenEqual()));
+ }
}
break;
}
@@ -7809,11 +7817,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp");
}
- AllocationInst *New;
- if (isa<MallocInst>(AI))
- New = AllocaBuilder.CreateMalloc(CastElTy, Amt);
- else
- New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
+ AllocationInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
New->setAlignment(AI.getAlignment());
New->takeName(&AI);
@@ -9270,14 +9274,44 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
return Changed ? &SI : 0;
}
-/// isDefinedInBB - Return true if the value is an instruction defined in the
-/// specified basicblock.
-static bool isDefinedInBB(const Value *V, const BasicBlock *BB) {
+
+/// CanSelectOperandBeMappingIntoPredBlock - SI is a select whose condition is a
+/// PHI node (but the two may be in different blocks). See if the true/false
+/// values (V) are live in all of the predecessor blocks of the PHI. For
+/// example, cases like this cannot be mapped:
+///
+/// X = phi [ C1, BB1], [C2, BB2]
+/// Y = add
+/// Z = select X, Y, 0
+///
+/// because Y is not live in BB1/BB2.
+///
+static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V,
+ const SelectInst &SI) {
+ // If the value is a non-instruction value like a constant or argument, it
+ // can always be mapped.
const Instruction *I = dyn_cast<Instruction>(V);
- return I != 0 && I->getParent() == BB;
+ if (I == 0) return true;
+
+ // If V is a PHI node defined in the same block as the condition PHI, we can
+ // map the arguments.
+ const PHINode *CondPHI = cast<PHINode>(SI.getCondition());
+
+ if (const PHINode *VP = dyn_cast<PHINode>(I))
+ if (VP->getParent() == CondPHI->getParent())
+ return true;
+
+ // Otherwise, if the PHI and select are defined in the same block and if V is
+ // defined in a different block, then we can transform it.
+ if (SI.getParent() == CondPHI->getParent() &&
+ I->getParent() != CondPHI->getParent())
+ return true;
+
+ // Otherwise we have a 'hard' case and we can't tell without doing more
+ // detailed dominator based analysis, punt.
+ return false;
}
-
Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
Value *CondVal = SI.getCondition();
Value *TrueVal = SI.getTrueValue();
@@ -9489,16 +9523,13 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return FoldI;
}
- // See if we can fold the select into a phi node. The true/false values have
- // to be live in the predecessor blocks. If they are instructions in SI's
- // block, we can't map to the predecessor.
- if (isa<PHINode>(SI.getCondition()) &&
- (!isDefinedInBB(SI.getTrueValue(), SI.getParent()) ||
- isa<PHINode>(SI.getTrueValue())) &&
- (!isDefinedInBB(SI.getFalseValue(), SI.getParent()) ||
- isa<PHINode>(SI.getFalseValue())))
- if (Instruction *NV = FoldOpIntoPhi(SI))
- return NV;
+ // See if we can fold the select into a phi node if the condition is a select.
+ if (isa<PHINode>(SI.getCondition()))
+ // The true/false values have to be live in the PHI predecessor's blocks.
+ if (CanSelectOperandBeMappingIntoPredBlock(TrueVal, SI) &&
+ CanSelectOperandBeMappingIntoPredBlock(FalseVal, SI))
+ if (Instruction *NV = FoldOpIntoPhi(SI))
+ return NV;
if (BinaryOperator::isNot(CondVal)) {
SI.setOperand(0, BinaryOperator::getNotArgument(CondVal));
@@ -11213,15 +11244,8 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {
if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
const Type *NewTy =
ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
- AllocationInst *New = 0;
-
- // Create and insert the replacement instruction...
- if (isa<MallocInst>(AI))
- New = Builder->CreateMalloc(NewTy, 0, AI.getName());
- else {
- assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!");
- New = Builder->CreateAlloca(NewTy, 0, AI.getName());
- }
+ assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!");
+ AllocationInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
New->setAlignment(AI.getAlignment());
// Scan to the end of the allocation instructions, to skip over a block of
@@ -11294,12 +11318,6 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) {
}
}
- // Change free(malloc) into nothing, if the malloc has a single use.
- if (MallocInst *MI = dyn_cast<MallocInst>(Op))
- if (MI->hasOneUse()) {
- EraseInstFromFunction(FI);
- return EraseInstFromFunction(*MI);
- }
if (isMalloc(Op)) {
if (CallInst* CI = extractMallocCallFromBitCast(Op)) {
if (Op->hasOneUse() && CI->hasOneUse()) {
@@ -11327,40 +11345,6 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
Value *CastOp = CI->getOperand(0);
LLVMContext *Context = IC.getContext();
- if (TD) {
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(CI)) {
- // Instead of loading constant c string, use corresponding integer value
- // directly if string length is small enough.
- std::string Str;
- if (GetConstantStringInfo(CE->getOperand(0), Str) && !Str.empty()) {
- unsigned len = Str.length();
- const Type *Ty = cast<PointerType>(CE->getType())->getElementType();
- unsigned numBits = Ty->getPrimitiveSizeInBits();
- // Replace LI with immediate integer store.
- if ((numBits >> 3) == len + 1) {
- APInt StrVal(numBits, 0);
- APInt SingleChar(numBits, 0);
- if (TD->isLittleEndian()) {
- for (signed i = len-1; i >= 0; i--) {
- SingleChar = (uint64_t) Str[i] & UCHAR_MAX;
- StrVal = (StrVal << 8) | SingleChar;
- }
- } else {
- for (unsigned i = 0; i < len; i++) {
- SingleChar = (uint64_t) Str[i] & UCHAR_MAX;
- StrVal = (StrVal << 8) | SingleChar;
- }
- // Append NULL at the end.
- SingleChar = 0;
- StrVal = (StrVal << 8) | SingleChar;
- }
- Value *NL = ConstantInt::get(*Context, StrVal);
- return IC.ReplaceInstUsesWith(LI, NL);
- }
- }
- }
- }
-
const PointerType *DestTy = cast<PointerType>(CI->getType());
const Type *DestPTy = DestTy->getElementType();
if (const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) {
@@ -11380,7 +11364,8 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
if (Constant *CSrc = dyn_cast<Constant>(CastOp))
if (ASrcTy->getNumElements() != 0) {
Value *Idxs[2];
- Idxs[0] = Idxs[1] = Constant::getNullValue(Type::getInt32Ty(*Context));
+ Idxs[0] = Constant::getNullValue(Type::getInt32Ty(*Context));
+ Idxs[1] = Idxs[0];
CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2);
SrcTy = cast<PointerType>(CastOp->getType());
SrcPTy = SrcTy->getElementType();
@@ -11436,6 +11421,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
return ReplaceInstUsesWith(LI, AvailableVal);
+ // load(gep null, ...) -> unreachable
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
const Value *GEPI0 = GEPI->getOperand(0);
// TODO: Consider a target hook for valid address spaces for this xform.
@@ -11450,60 +11436,24 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
}
}
- if (Constant *C = dyn_cast<Constant>(Op)) {
- // load null/undef -> undef
- // TODO: Consider a target hook for valid address spaces for this xform.
- if (isa<UndefValue>(C) ||
- (C->isNullValue() && LI.getPointerAddressSpace() == 0)) {
- // Insert a new store to null instruction before the load to indicate that
- // this code is not reachable. We do this instead of inserting an
- // unreachable instruction directly because we cannot modify the CFG.
- new StoreInst(UndefValue::get(LI.getType()),
- Constant::getNullValue(Op->getType()), &LI);
- return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
- }
-
- // Instcombine load (constant global) into the value loaded.
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op))
- if (GV->isConstant() && GV->hasDefinitiveInitializer())
- return ReplaceInstUsesWith(LI, GV->getInitializer());
-
- // Instcombine load (constantexpr_GEP global, 0, ...) into the value loaded.
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op)) {
- if (CE->getOpcode() == Instruction::GetElementPtr) {
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0)))
- if (GV->isConstant() && GV->hasDefinitiveInitializer())
- if (Constant *V =
- ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE))
- return ReplaceInstUsesWith(LI, V);
- if (CE->getOperand(0)->isNullValue()) {
- // Insert a new store to null instruction before the load to indicate
- // that this code is not reachable. We do this instead of inserting
- // an unreachable instruction directly because we cannot modify the
- // CFG.
- new StoreInst(UndefValue::get(LI.getType()),
- Constant::getNullValue(Op->getType()), &LI);
- return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
- }
-
- } else if (CE->isCast()) {
- if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
- return Res;
- }
- }
- }
-
- // If this load comes from anywhere in a constant global, and if the global
- // is all undef or zero, we know what it loads.
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op->getUnderlyingObject())){
- if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
- if (GV->getInitializer()->isNullValue())
- return ReplaceInstUsesWith(LI, Constant::getNullValue(LI.getType()));
- else if (isa<UndefValue>(GV->getInitializer()))
- return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
- }
+ // load null/undef -> unreachable
+ // TODO: Consider a target hook for valid address spaces for this xform.
+ if (isa<UndefValue>(Op) ||
+ (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {
+ // Insert a new store to null instruction before the load to indicate that
+ // this code is not reachable. We do this instead of inserting an
+ // unreachable instruction directly because we cannot modify the CFG.
+ new StoreInst(UndefValue::get(LI.getType()),
+ Constant::getNullValue(Op->getType()), &LI);
+ return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
}
+ // Instcombine load (constantexpr_cast global) -> cast (load global)
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op))
+ if (CE->isCast())
+ if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
+ return Res;
+
if (Op->hasOneUse()) {
// Change select and PHI nodes to select values instead of addresses: this
// helps alias analysis out a lot, allows many others simplifications, and
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index f6de362..223d2b9 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -138,7 +138,6 @@ namespace {
void SplitExitEdges(Loop *L, const SmallVector<BasicBlock *, 8> &ExitBlocks);
bool UnswitchIfProfitable(Value *LoopCond, Constant *Val);
- unsigned getLoopUnswitchCost(Value *LIC);
void UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
BasicBlock *ExitBlock);
void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L);
@@ -400,25 +399,6 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
return true;
}
-/// getLoopUnswitchCost - Return the cost (code size growth) that will happen if
-/// we choose to unswitch current loop on the specified value.
-///
-unsigned LoopUnswitch::getLoopUnswitchCost(Value *LIC) {
- // If the condition is trivial, always unswitch. There is no code growth for
- // this case.
- if (IsTrivialUnswitchCondition(LIC))
- return 0;
-
- // FIXME: This is overly conservative because it does not take into
- // consideration code simplification opportunities.
- CodeMetrics Metrics;
- for (Loop::block_iterator I = currentLoop->block_begin(),
- E = currentLoop->block_end();
- I != E; ++I)
- Metrics.analyzeBasicBlock(*I);
- return Metrics.NumInsts;
-}
-
/// UnswitchIfProfitable - We have found that we can unswitch currentLoop when
/// LoopCond == Val to simplify the loop. If we decide that this is profitable,
/// unswitch the loop, reprocess the pieces, then return true.
@@ -427,24 +407,35 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){
initLoopData();
Function *F = loopHeader->getParent();
+ // If the condition is trivial, always unswitch. There is no code growth for
+ // this case.
+ if (!IsTrivialUnswitchCondition(LoopCond)) {
+ // Check to see if it would be profitable to unswitch current loop.
- // Check to see if it would be profitable to unswitch current loop.
- unsigned Cost = getLoopUnswitchCost(LoopCond);
-
- // Do not do non-trivial unswitch while optimizing for size.
- if (Cost && OptimizeForSize)
- return false;
- if (Cost && !F->isDeclaration() && F->hasFnAttr(Attribute::OptimizeForSize))
- return false;
+ // Do not do non-trivial unswitch while optimizing for size.
+ if (OptimizeForSize || F->hasFnAttr(Attribute::OptimizeForSize))
+ return false;
- if (Cost > Threshold) {
- // FIXME: this should estimate growth by the amount of code shared by the
- // resultant unswitched loops.
- //
- DEBUG(errs() << "NOT unswitching loop %"
- << currentLoop->getHeader()->getName() << ", cost too high: "
- << currentLoop->getBlocks().size() << "\n");
- return false;
+ // FIXME: This is overly conservative because it does not take into
+ // consideration code simplification opportunities and code that can
+ // be shared by the resultant unswitched loops.
+ CodeMetrics Metrics;
+ for (Loop::block_iterator I = currentLoop->block_begin(),
+ E = currentLoop->block_end();
+ I != E; ++I)
+ Metrics.analyzeBasicBlock(*I);
+
+ // Limit the number of instructions to avoid causing significant code
+ // expansion, and the number of basic blocks, to avoid loops with
+ // large numbers of branches which cause loop unswitching to go crazy.
+ // This is a very ad-hoc heuristic.
+ if (Metrics.NumInsts > Threshold ||
+ Metrics.NumBlocks * 5 > Threshold) {
+ DEBUG(errs() << "NOT unswitching loop %"
+ << currentLoop->getHeader()->getName() << ", cost too high: "
+ << currentLoop->getBlocks().size() << "\n");
+ return false;
+ }
}
Constant *CondVal;
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index e6ffac2..af29f97 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -121,7 +121,6 @@ static bool isUnmovableInstruction(Instruction *I) {
if (I->getOpcode() == Instruction::PHI ||
I->getOpcode() == Instruction::Alloca ||
I->getOpcode() == Instruction::Load ||
- I->getOpcode() == Instruction::Malloc ||
I->getOpcode() == Instruction::Invoke ||
(I->getOpcode() == Instruction::Call &&
!isa<DbgInfoIntrinsic>(I)) ||
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index b5edf4e..b745097 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -1229,7 +1229,10 @@ CallOverdefined:
TMRVI = TrackedMultipleRetVals.find(std::make_pair(F, 0));
if (TMRVI == TrackedMultipleRetVals.end())
goto CallOverdefined;
-
+
+ // Need to mark as overdefined, otherwise it stays undefined which
+ // creates extractvalue undef, <idx>
+ markOverdefined(I);
// If we are tracking this callee, propagate the return values of the call
// into this call site. We do this by walking all the uses. Single-index
// ExtractValueInst uses can be tracked; anything more complicated is
@@ -1271,7 +1274,6 @@ CallOverdefined:
}
}
-
void SCCPSolver::Solve() {
// Process the work lists until they are empty!
while (!BBWorkList.empty() || !InstWorkList.empty() ||
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 4931ab3..35907fd 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -425,14 +425,26 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
if (L) {
if (IsLoopEntry) {
- if (Loop *PredLoop = LI->getLoopFor(Preds[0])) {
- // Add the new block to the nearest enclosing loop (and not an
- // adjacent loop).
- while (PredLoop && !PredLoop->contains(BB))
- PredLoop = PredLoop->getParentLoop();
- if (PredLoop)
- PredLoop->addBasicBlockToLoop(NewBB, LI->getBase());
- }
+ // Add the new block to the nearest enclosing loop (and not an
+ // adjacent loop). To find this, examine each of the predecessors and
+ // determine which loops enclose them, and select the most-nested loop
+ // which contains the loop containing the block being split.
+ Loop *InnermostPredLoop = 0;
+ for (unsigned i = 0; i != NumPreds; ++i)
+ if (Loop *PredLoop = LI->getLoopFor(Preds[i])) {
+ // Seek a loop which actually contains the block being split (to
+ // avoid adjacent loops).
+ while (PredLoop && !PredLoop->contains(BB))
+ PredLoop = PredLoop->getParentLoop();
+ // Select the most-nested of these loops which contains the block.
+ if (PredLoop &&
+ PredLoop->contains(BB) &&
+ (!InnermostPredLoop ||
+ InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth()))
+ InnermostPredLoop = PredLoop;
+ }
+ if (InnermostPredLoop)
+ InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase());
} else {
L->addBasicBlockToLoop(NewBB, LI->getBase());
if (SplitMakesNewLoopHeader)
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 0d00d69..619c939 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -444,18 +444,15 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
if (InlinedFunctionInfo.ContainsDynamicAllocas) {
Module *M = Caller->getParent();
// Get the two intrinsics we care about.
- Constant *StackSave, *StackRestore;
- StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
- StackRestore = Intrinsic::getDeclaration(M, Intrinsic::stackrestore);
+ Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
+ Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);
// If we are preserving the callgraph, add edges to the stacksave/restore
// functions for the calls we insert.
CallGraphNode *StackSaveCGN = 0, *StackRestoreCGN = 0, *CallerNode = 0;
if (CG) {
- // We know that StackSave/StackRestore are Function*'s, because they are
- // intrinsics which must have the right types.
- StackSaveCGN = CG->getOrInsertFunction(cast<Function>(StackSave));
- StackRestoreCGN = CG->getOrInsertFunction(cast<Function>(StackRestore));
+ StackSaveCGN = CG->getOrInsertFunction(StackSave);
+ StackRestoreCGN = CG->getOrInsertFunction(StackRestore);
CallerNode = (*CG)[Caller];
}
@@ -480,7 +477,8 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
for (Function::iterator BB = FirstNewBlock, E = Caller->end();
BB != E; ++BB)
if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
- CallInst::Create(StackRestore, SavedPtr, "", UI);
+ CallInst *CI = CallInst::Create(StackRestore, SavedPtr, "", UI);
+ if (CG) CallerNode->addCalledFunction(CI, StackRestoreCGN);
++NumStackRestores;
}
}
diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp
index 1fa51a3..7f11acf 100644
--- a/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/lib/Transforms/Utils/InstructionNamer.cpp
@@ -33,11 +33,11 @@ namespace {
for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end();
AI != AE; ++AI)
if (!AI->hasName() && AI->getType() != Type::getVoidTy(F.getContext()))
- AI->setName("tmp");
+ AI->setName("arg");
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
if (!BB->hasName())
- BB->setName("BB");
+ BB->setName("bb");
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
if (!I->hasName() && I->getType() != Type::getVoidTy(F.getContext()))
diff --git a/lib/Transforms/Utils/LowerAllocations.cpp b/lib/Transforms/Utils/LowerAllocations.cpp
index f26d7c1..9c9113d 100644
--- a/lib/Transforms/Utils/LowerAllocations.cpp
+++ b/lib/Transforms/Utils/LowerAllocations.cpp
@@ -1,4 +1,4 @@
-//===- LowerAllocations.cpp - Reduce malloc & free insts to calls ---------===//
+//===- LowerAllocations.cpp - Reduce free insts to calls ------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -29,18 +29,15 @@ using namespace llvm;
STATISTIC(NumLowered, "Number of allocations lowered");
namespace {
- /// LowerAllocations - Turn malloc and free instructions into @malloc and
- /// @free calls.
+ /// LowerAllocations - Turn free instructions into @free calls.
///
class VISIBILITY_HIDDEN LowerAllocations : public BasicBlockPass {
Constant *FreeFunc; // Functions in the module we are processing
// Initialized by doInitialization
- bool LowerMallocArgToInteger;
public:
static char ID; // Pass ID, replacement for typeid
- explicit LowerAllocations(bool LowerToInt = false)
- : BasicBlockPass(&ID), FreeFunc(0),
- LowerMallocArgToInteger(LowerToInt) {}
+ explicit LowerAllocations()
+ : BasicBlockPass(&ID), FreeFunc(0) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetData>();
@@ -54,7 +51,7 @@ namespace {
}
/// doPassInitialization - For the lower allocations pass, this ensures that
- /// a module contains a declaration for a malloc and a free function.
+ /// a module contains a declaration for a free function.
///
bool doInitialization(Module &M);
@@ -76,13 +73,13 @@ X("lowerallocs", "Lower allocations from instructions to calls");
// Publically exposed interface to pass...
const PassInfo *const llvm::LowerAllocationsID = &X;
// createLowerAllocationsPass - Interface to this file...
-Pass *llvm::createLowerAllocationsPass(bool LowerMallocArgToInteger) {
- return new LowerAllocations(LowerMallocArgToInteger);
+Pass *llvm::createLowerAllocationsPass() {
+ return new LowerAllocations();
}
// doInitialization - For the lower allocations pass, this ensures that a
-// module contains a declaration for a malloc and a free function.
+// module contains a declaration for a free function.
//
// This function is always successful.
//
@@ -102,25 +99,9 @@ bool LowerAllocations::runOnBasicBlock(BasicBlock &BB) {
BasicBlock::InstListType &BBIL = BB.getInstList();
- const TargetData &TD = getAnalysis<TargetData>();
- const Type *IntPtrTy = TD.getIntPtrType(BB.getContext());
-
- // Loop over all of the instructions, looking for malloc or free instructions
+ // Loop over all of the instructions, looking for free instructions
for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
- if (MallocInst *MI = dyn_cast<MallocInst>(I)) {
- Value *ArraySize = MI->getOperand(0);
- if (ArraySize->getType() != IntPtrTy)
- ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy,
- false /*ZExt*/, "", I);
- Value *MCast = CallInst::CreateMalloc(I, IntPtrTy,
- MI->getAllocatedType(), ArraySize);
-
- // Replace all uses of the old malloc inst with the cast inst
- MI->replaceAllUsesWith(MCast);
- I = --BBIL.erase(I); // remove and delete the malloc instr...
- Changed = true;
- ++NumLowered;
- } else if (FreeInst *FI = dyn_cast<FreeInst>(I)) {
+ if (FreeInst *FI = dyn_cast<FreeInst>(I)) {
Value *PtrCast =
new BitCastInst(FI->getOperand(0),
Type::getInt8PtrTy(BB.getContext()), "", I);
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 780ee26..8a07c35 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -48,7 +48,7 @@ void SSAUpdater::Initialize(Value *ProtoValue) {
AV = new AvailableValsTy();
else
getAvailableVals(AV).clear();
-
+
if (IPI == 0)
IPI = new IncomingPredInfoTy();
else
@@ -104,12 +104,12 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
// GetValueAtEndOfBlock to do our work.
if (!getAvailableVals(AV).count(BB))
return GetValueAtEndOfBlock(BB);
-
+
// Otherwise, we have the hard case. Get the live-in values for each
// predecessor.
SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues;
Value *SingularValue = 0;
-
+
// We can get our predecessor info by walking the pred_iterator list, but it
// is relatively slow. If we already have PHI nodes in this block, walk one
// of them to get the predecessor list instead.
@@ -118,7 +118,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
BasicBlock *PredBB = SomePhi->getIncomingBlock(i);
Value *PredVal = GetValueAtEndOfBlock(PredBB);
PredValues.push_back(std::make_pair(PredBB, PredVal));
-
+
// Compute SingularValue.
if (i == 0)
SingularValue = PredVal;
@@ -131,7 +131,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
BasicBlock *PredBB = *PI;
Value *PredVal = GetValueAtEndOfBlock(PredBB);
PredValues.push_back(std::make_pair(PredBB, PredVal));
-
+
// Compute SingularValue.
if (isFirstPred) {
SingularValue = PredVal;
@@ -140,25 +140,25 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
SingularValue = 0;
}
}
-
+
// If there are no predecessors, just return undef.
if (PredValues.empty())
return UndefValue::get(PrototypeValue->getType());
-
+
// Otherwise, if all the merged values are the same, just use it.
if (SingularValue != 0)
return SingularValue;
-
+
// Otherwise, we do need a PHI: insert one now.
PHINode *InsertedPHI = PHINode::Create(PrototypeValue->getType(),
PrototypeValue->getName(),
&BB->front());
InsertedPHI->reserveOperandSpace(PredValues.size());
-
+
// Fill in all the predecessors of the PHI.
for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
InsertedPHI->addIncoming(PredValues[i].second, PredValues[i].first);
-
+
// See if the PHI node can be merged to a single value. This can happen in
// loop cases when we get a PHI of itself and one other value.
if (Value *ConstVal = InsertedPHI->hasConstantValue()) {
@@ -168,7 +168,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
// If the client wants to know about all new instructions, tell it.
if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
-
+
DEBUG(errs() << " Inserted PHI: " << *InsertedPHI << "\n");
return InsertedPHI;
}
@@ -177,11 +177,14 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
/// which use their value in the corresponding predecessor.
void SSAUpdater::RewriteUse(Use &U) {
Instruction *User = cast<Instruction>(U.getUser());
- BasicBlock *UseBB = User->getParent();
- if (PHINode *UserPN = dyn_cast<PHINode>(User))
- UseBB = UserPN->getIncomingBlock(U);
- U.set(GetValueInMiddleOfBlock(UseBB));
+ Value *V;
+ if (PHINode *UserPN = dyn_cast<PHINode>(User))
+ V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
+ else
+ V = GetValueInMiddleOfBlock(User->getParent());
+
+ U.set(V);
}
@@ -192,11 +195,11 @@ void SSAUpdater::RewriteUse(Use &U) {
///
Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
AvailableValsTy &AvailableVals = getAvailableVals(AV);
-
+
// Query AvailableVals by doing an insertion of null.
std::pair<AvailableValsTy::iterator, bool> InsertRes =
AvailableVals.insert(std::make_pair(BB, WeakVH()));
-
+
// Handle the case when the insertion fails because we have already seen BB.
if (!InsertRes.second) {
// If the insertion failed, there are two cases. The first case is that the
@@ -204,7 +207,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
// return the value.
if (InsertRes.first->second != 0)
return InsertRes.first->second;
-
+
// Otherwise, if the value we find is null, then this is the value is not
// known but it is being computed elsewhere in our recursion. This means
// that we have a cycle. Handle this by inserting a PHI node and returning
@@ -214,7 +217,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
PHINode::Create(PrototypeValue->getType(), PrototypeValue->getName(),
&BB->front());
}
-
+
// Okay, the value isn't in the map and we just inserted a null in the entry
// to indicate that we're processing the block. Since we have no idea what
// value is in this block, we have to recurse through our predecessors.
@@ -225,13 +228,13 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
// of the recursion, just use IncomingPredInfo as an explicit stack.
IncomingPredInfoTy &IncomingPredInfo = getIncomingPredInfo(IPI);
unsigned FirstPredInfoEntry = IncomingPredInfo.size();
-
+
// As we're walking the predecessors, keep track of whether they are all
// producing the same value. If so, this value will capture it, if not, it
// will get reset to null. We distinguish the no-predecessor case explicitly
// below.
TrackingVH<Value> SingularValue;
-
+
// We can get our predecessor info by walking the pred_iterator list, but it
// is relatively slow. If we already have PHI nodes in this block, walk one
// of them to get the predecessor list instead.
@@ -240,7 +243,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
BasicBlock *PredBB = SomePhi->getIncomingBlock(i);
Value *PredVal = GetValueAtEndOfBlockInternal(PredBB);
IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal));
-
+
// Compute SingularValue.
if (i == 0)
SingularValue = PredVal;
@@ -253,7 +256,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
BasicBlock *PredBB = *PI;
Value *PredVal = GetValueAtEndOfBlockInternal(PredBB);
IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal));
-
+
// Compute SingularValue.
if (isFirstPred) {
SingularValue = PredVal;
@@ -262,19 +265,19 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
SingularValue = 0;
}
}
-
+
// If there are no predecessors, then we must have found an unreachable block
// just return 'undef'. Since there are no predecessors, InsertRes must not
// be invalidated.
if (IncomingPredInfo.size() == FirstPredInfoEntry)
return InsertRes.first->second = UndefValue::get(PrototypeValue->getType());
-
+
/// Look up BB's entry in AvailableVals. 'InsertRes' may be invalidated. If
/// this block is involved in a loop, a no-entry PHI node will have been
/// inserted as InsertedVal. Otherwise, we'll still have the null we inserted
/// above.
TrackingVH<Value> &InsertedVal = AvailableVals[BB];
-
+
// If all the predecessor values are the same then we don't need to insert a
// PHI. This is the simple and common case.
if (SingularValue) {
@@ -291,31 +294,31 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
} else {
InsertedVal = SingularValue;
}
-
+
// Drop the entries we added in IncomingPredInfo to restore the stack.
IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
IncomingPredInfo.end());
return InsertedVal;
}
-
+
// Otherwise, we do need a PHI: insert one now if we don't already have one.
if (InsertedVal == 0)
InsertedVal = PHINode::Create(PrototypeValue->getType(),
PrototypeValue->getName(), &BB->front());
-
+
PHINode *InsertedPHI = cast<PHINode>(InsertedVal);
InsertedPHI->reserveOperandSpace(IncomingPredInfo.size()-FirstPredInfoEntry);
-
+
// Fill in all the predecessors of the PHI.
for (IncomingPredInfoTy::iterator I =
IncomingPredInfo.begin()+FirstPredInfoEntry,
E = IncomingPredInfo.end(); I != E; ++I)
InsertedPHI->addIncoming(I->second, I->first);
-
+
// Drop the entries we added in IncomingPredInfo to restore the stack.
IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
IncomingPredInfo.end());
-
+
// See if the PHI node can be merged to a single value. This can happen in
// loop cases when we get a PHI of itself and one other value.
if (Value *ConstVal = InsertedPHI->hasConstantValue()) {
@@ -324,12 +327,10 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
InsertedVal = ConstVal;
} else {
DEBUG(errs() << " Inserted PHI: " << *InsertedPHI << "\n");
-
+
// If the client wants to know about all new instructions, tell it.
if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
}
-
+
return InsertedVal;
}
-
-
OpenPOWER on IntegriCloud