summaryrefslogtreecommitdiffstats
path: root/lib/Transforms
diff options
context:
space:
mode:
authorrdivacky <rdivacky@FreeBSD.org>2009-11-18 14:58:34 +0000
committerrdivacky <rdivacky@FreeBSD.org>2009-11-18 14:58:34 +0000
commitd2e985fd323c167e20f77b045a1d99ad166e65db (patch)
tree6a111e552c75afc66228e3d8f19b6731e4013f10 /lib/Transforms
parentded64d5d348ce8d8c5aa42cf63f6de9dd84b7e89 (diff)
downloadFreeBSD-src-d2e985fd323c167e20f77b045a1d99ad166e65db.zip
FreeBSD-src-d2e985fd323c167e20f77b045a1d99ad166e65db.tar.gz
Update LLVM to r89205.
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/Hello/CMakeLists.txt2
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp261
-rw-r--r--lib/Transforms/IPO/Inliner.cpp46
-rw-r--r--lib/Transforms/IPO/LoopExtractor.cpp4
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp54
-rw-r--r--lib/Transforms/Scalar/ABCD.cpp37
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt1
-rw-r--r--lib/Transforms/Scalar/CondPropagate.cpp289
-rw-r--r--lib/Transforms/Scalar/ConstantProp.cpp2
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp193
-rw-r--r--lib/Transforms/Scalar/GVN.cpp40
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp4
-rw-r--r--lib/Transforms/Scalar/InstructionCombining.cpp631
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp735
-rw-r--r--lib/Transforms/Scalar/LICM.cpp9
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp4
-rw-r--r--lib/Transforms/Scalar/LoopIndexSplit.cpp4
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp13
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp876
-rw-r--r--lib/Transforms/Scalar/LoopUnroll.cpp177
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp7
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp36
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp11
-rw-r--r--lib/Transforms/Scalar/SCCVN.cpp6
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp4
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp41
-rw-r--r--lib/Transforms/Scalar/TailDuplication.cpp3
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp32
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp8
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp71
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp2
-rw-r--r--lib/Transforms/Utils/LCSSA.cpp18
-rw-r--r--lib/Transforms/Utils/Local.cpp223
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp112
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp16
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp168
36 files changed, 2298 insertions, 1842 deletions
diff --git a/lib/Transforms/Hello/CMakeLists.txt b/lib/Transforms/Hello/CMakeLists.txt
index b80d15b..917b745 100644
--- a/lib/Transforms/Hello/CMakeLists.txt
+++ b/lib/Transforms/Hello/CMakeLists.txt
@@ -1,3 +1,3 @@
-add_llvm_library( LLVMHello
+add_llvm_loadable_module( LLVMHello
Hello.cpp
)
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 234d0ec..442f2fb 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -20,7 +20,6 @@
#include "llvm/DerivedTypes.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/ConstantFolding.h"
@@ -245,8 +244,7 @@ static bool AnalyzeGlobal(Value *V, GlobalStatus &GS,
return false;
}
-static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx,
- LLVMContext &Context) {
+static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) {
ConstantInt *CI = dyn_cast<ConstantInt>(Idx);
if (!CI) return 0;
unsigned IdxV = CI->getZExtValue();
@@ -282,8 +280,7 @@ static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx,
/// users of the global, cleaning up the obvious ones. This is largely just a
/// quick scan over the use list to clean up the easy and obvious cruft. This
/// returns true if it made a change.
-static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
- LLVMContext &Context) {
+static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
bool Changed = false;
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) {
User *U = *UI++;
@@ -304,11 +301,11 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
Constant *SubInit = 0;
if (Init)
SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
- Changed |= CleanupConstantGlobalUsers(CE, SubInit, Context);
+ Changed |= CleanupConstantGlobalUsers(CE, SubInit);
} else if (CE->getOpcode() == Instruction::BitCast &&
isa<PointerType>(CE->getType())) {
// Pointer cast, delete any stores and memsets to the global.
- Changed |= CleanupConstantGlobalUsers(CE, 0, Context);
+ Changed |= CleanupConstantGlobalUsers(CE, 0);
}
if (CE->use_empty()) {
@@ -322,11 +319,11 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
Constant *SubInit = 0;
if (!isa<ConstantExpr>(GEP->getOperand(0))) {
ConstantExpr *CE =
- dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, Context));
+ dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP));
if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
}
- Changed |= CleanupConstantGlobalUsers(GEP, SubInit, Context);
+ Changed |= CleanupConstantGlobalUsers(GEP, SubInit);
if (GEP->use_empty()) {
GEP->eraseFromParent();
@@ -344,7 +341,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
if (SafeToDestroyConstant(C)) {
C->destroyConstant();
// This could have invalidated UI, start over from scratch.
- CleanupConstantGlobalUsers(V, Init, Context);
+ CleanupConstantGlobalUsers(V, Init);
return true;
}
}
@@ -469,8 +466,7 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
/// behavior of the program in a more fine-grained way. We have determined that
/// this transformation is safe already. We return the first global variable we
/// insert so that the caller can reprocess it.
-static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD,
- LLVMContext &Context) {
+static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
// Make sure this global only has simple uses that we can SRA.
if (!GlobalUsersSafeToSRA(GV))
return 0;
@@ -492,11 +488,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD,
const StructLayout &Layout = *TD.getStructLayout(STy);
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Constant *In = getAggregateConstantElement(Init,
- ConstantInt::get(Type::getInt32Ty(Context), i),
- Context);
+ ConstantInt::get(Type::getInt32Ty(STy->getContext()), i));
assert(In && "Couldn't get element of initializer?");
- GlobalVariable *NGV = new GlobalVariable(Context,
- STy->getElementType(i), false,
+ GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false,
GlobalVariable::InternalLinkage,
In, GV->getName()+"."+Twine(i),
GV->isThreadLocal(),
@@ -527,12 +521,10 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD,
unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType());
for (unsigned i = 0, e = NumElements; i != e; ++i) {
Constant *In = getAggregateConstantElement(Init,
- ConstantInt::get(Type::getInt32Ty(Context), i),
- Context);
+ ConstantInt::get(Type::getInt32Ty(Init->getContext()), i));
assert(In && "Couldn't get element of initializer?");
- GlobalVariable *NGV = new GlobalVariable(Context,
- STy->getElementType(), false,
+ GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false,
GlobalVariable::InternalLinkage,
In, GV->getName()+"."+Twine(i),
GV->isThreadLocal(),
@@ -554,7 +546,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD,
DEBUG(errs() << "PERFORMING GLOBAL SRA ON: " << *GV);
- Constant *NullInt = Constant::getNullValue(Type::getInt32Ty(Context));
+ Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
// Loop over all of the uses of the global, replacing the constantexpr geps,
// with smaller constantexpr geps or direct references.
@@ -678,8 +670,7 @@ static bool AllUsesOfLoadedValueWillTrapIfNull(GlobalVariable *GV) {
return true;
}
-static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV,
- LLVMContext &Context) {
+static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
bool Changed = false;
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) {
Instruction *I = cast<Instruction>(*UI++);
@@ -712,7 +703,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV,
} else if (CastInst *CI = dyn_cast<CastInst>(I)) {
Changed |= OptimizeAwayTrappingUsesOfValue(CI,
ConstantExpr::getCast(CI->getOpcode(),
- NewV, CI->getType()), Context);
+ NewV, CI->getType()));
if (CI->use_empty()) {
Changed = true;
CI->eraseFromParent();
@@ -730,7 +721,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV,
if (Idxs.size() == GEPI->getNumOperands()-1)
Changed |= OptimizeAwayTrappingUsesOfValue(GEPI,
ConstantExpr::getGetElementPtr(NewV, &Idxs[0],
- Idxs.size()), Context);
+ Idxs.size()));
if (GEPI->use_empty()) {
Changed = true;
GEPI->eraseFromParent();
@@ -746,8 +737,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV,
/// value stored into it. If there are uses of the loaded value that would trap
/// if the loaded value is dynamically null, then we know that they cannot be
/// reachable with a null optimize away the load.
-static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
- LLVMContext &Context) {
+static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
bool Changed = false;
// Keep track of whether we are able to remove all the uses of the global
@@ -758,7 +748,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){
User *GlobalUser = *GUI++;
if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
- Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV, Context);
+ Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
// If we were able to delete all uses of the loads
if (LI->use_empty()) {
LI->eraseFromParent();
@@ -789,7 +779,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
// nor is the global.
if (AllNonStoreUsesGone) {
DEBUG(errs() << " *** GLOBAL NOW DEAD!\n");
- CleanupConstantGlobalUsers(GV, 0, Context);
+ CleanupConstantGlobalUsers(GV, 0);
if (GV->use_empty()) {
GV->eraseFromParent();
++NumDeleted;
@@ -801,10 +791,10 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
/// instructions that are foldable.
-static void ConstantPropUsersOf(Value *V, LLVMContext &Context) {
+static void ConstantPropUsersOf(Value *V) {
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; )
if (Instruction *I = dyn_cast<Instruction>(*UI++))
- if (Constant *NewC = ConstantFoldInstruction(I, Context)) {
+ if (Constant *NewC = ConstantFoldInstruction(I)) {
I->replaceAllUsesWith(NewC);
// Advance UI to the next non-I use to avoid invalidating it!
@@ -824,11 +814,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
CallInst *CI,
const Type *AllocTy,
Value* NElems,
- LLVMContext &Context,
TargetData* TD) {
DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n');
- const Type *IntPtrTy = TD->getIntPtrType(Context);
+ const Type *IntPtrTy = TD->getIntPtrType(GV->getContext());
// CI has either 0 or 1 bitcast uses (getMallocType() would otherwise have
// returned NULL and we would not be here).
@@ -883,10 +872,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
// If there is a comparison against null, we will insert a global bool to
// keep track of whether the global was initialized yet or not.
GlobalVariable *InitBool =
- new GlobalVariable(Context, Type::getInt1Ty(Context), false,
+ new GlobalVariable(Type::getInt1Ty(GV->getContext()), false,
GlobalValue::InternalLinkage,
- ConstantInt::getFalse(Context), GV->getName()+".init",
- GV->isThreadLocal());
+ ConstantInt::getFalse(GV->getContext()),
+ GV->getName()+".init", GV->isThreadLocal());
bool InitBoolUsed = false;
// Loop over all uses of GV, processing them in turn.
@@ -905,8 +894,8 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
switch (ICI->getPredicate()) {
default: llvm_unreachable("Unknown ICmp Predicate!");
case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_SLT:
- LV = ConstantInt::getFalse(Context); // X < null -> always false
+ case ICmpInst::ICMP_SLT: // X < null -> always false
+ LV = ConstantInt::getFalse(GV->getContext());
break;
case ICmpInst::ICMP_ULE:
case ICmpInst::ICMP_SLE:
@@ -928,7 +917,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
} else {
StoreInst *SI = cast<StoreInst>(GV->use_back());
// The global is initialized when the store to it occurs.
- new StoreInst(ConstantInt::getTrue(Context), InitBool, SI);
+ new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, SI);
SI->eraseFromParent();
}
@@ -949,9 +938,9 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
// To further other optimizations, loop over all users of NewGV and try to
// constant prop them. This will promote GEP instructions with constant
// indices into GEP constant-exprs, which will allow global-opt to hack on it.
- ConstantPropUsersOf(NewGV, Context);
+ ConstantPropUsersOf(NewGV);
if (RepValue != NewGV)
- ConstantPropUsersOf(RepValue, Context);
+ ConstantPropUsersOf(RepValue);
return NewGV;
}
@@ -1153,8 +1142,7 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV,
static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
- std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite,
- LLVMContext &Context) {
+ std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
if (FieldNo >= FieldVals.size())
@@ -1172,7 +1160,7 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
// a new Load of the scalarized global.
Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo,
InsertedScalarizedValues,
- PHIsToRewrite, Context),
+ PHIsToRewrite),
LI->getName()+".f"+Twine(FieldNo), LI);
} else if (PHINode *PN = dyn_cast<PHINode>(V)) {
// PN's type is pointer to struct. Make a new PHI of pointer to struct
@@ -1196,16 +1184,14 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
/// the load, rewrite the derived value to use the HeapSRoA'd load.
static void RewriteHeapSROALoadUser(Instruction *LoadUser,
DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
- std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite,
- LLVMContext &Context) {
+ std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
// If this is a comparison against null, handle it.
if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
// If we have a setcc of the loaded pointer, we can use a setcc of any
// field.
Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
- InsertedScalarizedValues, PHIsToRewrite,
- Context);
+ InsertedScalarizedValues, PHIsToRewrite);
Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
Constant::getNullValue(NPtr->getType()),
@@ -1223,8 +1209,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
// Load the pointer for this field.
unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
- InsertedScalarizedValues, PHIsToRewrite,
- Context);
+ InsertedScalarizedValues, PHIsToRewrite);
// Create the new GEP idx vector.
SmallVector<Value*, 8> GEPIdx;
@@ -1256,8 +1241,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
// users.
for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) {
Instruction *User = cast<Instruction>(*UI++);
- RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite,
- Context);
+ RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
}
}
@@ -1267,13 +1251,11 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
/// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
- std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite,
- LLVMContext &Context) {
+ std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end();
UI != E; ) {
Instruction *User = cast<Instruction>(*UI++);
- RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite,
- Context);
+ RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
}
if (Load->use_empty()) {
@@ -1285,8 +1267,7 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
/// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break
/// it up into multiple allocations of arrays of the fields.
static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
- Value* NElems, LLVMContext &Context,
- TargetData *TD) {
+ Value* NElems, TargetData *TD) {
DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n');
const Type* MAT = getMallocAllocatedType(CI);
const StructType *STy = cast<StructType>(MAT);
@@ -1315,14 +1296,16 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
FieldGlobals.push_back(NGV);
unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
- if (const StructType* ST = dyn_cast<StructType>(FieldTy))
+ if (const StructType *ST = dyn_cast<StructType>(FieldTy))
TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
- const Type* IntPtrTy = TD->getIntPtrType(Context);
+ const Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
ConstantInt::get(IntPtrTy, TypeSize),
NElems,
CI->getName() + ".f" + Twine(FieldNo));
- FieldMallocs.push_back(NMI);
+ CallInst *NCI = dyn_cast<BitCastInst>(NMI) ?
+ extractMallocCallFromBitCast(NMI) : cast<CallInst>(NMI);
+ FieldMallocs.push_back(NCI);
new StoreInst(NMI, NGV, CI);
}
@@ -1338,15 +1321,15 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
// if (F1) { free(F1); F1 = 0; }
// if (F2) { free(F2); F2 = 0; }
// }
- Value *RunningOr = 0;
+ // The malloc can also fail if its argument is too large.
+ Constant *ConstantZero = ConstantInt::get(CI->getOperand(1)->getType(), 0);
+ Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getOperand(1),
+ ConstantZero, "isneg");
for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
Constant::getNullValue(FieldMallocs[i]->getType()),
"isnull");
- if (!RunningOr)
- RunningOr = Cond; // First seteq
- else
- RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
+ RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
}
// Split the basic block at the old malloc.
@@ -1355,7 +1338,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
// Create the block to check the first condition. Put all these blocks at the
// end of the function as they are unlikely to be executed.
- BasicBlock *NullPtrBlock = BasicBlock::Create(Context, "malloc_ret_null",
+ BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(),
+ "malloc_ret_null",
OrigBB->getParent());
// Remove the uncond branch from OrigBB to ContBB, turning it into a cond
@@ -1370,9 +1354,9 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
Constant::getNullValue(GVVal->getType()),
"tmp");
- BasicBlock *FreeBlock = BasicBlock::Create(Context, "free_it",
+ BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
OrigBB->getParent());
- BasicBlock *NextBlock = BasicBlock::Create(Context, "next",
+ BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next",
OrigBB->getParent());
Instruction *BI = BranchInst::Create(FreeBlock, NextBlock,
Cmp, NullPtrBlock);
@@ -1406,8 +1390,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
Instruction *User = cast<Instruction>(*UI++);
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
- RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite,
- Context);
+ RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
continue;
}
@@ -1438,7 +1421,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *InVal = PN->getIncomingValue(i);
InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
- PHIsToRewrite, Context);
+ PHIsToRewrite);
FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
}
}
@@ -1477,8 +1460,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
CallInst *CI,
const Type *AllocTy,
Module::global_iterator &GVI,
- TargetData *TD,
- LLVMContext &Context) {
+ TargetData *TD) {
// If this is a malloc of an abstract type, don't touch it.
if (!AllocTy->isSized())
return false;
@@ -1508,15 +1490,14 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// This eliminates dynamic allocation, avoids an indirection accessing the
// data, and exposes the resultant global to further GlobalOpt.
// We cannot optimize the malloc if we cannot determine malloc array size.
- if (Value *NElems = getMallocArraySize(CI, Context, TD)) {
+ if (Value *NElems = getMallocArraySize(CI, TD, true)) {
if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
// Restrict this transformation to only working on small allocations
// (2048 bytes currently), as we don't want to introduce a 16M global or
// something.
if (TD &&
NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
- GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElems,
- Context, TD);
+ GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElems, TD);
return true;
}
@@ -1540,7 +1521,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// structs. malloc [100 x struct],1 -> malloc struct, 100
if (const ArrayType *AT =
dyn_cast<ArrayType>(getMallocAllocatedType(CI))) {
- const Type *IntPtrTy = TD->getIntPtrType(Context);
+ const Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
@@ -1551,12 +1532,10 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
CI->replaceAllUsesWith(Cast);
CI->eraseFromParent();
CI = dyn_cast<BitCastInst>(Malloc) ?
- extractMallocCallFromBitCast(Malloc):
- cast<CallInst>(Malloc);
+ extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc);
}
- GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, Context, TD),
- Context, TD);
+ GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD);
return true;
}
}
@@ -1569,7 +1548,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// that only one value (besides its initializer) is ever stored to the global.
static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
Module::global_iterator &GVI,
- TargetData *TD, LLVMContext &Context) {
+ TargetData *TD) {
// Ignore no-op GEPs and bitcasts.
StoredOnceVal = StoredOnceVal->stripPointerCasts();
@@ -1585,12 +1564,12 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
// Optimize away any trapping uses of the loaded value.
- if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, Context))
+ if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC))
return true;
} else if (CallInst *CI = extractMallocCall(StoredOnceVal)) {
const Type* MallocType = getMallocAllocatedType(CI);
if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
- GVI, TD, Context))
+ GVI, TD))
return true;
}
}
@@ -1602,8 +1581,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
/// two values ever stored into GV are its initializer and OtherVal. See if we
/// can shrink the global into a boolean and select between the two values
/// whenever it is used. This exposes the values to other scalar optimizations.
-static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal,
- LLVMContext &Context) {
+static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
const Type *GVElType = GV->getType()->getElementType();
// If GVElType is already i1, it is already shrunk. If the type of the GV is
@@ -1611,7 +1589,8 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal,
// between them is very expensive and unlikely to lead to later
// simplification. In these cases, we typically end up with "cond ? v1 : v2"
// where v1 and v2 both require constant pool loads, a big loss.
- if (GVElType == Type::getInt1Ty(Context) || GVElType->isFloatingPoint() ||
+ if (GVElType == Type::getInt1Ty(GV->getContext()) ||
+ GVElType->isFloatingPoint() ||
isa<PointerType>(GVElType) || isa<VectorType>(GVElType))
return false;
@@ -1624,15 +1603,16 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal,
DEBUG(errs() << " *** SHRINKING TO BOOL: " << *GV);
// Create the new global, initializing it to false.
- GlobalVariable *NewGV = new GlobalVariable(Context,
- Type::getInt1Ty(Context), false,
- GlobalValue::InternalLinkage, ConstantInt::getFalse(Context),
+ GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
+ false,
+ GlobalValue::InternalLinkage,
+ ConstantInt::getFalse(GV->getContext()),
GV->getName()+".b",
GV->isThreadLocal());
GV->getParent()->getGlobalList().insert(GV, NewGV);
Constant *InitVal = GV->getInitializer();
- assert(InitVal->getType() != Type::getInt1Ty(Context) &&
+ assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
"No reason to shrink to bool!");
// If initialized to zero and storing one into the global, we can use a cast
@@ -1649,7 +1629,8 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal,
// Only do this if we weren't storing a loaded value.
Value *StoreVal;
if (StoringOther || SI->getOperand(0) == InitVal)
- StoreVal = ConstantInt::get(Type::getInt1Ty(Context), StoringOther);
+ StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
+ StoringOther);
else {
// Otherwise, we are storing a previously loaded copy. To do this,
// change the copy from copying the original value to just copying the
@@ -1708,24 +1689,26 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
if (!AnalyzeGlobal(GV, GS, PHIUsers)) {
#if 0
- cerr << "Global: " << *GV;
- cerr << " isLoaded = " << GS.isLoaded << "\n";
- cerr << " StoredType = ";
+ DEBUG(errs() << "Global: " << *GV);
+ DEBUG(errs() << " isLoaded = " << GS.isLoaded << "\n");
+ DEBUG(errs() << " StoredType = ");
switch (GS.StoredType) {
- case GlobalStatus::NotStored: cerr << "NEVER STORED\n"; break;
- case GlobalStatus::isInitializerStored: cerr << "INIT STORED\n"; break;
- case GlobalStatus::isStoredOnce: cerr << "STORED ONCE\n"; break;
- case GlobalStatus::isStored: cerr << "stored\n"; break;
+ case GlobalStatus::NotStored: DEBUG(errs() << "NEVER STORED\n"); break;
+ case GlobalStatus::isInitializerStored: DEBUG(errs() << "INIT STORED\n");
+ break;
+ case GlobalStatus::isStoredOnce: DEBUG(errs() << "STORED ONCE\n"); break;
+ case GlobalStatus::isStored: DEBUG(errs() << "stored\n"); break;
}
if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue)
- cerr << " StoredOnceValue = " << *GS.StoredOnceValue << "\n";
+ DEBUG(errs() << " StoredOnceValue = " << *GS.StoredOnceValue << "\n");
if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions)
- cerr << " AccessingFunction = " << GS.AccessingFunction->getName()
- << "\n";
- cerr << " HasMultipleAccessingFunctions = "
- << GS.HasMultipleAccessingFunctions << "\n";
- cerr << " HasNonInstructionUser = " << GS.HasNonInstructionUser<<"\n";
- cerr << "\n";
+ DEBUG(errs() << " AccessingFunction = " << GS.AccessingFunction->getName()
+ << "\n");
+ DEBUG(errs() << " HasMultipleAccessingFunctions = "
+ << GS.HasMultipleAccessingFunctions << "\n");
+ DEBUG(errs() << " HasNonInstructionUser = "
+ << GS.HasNonInstructionUser<<"\n");
+ DEBUG(errs() << "\n");
#endif
// If this is a first class global and has only one accessing function
@@ -1764,8 +1747,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
// Delete any stores we can find to the global. We may not be able to
// make it completely dead though.
- bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(),
- GV->getContext());
+ bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer());
// If the global is dead now, delete it.
if (GV->use_empty()) {
@@ -1780,7 +1762,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
GV->setConstant(true);
// Clean up any obviously simplifiable users now.
- CleanupConstantGlobalUsers(GV, GV->getInitializer(), GV->getContext());
+ CleanupConstantGlobalUsers(GV, GV->getInitializer());
// If the global is dead now, just nuke it.
if (GV->use_empty()) {
@@ -1794,8 +1776,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
return true;
} else if (!GV->getInitializer()->getType()->isSingleValueType()) {
if (TargetData *TD = getAnalysisIfAvailable<TargetData>())
- if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD,
- GV->getContext())) {
+ if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) {
GVI = FirstNewGV; // Don't skip the newly produced globals!
return true;
}
@@ -1810,8 +1791,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
GV->setInitializer(SOVConstant);
// Clean up any obviously simplifiable users now.
- CleanupConstantGlobalUsers(GV, GV->getInitializer(),
- GV->getContext());
+ CleanupConstantGlobalUsers(GV, GV->getInitializer());
if (GV->use_empty()) {
DEBUG(errs() << " *** Substituting initializer allowed us to "
@@ -1828,14 +1808,13 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
// Try to optimize globals based on the knowledge that only one value
// (besides its initializer) is ever stored to the global.
if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI,
- getAnalysisIfAvailable<TargetData>(),
- GV->getContext()))
+ getAnalysisIfAvailable<TargetData>()))
return true;
// Otherwise, if the global was not a boolean, we can shrink it to be a
// boolean.
if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
- if (TryToShrinkGlobalToBoolean(GV, SOVConstant, GV->getContext())) {
+ if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
++NumShrunkToBool;
return true;
}
@@ -1987,11 +1966,10 @@ static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) {
/// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the
/// specified array, returning the new global to use.
static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
- const std::vector<Function*> &Ctors,
- LLVMContext &Context) {
+ const std::vector<Function*> &Ctors) {
// If we made a change, reassemble the initializer list.
std::vector<Constant*> CSVals;
- CSVals.push_back(ConstantInt::get(Type::getInt32Ty(Context), 65535));
+ CSVals.push_back(ConstantInt::get(Type::getInt32Ty(GCL->getContext()),65535));
CSVals.push_back(0);
// Create the new init list.
@@ -2000,12 +1978,14 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
if (Ctors[i]) {
CSVals[1] = Ctors[i];
} else {
- const Type *FTy = FunctionType::get(Type::getVoidTy(Context), false);
+ const Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()),
+ false);
const PointerType *PFTy = PointerType::getUnqual(FTy);
CSVals[1] = Constant::getNullValue(PFTy);
- CSVals[0] = ConstantInt::get(Type::getInt32Ty(Context), 2147483647);
+ CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()),
+ 2147483647);
}
- CAList.push_back(ConstantStruct::get(Context, CSVals, false));
+ CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false));
}
// Create the array initializer.
@@ -2021,8 +2001,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
}
// Create the new global and insert it next to the existing list.
- GlobalVariable *NGV = new GlobalVariable(Context, CA->getType(),
- GCL->isConstant(),
+ GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(),
GCL->getLinkage(), CA, "",
GCL->isThreadLocal());
GCL->getParent()->getGlobalList().insert(GCL, NGV);
@@ -2056,7 +2035,7 @@ static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues,
/// enough for us to understand. In particular, if it is a cast of something,
/// we punt. We basically just support direct accesses to globals and GEP's of
/// globals. This should be kept up to date with CommitValueTo.
-static bool isSimpleEnoughPointerToCommit(Constant *C, LLVMContext &Context) {
+static bool isSimpleEnoughPointerToCommit(Constant *C) {
// Conservatively, avoid aggregate types. This is because we don't
// want to worry about them partially overlapping other stores.
if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType())
@@ -2096,8 +2075,7 @@ static bool isSimpleEnoughPointerToCommit(Constant *C, LLVMContext &Context) {
/// initializer. This returns 'Init' modified to reflect 'Val' stored into it.
/// At this point, the GEP operands of Addr [0, OpNo) have been stepped into.
static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
- ConstantExpr *Addr, unsigned OpNo,
- LLVMContext &Context) {
+ ConstantExpr *Addr, unsigned OpNo) {
// Base case of the recursion.
if (OpNo == Addr->getNumOperands()) {
assert(Val->getType() == Init->getType() && "Type mismatch!");
@@ -2126,10 +2104,11 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
unsigned Idx = CU->getZExtValue();
assert(Idx < STy->getNumElements() && "Struct index out of range!");
- Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1, Context);
+ Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
// Return the modified struct.
- return ConstantStruct::get(Context, &Elts[0], Elts.size(), STy->isPacked());
+ return ConstantStruct::get(Init->getContext(), &Elts[0], Elts.size(),
+ STy->isPacked());
} else {
ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
const ArrayType *ATy = cast<ArrayType>(Init->getType());
@@ -2152,15 +2131,14 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
assert(CI->getZExtValue() < ATy->getNumElements());
Elts[CI->getZExtValue()] =
- EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1, Context);
+ EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
return ConstantArray::get(ATy, Elts);
}
}
/// CommitValueTo - We have decided that Addr (which satisfies the predicate
/// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen.
-static void CommitValueTo(Constant *Val, Constant *Addr,
- LLVMContext &Context) {
+static void CommitValueTo(Constant *Val, Constant *Addr) {
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
assert(GV->hasInitializer());
GV->setInitializer(Val);
@@ -2171,7 +2149,7 @@ static void CommitValueTo(Constant *Val, Constant *Addr,
GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
Constant *Init = GV->getInitializer();
- Init = EvaluateStoreInto(Init, Val, CE, 2, Context);
+ Init = EvaluateStoreInto(Init, Val, CE, 2);
GV->setInitializer(Init);
}
@@ -2179,8 +2157,7 @@ static void CommitValueTo(Constant *Val, Constant *Addr,
/// P after the stores reflected by 'memory' have been performed. If we can't
/// decide, return null.
static Constant *ComputeLoadResult(Constant *P,
- const DenseMap<Constant*, Constant*> &Memory,
- LLVMContext &Context) {
+ const DenseMap<Constant*, Constant*> &Memory) {
// If this memory location has been recently stored, use the stored value: it
// is the most up-to-date.
DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P);
@@ -2218,8 +2195,6 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
return false;
- LLVMContext &Context = F->getContext();
-
CallStack.push_back(F);
/// Values - As we compute SSA register values, we store their contents here.
@@ -2246,7 +2221,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
if (SI->isVolatile()) return false; // no volatile accesses.
Constant *Ptr = getVal(Values, SI->getOperand(1));
- if (!isSimpleEnoughPointerToCommit(Ptr, Context))
+ if (!isSimpleEnoughPointerToCommit(Ptr))
// If this is too complex for us to commit, reject it.
return false;
Constant *Val = getVal(Values, SI->getOperand(0));
@@ -2280,12 +2255,12 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
} else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
if (LI->isVolatile()) return false; // no volatile accesses.
InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)),
- MutatedMemory, Context);
+ MutatedMemory);
if (InstResult == 0) return false; // Could not evaluate load.
} else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
if (AI->isArrayAllocation()) return false; // Cannot handle array allocs.
const Type *Ty = AI->getType()->getElementType();
- AllocaTmps.push_back(new GlobalVariable(Context, Ty, false,
+ AllocaTmps.push_back(new GlobalVariable(Ty, false,
GlobalValue::InternalLinkage,
UndefValue::get(Ty),
AI->getName()));
@@ -2423,7 +2398,7 @@ static bool EvaluateStaticConstructor(Function *F) {
<< " stores.\n");
for (DenseMap<Constant*, Constant*>::iterator I = MutatedMemory.begin(),
E = MutatedMemory.end(); I != E; ++I)
- CommitValueTo(I->second, I->first, F->getContext());
+ CommitValueTo(I->second, I->first);
}
// At this point, we are done interpreting. If we created any 'alloca'
@@ -2480,7 +2455,7 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
if (!MadeChange) return false;
- GCL = InstallGlobalCtors(GCL, Ctors, GCL->getContext());
+ GCL = InstallGlobalCtors(GCL, Ctors);
return true;
}
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index ea47366..6918fe8 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -19,10 +19,11 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
-#include "llvm/Support/CallSite.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/IPO/InlinerPass.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CallSite.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -32,6 +33,7 @@
using namespace llvm;
STATISTIC(NumInlined, "Number of functions inlined");
+STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
STATISTIC(NumMergedAllocas, "Number of allocas merged together");
@@ -336,23 +338,38 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
CallSite CS = CallSites[CSi];
+ Function *Caller = CS.getCaller();
Function *Callee = CS.getCalledFunction();
- // We can only inline direct calls to non-declarations.
- if (Callee == 0 || Callee->isDeclaration()) continue;
+
+ // If this call site is dead and it is to a readonly function, we should
+ // just delete the call instead of trying to inline it, regardless of
+ // size. This happens because IPSCCP propagates the result out of the
+ // call and then we're left with the dead call.
+ if (isInstructionTriviallyDead(CS.getInstruction())) {
+ DEBUG(errs() << " -> Deleting dead call: "
+ << *CS.getInstruction() << "\n");
+ // Update the call graph by deleting the edge from Callee to Caller.
+ CG[Caller]->removeCallEdgeFor(CS);
+ CS.getInstruction()->eraseFromParent();
+ ++NumCallsDeleted;
+ } else {
+ // We can only inline direct calls to non-declarations.
+ if (Callee == 0 || Callee->isDeclaration()) continue;
- // If the policy determines that we should inline this function,
- // try to do so.
- if (!shouldInline(CS))
- continue;
+ // If the policy determines that we should inline this function,
+ // try to do so.
+ if (!shouldInline(CS))
+ continue;
- Function *Caller = CS.getCaller();
- // Attempt to inline the function...
- if (!InlineCallIfPossible(CS, CG, TD, InlinedArrayAllocas))
- continue;
+ // Attempt to inline the function...
+ if (!InlineCallIfPossible(CS, CG, TD, InlinedArrayAllocas))
+ continue;
+ ++NumInlined;
+ }
- // If we inlined the last possible call site to the function, delete the
- // function body now.
- if (Callee->use_empty() && Callee->hasLocalLinkage() &&
+ // If we inlined or deleted the last possible call site to the function,
+ // delete the function body now.
+ if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() &&
// TODO: Can remove if in SCC now.
!SCCFunctions.count(Callee) &&
@@ -391,7 +408,6 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
}
--CSi;
- ++NumInlined;
Changed = true;
LocalChange = true;
}
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index fd69aeb..cb81330 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -75,6 +75,10 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
if (L->getParentLoop())
return false;
+ // If LoopSimplify form is not available, stay out of trouble.
+ if (!L->isLoopSimplifyForm())
+ return false;
+
DominatorTree &DT = getAnalysis<DominatorTree>();
bool Changed = false;
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 4f6369e..0b5e007 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -202,53 +202,35 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
// llvm.dbg.region.end calls, and any globals they point to if now dead.
static bool StripDebugInfo(Module &M) {
+ bool Changed = false;
+
// Remove all of the calls to the debugger intrinsics, and remove them from
// the module.
- Function *FuncStart = M.getFunction("llvm.dbg.func.start");
- Function *StopPoint = M.getFunction("llvm.dbg.stoppoint");
- Function *RegionStart = M.getFunction("llvm.dbg.region.start");
- Function *RegionEnd = M.getFunction("llvm.dbg.region.end");
- Function *Declare = M.getFunction("llvm.dbg.declare");
-
- if (FuncStart) {
- while (!FuncStart->use_empty()) {
- CallInst *CI = cast<CallInst>(FuncStart->use_back());
- CI->eraseFromParent();
- }
- FuncStart->eraseFromParent();
- }
- if (StopPoint) {
- while (!StopPoint->use_empty()) {
- CallInst *CI = cast<CallInst>(StopPoint->use_back());
- CI->eraseFromParent();
- }
- StopPoint->eraseFromParent();
- }
- if (RegionStart) {
- while (!RegionStart->use_empty()) {
- CallInst *CI = cast<CallInst>(RegionStart->use_back());
- CI->eraseFromParent();
- }
- RegionStart->eraseFromParent();
- }
- if (RegionEnd) {
- while (!RegionEnd->use_empty()) {
- CallInst *CI = cast<CallInst>(RegionEnd->use_back());
- CI->eraseFromParent();
- }
- RegionEnd->eraseFromParent();
- }
- if (Declare) {
+ if (Function *Declare = M.getFunction("llvm.dbg.declare")) {
while (!Declare->use_empty()) {
CallInst *CI = cast<CallInst>(Declare->use_back());
CI->eraseFromParent();
}
Declare->eraseFromParent();
+ Changed = true;
}
NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv");
- if (NMD)
+ if (NMD) {
+ Changed = true;
NMD->eraseFromParent();
+ }
+ MetadataContext &TheMetadata = M.getContext().getMetadata();
+ unsigned MDDbgKind = TheMetadata.getMDKind("dbg");
+ if (!MDDbgKind)
+ return Changed;
+
+ for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI)
+ for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE;
+ ++FI)
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE;
+ ++BI)
+ TheMetadata.removeMD(MDDbgKind, BI);
return true;
}
diff --git a/lib/Transforms/Scalar/ABCD.cpp b/lib/Transforms/Scalar/ABCD.cpp
index c8541d7..e58fa63 100644
--- a/lib/Transforms/Scalar/ABCD.cpp
+++ b/lib/Transforms/Scalar/ABCD.cpp
@@ -412,7 +412,9 @@ class ABCD : public FunctionPass {
/// If PN_op1 and PN_o2 are different from NULL, create a constraint
/// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace
/// with the respective V_op#, if V_op# is a ConstantInt.
- void createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2, APInt value);
+ void createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2,
+ ConstantInt *V_op1, ConstantInt *V_op2,
+ APInt value);
/// Returns the sigma representing the Instruction I in BasicBlock BB.
/// Returns NULL in case there is no sigma for this Instruction in this
@@ -735,25 +737,27 @@ void ABCD::createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI) {
APInt Zero = APInt::getNullValue(width);
CmpInst::Predicate Pred = ICI->getPredicate();
+ ConstantInt *CI1 = dyn_cast<ConstantInt>(V_op1);
+ ConstantInt *CI2 = dyn_cast<ConstantInt>(V_op2);
switch (Pred) {
case CmpInst::ICMP_SGT: // signed greater than
- createConstraintSigSig(SIG_op2_t, SIG_op1_t, MinusOne);
- createConstraintSigSig(SIG_op1_f, SIG_op2_f, Zero);
+ createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, MinusOne);
+ createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, Zero);
break;
case CmpInst::ICMP_SGE: // signed greater or equal
- createConstraintSigSig(SIG_op2_t, SIG_op1_t, Zero);
- createConstraintSigSig(SIG_op1_f, SIG_op2_f, MinusOne);
+ createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, Zero);
+ createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, MinusOne);
break;
case CmpInst::ICMP_SLT: // signed less than
- createConstraintSigSig(SIG_op1_t, SIG_op2_t, MinusOne);
- createConstraintSigSig(SIG_op2_f, SIG_op1_f, Zero);
+ createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, MinusOne);
+ createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, Zero);
break;
case CmpInst::ICMP_SLE: // signed less or equal
- createConstraintSigSig(SIG_op1_t, SIG_op2_t, Zero);
- createConstraintSigSig(SIG_op2_f, SIG_op1_f, MinusOne);
+ createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, Zero);
+ createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, MinusOne);
break;
default:
@@ -772,6 +776,10 @@ void ABCD::createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI) {
/// b->a and c->a with weight 0 in the lower bound graph, and the edges
/// a->b and a->c with weight 0 in the upper bound graph.
void ABCD::createConstraintPHINode(PHINode *PN) {
+ // FIXME: We really want to disallow sigma nodes, but I don't know the best
+ // way to detect the other than this.
+ if (PN->getNumOperands() == 2) return;
+
int32_t width = cast<IntegerType>(PN->getType())->getBitWidth();
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *V = PN->getIncomingValue(i);
@@ -796,13 +804,11 @@ void ABCD::createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t,
int32_t width = cast<IntegerType>((*SIG_op_t)->getType())->getBitWidth();
inequality_graph.addEdge(I_op, *SIG_op_t, APInt(width, 0), true);
inequality_graph.addEdge(*SIG_op_t, I_op, APInt(width, 0), false);
- created.insert(*SIG_op_t);
}
if (*SIG_op_f) {
int32_t width = cast<IntegerType>((*SIG_op_f)->getType())->getBitWidth();
inequality_graph.addEdge(I_op, *SIG_op_f, APInt(width, 0), true);
inequality_graph.addEdge(*SIG_op_f, I_op, APInt(width, 0), false);
- created.insert(*SIG_op_f);
}
}
@@ -810,10 +816,17 @@ void ABCD::createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t,
/// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace
/// with the respective V_op#, if V_op# is a ConstantInt.
void ABCD::createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2,
+ ConstantInt *V_op1, ConstantInt *V_op2,
APInt value) {
if (SIG_op1 && SIG_op2) {
inequality_graph.addEdge(SIG_op2, SIG_op1, value, true);
inequality_graph.addEdge(SIG_op1, SIG_op2, -value, false);
+ } else if (SIG_op1 && V_op2) {
+ inequality_graph.addEdge(V_op2, SIG_op1, value, true);
+ inequality_graph.addEdge(SIG_op1, V_op2, -value, false);
+ } else if (SIG_op2 && V_op1) {
+ inequality_graph.addEdge(SIG_op2, V_op1, value, true);
+ inequality_graph.addEdge(V_op1, SIG_op2, -value, false);
}
}
@@ -1036,7 +1049,7 @@ void ABCD::InequalityGraph::printHeader(raw_ostream &OS, Function &F) const {
/// Prints the body of the dot file
void ABCD::InequalityGraph::printBody(raw_ostream &OS) const {
- DenseMap<Value *, SmallPtrSet<Edge *, 16> >::iterator begin =
+ DenseMap<Value *, SmallPtrSet<Edge *, 16> >::const_iterator begin =
graph.begin(), end = graph.end();
for (; begin != end ; ++begin) {
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index e048518..5a92399 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -3,7 +3,6 @@ add_llvm_library(LLVMScalarOpts
ADCE.cpp
BasicBlockPlacement.cpp
CodeGenPrepare.cpp
- CondPropagate.cpp
ConstantProp.cpp
DCE.cpp
DeadStoreElimination.cpp
diff --git a/lib/Transforms/Scalar/CondPropagate.cpp b/lib/Transforms/Scalar/CondPropagate.cpp
deleted file mode 100644
index 8a6c556..0000000
--- a/lib/Transforms/Scalar/CondPropagate.cpp
+++ /dev/null
@@ -1,289 +0,0 @@
-//===-- CondPropagate.cpp - Propagate Conditional Expressions -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass propagates information about conditional expressions through the
-// program, allowing it to eliminate conditional branches in some cases.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "condprop"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Pass.h"
-#include "llvm/Type.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SmallVector.h"
-using namespace llvm;
-
-STATISTIC(NumBrThread, "Number of CFG edges threaded through branches");
-STATISTIC(NumSwThread, "Number of CFG edges threaded through switches");
-
-namespace {
- struct CondProp : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- CondProp() : FunctionPass(&ID) {}
-
- virtual bool runOnFunction(Function &F);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequiredID(BreakCriticalEdgesID);
- //AU.addRequired<DominanceFrontier>();
- }
-
- private:
- bool MadeChange;
- SmallVector<BasicBlock *, 4> DeadBlocks;
- void SimplifyBlock(BasicBlock *BB);
- void SimplifyPredecessors(BranchInst *BI);
- void SimplifyPredecessors(SwitchInst *SI);
- void RevectorBlockTo(BasicBlock *FromBB, BasicBlock *ToBB);
- bool RevectorBlockTo(BasicBlock *FromBB, Value *Cond, BranchInst *BI);
- };
-}
-
-char CondProp::ID = 0;
-static RegisterPass<CondProp> X("condprop", "Conditional Propagation");
-
-FunctionPass *llvm::createCondPropagationPass() {
- return new CondProp();
-}
-
-bool CondProp::runOnFunction(Function &F) {
- bool EverMadeChange = false;
- DeadBlocks.clear();
-
- // While we are simplifying blocks, keep iterating.
- do {
- MadeChange = false;
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E;)
- SimplifyBlock(BB++);
- EverMadeChange = EverMadeChange || MadeChange;
- } while (MadeChange);
-
- if (EverMadeChange) {
- while (!DeadBlocks.empty()) {
- BasicBlock *BB = DeadBlocks.back(); DeadBlocks.pop_back();
- DeleteDeadBlock(BB);
- }
- }
- return EverMadeChange;
-}
-
-void CondProp::SimplifyBlock(BasicBlock *BB) {
- if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
- // If this is a conditional branch based on a phi node that is defined in
- // this block, see if we can simplify predecessors of this block.
- if (BI->isConditional() && isa<PHINode>(BI->getCondition()) &&
- cast<PHINode>(BI->getCondition())->getParent() == BB)
- SimplifyPredecessors(BI);
-
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
- if (isa<PHINode>(SI->getCondition()) &&
- cast<PHINode>(SI->getCondition())->getParent() == BB)
- SimplifyPredecessors(SI);
- }
-
- // If possible, simplify the terminator of this block.
- if (ConstantFoldTerminator(BB))
- MadeChange = true;
-
- // If this block ends with an unconditional branch and the only successor has
- // only this block as a predecessor, merge the two blocks together.
- if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
- if (BI->isUnconditional() && BI->getSuccessor(0)->getSinglePredecessor() &&
- BB != BI->getSuccessor(0)) {
- BasicBlock *Succ = BI->getSuccessor(0);
-
- // If Succ has any PHI nodes, they are all single-entry PHI's. Eliminate
- // them.
- FoldSingleEntryPHINodes(Succ);
-
- // Remove BI.
- BI->eraseFromParent();
-
- // Move over all of the instructions.
- BB->getInstList().splice(BB->end(), Succ->getInstList());
-
- // Any phi nodes that had entries for Succ now have entries from BB.
- Succ->replaceAllUsesWith(BB);
-
- // Succ is now dead, but we cannot delete it without potentially
- // invalidating iterators elsewhere. Just insert an unreachable
- // instruction in it and delete this block later on.
- new UnreachableInst(BB->getContext(), Succ);
- DeadBlocks.push_back(Succ);
- MadeChange = true;
- }
-}
-
-// SimplifyPredecessors(branches) - We know that BI is a conditional branch
-// based on a PHI node defined in this block. If the phi node contains constant
-// operands, then the blocks corresponding to those operands can be modified to
-// jump directly to the destination instead of going through this block.
-void CondProp::SimplifyPredecessors(BranchInst *BI) {
- // TODO: We currently only handle the most trival case, where the PHI node has
- // one use (the branch), and is the only instruction besides the branch and dbg
- // intrinsics in the block.
- PHINode *PN = cast<PHINode>(BI->getCondition());
-
- if (PN->getNumIncomingValues() == 1) {
- // Eliminate single-entry PHI nodes.
- FoldSingleEntryPHINodes(PN->getParent());
- return;
- }
-
-
- if (!PN->hasOneUse()) return;
-
- BasicBlock *BB = BI->getParent();
- if (&*BB->begin() != PN)
- return;
- BasicBlock::iterator BBI = BB->begin();
- BasicBlock::iterator BBE = BB->end();
- while (BBI != BBE && isa<DbgInfoIntrinsic>(++BBI)) /* empty */;
- if (&*BBI != BI)
- return;
-
- // Ok, we have this really simple case, walk the PHI operands, looking for
- // constants. Walk from the end to remove operands from the end when
- // possible, and to avoid invalidating "i".
- for (unsigned i = PN->getNumIncomingValues(); i != 0; --i) {
- Value *InVal = PN->getIncomingValue(i-1);
- if (!RevectorBlockTo(PN->getIncomingBlock(i-1), InVal, BI))
- continue;
-
- ++NumBrThread;
-
- // If there were two predecessors before this simplification, or if the
- // PHI node contained all the same value except for the one we just
- // substituted, the PHI node may be deleted. Don't iterate through it the
- // last time.
- if (BI->getCondition() != PN) return;
- }
-}
-
-// SimplifyPredecessors(switch) - We know that SI is switch based on a PHI node
-// defined in this block. If the phi node contains constant operands, then the
-// blocks corresponding to those operands can be modified to jump directly to
-// the destination instead of going through this block.
-void CondProp::SimplifyPredecessors(SwitchInst *SI) {
- // TODO: We currently only handle the most trival case, where the PHI node has
- // one use (the branch), and is the only instruction besides the branch and
- // dbg intrinsics in the block.
- PHINode *PN = cast<PHINode>(SI->getCondition());
- if (!PN->hasOneUse()) return;
-
- BasicBlock *BB = SI->getParent();
- if (&*BB->begin() != PN)
- return;
- BasicBlock::iterator BBI = BB->begin();
- BasicBlock::iterator BBE = BB->end();
- while (BBI != BBE && isa<DbgInfoIntrinsic>(++BBI)) /* empty */;
- if (&*BBI != SI)
- return;
-
- // Ok, we have this really simple case, walk the PHI operands, looking for
- // constants. Walk from the end to remove operands from the end when
- // possible, and to avoid invalidating "i".
- for (unsigned i = PN->getNumIncomingValues(); i != 0; --i)
- if (ConstantInt *CI = dyn_cast<ConstantInt>(PN->getIncomingValue(i-1))) {
- BasicBlock *PredBB = PN->getIncomingBlock(i-1);
- if (isa<BranchInst>(PredBB->getTerminator())) {
- // If we have a constant, forward the edge from its current to its
- // ultimate destination.
- unsigned DestCase = SI->findCaseValue(CI);
- RevectorBlockTo(PredBB, SI->getSuccessor(DestCase));
- ++NumSwThread;
-
- // If there were two predecessors before this simplification, or if the
- // PHI node contained all the same value except for the one we just
- // substituted, the PHI node may be deleted. Don't iterate through it the
- // last time.
- if (SI->getCondition() != PN) return;
- }
- }
-}
-
-
-// RevectorBlockTo - Revector the unconditional branch at the end of FromBB to
-// the ToBB block, which is one of the successors of its current successor.
-void CondProp::RevectorBlockTo(BasicBlock *FromBB, BasicBlock *ToBB) {
- BranchInst *FromBr = cast<BranchInst>(FromBB->getTerminator());
- assert(FromBr->isUnconditional() && "FromBB should end with uncond br!");
-
- // Get the old block we are threading through.
- BasicBlock *OldSucc = FromBr->getSuccessor(0);
-
- // OldSucc had multiple successors. If ToBB has multiple predecessors, then
- // the edge between them would be critical, which we already took care of.
- // If ToBB has single operand PHI node then take care of it here.
- FoldSingleEntryPHINodes(ToBB);
-
- // Update PHI nodes in OldSucc to know that FromBB no longer branches to it.
- OldSucc->removePredecessor(FromBB);
-
- // Change FromBr to branch to the new destination.
- FromBr->setSuccessor(0, ToBB);
-
- MadeChange = true;
-}
-
-bool CondProp::RevectorBlockTo(BasicBlock *FromBB, Value *Cond, BranchInst *BI){
- BranchInst *FromBr = cast<BranchInst>(FromBB->getTerminator());
- if (!FromBr->isUnconditional())
- return false;
-
- // Get the old block we are threading through.
- BasicBlock *OldSucc = FromBr->getSuccessor(0);
-
- // If the condition is a constant, simply revector the unconditional branch at
- // the end of FromBB to one of the successors of its current successor.
- if (ConstantInt *CB = dyn_cast<ConstantInt>(Cond)) {
- BasicBlock *ToBB = BI->getSuccessor(CB->isZero());
-
- // OldSucc had multiple successors. If ToBB has multiple predecessors, then
- // the edge between them would be critical, which we already took care of.
- // If ToBB has single operand PHI node then take care of it here.
- FoldSingleEntryPHINodes(ToBB);
-
- // Update PHI nodes in OldSucc to know that FromBB no longer branches to it.
- OldSucc->removePredecessor(FromBB);
-
- // Change FromBr to branch to the new destination.
- FromBr->setSuccessor(0, ToBB);
- } else {
- BasicBlock *Succ0 = BI->getSuccessor(0);
- // Do not perform transform if the new destination has PHI nodes. The
- // transform will add new preds to the PHI's.
- if (isa<PHINode>(Succ0->begin()))
- return false;
-
- BasicBlock *Succ1 = BI->getSuccessor(1);
- if (isa<PHINode>(Succ1->begin()))
- return false;
-
- // Insert the new conditional branch.
- BranchInst::Create(Succ0, Succ1, Cond, FromBr);
-
- FoldSingleEntryPHINodes(Succ0);
- FoldSingleEntryPHINodes(Succ1);
-
- // Update PHI nodes in OldSucc to know that FromBB no longer branches to it.
- OldSucc->removePredecessor(FromBB);
-
- // Delete the old branch.
- FromBr->eraseFromParent();
- }
-
- MadeChange = true;
- return true;
-}
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index 4fee327..ea20813 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -66,7 +66,7 @@ bool ConstantPropagation::runOnFunction(Function &F) {
WorkList.erase(WorkList.begin()); // Get an element from the worklist...
if (!I->use_empty()) // Don't muck with dead instructions...
- if (Constant *C = ConstantFoldInstruction(I, F.getContext())) {
+ if (Constant *C = ConstantFoldInstruction(I)) {
// Add all of the users of this instruction to the worklist, they might
// be constant propagatable now...
for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 90436f4..b0988b5 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -78,19 +78,96 @@ static RegisterPass<DSE> X("dse", "Dead Store Elimination");
FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
-/// isValueAtLeastAsBigAs - Return true if V1 is greater than or equal to the
-/// stored size of V2. This returns false if we don't know.
+/// doesClobberMemory - Does this instruction clobber (write without reading)
+/// some memory?
+static bool doesClobberMemory(Instruction *I) {
+ if (isa<StoreInst>(I))
+ return true;
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::memset: case Intrinsic::memmove: case Intrinsic::memcpy:
+ case Intrinsic::init_trampoline: case Intrinsic::lifetime_end: return true;
+ }
+ }
+ return false;
+}
+
+/// isElidable - If the value of this instruction and the memory it writes to is
+/// unused, may we delete this instrtction?
+static bool isElidable(Instruction *I) {
+ assert(doesClobberMemory(I));
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ return II->getIntrinsicID() != Intrinsic::lifetime_end;
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return !SI->isVolatile();
+ return true;
+}
+
+/// getPointerOperand - Return the pointer that is being clobbered.
+static Value *getPointerOperand(Instruction *I) {
+ assert(doesClobberMemory(I));
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->getPointerOperand();
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
+ return MI->getOperand(1);
+ IntrinsicInst *II = cast<IntrinsicInst>(I);
+ switch (II->getIntrinsicID()) {
+ default:
+ assert(false && "Unexpected intrinsic!");
+ case Intrinsic::init_trampoline:
+ return II->getOperand(1);
+ case Intrinsic::lifetime_end:
+ return II->getOperand(2);
+ }
+}
+
+/// getStoreSize - Return the length in bytes of the write by the clobbering
+/// instruction. If variable or unknown, returns -1.
+static unsigned getStoreSize(Instruction *I, const TargetData *TD) {
+ assert(doesClobberMemory(I));
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (!TD) return -1u;
+ return TD->getTypeStoreSize(SI->getOperand(0)->getType());
+ }
+
+ Value *Len;
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
+ Len = MI->getLength();
+ } else {
+ IntrinsicInst *II = cast<IntrinsicInst>(I);
+ switch (II->getIntrinsicID()) {
+ default:
+ assert(false && "Unexpected intrinsic!");
+ case Intrinsic::init_trampoline:
+ return -1u;
+ case Intrinsic::lifetime_end:
+ Len = II->getOperand(1);
+ break;
+ }
+ }
+ if (ConstantInt *LenCI = dyn_cast<ConstantInt>(Len))
+ if (!LenCI->isAllOnesValue())
+ return LenCI->getZExtValue();
+ return -1u;
+}
+
+/// isStoreAtLeastAsWideAs - Return true if the size of the store in I1 is
+/// greater than or equal to the store in I2. This returns false if we don't
+/// know.
///
-static bool isValueAtLeastAsBigAs(Value *V1, Value *V2, const TargetData *TD) {
- const Type *V1Ty = V1->getType(), *V2Ty = V2->getType();
+static bool isStoreAtLeastAsWideAs(Instruction *I1, Instruction *I2,
+ const TargetData *TD) {
+ const Type *I1Ty = getPointerOperand(I1)->getType();
+ const Type *I2Ty = getPointerOperand(I2)->getType();
// Exactly the same type, must have exactly the same size.
- if (V1Ty == V2Ty) return true;
+ if (I1Ty == I2Ty) return true;
- // If we don't have target data, we don't know.
- if (TD == 0) return false;
+ int I1Size = getStoreSize(I1, TD);
+ int I2Size = getStoreSize(I2, TD);
- return TD->getTypeStoreSize(V1Ty) >= TD->getTypeStoreSize(V2Ty);
+ return I1Size != -1 && I2Size != -1 && I1Size >= I2Size;
}
bool DSE::runOnBasicBlock(BasicBlock &BB) {
@@ -104,14 +181,9 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
Instruction *Inst = BBI++;
// If we find a store or a free, get its memory dependence.
- if (!isa<StoreInst>(Inst) && !isFreeCall(Inst))
+ if (!doesClobberMemory(Inst) && !isFreeCall(Inst))
continue;
- // Don't molest volatile stores or do queries that will return "clobber".
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
- if (SI->isVolatile())
- continue;
-
MemDepResult InstDep = MD.getDependency(Inst);
// Ignore non-local stores.
@@ -124,16 +196,16 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
continue;
}
- StoreInst *SI = cast<StoreInst>(Inst);
-
// If not a definite must-alias dependency, ignore it.
if (!InstDep.isDef())
continue;
// If this is a store-store dependence, then the previous store is dead so
// long as this store is at least as big as it.
- if (StoreInst *DepStore = dyn_cast<StoreInst>(InstDep.getInst()))
- if (isValueAtLeastAsBigAs(SI->getOperand(0), DepStore->getOperand(0),TD)){
+ if (doesClobberMemory(InstDep.getInst())) {
+ Instruction *DepStore = InstDep.getInst();
+ if (isStoreAtLeastAsWideAs(Inst, DepStore, TD) &&
+ isElidable(DepStore)) {
// Delete the store and now-dead instructions that feed it.
DeleteDeadInstruction(DepStore);
NumFastStores++;
@@ -146,37 +218,43 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
--BBI;
continue;
}
+ }
+
+ if (!isElidable(Inst))
+ continue;
// If we're storing the same value back to a pointer that we just
// loaded from, then the store can be removed.
- if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
- if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
- SI->getOperand(0) == DepLoad) {
- // DeleteDeadInstruction can delete the current instruction. Save BBI
- // in case we need it.
- WeakVH NextInst(BBI);
-
- DeleteDeadInstruction(SI);
-
- if (NextInst == 0) // Next instruction deleted.
- BBI = BB.begin();
- else if (BBI != BB.begin()) // Revisit this instruction if possible.
- --BBI;
- NumFastStores++;
- MadeChange = true;
- continue;
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
+ if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
+ SI->getOperand(0) == DepLoad) {
+ // DeleteDeadInstruction can delete the current instruction. Save BBI
+ // in case we need it.
+ WeakVH NextInst(BBI);
+
+ DeleteDeadInstruction(SI);
+
+ if (NextInst == 0) // Next instruction deleted.
+ BBI = BB.begin();
+ else if (BBI != BB.begin()) // Revisit this instruction if possible.
+ --BBI;
+ NumFastStores++;
+ MadeChange = true;
+ continue;
+ }
}
}
// If this is a lifetime end marker, we can throw away the store.
- if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(InstDep.getInst())) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(InstDep.getInst())) {
if (II->getIntrinsicID() == Intrinsic::lifetime_end) {
// Delete the store and now-dead instructions that feed it.
// DeleteDeadInstruction can delete the current instruction. Save BBI
// in case we need it.
WeakVH NextInst(BBI);
- DeleteDeadInstruction(SI);
+ DeleteDeadInstruction(Inst);
if (NextInst == 0) // Next instruction deleted.
BBI = BB.begin();
@@ -202,11 +280,11 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
bool DSE::handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep) {
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
- StoreInst *Dependency = dyn_cast_or_null<StoreInst>(Dep.getInst());
- if (!Dependency || Dependency->isVolatile())
+ Instruction *Dependency = Dep.getInst();
+ if (!Dependency || !doesClobberMemory(Dependency) || !isElidable(Dependency))
return false;
- Value *DepPointer = Dependency->getPointerOperand()->getUnderlyingObject();
+ Value *DepPointer = getPointerOperand(Dependency)->getUnderlyingObject();
// Check for aliasing.
if (AA.alias(F->getOperand(1), 1, DepPointer, 1) !=
@@ -251,39 +329,28 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
--BBI;
// If we find a store whose pointer is dead.
- if (StoreInst* S = dyn_cast<StoreInst>(BBI)) {
- if (!S->isVolatile()) {
+ if (doesClobberMemory(BBI)) {
+ if (isElidable(BBI)) {
// See through pointer-to-pointer bitcasts
- Value* pointerOperand = S->getPointerOperand()->getUnderlyingObject();
+ Value *pointerOperand = getPointerOperand(BBI)->getUnderlyingObject();
// Alloca'd pointers or byval arguments (which are functionally like
// alloca's) are valid candidates for removal.
if (deadPointers.count(pointerOperand)) {
// DCE instructions only used to calculate that store.
+ Instruction *Dead = BBI;
BBI++;
- DeleteDeadInstruction(S, &deadPointers);
+ DeleteDeadInstruction(Dead, &deadPointers);
NumFastStores++;
MadeChange = true;
+ continue;
}
}
- continue;
- }
-
- // We can also remove memcpy's to local variables at the end of a function.
- if (MemCpyInst *M = dyn_cast<MemCpyInst>(BBI)) {
- Value *dest = M->getDest()->getUnderlyingObject();
-
- if (deadPointers.count(dest)) {
- BBI++;
- DeleteDeadInstruction(M, &deadPointers);
- NumFastOther++;
- MadeChange = true;
+ // Because a memcpy or memmove is also a load, we can't skip it if we
+ // didn't remove it.
+ if (!isa<MemTransferInst>(BBI))
continue;
- }
-
- // Because a memcpy is also a load, we can't skip it if we didn't remove
- // it.
}
Value* killPointer = 0;
@@ -304,11 +371,11 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
killPointer = L->getPointerOperand();
} else if (VAArgInst* V = dyn_cast<VAArgInst>(BBI)) {
killPointer = V->getOperand(0);
- } else if (isa<MemCpyInst>(BBI) &&
- isa<ConstantInt>(cast<MemCpyInst>(BBI)->getLength())) {
- killPointer = cast<MemCpyInst>(BBI)->getSource();
+ } else if (isa<MemTransferInst>(BBI) &&
+ isa<ConstantInt>(cast<MemTransferInst>(BBI)->getLength())) {
+ killPointer = cast<MemTransferInst>(BBI)->getSource();
killPointerSize = cast<ConstantInt>(
- cast<MemCpyInst>(BBI)->getLength())->getZExtValue();
+ cast<MemTransferInst>(BBI)->getLength())->getZExtValue();
} else if (AllocaInst* A = dyn_cast<AllocaInst>(BBI)) {
deadPointers.erase(A);
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 0e3f750..a8f39c1 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -443,6 +443,11 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
valueNumbering[C] = e;
return e;
}
+ if (!MD) {
+ e = nextValueNumber++;
+ valueNumbering[C] = e;
+ return e;
+ }
MemDepResult local_dep = MD->getDependency(C);
@@ -624,7 +629,7 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
/// lookup - Returns the value number of the specified value. Fails if
/// the value has not yet been numbered.
uint32_t ValueTable::lookup(Value *V) const {
- DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
+ DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
assert(VI != valueNumbering.end() && "Value not numbered?");
return VI->second;
}
@@ -644,7 +649,7 @@ void ValueTable::erase(Value *V) {
/// verifyRemoved - Verify that the value is removed from all internal data
/// structures.
void ValueTable::verifyRemoved(const Value *V) const {
- for (DenseMap<Value*, uint32_t>::iterator
+ for (DenseMap<Value*, uint32_t>::const_iterator
I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
assert(I->first != V && "Inst still occurs in value numbering map!");
}
@@ -669,10 +674,12 @@ namespace {
bool runOnFunction(Function &F);
public:
static char ID; // Pass identification, replacement for typeid
- GVN(bool nopre = false) : FunctionPass(&ID), NoPRE(nopre) { }
+ explicit GVN(bool nopre = false, bool noloads = false)
+ : FunctionPass(&ID), NoPRE(nopre), NoLoads(noloads), MD(0) { }
private:
bool NoPRE;
+ bool NoLoads;
MemoryDependenceAnalysis *MD;
DominatorTree *DT;
@@ -682,7 +689,8 @@ namespace {
// This transformation requires dominator postdominator info
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
- AU.addRequired<MemoryDependenceAnalysis>();
+ if (!NoLoads)
+ AU.addRequired<MemoryDependenceAnalysis>();
AU.addRequired<AliasAnalysis>();
AU.addPreserved<DominatorTree>();
@@ -711,7 +719,9 @@ namespace {
}
// createGVNPass - The public interface to this file...
-FunctionPass *llvm::createGVNPass(bool NoPRE) { return new GVN(NoPRE); }
+FunctionPass *llvm::createGVNPass(bool NoPRE, bool NoLoads) {
+ return new GVN(NoPRE, NoLoads);
+}
static RegisterPass<GVN> X("gvn",
"Global Value Numbering");
@@ -1476,6 +1486,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
/// processLoad - Attempt to eliminate a load, first by eliminating it
/// locally, and then attempting non-local elimination if that fails.
bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
+ if (!MD)
+ return false;
+
if (L->isVolatile())
return false;
@@ -1686,7 +1699,7 @@ bool GVN::processInstruction(Instruction *I,
if (constVal) {
p->replaceAllUsesWith(constVal);
- if (isa<PointerType>(constVal->getType()))
+ if (MD && isa<PointerType>(constVal->getType()))
MD->invalidateCachedPointerInfo(constVal);
VN.erase(p);
@@ -1707,7 +1720,7 @@ bool GVN::processInstruction(Instruction *I,
// Remove it!
VN.erase(I);
I->replaceAllUsesWith(repl);
- if (isa<PointerType>(repl->getType()))
+ if (MD && isa<PointerType>(repl->getType()))
MD->invalidateCachedPointerInfo(repl);
toErase.push_back(I);
return true;
@@ -1721,7 +1734,8 @@ bool GVN::processInstruction(Instruction *I,
/// runOnFunction - This is the main transformation entry point for a function.
bool GVN::runOnFunction(Function& F) {
- MD = &getAnalysis<MemoryDependenceAnalysis>();
+ if (!NoLoads)
+ MD = &getAnalysis<MemoryDependenceAnalysis>();
DT = &getAnalysis<DominatorTree>();
VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
VN.setMemDep(MD);
@@ -1793,7 +1807,7 @@ bool GVN::processBlock(BasicBlock *BB) {
for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
E = toErase.end(); I != E; ++I) {
DEBUG(errs() << "GVN removed: " << **I << '\n');
- MD->removeInstruction(*I);
+ if (MD) MD->removeInstruction(*I);
(*I)->eraseFromParent();
DEBUG(verifyRemoved(*I));
}
@@ -1946,12 +1960,12 @@ bool GVN::performPRE(Function &F) {
localAvail[CurrentBlock]->table[ValNo] = Phi;
CurInst->replaceAllUsesWith(Phi);
- if (isa<PointerType>(Phi->getType()))
+ if (MD && isa<PointerType>(Phi->getType()))
MD->invalidateCachedPointerInfo(Phi);
VN.erase(CurInst);
DEBUG(errs() << "GVN PRE removed: " << *CurInst << '\n');
- MD->removeInstruction(CurInst);
+ if (MD) MD->removeInstruction(CurInst);
CurInst->eraseFromParent();
DEBUG(verifyRemoved(CurInst));
Changed = true;
@@ -2011,12 +2025,12 @@ void GVN::verifyRemoved(const Instruction *Inst) const {
// Walk through the value number scope to make sure the instruction isn't
// ferreted away in it.
- for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
+ for (DenseMap<BasicBlock*, ValueNumberScope*>::const_iterator
I = localAvail.begin(), E = localAvail.end(); I != E; ++I) {
const ValueNumberScope *VNS = I->second;
while (VNS) {
- for (DenseMap<uint32_t, Value*>::iterator
+ for (DenseMap<uint32_t, Value*>::const_iterator
II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) {
assert(II->second != Inst && "Inst still in value numbering scope!");
}
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index b0bc70c..2912421 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -536,8 +536,10 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
BasicBlock *ExitBlock = L->getExitBlock();
if (!ExitBlock) return;
- Instruction *InsertPt = ExitBlock->getFirstNonPHI();
BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) return;
+
+ Instruction *InsertPt = ExitBlock->getFirstNonPHI();
BasicBlock::iterator I = Preheader->getTerminator();
while (I != Preheader->begin()) {
--I;
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index 7e75cfb..1c48366 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -42,6 +42,7 @@
#include "llvm/GlobalVariable.h"
#include "llvm/Operator.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Target/TargetData.h"
@@ -283,6 +284,8 @@ namespace {
Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI);
Instruction *visitCallInst(CallInst &CI);
Instruction *visitInvokeInst(InvokeInst &II);
+
+ Instruction *SliceUpIllegalIntegerPHI(PHINode &PN);
Instruction *visitPHINode(PHINode &PN);
Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP);
Instruction *visitAllocaInst(AllocaInst &AI);
@@ -380,10 +383,6 @@ namespace {
/// commutative operators.
bool SimplifyCommutative(BinaryOperator &I);
- /// SimplifyCompare - This reorders the operands of a CmpInst to get them in
- /// most-complex to least-complex order.
- bool SimplifyCompare(CmpInst &I);
-
/// SimplifyDemandedUseBits - Attempts to replace V with a simpler value
/// based on the demanded bits.
Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
@@ -478,6 +477,34 @@ static const Type *getPromotedType(const Type *Ty) {
return Ty;
}
+/// ShouldChangeType - Return true if it is desirable to convert a computation
+/// from 'From' to 'To'. We don't want to convert from a legal to an illegal
+/// type for example, or from a smaller to a larger illegal type.
+static bool ShouldChangeType(const Type *From, const Type *To,
+ const TargetData *TD) {
+ assert(isa<IntegerType>(From) && isa<IntegerType>(To));
+
+ // If we don't have TD, we don't know if the source/dest are legal.
+ if (!TD) return false;
+
+ unsigned FromWidth = From->getPrimitiveSizeInBits();
+ unsigned ToWidth = To->getPrimitiveSizeInBits();
+ bool FromLegal = TD->isLegalInteger(FromWidth);
+ bool ToLegal = TD->isLegalInteger(ToWidth);
+
+ // If this is a legal integer from type, and the result would be an illegal
+ // type, don't do the transformation.
+ if (FromLegal && !ToLegal)
+ return false;
+
+ // Otherwise, if both are illegal, do not increase the size of the result. We
+ // do allow things like i160 -> i64, but not i64 -> i160.
+ if (!FromLegal && !ToLegal && ToWidth > FromWidth)
+ return false;
+
+ return true;
+}
+
/// getBitCastOperand - If the specified operand is a CastInst, a constant
/// expression bitcast, or a GetElementPtrInst with all zero indices, return the
/// operand value, otherwise return null.
@@ -584,17 +611,6 @@ bool InstCombiner::SimplifyCommutative(BinaryOperator &I) {
return Changed;
}
-/// SimplifyCompare - For a CmpInst this function just orders the operands
-/// so that theyare listed from right (least complex) to left (most complex).
-/// This puts constants before unary operators before binary operators.
-bool InstCombiner::SimplifyCompare(CmpInst &I) {
- if (getComplexity(I.getOperand(0)) >= getComplexity(I.getOperand(1)))
- return false;
- I.swapOperands();
- // Compare instructions are not associative so there's nothing else we can do.
- return true;
-}
-
// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction
// if the LHS is a constant zero (which is the 'negate' form).
//
@@ -4304,25 +4320,15 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
bool Changed = SimplifyCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (isa<UndefValue>(Op1)) // X & undef -> 0
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-
- // and X, X = X
- if (Op0 == Op1)
- return ReplaceInstUsesWith(I, Op1);
+ if (Value *V = SimplifyAndInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(I))
return &I;
- if (isa<VectorType>(I.getType())) {
- if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) {
- if (CP->isAllOnesValue()) // X & <-1,-1> -> X
- return ReplaceInstUsesWith(I, I.getOperand(0));
- } else if (isa<ConstantAggregateZero>(Op1)) {
- return ReplaceInstUsesWith(I, Op1); // X & <0,0> -> <0,0>
- }
- }
+
if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
const APInt &AndRHSMask = AndRHS->getValue();
@@ -4443,42 +4449,29 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
return NV;
}
- Value *Op0NotVal = dyn_castNotVal(Op0);
- Value *Op1NotVal = dyn_castNotVal(Op1);
-
- if (Op0NotVal == Op1 || Op1NotVal == Op0) // A & ~A == ~A & A == 0
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
// (~A & ~B) == (~(A | B)) - De Morgan's Law
- if (Op0NotVal && Op1NotVal && isOnlyUse(Op0) && isOnlyUse(Op1)) {
- Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal,
- I.getName()+".demorgan");
- return BinaryOperator::CreateNot(Or);
- }
-
+ if (Value *Op0NotVal = dyn_castNotVal(Op0))
+ if (Value *Op1NotVal = dyn_castNotVal(Op1))
+ if (Op0->hasOneUse() && Op1->hasOneUse()) {
+ Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal,
+ I.getName()+".demorgan");
+ return BinaryOperator::CreateNot(Or);
+ }
+
{
Value *A = 0, *B = 0, *C = 0, *D = 0;
- if (match(Op0, m_Or(m_Value(A), m_Value(B)))) {
- if (A == Op1 || B == Op1) // (A | ?) & A --> A
- return ReplaceInstUsesWith(I, Op1);
-
- // (A|B) & ~(A&B) -> A^B
- if (match(Op1, m_Not(m_And(m_Value(C), m_Value(D))))) {
- if ((A == C && B == D) || (A == D && B == C))
- return BinaryOperator::CreateXor(A, B);
- }
- }
+ // (A|B) & ~(A&B) -> A^B
+ if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) &&
+ ((A == C && B == D) || (A == D && B == C)))
+ return BinaryOperator::CreateXor(A, B);
- if (match(Op1, m_Or(m_Value(A), m_Value(B)))) {
- if (A == Op0 || B == Op0) // A & (A | ?) --> A
- return ReplaceInstUsesWith(I, Op0);
-
- // ~(A&B) & (A|B) -> A^B
- if (match(Op0, m_Not(m_And(m_Value(C), m_Value(D))))) {
- if ((A == C && B == D) || (A == D && B == C))
- return BinaryOperator::CreateXor(A, B);
- }
- }
+ // ~(A&B) & (A|B) -> A^B
+ if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) &&
+ ((A == C && B == D) || (A == D && B == C)))
+ return BinaryOperator::CreateXor(A, B);
if (Op0->hasOneUse() &&
match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
@@ -5010,27 +5003,15 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
bool Changed = SimplifyCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (isa<UndefValue>(Op1)) // X | undef -> -1
- return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
-
- // or X, X = X
- if (Op0 == Op1)
- return ReplaceInstUsesWith(I, Op0);
-
+ if (Value *V = SimplifyOrInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(I))
return &I;
- if (isa<VectorType>(I.getType())) {
- if (isa<ConstantAggregateZero>(Op1)) {
- return ReplaceInstUsesWith(I, Op0); // X | <0,0> -> X
- } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) {
- if (CP->isAllOnesValue()) // X | <-1,-1> -> <-1,-1>
- return ReplaceInstUsesWith(I, I.getOperand(1));
- }
- }
- // or X, -1 == -1
if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
ConstantInt *C1 = 0; Value *X = 0;
// (X & C1) | C2 --> (X | C2) & (C1|C2)
@@ -5063,13 +5044,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
Value *A = 0, *B = 0;
ConstantInt *C1 = 0, *C2 = 0;
- if (match(Op0, m_And(m_Value(A), m_Value(B))))
- if (A == Op1 || B == Op1) // (A & ?) | A --> A
- return ReplaceInstUsesWith(I, Op1);
- if (match(Op1, m_And(m_Value(A), m_Value(B))))
- if (A == Op0 || B == Op0) // A | (A & ?) --> A
- return ReplaceInstUsesWith(I, Op0);
-
// (A | B) | C and A | (B | C) -> bswap if possible.
// (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible.
if (match(Op0, m_Or(m_Value(), m_Value())) ||
@@ -5203,23 +5177,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (Ret) return Ret;
}
- if ((A = dyn_castNotVal(Op0))) { // ~A | Op1
- if (A == Op1) // ~A | A == -1
- return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
- } else {
- A = 0;
- }
- // Note, A is still live here!
- if ((B = dyn_castNotVal(Op1))) { // Op0 | ~B
- if (Op0 == B)
- return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
-
- // (~A | ~B) == (~(A & B)) - De Morgan's Law
- if (A && isOnlyUse(Op0) && isOnlyUse(Op1)) {
- Value *And = Builder->CreateAnd(A, B, I.getName()+".demorgan");
- return BinaryOperator::CreateNot(And);
- }
- }
+ // (~A | ~B) == (~(A & B)) - De Morgan's Law
+ if (Value *Op0NotVal = dyn_castNotVal(Op0))
+ if (Value *Op1NotVal = dyn_castNotVal(Op1))
+ if (Op0->hasOneUse() && Op1->hasOneUse()) {
+ Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal,
+ I.getName()+".demorgan");
+ return BinaryOperator::CreateNot(And);
+ }
// (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) {
@@ -5942,28 +5907,25 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
}
Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
- bool Changed = SimplifyCompare(I);
- Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ bool Changed = false;
+
+ /// Orders the operands of the compare so that they are listed from most
+ /// complex to least complex. This puts constants before unary operators,
+ /// before binary operators.
+ if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) {
+ I.swapOperands();
+ Changed = true;
+ }
- // Fold trivial predicates.
- if (I.getPredicate() == FCmpInst::FCMP_FALSE)
- return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 0));
- if (I.getPredicate() == FCmpInst::FCMP_TRUE)
- return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 1));
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
// Simplify 'fcmp pred X, X'
if (Op0 == Op1) {
switch (I.getPredicate()) {
default: llvm_unreachable("Unknown predicate!");
- case FCmpInst::FCMP_UEQ: // True if unordered or equal
- case FCmpInst::FCMP_UGE: // True if unordered, greater than, or equal
- case FCmpInst::FCMP_ULE: // True if unordered, less than, or equal
- return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 1));
- case FCmpInst::FCMP_OGT: // True if ordered and greater than
- case FCmpInst::FCMP_OLT: // True if ordered and less than
- case FCmpInst::FCMP_ONE: // True if ordered and operands are unequal
- return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 0));
-
case FCmpInst::FCMP_UNO: // True if unordered: isnan(X) | isnan(Y)
case FCmpInst::FCMP_ULT: // True if unordered or less than
case FCmpInst::FCMP_UGT: // True if unordered or greater than
@@ -5984,23 +5946,8 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
}
}
- if (isa<UndefValue>(Op1)) // fcmp pred X, undef -> undef
- return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));
-
// Handle fcmp with constant RHS
if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
- // If the constant is a nan, see if we can fold the comparison based on it.
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
- if (CFP->getValueAPF().isNaN()) {
- if (FCmpInst::isOrdered(I.getPredicate())) // True if ordered and...
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
- assert(FCmpInst::isUnordered(I.getPredicate()) &&
- "Comparison must be either ordered or unordered!");
- // True if unordered.
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
- }
- }
-
if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
switch (LHSI->getOpcode()) {
case Instruction::PHI:
@@ -6047,26 +5994,22 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
}
Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
- bool Changed = SimplifyCompare(I);
+ bool Changed = false;
+
+ /// Orders the operands of the compare so that they are listed from most
+ /// complex to least complex. This puts constants before unary operators,
+ /// before binary operators.
+ if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) {
+ I.swapOperands();
+ Changed = true;
+ }
+
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- const Type *Ty = Op0->getType();
-
- // icmp X, X
- if (Op0 == Op1)
- return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(),
- I.isTrueWhenEqual()));
-
- if (isa<UndefValue>(Op1)) // X icmp undef -> undef
- return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));
- // icmp <global/alloca*/null>, <global/alloca*/null> - Global/Stack value
- // addresses never equal each other! We already know that Op0 != Op1.
- if ((isa<GlobalValue>(Op0) || isa<AllocaInst>(Op0) ||
- isa<ConstantPointerNull>(Op0)) &&
- (isa<GlobalValue>(Op1) || isa<AllocaInst>(Op1) ||
- isa<ConstantPointerNull>(Op1)))
- return ReplaceInstUsesWith(I, ConstantInt::get(Type::getInt1Ty(*Context),
- !I.isTrueWhenEqual()));
+ if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ const Type *Ty = Op0->getType();
// icmp's with boolean values can always be turned into bitwise operations
if (Ty == Type::getInt1Ty(*Context)) {
@@ -6131,27 +6074,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// If we have an icmp le or icmp ge instruction, turn it into the
// appropriate icmp lt or icmp gt instruction. This allows us to rely on
- // them being folded in the code below.
+ // them being folded in the code below. The SimplifyICmpInst code has
+ // already handled the edge cases for us, so we just assert on them.
switch (I.getPredicate()) {
default: break;
case ICmpInst::ICMP_ULE:
- if (CI->isMaxValue(false)) // A <=u MAX -> TRUE
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+ assert(!CI->isMaxValue(false)); // A <=u MAX -> TRUE
return new ICmpInst(ICmpInst::ICMP_ULT, Op0,
AddOne(CI));
case ICmpInst::ICMP_SLE:
- if (CI->isMaxValue(true)) // A <=s MAX -> TRUE
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+ assert(!CI->isMaxValue(true)); // A <=s MAX -> TRUE
return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
AddOne(CI));
case ICmpInst::ICMP_UGE:
- if (CI->isMinValue(false)) // A >=u MIN -> TRUE
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+ assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE
return new ICmpInst(ICmpInst::ICMP_UGT, Op0,
SubOne(CI));
case ICmpInst::ICMP_SGE:
- if (CI->isMinValue(true)) // A >=s MIN -> TRUE
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+ assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE
return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
SubOne(CI));
}
@@ -8083,8 +8023,7 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty,
Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
bool isSigned) {
if (Constant *C = dyn_cast<Constant>(V))
- return ConstantExpr::getIntegerCast(C, Ty,
- isSigned /*Sext or ZExt*/);
+ return ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
// Otherwise, it must be an instruction.
Instruction *I = cast<Instruction>(V);
@@ -8117,8 +8056,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
return I->getOperand(0);
// Otherwise, must be the same type of cast, so just reinsert a new one.
- Res = CastInst::Create(cast<CastInst>(I)->getOpcode(), I->getOperand(0),
- Ty);
+ Res = CastInst::Create(cast<CastInst>(I)->getOpcode(), I->getOperand(0),Ty);
break;
case Instruction::Select: {
Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
@@ -8167,9 +8105,15 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
return NV;
// If we are casting a PHI then fold the cast into the PHI
- if (isa<PHINode>(Src))
- if (Instruction *NV = FoldOpIntoPhi(CI))
- return NV;
+ if (isa<PHINode>(Src)) {
+ // We don't do this if this would create a PHI node with an illegal type if
+ // it is currently legal.
+ if (!isa<IntegerType>(Src->getType()) ||
+ !isa<IntegerType>(CI.getType()) ||
+ ShouldChangeType(CI.getType(), Src->getType(), TD))
+ if (Instruction *NV = FoldOpIntoPhi(CI))
+ return NV;
+ }
return 0;
}
@@ -8289,23 +8233,6 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
return commonCastTransforms(CI);
}
-/// isSafeIntegerType - Return true if this is a basic integer type, not a crazy
-/// type like i42. We don't want to introduce operations on random non-legal
-/// integer types where they don't already exist in the code. In the future,
-/// we should consider making this based off target-data, so that 32-bit targets
-/// won't get i64 operations etc.
-static bool isSafeIntegerType(const Type *Ty) {
- switch (Ty->getPrimitiveSizeInBits()) {
- case 8:
- case 16:
- case 32:
- case 64:
- return true;
- default:
- return false;
- }
-}
-
/// commonIntCastTransforms - This function implements the common transforms
/// for trunc, zext, and sext.
Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
@@ -8334,8 +8261,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
// Only do this if the dest type is a simple type, don't convert the
// expression tree to something weird like i93 unless the source is also
// strange.
- if ((isSafeIntegerType(DestTy->getScalarType()) ||
- !isSafeIntegerType(SrcI->getType()->getScalarType())) &&
+ if ((isa<VectorType>(DestTy) ||
+ ShouldChangeType(SrcI->getType(), DestTy, TD)) &&
CanEvaluateInDifferentType(SrcI, DestTy,
CI.getOpcode(), NumCastsRemoved)) {
// If this cast is a truncate, evaluting in a different type always
@@ -8356,6 +8283,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
break;
case Instruction::ZExt: {
DoXForm = NumCastsRemoved >= 1;
+
if (!DoXForm && 0) {
// If it's unnecessary to issue an AND to clear the high bits, it's
// always profitable to do this xform.
@@ -8522,7 +8450,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
return BinaryOperator::CreateLShr(V1, V2);
}
}
-
+
return 0;
}
@@ -10880,9 +10808,10 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
}
-// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
-// operator and they all are only used by the PHI, PHI together their
-// inputs, and do the operation once, to the result of the PHI.
+
+/// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
+/// operator and they all are only used by the PHI, PHI together their
+/// inputs, and do the operation once, to the result of the PHI.
Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
@@ -10900,6 +10829,13 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
if (isa<CastInst>(FirstInst)) {
CastSrcTy = FirstInst->getOperand(0)->getType();
+
+ // Be careful about transforming integer PHIs. We don't want to pessimize
+ // the code by turning an i32 into an i1293.
+ if (isa<IntegerType>(PN.getType()) && isa<IntegerType>(CastSrcTy)) {
+ if (!ShouldChangeType(PN.getType(), CastSrcTy, TD))
+ return 0;
+ }
} else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) {
// Can fold binop, compare or shift here if the RHS is a constant,
// otherwise call FoldPHIArgBinOpIntoPHI.
@@ -11012,6 +10948,222 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
}
+namespace {
+struct PHIUsageRecord {
+ unsigned PHIId; // The ID # of the PHI (something determinstic to sort on)
+ unsigned Shift; // The amount shifted.
+ Instruction *Inst; // The trunc instruction.
+
+ PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User)
+ : PHIId(pn), Shift(Sh), Inst(User) {}
+
+ bool operator<(const PHIUsageRecord &RHS) const {
+ if (PHIId < RHS.PHIId) return true;
+ if (PHIId > RHS.PHIId) return false;
+ if (Shift < RHS.Shift) return true;
+ if (Shift > RHS.Shift) return false;
+ return Inst->getType()->getPrimitiveSizeInBits() <
+ RHS.Inst->getType()->getPrimitiveSizeInBits();
+ }
+};
+
+struct LoweredPHIRecord {
+ PHINode *PN; // The PHI that was lowered.
+ unsigned Shift; // The amount shifted.
+ unsigned Width; // The width extracted.
+
+ LoweredPHIRecord(PHINode *pn, unsigned Sh, const Type *Ty)
+ : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {}
+
+ // Ctor form used by DenseMap.
+ LoweredPHIRecord(PHINode *pn, unsigned Sh)
+ : PN(pn), Shift(Sh), Width(0) {}
+};
+}
+
+namespace llvm {
+ template<>
+ struct DenseMapInfo<LoweredPHIRecord> {
+ static inline LoweredPHIRecord getEmptyKey() {
+ return LoweredPHIRecord(0, 0);
+ }
+ static inline LoweredPHIRecord getTombstoneKey() {
+ return LoweredPHIRecord(0, 1);
+ }
+ static unsigned getHashValue(const LoweredPHIRecord &Val) {
+ return DenseMapInfo<PHINode*>::getHashValue(Val.PN) ^ (Val.Shift>>3) ^
+ (Val.Width>>3);
+ }
+ static bool isEqual(const LoweredPHIRecord &LHS,
+ const LoweredPHIRecord &RHS) {
+ return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift &&
+ LHS.Width == RHS.Width;
+ }
+ static bool isPod() { return true; }
+ };
+}
+
+
+/// SliceUpIllegalIntegerPHI - This is an integer PHI and we know that it has an
+/// illegal type: see if it is only used by trunc or trunc(lshr) operations. If
+/// so, we split the PHI into the various pieces being extracted. This sort of
+/// thing is introduced when SROA promotes an aggregate to large integer values.
+///
+/// TODO: The user of the trunc may be an bitcast to float/double/vector or an
+/// inttoptr. We should produce new PHIs in the right type.
+///
+Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
+ // PHIUsers - Keep track of all of the truncated values extracted from a set
+ // of PHIs, along with their offset. These are the things we want to rewrite.
+ SmallVector<PHIUsageRecord, 16> PHIUsers;
+
+ // PHIs are often mutually cyclic, so we keep track of a whole set of PHI
+ // nodes which are extracted from. PHIsToSlice is a set we use to avoid
+ // revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to
+ // check the uses of (to ensure they are all extracts).
+ SmallVector<PHINode*, 8> PHIsToSlice;
+ SmallPtrSet<PHINode*, 8> PHIsInspected;
+
+ PHIsToSlice.push_back(&FirstPhi);
+ PHIsInspected.insert(&FirstPhi);
+
+ for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) {
+ PHINode *PN = PHIsToSlice[PHIId];
+
+ for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ // If the user is a PHI, inspect its uses recursively.
+ if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+ if (PHIsInspected.insert(UserPN))
+ PHIsToSlice.push_back(UserPN);
+ continue;
+ }
+
+ // Truncates are always ok.
+ if (isa<TruncInst>(User)) {
+ PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User));
+ continue;
+ }
+
+ // Otherwise it must be a lshr which can only be used by one trunc.
+ if (User->getOpcode() != Instruction::LShr ||
+ !User->hasOneUse() || !isa<TruncInst>(User->use_back()) ||
+ !isa<ConstantInt>(User->getOperand(1)))
+ return 0;
+
+ unsigned Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue();
+ PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back()));
+ }
+ }
+
+ // If we have no users, they must be all self uses, just nuke the PHI.
+ if (PHIUsers.empty())
+ return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType()));
+
+ // If this phi node is transformable, create new PHIs for all the pieces
+ // extracted out of it. First, sort the users by their offset and size.
+ array_pod_sort(PHIUsers.begin(), PHIUsers.end());
+
+ DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n';
+ for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
+ errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n';
+ );
+
+ // PredValues - This is a temporary used when rewriting PHI nodes. It is
+ // hoisted out here to avoid construction/destruction thrashing.
+ DenseMap<BasicBlock*, Value*> PredValues;
+
+ // ExtractedVals - Each new PHI we introduce is saved here so we don't
+ // introduce redundant PHIs.
+ DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals;
+
+ for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) {
+ unsigned PHIId = PHIUsers[UserI].PHIId;
+ PHINode *PN = PHIsToSlice[PHIId];
+ unsigned Offset = PHIUsers[UserI].Shift;
+ const Type *Ty = PHIUsers[UserI].Inst->getType();
+
+ PHINode *EltPHI;
+
+ // If we've already lowered a user like this, reuse the previously lowered
+ // value.
+ if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) {
+
+ // Otherwise, Create the new PHI node for this user.
+ EltPHI = PHINode::Create(Ty, PN->getName()+".off"+Twine(Offset), PN);
+ assert(EltPHI->getType() != PN->getType() &&
+ "Truncate didn't shrink phi?");
+
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *Pred = PN->getIncomingBlock(i);
+ Value *&PredVal = PredValues[Pred];
+
+ // If we already have a value for this predecessor, reuse it.
+ if (PredVal) {
+ EltPHI->addIncoming(PredVal, Pred);
+ continue;
+ }
+
+ // Handle the PHI self-reuse case.
+ Value *InVal = PN->getIncomingValue(i);
+ if (InVal == PN) {
+ PredVal = EltPHI;
+ EltPHI->addIncoming(PredVal, Pred);
+ continue;
+ } else if (PHINode *InPHI = dyn_cast<PHINode>(PN)) {
+ // If the incoming value was a PHI, and if it was one of the PHIs we
+ // already rewrote it, just use the lowered value.
+ if (Value *Res = ExtractedVals[LoweredPHIRecord(InPHI, Offset, Ty)]) {
+ PredVal = Res;
+ EltPHI->addIncoming(PredVal, Pred);
+ continue;
+ }
+ }
+
+ // Otherwise, do an extract in the predecessor.
+ Builder->SetInsertPoint(Pred, Pred->getTerminator());
+ Value *Res = InVal;
+ if (Offset)
+ Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(),
+ Offset), "extract");
+ Res = Builder->CreateTrunc(Res, Ty, "extract.t");
+ PredVal = Res;
+ EltPHI->addIncoming(Res, Pred);
+
+ // If the incoming value was a PHI, and if it was one of the PHIs we are
+ // rewriting, we will ultimately delete the code we inserted. This
+ // means we need to revisit that PHI to make sure we extract out the
+ // needed piece.
+ if (PHINode *OldInVal = dyn_cast<PHINode>(PN->getIncomingValue(i)))
+ if (PHIsInspected.count(OldInVal)) {
+ unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(),
+ OldInVal)-PHIsToSlice.begin();
+ PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset,
+ cast<Instruction>(Res)));
+ ++UserE;
+ }
+ }
+ PredValues.clear();
+
+ DEBUG(errs() << " Made element PHI for offset " << Offset << ": "
+ << *EltPHI << '\n');
+ ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI;
+ }
+
+ // Replace the use of this piece with the PHI node.
+ ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI);
+ }
+
+ // Replace all the remaining uses of the PHI nodes (self uses and the lshrs)
+ // with undefs.
+ Value *Undef = UndefValue::get(FirstPhi.getType());
+ for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
+ ReplaceInstUsesWith(*PHIsToSlice[i], Undef);
+ return ReplaceInstUsesWith(FirstPhi, Undef);
+}
+
// PHINode simplification
//
Instruction *InstCombiner::visitPHINode(PHINode &PN) {
@@ -11117,6 +11269,15 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
}
}
+ // If this is an integer PHI and we know that it has an illegal type, see if
+ // it is only used by trunc or trunc(lshr) operations. If so, we split the
+ // PHI into the various pieces being extracted. This sort of thing is
+ // introduced when SROA promotes an aggregate to a single large integer type.
+ if (isa<IntegerType>(PN.getType()) && TD &&
+ !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
+ if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
+ return Res;
+
return 0;
}
@@ -12210,6 +12371,47 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
return ExtractValueInst::Create(IV->getInsertedValueOperand(),
exti, exte);
}
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) {
+ // We're extracting from an intrinsic, see if we're the only user, which
+ // allows us to simplify multiple result intrinsics to simpler things that
+ // just get one value..
+ if (II->hasOneUse()) {
+ // Check if we're grabbing the overflow bit or the result of a 'with
+ // overflow' intrinsic. If it's the latter we can remove the intrinsic
+ // and replace it with a traditional binary instruction.
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ if (*EV.idx_begin() == 0) { // Normal result.
+ Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
+ II->replaceAllUsesWith(UndefValue::get(II->getType()));
+ EraseInstFromFunction(*II);
+ return BinaryOperator::CreateAdd(LHS, RHS);
+ }
+ break;
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ if (*EV.idx_begin() == 0) { // Normal result.
+ Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
+ II->replaceAllUsesWith(UndefValue::get(II->getType()));
+ EraseInstFromFunction(*II);
+ return BinaryOperator::CreateSub(LHS, RHS);
+ }
+ break;
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ if (*EV.idx_begin() == 0) { // Normal result.
+ Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
+ II->replaceAllUsesWith(UndefValue::get(II->getType()));
+ EraseInstFromFunction(*II);
+ return BinaryOperator::CreateMul(LHS, RHS);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
// Can't simplify extracts from other values. Note that nested extracts are
// already simplified implicitely by the above (extract ( extract (insert) )
// will be translated into extract ( insert ( extract ) ) first and then just
@@ -12715,29 +12917,33 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (isa<UndefValue>(RHS)) {
std::vector<unsigned> LHSMask = getShuffleMask(LHSSVI);
- std::vector<unsigned> NewMask;
- for (unsigned i = 0, e = Mask.size(); i != e; ++i)
- if (Mask[i] >= 2*e)
- NewMask.push_back(2*e);
- else
- NewMask.push_back(LHSMask[Mask[i]]);
+ if (LHSMask.size() == Mask.size()) {
+ std::vector<unsigned> NewMask;
+ for (unsigned i = 0, e = Mask.size(); i != e; ++i)
+ if (Mask[i] >= 2*e)
+ NewMask.push_back(2*e);
+ else
+ NewMask.push_back(LHSMask[Mask[i]]);
- // If the result mask is equal to the src shuffle or this shuffle mask, do
- // the replacement.
- if (NewMask == LHSMask || NewMask == Mask) {
- unsigned LHSInNElts =
- cast<VectorType>(LHSSVI->getOperand(0)->getType())->getNumElements();
- std::vector<Constant*> Elts;
- for (unsigned i = 0, e = NewMask.size(); i != e; ++i) {
- if (NewMask[i] >= LHSInNElts*2) {
- Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context)));
- } else {
- Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), NewMask[i]));
+ // If the result mask is equal to the src shuffle or this
+ // shuffle mask, do the replacement.
+ if (NewMask == LHSMask || NewMask == Mask) {
+ unsigned LHSInNElts =
+ cast<VectorType>(LHSSVI->getOperand(0)->getType())->
+ getNumElements();
+ std::vector<Constant*> Elts;
+ for (unsigned i = 0, e = NewMask.size(); i != e; ++i) {
+ if (NewMask[i] >= LHSInNElts*2) {
+ Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context)));
+ } else {
+ Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context),
+ NewMask[i]));
+ }
}
+ return new ShuffleVectorInst(LHSSVI->getOperand(0),
+ LHSSVI->getOperand(1),
+ ConstantVector::get(Elts));
}
- return new ShuffleVectorInst(LHSSVI->getOperand(0),
- LHSSVI->getOperand(1),
- ConstantVector::get(Elts));
}
}
}
@@ -12824,7 +13030,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
// ConstantProp instruction if trivially constant.
if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
- if (Constant *C = ConstantFoldInstruction(Inst, BB->getContext(), TD)) {
+ if (Constant *C = ConstantFoldInstruction(Inst, TD)) {
DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
<< *Inst << '\n');
Inst->replaceAllUsesWith(C);
@@ -12846,8 +13052,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
if (!FoldedConstants.insert(CE))
continue;
- Constant *NewC =
- ConstantFoldConstantExpression(CE, BB->getContext(), TD);
+ Constant *NewC = ConstantFoldConstantExpression(CE, TD);
if (NewC && NewC != CE) {
*i = NewC;
MadeIRChange = true;
@@ -12954,7 +13159,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
// Instruction isn't dead, see if we can constant propagate it.
if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
- if (Constant *C = ConstantFoldInstruction(I, F.getContext(), TD)) {
+ if (Constant *C = ConstantFoldInstruction(I, TD)) {
DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
// Add operands to the worklist.
@@ -13065,7 +13270,7 @@ bool InstCombiner::runOnFunction(Function &F) {
/// Builder - This is an IRBuilder that automatically inserts new
/// instructions into the worklist when they are created.
IRBuilder<true, TargetFolder, InstCombineIRInserter>
- TheBuilder(F.getContext(), TargetFolder(TD, F.getContext()),
+ TheBuilder(F.getContext(), TargetFolder(TD),
InstCombineIRInserter(Worklist));
Builder = &TheBuilder;
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 10c9ec6..5864113 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -16,7 +16,8 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
#include "llvm/Pass.h"
-#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -40,6 +41,12 @@ Threshold("jump-threading-threshold",
cl::desc("Max block size to duplicate for jump threading"),
cl::init(6), cl::Hidden);
+// Turn on use of LazyValueInfo.
+static cl::opt<bool>
+EnableLVI("enable-jump-threading-lvi", cl::ReallyHidden);
+
+
+
namespace {
/// This pass performs 'jump threading', which looks at blocks that have
/// multiple predecessors and multiple successors. If one or more of the
@@ -59,6 +66,7 @@ namespace {
///
class JumpThreading : public FunctionPass {
TargetData *TD;
+ LazyValueInfo *LVI;
#ifdef NDEBUG
SmallPtrSet<BasicBlock*, 16> LoopHeaders;
#else
@@ -69,20 +77,31 @@ namespace {
JumpThreading() : FunctionPass(&ID) {}
bool runOnFunction(Function &F);
- void FindLoopHeaders(Function &F);
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ if (EnableLVI)
+ AU.addRequired<LazyValueInfo>();
+ }
+
+ void FindLoopHeaders(Function &F);
bool ProcessBlock(BasicBlock *BB);
- bool ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB);
+ bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs,
+ BasicBlock *SuccBB);
bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
BasicBlock *PredBB);
-
- BasicBlock *FactorCommonPHIPreds(PHINode *PN, Value *Val);
+
+ typedef SmallVectorImpl<std::pair<ConstantInt*,
+ BasicBlock*> > PredValueInfo;
+
+ bool ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,
+ PredValueInfo &Result);
+ bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB);
+
+
bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
bool ProcessJumpOnPHI(PHINode *PN);
- bool ProcessBranchOnLogical(Value *V, BasicBlock *BB, bool isAnd);
- bool ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB);
bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
};
@@ -100,6 +119,7 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
bool JumpThreading::runOnFunction(Function &F) {
DEBUG(errs() << "Jump threading on function '" << F.getName() << "'\n");
TD = getAnalysisIfAvailable<TargetData>();
+ LVI = EnableLVI ? &getAnalysis<LazyValueInfo>() : 0;
FindLoopHeaders(F);
@@ -109,6 +129,7 @@ bool JumpThreading::runOnFunction(Function &F) {
bool Changed = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
BasicBlock *BB = I;
+ // Thread all of the branches we can over this block.
while (ProcessBlock(BB))
Changed = true;
@@ -123,6 +144,29 @@ bool JumpThreading::runOnFunction(Function &F) {
LoopHeaders.erase(BB);
DeleteDeadBlock(BB);
Changed = true;
+ } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+ // Can't thread an unconditional jump, but if the block is "almost
+ // empty", we can replace uses of it with uses of the successor and make
+ // this dead.
+ if (BI->isUnconditional() &&
+ BB != &BB->getParent()->getEntryBlock()) {
+ BasicBlock::iterator BBI = BB->getFirstNonPHI();
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(BBI))
+ ++BBI;
+ // If the terminator is the only non-phi instruction, try to nuke it.
+ if (BBI->isTerminator()) {
+ // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
+ // block, we have to make sure it isn't in the LoopHeaders set. We
+ // reinsert afterward in the rare case when the block isn't deleted.
+ bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
+
+ if (TryToSimplifyUncondBranchFromEmptyBlock(BB))
+ Changed = true;
+ else if (ErasedFromLoopHeaders)
+ LoopHeaders.insert(BB);
+ }
+ }
}
}
AnotherIteration = Changed;
@@ -139,6 +183,10 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
/// Ignore PHI nodes, these will be flattened when duplication happens.
BasicBlock::const_iterator I = BB->getFirstNonPHI();
+ // FIXME: THREADING will delete values that are just used to compute the
+ // branch, so they shouldn't count against the duplication cost.
+
+
// Sum up the cost of each instruction until we get to the terminator. Don't
// include the terminator because the copy won't include it.
unsigned Size = 0;
@@ -173,8 +221,6 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
return Size;
}
-
-
/// FindLoopHeaders - We do not want jump threading to turn proper loop
/// structures into irreducible loops. Doing this breaks up the loop nesting
/// hierarchy and pessimizes later transformations. To prevent this from
@@ -198,29 +244,181 @@ void JumpThreading::FindLoopHeaders(Function &F) {
LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second));
}
-
-/// FactorCommonPHIPreds - If there are multiple preds with the same incoming
-/// value for the PHI, factor them together so we get one block to thread for
-/// the whole group.
-/// This is important for things like "phi i1 [true, true, false, true, x]"
-/// where we only need to clone the block for the true blocks once.
+/// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see
+/// if we can infer that the value is a known ConstantInt in any of our
+/// predecessors. If so, return the known list of value and pred BB in the
+/// result vector. If a value is known to be undef, it is returned as null.
+///
+/// This returns true if there were any known values.
///
-BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Value *Val) {
- SmallVector<BasicBlock*, 16> CommonPreds;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (PN->getIncomingValue(i) == Val)
- CommonPreds.push_back(PN->getIncomingBlock(i));
-
- if (CommonPreds.size() == 1)
- return CommonPreds[0];
+bool JumpThreading::
+ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
+ // If V is a constantint, then it is known in all predecessors.
+ if (isa<ConstantInt>(V) || isa<UndefValue>(V)) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(V);
- DEBUG(errs() << " Factoring out " << CommonPreds.size()
- << " common predecessors.\n");
- return SplitBlockPredecessors(PN->getParent(),
- &CommonPreds[0], CommonPreds.size(),
- ".thr_comm", this);
-}
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ Result.push_back(std::make_pair(CI, *PI));
+ return true;
+ }
+
+ // If V is a non-instruction value, or an instruction in a different block,
+ // then it can't be derived from a PHI.
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0 || I->getParent() != BB) {
+
+ // Okay, if this is a live-in value, see if it has a known value at the end
+ // of any of our predecessors.
+ //
+ // FIXME: This should be an edge property, not a block end property.
+ /// TODO: Per PR2563, we could infer value range information about a
+ /// predecessor based on its terminator.
+ //
+ if (LVI) {
+ // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if
+ // "I" is a non-local compare-with-a-constant instruction. This would be
+ // able to handle value inequalities better, for example if the compare is
+ // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
+ // Perhaps getConstantOnEdge should be smart enough to do this?
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ // If the value is known by LazyValueInfo to be a constant in a
+ // predecessor, use that information to try to thread this block.
+ Constant *PredCst = LVI->getConstantOnEdge(V, *PI, BB);
+ if (PredCst == 0 ||
+ (!isa<ConstantInt>(PredCst) && !isa<UndefValue>(PredCst)))
+ continue;
+
+ Result.push_back(std::make_pair(dyn_cast<ConstantInt>(PredCst), *PI));
+ }
+
+ return !Result.empty();
+ }
+
+ return false;
+ }
+
+ /// If I is a PHI node, then we know the incoming values for any constants.
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *InVal = PN->getIncomingValue(i);
+ if (isa<ConstantInt>(InVal) || isa<UndefValue>(InVal)) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(InVal);
+ Result.push_back(std::make_pair(CI, PN->getIncomingBlock(i)));
+ }
+ }
+ return !Result.empty();
+ }
+
+ SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals, RHSVals;
+
+ // Handle some boolean conditions.
+ if (I->getType()->getPrimitiveSizeInBits() == 1) {
+ // X | true -> true
+ // X & false -> false
+ if (I->getOpcode() == Instruction::Or ||
+ I->getOpcode() == Instruction::And) {
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals);
+ ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals);
+
+ if (LHSVals.empty() && RHSVals.empty())
+ return false;
+
+ ConstantInt *InterestingVal;
+ if (I->getOpcode() == Instruction::Or)
+ InterestingVal = ConstantInt::getTrue(I->getContext());
+ else
+ InterestingVal = ConstantInt::getFalse(I->getContext());
+
+ // Scan for the sentinel.
+ for (unsigned i = 0, e = LHSVals.size(); i != e; ++i)
+ if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0)
+ Result.push_back(LHSVals[i]);
+ for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
+ if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0)
+ Result.push_back(RHSVals[i]);
+ return !Result.empty();
+ }
+
+ // Handle the NOT form of XOR.
+ if (I->getOpcode() == Instruction::Xor &&
+ isa<ConstantInt>(I->getOperand(1)) &&
+ cast<ConstantInt>(I->getOperand(1))->isOne()) {
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result);
+ if (Result.empty())
+ return false;
+
+ // Invert the known values.
+ for (unsigned i = 0, e = Result.size(); i != e; ++i)
+ if (Result[i].first)
+ Result[i].first =
+ cast<ConstantInt>(ConstantExpr::getNot(Result[i].first));
+ return true;
+ }
+ }
+ // Handle compare with phi operand, where the PHI is defined in this block.
+ if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
+ PHINode *PN = dyn_cast<PHINode>(Cmp->getOperand(0));
+ if (PN && PN->getParent() == BB) {
+ // We can do this simplification if any comparisons fold to true or false.
+ // See if any do.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *PredBB = PN->getIncomingBlock(i);
+ Value *LHS = PN->getIncomingValue(i);
+ Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB);
+
+ Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, TD);
+ if (Res == 0) {
+ if (!LVI || !isa<Constant>(RHS))
+ continue;
+
+ LazyValueInfo::Tristate
+ ResT = LVI->getPredicateOnEdge(Cmp->getPredicate(), LHS,
+ cast<Constant>(RHS), PredBB, BB);
+ if (ResT == LazyValueInfo::Unknown)
+ continue;
+ Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
+ }
+
+ if (isa<UndefValue>(Res))
+ Result.push_back(std::make_pair((ConstantInt*)0, PredBB));
+ else if (ConstantInt *CI = dyn_cast<ConstantInt>(Res))
+ Result.push_back(std::make_pair(CI, PredBB));
+ }
+
+ return !Result.empty();
+ }
+
+
+ // If comparing a live-in value against a constant, see if we know the
+ // live-in value on any predecessors.
+ if (LVI && isa<Constant>(Cmp->getOperand(1)) &&
+ Cmp->getType()->isInteger() && // Not vector compare.
+ (!isa<Instruction>(Cmp->getOperand(0)) ||
+ cast<Instruction>(Cmp->getOperand(0))->getParent() != BB)) {
+ Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ // If the value is known by LazyValueInfo to be a constant in a
+ // predecessor, use that information to try to thread this block.
+ LazyValueInfo::Tristate
+ Res = LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
+ RHSCst, *PI, BB);
+ if (Res == LazyValueInfo::Unknown)
+ continue;
+
+ Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
+ Result.push_back(std::make_pair(cast<ConstantInt>(ResC), *PI));
+ }
+
+ return !Result.empty();
+ }
+ }
+ return false;
+}
+
+
/// GetBestDestForBranchOnUndef - If we determine that the specified block ends
/// in an undefined jump, decide which block is best to revector to.
@@ -251,7 +449,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// successor, merge the blocks. This encourages recursive jump threading
// because now the condition in this block can be threaded through
// predecessors of our predecessor block.
- if (BasicBlock *SinglePred = BB->getSinglePredecessor())
+ if (BasicBlock *SinglePred = BB->getSinglePredecessor()) {
if (SinglePred->getTerminator()->getNumSuccessors() == 1 &&
SinglePred != BB) {
// If SinglePred was a loop header, BB becomes one.
@@ -267,10 +465,10 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
BB->moveBefore(&BB->getParent()->getEntryBlock());
return true;
}
-
- // See if this block ends with a branch or switch. If so, see if the
- // condition is a phi node. If so, and if an entry of the phi node is a
- // constant, we can thread the block.
+ }
+
+ // Look to see if the terminator is a branch of switch, if not we can't thread
+ // it.
Value *Condition;
if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
// Can't thread an unconditional jump.
@@ -301,7 +499,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
TerminatorInst *BBTerm = BB->getTerminator();
for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
if (i == BestSucc) continue;
- BBTerm->getSuccessor(i)->removePredecessor(BB);
+ RemovePredecessorAndSimplify(BBTerm->getSuccessor(i), BB, TD);
}
DEBUG(errs() << " In block '" << BB->getName()
@@ -318,7 +516,8 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// br COND, BBX, BBY
// BBX:
// br COND, BBZ, BBW
- if (!Condition->hasOneUse() && // Multiple uses.
+ if (!LVI &&
+ !Condition->hasOneUse() && // Multiple uses.
(CondInst == 0 || CondInst->getParent() != BB)) { // Non-local definition.
pred_iterator PI = pred_begin(BB), E = pred_end(BB);
if (isa<BranchInst>(BB->getTerminator())) {
@@ -338,52 +537,40 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
}
// All the rest of our checks depend on the condition being an instruction.
- if (CondInst == 0)
+ if (CondInst == 0) {
+ // FIXME: Unify this with code below.
+ if (LVI && ProcessThreadableEdges(Condition, BB))
+ return true;
return false;
+ }
+
// See if this is a phi node in the current block.
if (PHINode *PN = dyn_cast<PHINode>(CondInst))
if (PN->getParent() == BB)
return ProcessJumpOnPHI(PN);
- // If this is a conditional branch whose condition is and/or of a phi, try to
- // simplify it.
- if ((CondInst->getOpcode() == Instruction::And ||
- CondInst->getOpcode() == Instruction::Or) &&
- isa<BranchInst>(BB->getTerminator()) &&
- ProcessBranchOnLogical(CondInst, BB,
- CondInst->getOpcode() == Instruction::And))
- return true;
-
if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
- if (isa<PHINode>(CondCmp->getOperand(0))) {
- // If we have "br (phi != 42)" and the phi node has any constant values
- // as operands, we can thread through this block.
- //
- // If we have "br (cmp phi, x)" and the phi node contains x such that the
- // comparison uniquely identifies the branch target, we can thread
- // through this block.
-
- if (ProcessBranchOnCompare(CondCmp, BB))
- return true;
- }
-
- // If we have a comparison, loop over the predecessors to see if there is
- // a condition with the same value.
- pred_iterator PI = pred_begin(BB), E = pred_end(BB);
- for (; PI != E; ++PI)
- if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
- if (PBI->isConditional() && *PI != BB) {
- if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) {
- if (CI->getOperand(0) == CondCmp->getOperand(0) &&
- CI->getOperand(1) == CondCmp->getOperand(1) &&
- CI->getPredicate() == CondCmp->getPredicate()) {
- // TODO: Could handle things like (x != 4) --> (x == 17)
- if (ProcessBranchOnDuplicateCond(*PI, BB))
- return true;
+ if (!LVI &&
+ (!isa<PHINode>(CondCmp->getOperand(0)) ||
+ cast<PHINode>(CondCmp->getOperand(0))->getParent() != BB)) {
+ // If we have a comparison, loop over the predecessors to see if there is
+ // a condition with a lexically identical value.
+ pred_iterator PI = pred_begin(BB), E = pred_end(BB);
+ for (; PI != E; ++PI)
+ if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+ if (PBI->isConditional() && *PI != BB) {
+ if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) {
+ if (CI->getOperand(0) == CondCmp->getOperand(0) &&
+ CI->getOperand(1) == CondCmp->getOperand(1) &&
+ CI->getPredicate() == CondCmp->getPredicate()) {
+ // TODO: Could handle things like (x != 4) --> (x == 17)
+ if (ProcessBranchOnDuplicateCond(*PI, BB))
+ return true;
+ }
}
}
- }
+ }
}
// Check for some cases that are worth simplifying. Right now we want to look
@@ -398,10 +585,21 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
if (isa<Constant>(CondCmp->getOperand(1)))
SimplifyValue = CondCmp->getOperand(0);
+ // TODO: There are other places where load PRE would be profitable, such as
+ // more complex comparisons.
if (LoadInst *LI = dyn_cast<LoadInst>(SimplifyValue))
if (SimplifyPartiallyRedundantLoad(LI))
return true;
+
+ // Handle a variety of cases where we are branching on something derived from
+ // a PHI node in the current block. If we can prove that any predecessors
+ // compute a predictable value based on a PHI node, thread those predecessors.
+ //
+ if (ProcessThreadableEdges(CondInst, BB))
+ return true;
+
+
// TODO: If we have: "br (X > 0)" and we have a predecessor where we know
// "(X == 4)" thread through this block.
@@ -459,8 +657,11 @@ bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB,
// Next, figure out which successor we are threading to.
BasicBlock *SuccBB = DestBI->getSuccessor(!BranchDir);
+ SmallVector<BasicBlock*, 2> Preds;
+ Preds.push_back(PredBB);
+
// Ok, try to thread it!
- return ThreadEdge(BB, PredBB, SuccBB);
+ return ThreadEdge(BB, Preds, SuccBB);
}
/// ProcessSwitchOnDuplicateCond - We found a block and a predecessor of that
@@ -553,7 +754,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
Value *LoadedPtr = LI->getOperand(0);
// If the loaded operand is defined in the LoadBB, it can't be available.
- // FIXME: Could do PHI translation, that would be fun :)
+ // TODO: Could do simple PHI translation, that would be fun :)
if (Instruction *PtrOp = dyn_cast<Instruction>(LoadedPtr))
if (PtrOp->getParent() == LoadBB)
return false;
@@ -562,8 +763,8 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// the entry to its block.
BasicBlock::iterator BBIt = LI;
- if (Value *AvailableVal = FindAvailableLoadedValue(LoadedPtr, LoadBB,
- BBIt, 6)) {
+ if (Value *AvailableVal =
+ FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, 6)) {
// If the value if the load is locally available within the block, just use
// it. This frequently occurs for reg2mem'd allocas.
//cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n";
@@ -646,7 +847,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// Split them out to their own block.
UnavailablePred =
SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(),
- "thread-split", this);
+ "thread-pre-split", this);
}
// If the value isn't available in all predecessors, then there will be
@@ -655,7 +856,8 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
if (UnavailablePred) {
assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
"Can't handle critical edge here!");
- Value *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr",
+ Value *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", false,
+ LI->getAlignment(),
UnavailablePred->getTerminator());
AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
}
@@ -690,55 +892,183 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
return true;
}
-
-/// ProcessJumpOnPHI - We have a conditional branch or switch on a PHI node in
-/// the current block. See if there are any simplifications we can do based on
-/// inputs to the phi node.
-///
-bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) {
- BasicBlock *BB = PN->getParent();
+/// FindMostPopularDest - The specified list contains multiple possible
+/// threadable destinations. Pick the one that occurs the most frequently in
+/// the list.
+static BasicBlock *
+FindMostPopularDest(BasicBlock *BB,
+ const SmallVectorImpl<std::pair<BasicBlock*,
+ BasicBlock*> > &PredToDestList) {
+ assert(!PredToDestList.empty());
+
+ // Determine popularity. If there are multiple possible destinations, we
+ // explicitly choose to ignore 'undef' destinations. We prefer to thread
+ // blocks with known and real destinations to threading undef. We'll handle
+ // them later if interesting.
+ DenseMap<BasicBlock*, unsigned> DestPopularity;
+ for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
+ if (PredToDestList[i].second)
+ DestPopularity[PredToDestList[i].second]++;
+
+ // Find the most popular dest.
+ DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin();
+ BasicBlock *MostPopularDest = DPI->first;
+ unsigned Popularity = DPI->second;
+ SmallVector<BasicBlock*, 4> SamePopularity;
+
+ for (++DPI; DPI != DestPopularity.end(); ++DPI) {
+ // If the popularity of this entry isn't higher than the popularity we've
+ // seen so far, ignore it.
+ if (DPI->second < Popularity)
+ ; // ignore.
+ else if (DPI->second == Popularity) {
+ // If it is the same as what we've seen so far, keep track of it.
+ SamePopularity.push_back(DPI->first);
+ } else {
+ // If it is more popular, remember it.
+ SamePopularity.clear();
+ MostPopularDest = DPI->first;
+ Popularity = DPI->second;
+ }
+ }
- // See if the phi node has any constant integer or undef values. If so, we
- // can determine where the corresponding predecessor will branch.
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- Value *PredVal = PN->getIncomingValue(i);
-
- // Check to see if this input is a constant integer. If so, the direction
- // of the branch is predictable.
- if (ConstantInt *CI = dyn_cast<ConstantInt>(PredVal)) {
- // Merge any common predecessors that will act the same.
- BasicBlock *PredBB = FactorCommonPHIPreds(PN, CI);
+ // Okay, now we know the most popular destination. If there is more than
+ // destination, we need to determine one. This is arbitrary, but we need
+ // to make a deterministic decision. Pick the first one that appears in the
+ // successor list.
+ if (!SamePopularity.empty()) {
+ SamePopularity.push_back(MostPopularDest);
+ TerminatorInst *TI = BB->getTerminator();
+ for (unsigned i = 0; ; ++i) {
+ assert(i != TI->getNumSuccessors() && "Didn't find any successor!");
- BasicBlock *SuccBB;
- if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
- SuccBB = BI->getSuccessor(CI->isZero());
- else {
- SwitchInst *SI = cast<SwitchInst>(BB->getTerminator());
- SuccBB = SI->getSuccessor(SI->findCaseValue(CI));
- }
+ if (std::find(SamePopularity.begin(), SamePopularity.end(),
+ TI->getSuccessor(i)) == SamePopularity.end())
+ continue;
- // Ok, try to thread it!
- return ThreadEdge(BB, PredBB, SuccBB);
+ MostPopularDest = TI->getSuccessor(i);
+ break;
}
+ }
+
+ // Okay, we have finally picked the most popular destination.
+ return MostPopularDest;
+}
+
+bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) {
+ // If threading this would thread across a loop header, don't even try to
+ // thread the edge.
+ if (LoopHeaders.count(BB))
+ return false;
+
+ SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> PredValues;
+ if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues))
+ return false;
+ assert(!PredValues.empty() &&
+ "ComputeValueKnownInPredecessors returned true with no values");
+
+ DEBUG(errs() << "IN BB: " << *BB;
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
+ errs() << " BB '" << BB->getName() << "': FOUND condition = ";
+ if (PredValues[i].first)
+ errs() << *PredValues[i].first;
+ else
+ errs() << "UNDEF";
+ errs() << " for pred '" << PredValues[i].second->getName()
+ << "'.\n";
+ });
+
+ // Decide what we want to thread through. Convert our list of known values to
+ // a list of known destinations for each pred. This also discards duplicate
+ // predecessors and keeps track of the undefined inputs (which are represented
+ // as a null dest in the PredToDestList).
+ SmallPtrSet<BasicBlock*, 16> SeenPreds;
+ SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList;
+
+ BasicBlock *OnlyDest = 0;
+ BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
+
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
+ BasicBlock *Pred = PredValues[i].second;
+ if (!SeenPreds.insert(Pred))
+ continue; // Duplicate predecessor entry.
- // If the input is an undef, then it doesn't matter which way it will go.
- // Pick an arbitrary dest and thread the edge.
- if (UndefValue *UV = dyn_cast<UndefValue>(PredVal)) {
- // Merge any common predecessors that will act the same.
- BasicBlock *PredBB = FactorCommonPHIPreds(PN, UV);
- BasicBlock *SuccBB =
- BB->getTerminator()->getSuccessor(GetBestDestForJumpOnUndef(BB));
-
- // Ok, try to thread it!
- return ThreadEdge(BB, PredBB, SuccBB);
+ // If the predecessor ends with an indirect goto, we can't change its
+ // destination.
+ if (isa<IndirectBrInst>(Pred->getTerminator()))
+ continue;
+
+ ConstantInt *Val = PredValues[i].first;
+
+ BasicBlock *DestBB;
+ if (Val == 0) // Undef.
+ DestBB = 0;
+ else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
+ DestBB = BI->getSuccessor(Val->isZero());
+ else {
+ SwitchInst *SI = cast<SwitchInst>(BB->getTerminator());
+ DestBB = SI->getSuccessor(SI->findCaseValue(Val));
}
+
+ // If we have exactly one destination, remember it for efficiency below.
+ if (i == 0)
+ OnlyDest = DestBB;
+ else if (OnlyDest != DestBB)
+ OnlyDest = MultipleDestSentinel;
+
+ PredToDestList.push_back(std::make_pair(Pred, DestBB));
}
- // If the incoming values are all variables, we don't know the destination of
- // any predecessors. However, if any of the predecessor blocks end in an
- // unconditional branch, we can *duplicate* the jump into that block in order
- // to further encourage jump threading and to eliminate cases where we have
- // branch on a phi of an icmp (branch on icmp is much better).
+ // If all edges were unthreadable, we fail.
+ if (PredToDestList.empty())
+ return false;
+
+ // Determine which is the most common successor. If we have many inputs and
+ // this block is a switch, we want to start by threading the batch that goes
+ // to the most popular destination first. If we only know about one
+ // threadable destination (the common case) we can avoid this.
+ BasicBlock *MostPopularDest = OnlyDest;
+
+ if (MostPopularDest == MultipleDestSentinel)
+ MostPopularDest = FindMostPopularDest(BB, PredToDestList);
+
+ // Now that we know what the most popular destination is, factor all
+ // predecessors that will jump to it into a single predecessor.
+ SmallVector<BasicBlock*, 16> PredsToFactor;
+ for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
+ if (PredToDestList[i].second == MostPopularDest) {
+ BasicBlock *Pred = PredToDestList[i].first;
+
+ // This predecessor may be a switch or something else that has multiple
+ // edges to the block. Factor each of these edges by listing them
+ // according to # occurrences in PredsToFactor.
+ TerminatorInst *PredTI = Pred->getTerminator();
+ for (unsigned i = 0, e = PredTI->getNumSuccessors(); i != e; ++i)
+ if (PredTI->getSuccessor(i) == BB)
+ PredsToFactor.push_back(Pred);
+ }
+
+ // If the threadable edges are branching on an undefined value, we get to pick
+ // the destination that these predecessors should get to.
+ if (MostPopularDest == 0)
+ MostPopularDest = BB->getTerminator()->
+ getSuccessor(GetBestDestForJumpOnUndef(BB));
+
+ // Ok, try to thread it!
+ return ThreadEdge(BB, PredsToFactor, MostPopularDest);
+}
+
+/// ProcessJumpOnPHI - We have a conditional branch or switch on a PHI node in
+/// the current block. See if there are any simplifications we can do based on
+/// inputs to the phi node.
+///
+bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) {
+ BasicBlock *BB = PN->getParent();
+
+ // If any of the predecessor blocks end in an unconditional branch, we can
+ // *duplicate* the jump into that block in order to further encourage jump
+ // threading and to eliminate cases where we have branch on a phi of an icmp
+ // (branch on icmp is much better).
// We don't want to do this tranformation for switches, because we don't
// really want to duplicate a switch.
@@ -759,137 +1089,6 @@ bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) {
}
-/// ProcessJumpOnLogicalPHI - PN's basic block contains a conditional branch
-/// whose condition is an AND/OR where one side is PN. If PN has constant
-/// operands that permit us to evaluate the condition for some operand, thread
-/// through the block. For example with:
-/// br (and X, phi(Y, Z, false))
-/// the predecessor corresponding to the 'false' will always jump to the false
-/// destination of the branch.
-///
-bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB,
- bool isAnd) {
- // If this is a binary operator tree of the same AND/OR opcode, check the
- // LHS/RHS.
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V))
- if ((isAnd && BO->getOpcode() == Instruction::And) ||
- (!isAnd && BO->getOpcode() == Instruction::Or)) {
- if (ProcessBranchOnLogical(BO->getOperand(0), BB, isAnd))
- return true;
- if (ProcessBranchOnLogical(BO->getOperand(1), BB, isAnd))
- return true;
- }
-
- // If this isn't a PHI node, we can't handle it.
- PHINode *PN = dyn_cast<PHINode>(V);
- if (!PN || PN->getParent() != BB) return false;
-
- // We can only do the simplification for phi nodes of 'false' with AND or
- // 'true' with OR. See if we have any entries in the phi for this.
- unsigned PredNo = ~0U;
- ConstantInt *PredCst = ConstantInt::get(Type::getInt1Ty(BB->getContext()),
- !isAnd);
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- if (PN->getIncomingValue(i) == PredCst) {
- PredNo = i;
- break;
- }
- }
-
- // If no match, bail out.
- if (PredNo == ~0U)
- return false;
-
- // If so, we can actually do this threading. Merge any common predecessors
- // that will act the same.
- BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst);
-
- // Next, figure out which successor we are threading to. If this was an AND,
- // the constant must be FALSE, and we must be targeting the 'false' block.
- // If this is an OR, the constant must be TRUE, and we must be targeting the
- // 'true' block.
- BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(isAnd);
-
- // Ok, try to thread it!
- return ThreadEdge(BB, PredBB, SuccBB);
-}
-
-/// GetResultOfComparison - Given an icmp/fcmp predicate and the left and right
-/// hand sides of the compare instruction, try to determine the result. If the
-/// result can not be determined, a null pointer is returned.
-static Constant *GetResultOfComparison(CmpInst::Predicate pred,
- Value *LHS, Value *RHS,
- LLVMContext &Context) {
- if (Constant *CLHS = dyn_cast<Constant>(LHS))
- if (Constant *CRHS = dyn_cast<Constant>(RHS))
- return ConstantExpr::getCompare(pred, CLHS, CRHS);
-
- if (LHS == RHS)
- if (isa<IntegerType>(LHS->getType()) || isa<PointerType>(LHS->getType()))
- return ICmpInst::isTrueWhenEqual(pred) ?
- ConstantInt::getTrue(Context) : ConstantInt::getFalse(Context);
-
- return 0;
-}
-
-/// ProcessBranchOnCompare - We found a branch on a comparison between a phi
-/// node and a value. If we can identify when the comparison is true between
-/// the phi inputs and the value, we can fold the compare for that edge and
-/// thread through it.
-bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {
- PHINode *PN = cast<PHINode>(Cmp->getOperand(0));
- Value *RHS = Cmp->getOperand(1);
-
- // If the phi isn't in the current block, an incoming edge to this block
- // doesn't control the destination.
- if (PN->getParent() != BB)
- return false;
-
- // We can do this simplification if any comparisons fold to true or false.
- // See if any do.
- Value *PredVal = 0;
- bool TrueDirection = false;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- PredVal = PN->getIncomingValue(i);
-
- Constant *Res = GetResultOfComparison(Cmp->getPredicate(), PredVal,
- RHS, Cmp->getContext());
- if (!Res) {
- PredVal = 0;
- continue;
- }
-
- // If this folded to a constant expr, we can't do anything.
- if (ConstantInt *ResC = dyn_cast<ConstantInt>(Res)) {
- TrueDirection = ResC->getZExtValue();
- break;
- }
- // If this folded to undef, just go the false way.
- if (isa<UndefValue>(Res)) {
- TrueDirection = false;
- break;
- }
-
- // Otherwise, we can't fold this input.
- PredVal = 0;
- }
-
- // If no match, bail out.
- if (PredVal == 0)
- return false;
-
- // If so, we can actually do this threading. Merge any common predecessors
- // that will act the same.
- BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredVal);
-
- // Next, get our successor.
- BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(!TrueDirection);
-
- // Ok, try to thread it!
- return ThreadEdge(BB, PredBB, SuccBB);
-}
-
-
/// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
/// predecessor to the PHIBB block. If it has PHI nodes, add entries for
/// NewPred using the entries from OldPred (suitably mapped).
@@ -914,10 +1113,11 @@ static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
}
}
-/// ThreadEdge - We have decided that it is safe and profitable to thread an
-/// edge from PredBB to SuccBB across BB. Transform the IR to reflect this
-/// change.
-bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
+/// ThreadEdge - We have decided that it is safe and profitable to factor the
+/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
+/// across BB. Transform the IR to reflect this change.
+bool JumpThreading::ThreadEdge(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock*> &PredBBs,
BasicBlock *SuccBB) {
// If threading to the same block as we come from, we would infinite loop.
if (SuccBB == BB) {
@@ -929,8 +1129,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
// If threading this would thread across a loop header, don't thread the edge.
// See the comments above FindLoopHeaders for justifications and caveats.
if (LoopHeaders.count(BB)) {
- DEBUG(errs() << " Not threading from '" << PredBB->getName()
- << "' across loop header BB '" << BB->getName()
+ DEBUG(errs() << " Not threading across loop header BB '" << BB->getName()
<< "' to dest BB '" << SuccBB->getName()
<< "' - it might create an irreducible loop!\n");
return false;
@@ -943,6 +1142,17 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
return false;
}
+ // And finally, do it! Start by factoring the predecessors is needed.
+ BasicBlock *PredBB;
+ if (PredBBs.size() == 1)
+ PredBB = PredBBs[0];
+ else {
+ DEBUG(errs() << " Factoring out " << PredBBs.size()
+ << " common predecessors.\n");
+ PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
+ ".thr_comm", this);
+ }
+
// And finally, do it!
DEBUG(errs() << " Threading edge from '" << PredBB->getName() << "' to '"
<< SuccBB->getName() << "' with cost: " << JumpThreadCost
@@ -1034,7 +1244,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
TerminatorInst *PredTerm = PredBB->getTerminator();
for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
if (PredTerm->getSuccessor(i) == BB) {
- BB->removePredecessor(PredBB);
+ RemovePredecessorAndSimplify(BB, PredBB, TD);
PredTerm->setSuccessor(i, NewBB);
}
@@ -1044,9 +1254,12 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
BI = NewBB->begin();
for (BasicBlock::iterator E = NewBB->end(); BI != E; ) {
Instruction *Inst = BI++;
- if (Constant *C = ConstantFoldInstruction(Inst, BB->getContext(), TD)) {
- Inst->replaceAllUsesWith(C);
- Inst->eraseFromParent();
+
+ if (Value *V = SimplifyInstruction(Inst, TD)) {
+ WeakVH BIHandle(BI);
+ ReplaceAndSimplifyAllUses(Inst, V, TD);
+ if (BIHandle == 0)
+ BI = NewBB->begin();
continue;
}
@@ -1164,7 +1377,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
// PredBB no longer jumps to BB, remove entries in the PHI node for the edge
// that we nuked.
- BB->removePredecessor(PredBB);
+ RemovePredecessorAndSimplify(BB, PredBB, TD);
// Remove the unconditional branch at the end of the PredBB block.
OldPredBranch->eraseFromParent();
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 756fbf3..104c873 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -263,7 +263,6 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// Get the preheader block to move instructions into...
Preheader = L->getLoopPreheader();
- assert(Preheader&&"Preheader insertion pass guarantees we have a preheader!");
// Loop over the body of this loop, looking for calls, invokes, and stores.
// Because subloops have already been incorporated into AST, we skip blocks in
@@ -286,12 +285,14 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// us to sink instructions in one pass, without iteration. After sinking
// instructions, we perform another pass to hoist them out of the loop.
//
- SinkRegion(DT->getNode(L->getHeader()));
- HoistRegion(DT->getNode(L->getHeader()));
+ if (L->hasDedicatedExits())
+ SinkRegion(DT->getNode(L->getHeader()));
+ if (Preheader)
+ HoistRegion(DT->getNode(L->getHeader()));
// Now that all loop invariants have been removed from the loop, promote any
// memory references to scalars that we can...
- if (!DisablePromotion)
+ if (!DisablePromotion && Preheader && L->hasDedicatedExits())
PromoteValuesInLoop();
// Clear out loops state information for the next iteration
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 866d8b4..48817ab 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -115,6 +115,10 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
if (!preheader)
return false;
+ // If LoopSimplify form is not available, stay out of trouble.
+ if (!L->hasDedicatedExits())
+ return false;
+
// We can't remove loops that contain subloops. If the subloops were dead,
// they would already have been removed in earlier executions of this pass.
if (L->begin() != L->end())
diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp
index 920d85c..8b6a233 100644
--- a/lib/Transforms/Scalar/LoopIndexSplit.cpp
+++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp
@@ -209,6 +209,10 @@ bool LoopIndexSplit::runOnLoop(Loop *IncomingLoop, LPPassManager &LPM_Ref) {
L = IncomingLoop;
LPM = &LPM_Ref;
+ // If LoopSimplify form is not available, stay out of trouble.
+ if (!L->isLoopSimplifyForm())
+ return false;
+
// FIXME - Nested loops make dominator info updates tricky.
if (!L->getSubLoops().empty())
return false;
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 7a4bb35..5004483 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -15,7 +15,6 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Function.h"
#include "llvm/IntrinsicInst.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -49,6 +48,7 @@ namespace {
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
AU.addPreserved<ScalarEvolution>();
+ AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
AU.addPreserved<DominatorTree>();
AU.addPreserved<DominanceFrontier>();
@@ -104,17 +104,18 @@ bool LoopRotate::runOnLoop(Loop *Lp, LPPassManager &LPM) {
bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
L = Lp;
- OrigHeader = L->getHeader();
OrigPreHeader = L->getLoopPreheader();
+ if (!OrigPreHeader) return false;
+
OrigLatch = L->getLoopLatch();
+ if (!OrigLatch) return false;
+
+ OrigHeader = L->getHeader();
// If the loop has only one block then there is not much to rotate.
if (L->getBlocks().size() == 1)
return false;
- assert(OrigHeader && OrigLatch && OrigPreHeader &&
- "Loop is not in canonical form");
-
// If the loop header is not one of the loop exiting blocks then
// either this loop is already rotated or it is not
// suitable for loop rotation transformations.
@@ -287,7 +288,7 @@ void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) {
"bb.nph",
OrigHeader->getParent(),
NewHeader);
- LoopInfo &LI = LPM.getAnalysis<LoopInfo>();
+ LoopInfo &LI = getAnalysis<LoopInfo>();
if (Loop *PL = LI.getLoopFor(OrigPreHeader))
PL->addBasicBlockToLoop(NewPreHeader, LI.getBase());
BranchInst::Create(NewHeader, NewPreHeader);
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index e20fb16..564c7ac 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -51,6 +51,7 @@ STATISTIC(NumEliminated, "Number of strides eliminated");
STATISTIC(NumShadow, "Number of Shadow IVs optimized");
STATISTIC(NumImmSunk, "Number of common expr immediates sunk into uses");
STATISTIC(NumLoopCond, "Number of loop terminating conds optimized");
+STATISTIC(NumCountZero, "Number of count iv optimized to count toward zero");
static cl::opt<bool> EnableFullLSRMode("enable-full-lsr",
cl::init(false),
@@ -107,7 +108,7 @@ namespace {
public:
static char ID; // Pass ID, replacement for typeid
- explicit LoopStrengthReduce(const TargetLowering *tli = NULL) :
+ explicit LoopStrengthReduce(const TargetLowering *tli = NULL) :
LoopPass(&ID), TLI(tli) {
}
@@ -131,12 +132,10 @@ namespace {
}
private:
- ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond,
- IVStrideUse* &CondUse,
- const SCEV *const * &CondStride);
-
void OptimizeIndvars(Loop *L);
- void OptimizeLoopCountIV(Loop *L);
+
+ /// OptimizeLoopTermCond - Change loop terminating condition to use the
+ /// postinc iv when possible.
void OptimizeLoopTermCond(Loop *L);
/// OptimizeShadowIV - If IV is used in a int-to-float cast
@@ -148,8 +147,28 @@ namespace {
ICmpInst *OptimizeMax(Loop *L, ICmpInst *Cond,
IVStrideUse* &CondUse);
+ /// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for
+ /// deciding when to exit the loop is used only for that purpose, try to
+ /// rearrange things so it counts down to a test against zero.
+ bool OptimizeLoopCountIV(Loop *L);
+ bool OptimizeLoopCountIVOfStride(const SCEV* &Stride,
+ IVStrideUse* &CondUse, Loop *L);
+
+ /// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a
+ /// single stride of IV. All of the users may have different starting
+ /// values, and this may not be the only stride.
+ void StrengthReduceIVUsersOfStride(const SCEV *const &Stride,
+ IVUsersOfOneStride &Uses,
+ Loop *L);
+ void StrengthReduceIVUsers(Loop *L);
+
+ ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond,
+ IVStrideUse* &CondUse,
+ const SCEV* &CondStride,
+ bool PostPass = false);
+
bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
- const SCEV *const * &CondStride);
+ const SCEV* &CondStride);
bool RequiresTypeConversion(const Type *Ty, const Type *NewTy);
const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *const&,
IVExpr&, const Type*,
@@ -164,6 +183,7 @@ namespace {
bool &AllUsesAreAddresses,
bool &AllUsesAreOutsideLoop,
std::vector<BasedUser> &UsersToProcess);
+ bool StrideMightBeShared(const SCEV *Stride, Loop *L, bool CheckPreInc);
bool ShouldUseFullStrengthReductionMode(
const std::vector<BasedUser> &UsersToProcess,
const Loop *L,
@@ -188,9 +208,7 @@ namespace {
Instruction *IVIncInsertPt,
const Loop *L,
SCEVExpander &PreheaderRewriter);
- void StrengthReduceStridedIVUsers(const SCEV *const &Stride,
- IVUsersOfOneStride &Uses,
- Loop *L);
+
void DeleteTriviallyDeadInstructions();
};
}
@@ -208,11 +226,11 @@ Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
/// their operands subsequently dead.
void LoopStrengthReduce::DeleteTriviallyDeadInstructions() {
if (DeadInsts.empty()) return;
-
+
while (!DeadInsts.empty()) {
Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.back());
DeadInsts.pop_back();
-
+
if (I == 0 || !isInstructionTriviallyDead(I))
continue;
@@ -223,14 +241,14 @@ void LoopStrengthReduce::DeleteTriviallyDeadInstructions() {
DeadInsts.push_back(U);
}
}
-
+
I->eraseFromParent();
Changed = true;
}
}
-/// containsAddRecFromDifferentLoop - Determine whether expression S involves a
-/// subexpression that is an AddRec from a loop other than L. An outer loop
+/// containsAddRecFromDifferentLoop - Determine whether expression S involves a
+/// subexpression that is an AddRec from a loop other than L. An outer loop
/// of L is OK, but not an inner loop nor a disjoint loop.
static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) {
// This is very common, put it first.
@@ -256,7 +274,7 @@ static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) {
return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
containsAddRecFromDifferentLoop(DE->getRHS(), L);
#if 0
- // SCEVSDivExpr has been backed out temporarily, but will be back; we'll
+ // SCEVSDivExpr has been backed out temporarily, but will be back; we'll
// need this when it is.
if (const SCEVSDivExpr *DE = dyn_cast<SCEVSDivExpr>(S))
return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
@@ -328,7 +346,7 @@ namespace {
/// field to the Imm field (below). BasedUser values are sorted by this
/// field.
const SCEV *Base;
-
+
/// Inst - The instruction using the induction variable.
Instruction *Inst;
@@ -352,11 +370,11 @@ namespace {
// instruction for a loop and uses outside the loop that are dominated by
// the loop.
bool isUseOfPostIncrementedValue;
-
+
BasedUser(IVStrideUse &IVSU, ScalarEvolution *se)
: SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()),
OperandValToReplace(IVSU.getOperandValToReplace()),
- Imm(SE->getIntegerSCEV(0, Base->getType())),
+ Imm(SE->getIntegerSCEV(0, Base->getType())),
isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {}
// Once we rewrite the code to insert the new IVs we want, update the
@@ -367,8 +385,8 @@ namespace {
SCEVExpander &Rewriter, Loop *L, Pass *P,
LoopInfo &LI,
SmallVectorImpl<WeakVH> &DeadInsts);
-
- Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
+
+ Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
const Type *Ty,
SCEVExpander &Rewriter,
Instruction *IP, Loop *L,
@@ -383,7 +401,7 @@ void BasedUser::dump() const {
errs() << " Inst: " << *Inst;
}
-Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
+Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
const Type *Ty,
SCEVExpander &Rewriter,
Instruction *IP, Loop *L,
@@ -393,10 +411,10 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
// want to insert this expression before the user, we'd rather pull it out as
// many loops as possible.
Instruction *BaseInsertPt = IP;
-
+
// Figure out the most-nested loop that IP is in.
Loop *InsertLoop = LI.getLoopFor(IP->getParent());
-
+
// If InsertLoop is not L, and InsertLoop is nested inside of L, figure out
// the preheader of the outer-most loop where NewBase is not loop invariant.
if (L->contains(IP->getParent()))
@@ -404,7 +422,7 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
BaseInsertPt = InsertLoop->getLoopPreheader()->getTerminator();
InsertLoop = InsertLoop->getParentLoop();
}
-
+
Value *Base = Rewriter.expandCodeFor(NewBase, 0, BaseInsertPt);
const SCEV *NewValSCEV = SE->getUnknown(Base);
@@ -430,7 +448,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
if (!isa<PHINode>(Inst)) {
// By default, insert code at the user instruction.
BasicBlock::iterator InsertPt = Inst;
-
+
// However, if the Operand is itself an instruction, the (potentially
// complex) inserted code may be shared by many users. Because of this, we
// want to emit code for the computation of the operand right before its old
@@ -442,7 +460,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
//
// If this is a use outside the loop (which means after, since it is based
// on a loop indvar) we use the post-incremented value, so that we don't
- // artificially make the preinc value live out the bottom of the loop.
+ // artificially make the preinc value live out the bottom of the loop.
if (!isUseOfPostIncrementedValue && L->contains(Inst->getParent())) {
if (NewBasePt && isa<PHINode>(OperandValToReplace)) {
InsertPt = NewBasePt;
@@ -477,7 +495,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
if (PN->getIncomingValue(i) == OperandValToReplace) {
// If the original expression is outside the loop, put the replacement
// code in the same place as the original expression,
- // which need not be an immediate predecessor of this PHI. This way we
+ // which need not be an immediate predecessor of this PHI. This way we
// need only one copy of it even if it is referenced multiple times in
// the PHI. We don't do this when the original expression is inside the
// loop because multiple copies sometimes do useful sinking of code in
@@ -490,6 +508,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
// is the canonical backedge for this loop, as this can make some
// inserted code be in an illegal position.
if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 &&
+ !isa<IndirectBrInst>(PHIPred->getTerminator()) &&
(PN->getParent() != L->getHeader() || !L->contains(PHIPred))) {
// First step, split the critical edge.
@@ -572,11 +591,11 @@ static bool fitsInAddressMode(const SCEV *const &V, const Type *AccessTy,
static void MoveLoopVariantsToImmediateField(const SCEV *&Val, const SCEV *&Imm,
Loop *L, ScalarEvolution *SE) {
if (Val->isLoopInvariant(L)) return; // Nothing to do.
-
+
if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
SmallVector<const SCEV *, 4> NewOps;
NewOps.reserve(SAE->getNumOperands());
-
+
for (unsigned i = 0; i != SAE->getNumOperands(); ++i)
if (!SAE->getOperand(i)->isLoopInvariant(L)) {
// If this is a loop-variant expression, it must stay in the immediate
@@ -594,7 +613,7 @@ static void MoveLoopVariantsToImmediateField(const SCEV *&Val, const SCEV *&Imm,
// Try to pull immediates out of the start value of nested addrec's.
const SCEV *Start = SARE->getStart();
MoveLoopVariantsToImmediateField(Start, Imm, L, SE);
-
+
SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
Ops[0] = Start;
Val = SE->getAddRecExpr(Ops, SARE->getLoop());
@@ -617,11 +636,11 @@ static void MoveImmediateValues(const TargetLowering *TLI,
if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
SmallVector<const SCEV *, 4> NewOps;
NewOps.reserve(SAE->getNumOperands());
-
+
for (unsigned i = 0; i != SAE->getNumOperands(); ++i) {
const SCEV *NewOp = SAE->getOperand(i);
MoveImmediateValues(TLI, AccessTy, NewOp, Imm, isAddress, L, SE);
-
+
if (!NewOp->isLoopInvariant(L)) {
// If this is a loop-variant expression, it must stay in the immediate
// field of the expression.
@@ -640,7 +659,7 @@ static void MoveImmediateValues(const TargetLowering *TLI,
// Try to pull immediates out of the start value of nested addrec's.
const SCEV *Start = SARE->getStart();
MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE);
-
+
if (Start != SARE->getStart()) {
SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
Ops[0] = Start;
@@ -656,8 +675,8 @@ static void MoveImmediateValues(const TargetLowering *TLI,
const SCEV *SubImm = SE->getIntegerSCEV(0, Val->getType());
const SCEV *NewOp = SME->getOperand(1);
MoveImmediateValues(TLI, AccessTy, NewOp, SubImm, isAddress, L, SE);
-
- // If we extracted something out of the subexpressions, see if we can
+
+ // If we extracted something out of the subexpressions, see if we can
// simplify this!
if (NewOp != SME->getOperand(1)) {
// Scale SubImm up by "8". If the result is a target constant, we are
@@ -666,7 +685,7 @@ static void MoveImmediateValues(const TargetLowering *TLI,
if (fitsInAddressMode(SubImm, AccessTy, TLI, false)) {
// Accumulate the immediate.
Imm = SE->getAddExpr(Imm, SubImm);
-
+
// Update what is left of 'Val'.
Val = SE->getMulExpr(SME->getOperand(0), NewOp);
return;
@@ -714,7 +733,7 @@ static void SeparateSubExprs(SmallVector<const SCEV *, 16> &SubExprs,
SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
Ops[0] = Zero; // Start with zero base.
SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop()));
-
+
SeparateSubExprs(SubExprs, SARE->getOperand(0), SE);
}
@@ -724,7 +743,7 @@ static void SeparateSubExprs(SmallVector<const SCEV *, 16> &SubExprs,
}
}
-// This is logically local to the following function, but C++ says we have
+// This is logically local to the following function, but C++ says we have
// to make it file scope.
struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
@@ -762,7 +781,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
// an addressing mode "for free"; such expressions are left within the loop.
// struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
std::map<const SCEV *, SubExprUseData> SubExpressionUseData;
-
+
// UniqueSubExprs - Keep track of all of the subexpressions we see in the
// order we see them.
SmallVector<const SCEV *, 16> UniqueSubExprs;
@@ -779,7 +798,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
if (!L->contains(Uses[i].Inst->getParent()))
continue;
NumUsesInsideLoop++;
-
+
// If the base is zero (which is common), return zero now, there are no
// CSEs we can find.
if (Uses[i].Base == Zero) return Zero;
@@ -811,13 +830,13 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
// Now that we know how many times each is used, build Result. Iterate over
// UniqueSubexprs so that we have a stable ordering.
for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) {
- std::map<const SCEV *, SubExprUseData>::iterator I =
+ std::map<const SCEV *, SubExprUseData>::iterator I =
SubExpressionUseData.find(UniqueSubExprs[i]);
assert(I != SubExpressionUseData.end() && "Entry not found?");
- if (I->second.Count == NumUsesInsideLoop) { // Found CSE!
+ if (I->second.Count == NumUsesInsideLoop) { // Found CSE!
if (I->second.notAllUsesAreFree)
Result = SE->getAddExpr(Result, I->first);
- else
+ else
FreeResult = SE->getAddExpr(FreeResult, I->first);
} else
// Remove non-cse's from SubExpressionUseData.
@@ -849,13 +868,13 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
// If we found no CSE's, return now.
if (Result == Zero) return Result;
-
+
// If we still have a FreeResult, remove its subexpressions from
// SubExpressionUseData. This means they will remain in the use Bases.
if (FreeResult != Zero) {
SeparateSubExprs(SubExprs, FreeResult, SE);
for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) {
- std::map<const SCEV *, SubExprUseData>::iterator I =
+ std::map<const SCEV *, SubExprUseData>::iterator I =
SubExpressionUseData.find(SubExprs[j]);
SubExpressionUseData.erase(I);
}
@@ -882,7 +901,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
SubExprs.erase(SubExprs.begin()+j);
--j; --e;
}
-
+
// Finally, add the non-shared expressions together.
if (SubExprs.empty())
Uses[i].Base = Zero;
@@ -890,11 +909,11 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
Uses[i].Base = SE->getAddExpr(SubExprs);
SubExprs.clear();
}
-
+
return Result;
}
-/// ValidScale - Check whether the given Scale is valid for all loads and
+/// ValidScale - Check whether the given Scale is valid for all loads and
/// stores in UsersToProcess.
///
bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale,
@@ -911,7 +930,7 @@ bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale,
AccessTy = getAccessType(UsersToProcess[i].Inst);
else if (isa<PHINode>(UsersToProcess[i].Inst))
continue;
-
+
TargetLowering::AddrMode AM;
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm))
AM.BaseOffs = SC->getValue()->getSExtValue();
@@ -983,13 +1002,13 @@ bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1,
/// reuse is possible. Factors can be negative on same targets, e.g. ARM.
///
/// If all uses are outside the loop, we don't require that all multiplies
-/// be folded into the addressing mode, nor even that the factor be constant;
-/// a multiply (executed once) outside the loop is better than another IV
+/// be folded into the addressing mode, nor even that the factor be constant;
+/// a multiply (executed once) outside the loop is better than another IV
/// within. Well, usually.
const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
bool AllUsesAreAddresses,
bool AllUsesAreOutsideLoop,
- const SCEV *const &Stride,
+ const SCEV *const &Stride,
IVExpr &IV, const Type *Ty,
const std::vector<BasedUser>& UsersToProcess) {
if (StrideNoReuse.count(Stride))
@@ -999,11 +1018,16 @@ const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
int64_t SInt = SC->getValue()->getSExtValue();
for (unsigned NewStride = 0, e = IU->StrideOrder.size();
NewStride != e; ++NewStride) {
- std::map<const SCEV *, IVsOfOneStride>::iterator SI =
+ std::map<const SCEV *, IVsOfOneStride>::iterator SI =
IVsByStride.find(IU->StrideOrder[NewStride]);
if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first) ||
StrideNoReuse.count(SI->first))
continue;
+ // The other stride has no uses, don't reuse it.
+ std::map<const SCEV *, IVUsersOfOneStride *>::iterator UI =
+ IU->IVUsesByStride.find(IU->StrideOrder[NewStride]);
+ if (UI->second->Users.empty())
+ continue;
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
if (SI->first != Stride &&
(unsigned(abs64(SInt)) < SSInt || (SInt % SSInt) != 0))
@@ -1052,7 +1076,7 @@ const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
// an existing IV if we can.
for (unsigned NewStride = 0, e = IU->StrideOrder.size();
NewStride != e; ++NewStride) {
- std::map<const SCEV *, IVsOfOneStride>::iterator SI =
+ std::map<const SCEV *, IVsOfOneStride>::iterator SI =
IVsByStride.find(IU->StrideOrder[NewStride]);
if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))
continue;
@@ -1072,9 +1096,9 @@ const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
// -1*old.
for (unsigned NewStride = 0, e = IU->StrideOrder.size();
NewStride != e; ++NewStride) {
- std::map<const SCEV *, IVsOfOneStride>::iterator SI =
+ std::map<const SCEV *, IVsOfOneStride>::iterator SI =
IVsByStride.find(IU->StrideOrder[NewStride]);
- if (SI == IVsByStride.end())
+ if (SI == IVsByStride.end())
continue;
if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(SI->first))
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(ME->getOperand(0)))
@@ -1104,18 +1128,18 @@ static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) {
static bool isNonConstantNegative(const SCEV *const &Expr) {
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Expr);
if (!Mul) return false;
-
+
// If there is a constant factor, it will be first.
const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
if (!SC) return false;
-
+
// Return true if the value is negative, this matches things like (-42 * V).
return SC->getValue()->getValue().isNegative();
}
/// CollectIVUsers - Transform our list of users and offsets to a bit more
-/// complex table. In this new vector, each 'BasedUser' contains 'Base', the base
-/// of the strided accesses, as well as the old information from Uses. We
+/// complex table. In this new vector, each 'BasedUser' contains 'Base', the
+/// base of the strided accesses, as well as the old information from Uses. We
/// progressively move information from the Base field to the Imm field, until
/// we eventually have the full access expression to rewrite the use.
const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride,
@@ -1145,7 +1169,7 @@ const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride,
// We now have a whole bunch of uses of like-strided induction variables, but
// they might all have different bases. We want to emit one PHI node for this
// stride which we fold as many common expressions (between the IVs) into as
- // possible. Start by identifying the common expressions in the base values
+ // possible. Start by identifying the common expressions in the base values
// for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find
// "A+B"), emit it to the preheader, then remove the expression from the
// UsersToProcess base values.
@@ -1165,11 +1189,11 @@ const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride,
if (!L->contains(UsersToProcess[i].Inst->getParent())) {
UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm,
UsersToProcess[i].Base);
- UsersToProcess[i].Base =
+ UsersToProcess[i].Base =
SE->getIntegerSCEV(0, UsersToProcess[i].Base->getType());
} else {
// Not all uses are outside the loop.
- AllUsesAreOutsideLoop = false;
+ AllUsesAreOutsideLoop = false;
// Addressing modes can be folded into loads and stores. Be careful that
// the store is through the expression, not of the expression though.
@@ -1183,11 +1207,11 @@ const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride,
if (isAddress)
HasAddress = true;
-
+
// If this use isn't an address, then not all uses are addresses.
if (!isAddress && !isPHI)
AllUsesAreAddresses = false;
-
+
MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base,
UsersToProcess[i].Imm, isAddress, L, SE);
}
@@ -1198,7 +1222,7 @@ const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride,
// for one fewer iv.
if (NumPHI > 1)
AllUsesAreAddresses = false;
-
+
// There are no in-loop address uses.
if (AllUsesAreAddresses && (!HasAddress && !AllUsesAreOutsideLoop))
AllUsesAreAddresses = false;
@@ -1491,12 +1515,13 @@ static bool IsImmFoldedIntoAddrMode(GlobalValue *GV, int64_t Offset,
return true;
}
-/// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single
+/// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a single
/// stride of IV. All of the users may have different starting values, and this
/// may not be the only stride.
-void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
- IVUsersOfOneStride &Uses,
- Loop *L) {
+void
+LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *const &Stride,
+ IVUsersOfOneStride &Uses,
+ Loop *L) {
// If all the users are moved to another stride, then there is nothing to do.
if (Uses.Users.empty())
return;
@@ -1518,8 +1543,8 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
// have the full access expression to rewrite the use.
std::vector<BasedUser> UsersToProcess;
const SCEV *CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses,
- AllUsesAreOutsideLoop,
- UsersToProcess);
+ AllUsesAreOutsideLoop,
+ UsersToProcess);
// Sort the UsersToProcess array so that users with common bases are
// next to each other.
@@ -1588,12 +1613,12 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
const SCEV *RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy);
IVExpr ReuseIV(SE->getIntegerSCEV(0,
Type::getInt32Ty(Preheader->getContext())),
- SE->getIntegerSCEV(0,
+ SE->getIntegerSCEV(0,
Type::getInt32Ty(Preheader->getContext())),
0);
- /// Choose a strength-reduction strategy and prepare for it by creating
- /// the necessary PHIs and adjusting the bookkeeping.
+ // Choose a strength-reduction strategy and prepare for it by creating
+ // the necessary PHIs and adjusting the bookkeeping.
if (ShouldUseFullStrengthReductionMode(UsersToProcess, L,
AllUsesAreAddresses, Stride)) {
PrepareToStrengthReduceFully(UsersToProcess, Stride, CommonExprs, L,
@@ -1606,7 +1631,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
// If all uses are addresses, check if it is possible to reuse an IV. The
// new IV must have a stride that is a multiple of the old stride; the
// multiple must be a number that can be encoded in the scale field of the
- // target addressing mode; and we must have a valid instruction after this
+ // target addressing mode; and we must have a valid instruction after this
// substitution, including the immediate field, if any.
RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses,
AllUsesAreOutsideLoop,
@@ -1649,7 +1674,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
// We want this constant emitted into the preheader! This is just
// using cast as a copy so BitCast (no-op cast) is appropriate
BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert",
- PreInsertPt);
+ PreInsertPt);
}
}
@@ -1723,7 +1748,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
assert(SE->getTypeSizeInBits(RewriteExpr->getType()) <
SE->getTypeSizeInBits(ReuseIV.Base->getType()) &&
"Unexpected lengthening conversion!");
- typedBase = SE->getTruncateExpr(ReuseIV.Base,
+ typedBase = SE->getTruncateExpr(ReuseIV.Base,
RewriteExpr->getType());
}
RewriteExpr = SE->getMinusSCEV(RewriteExpr, typedBase);
@@ -1775,11 +1800,29 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
// different starting values, into different PHIs.
}
+void LoopStrengthReduce::StrengthReduceIVUsers(Loop *L) {
+ // Note: this processes each stride/type pair individually. All users
+ // passed into StrengthReduceIVUsersOfStride have the same type AND stride.
+ // Also, note that we iterate over IVUsesByStride indirectly by using
+ // StrideOrder. This extra layer of indirection makes the ordering of
+ // strides deterministic - not dependent on map order.
+ for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e; ++Stride) {
+ std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
+ IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
+ assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
+ // FIXME: Generalize to non-affine IV's.
+ if (!SI->first->isLoopInvariant(L))
+ continue;
+ StrengthReduceIVUsersOfStride(SI->first, *SI->second, L);
+ }
+}
+
/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
/// set the IV user and stride information and return true, otherwise return
/// false.
-bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
- const SCEV *const * &CondStride) {
+bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond,
+ IVStrideUse *&CondUse,
+ const SCEV* &CondStride) {
for (unsigned Stride = 0, e = IU->StrideOrder.size();
Stride != e && !CondUse; ++Stride) {
std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
@@ -1793,12 +1836,12 @@ bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse
// InstCombine does it as well for simple uses, it's not clear that it
// occurs enough in real life to handle.
CondUse = UI;
- CondStride = &SI->first;
+ CondStride = SI->first;
return true;
}
}
return false;
-}
+}
namespace {
// Constant strides come first which in turns are sorted by their absolute
@@ -1851,8 +1894,9 @@ namespace {
/// v1 = v1 + 3
/// if (v1 < 30) goto loop
ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
- IVStrideUse* &CondUse,
- const SCEV *const* &CondStride) {
+ IVStrideUse* &CondUse,
+ const SCEV* &CondStride,
+ bool PostPass) {
// If there's only one stride in the loop, there's nothing to do here.
if (IU->StrideOrder.size() < 2)
return Cond;
@@ -1860,23 +1904,31 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
// trying to change the condition because the stride will still
// remain.
std::map<const SCEV *, IVUsersOfOneStride *>::iterator I =
- IU->IVUsesByStride.find(*CondStride);
- if (I == IU->IVUsesByStride.end() ||
- I->second->Users.size() != 1)
+ IU->IVUsesByStride.find(CondStride);
+ if (I == IU->IVUsesByStride.end())
return Cond;
+ if (I->second->Users.size() > 1) {
+ for (ilist<IVStrideUse>::iterator II = I->second->Users.begin(),
+ EE = I->second->Users.end(); II != EE; ++II) {
+ if (II->getUser() == Cond)
+ continue;
+ if (!isInstructionTriviallyDead(II->getUser()))
+ return Cond;
+ }
+ }
// Only handle constant strides for now.
- const SCEVConstant *SC = dyn_cast<SCEVConstant>(*CondStride);
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(CondStride);
if (!SC) return Cond;
ICmpInst::Predicate Predicate = Cond->getPredicate();
int64_t CmpSSInt = SC->getValue()->getSExtValue();
- unsigned BitWidth = SE->getTypeSizeInBits((*CondStride)->getType());
+ unsigned BitWidth = SE->getTypeSizeInBits(CondStride->getType());
uint64_t SignBit = 1ULL << (BitWidth-1);
const Type *CmpTy = Cond->getOperand(0)->getType();
const Type *NewCmpTy = NULL;
unsigned TyBits = SE->getTypeSizeInBits(CmpTy);
unsigned NewTyBits = 0;
- const SCEV **NewStride = NULL;
+ const SCEV *NewStride = NULL;
Value *NewCmpLHS = NULL;
Value *NewCmpRHS = NULL;
int64_t Scale = 1;
@@ -1885,16 +1937,31 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
if (ConstantInt *C = dyn_cast<ConstantInt>(Cond->getOperand(1))) {
int64_t CmpVal = C->getValue().getSExtValue();
+ // Check the relevant induction variable for conformance to
+ // the pattern.
+ const SCEV *IV = SE->getSCEV(Cond->getOperand(0));
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
+ if (!AR || !AR->isAffine())
+ return Cond;
+
+ const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
// Check stride constant and the comparision constant signs to detect
// overflow.
- if ((CmpVal & SignBit) != (CmpSSInt & SignBit))
- return Cond;
+ if (StartC) {
+ if ((StartC->getValue()->getSExtValue() < CmpVal && CmpSSInt < 0) ||
+ (StartC->getValue()->getSExtValue() > CmpVal && CmpSSInt > 0))
+ return Cond;
+ } else {
+ // More restrictive check for the other cases.
+ if ((CmpVal & SignBit) != (CmpSSInt & SignBit))
+ return Cond;
+ }
// Look for a suitable stride / iv as replacement.
for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
IU->IVUsesByStride.find(IU->StrideOrder[i]);
- if (!isa<SCEVConstant>(SI->first))
+ if (!isa<SCEVConstant>(SI->first) || SI->second->Users.empty())
continue;
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
if (SSInt == CmpSSInt ||
@@ -1904,6 +1971,14 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
Scale = SSInt / CmpSSInt;
int64_t NewCmpVal = CmpVal * Scale;
+
+ // If old icmp value fits in icmp immediate field, but the new one doesn't
+ // try something else.
+ if (TLI &&
+ TLI->isLegalICmpImmediate(CmpVal) &&
+ !TLI->isLegalICmpImmediate(NewCmpVal))
+ continue;
+
APInt Mul = APInt(BitWidth*2, CmpVal, true);
Mul = Mul * APInt(BitWidth*2, Scale, true);
// Check for overflow.
@@ -1918,8 +1993,6 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
(CmpVal & SignBit) != (NewCmpVal & SignBit))
continue;
- if (NewCmpVal == CmpVal)
- continue;
// Pick the best iv to use trying to avoid a cast.
NewCmpLHS = NULL;
for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
@@ -1969,19 +2042,21 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
if (NewTyBits != TyBits && !isa<SCEVConstant>(CondUse->getOffset()))
continue;
- bool AllUsesAreAddresses = true;
- bool AllUsesAreOutsideLoop = true;
- std::vector<BasedUser> UsersToProcess;
- const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
- AllUsesAreAddresses,
- AllUsesAreOutsideLoop,
- UsersToProcess);
- // Avoid rewriting the compare instruction with an iv of new stride
- // if it's likely the new stride uses will be rewritten using the
- // stride of the compare instruction.
- if (AllUsesAreAddresses &&
- ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess))
- continue;
+ if (!PostPass) {
+ bool AllUsesAreAddresses = true;
+ bool AllUsesAreOutsideLoop = true;
+ std::vector<BasedUser> UsersToProcess;
+ const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
+ AllUsesAreAddresses,
+ AllUsesAreOutsideLoop,
+ UsersToProcess);
+ // Avoid rewriting the compare instruction with an iv of new stride
+ // if it's likely the new stride uses will be rewritten using the
+ // stride of the compare instruction.
+ if (AllUsesAreAddresses &&
+ ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess))
+ continue;
+ }
// Avoid rewriting the compare instruction with an iv which has
// implicit extension or truncation built into it.
@@ -1994,7 +2069,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
if (Scale < 0 && !Cond->isEquality())
Predicate = ICmpInst::getSwappedPredicate(Predicate);
- NewStride = &IU->StrideOrder[i];
+ NewStride = IU->StrideOrder[i];
if (!isa<PointerType>(NewCmpTy))
NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal);
else {
@@ -2031,13 +2106,16 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
Cond = new ICmpInst(OldCond, Predicate, NewCmpLHS, NewCmpRHS,
L->getHeader()->getName() + ".termcond");
+ DEBUG(errs() << " Change compare stride in Inst " << *OldCond);
+ DEBUG(errs() << " to " << *Cond << '\n');
+
// Remove the old compare instruction. The old indvar is probably dead too.
DeadInsts.push_back(CondUse->getOperandValToReplace());
OldCond->replaceAllUsesWith(Cond);
OldCond->eraseFromParent();
- IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS);
- CondUse = &IU->IVUsesByStride[*NewStride]->Users.back();
+ IU->IVUsesByStride[NewStride]->addUser(NewOffset, Cond, NewCmpLHS);
+ CondUse = &IU->IVUsesByStride[NewStride]->Users.back();
CondStride = NewStride;
++NumEliminated;
Changed = true;
@@ -2180,7 +2258,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
return;
-
+
for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e;
++Stride) {
std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
@@ -2199,13 +2277,13 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
/* If shadow use is a int->float cast then insert a second IV
to eliminate this cast.
- for (unsigned i = 0; i < n; ++i)
+ for (unsigned i = 0; i < n; ++i)
foo((double)i);
is transformed into
double d = 0.0;
- for (unsigned i = 0; i < n; ++i, ++d)
+ for (unsigned i = 0; i < n; ++i, ++d)
foo(d);
*/
if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
@@ -2227,7 +2305,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
const Type *SrcTy = PH->getType();
int Mantissa = DestTy->getFPMantissaWidth();
- if (Mantissa == -1) continue;
+ if (Mantissa == -1) continue;
if ((int)SE->getTypeSizeInBits(SrcTy) > Mantissa)
continue;
@@ -2239,12 +2317,12 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
Entry = 1;
Latch = 0;
}
-
+
ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
if (!Init) continue;
Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
- BinaryOperator *Incr =
+ BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
if (!Incr) continue;
if (Incr->getOpcode() != Instruction::Add
@@ -2271,7 +2349,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
/* create new increment. '++d' in above example. */
Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
- BinaryOperator *NewIncr =
+ BinaryOperator *NewIncr =
BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
Instruction::FAdd : Instruction::FSub,
NewPH, CFP, "IV.S.next.", Incr);
@@ -2297,237 +2375,385 @@ void LoopStrengthReduce::OptimizeIndvars(Loop *L) {
OptimizeShadowIV(L);
}
-/// OptimizeLoopTermCond - Change loop terminating condition to use the
+bool LoopStrengthReduce::StrideMightBeShared(const SCEV* Stride, Loop *L,
+ bool CheckPreInc) {
+ int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue();
+ for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
+ std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
+ IU->IVUsesByStride.find(IU->StrideOrder[i]);
+ const SCEV *Share = SI->first;
+ if (!isa<SCEVConstant>(SI->first) || Share == Stride)
+ continue;
+ int64_t SSInt = cast<SCEVConstant>(Share)->getValue()->getSExtValue();
+ if (SSInt == SInt)
+ return true; // This can definitely be reused.
+ if (unsigned(abs64(SSInt)) < SInt || (SSInt % SInt) != 0)
+ continue;
+ int64_t Scale = SSInt / SInt;
+ bool AllUsesAreAddresses = true;
+ bool AllUsesAreOutsideLoop = true;
+ std::vector<BasedUser> UsersToProcess;
+ const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
+ AllUsesAreAddresses,
+ AllUsesAreOutsideLoop,
+ UsersToProcess);
+ if (AllUsesAreAddresses &&
+ ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess)) {
+ if (!CheckPreInc)
+ return true;
+ // Any pre-inc iv use?
+ IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[Share];
+ for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
+ E = StrideUses.Users.end(); I != E; ++I) {
+ if (!I->isUseOfPostIncrementedValue())
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// isUsedByExitBranch - Return true if icmp is used by a loop terminating
+/// conditional branch or it's and / or with other conditions before being used
+/// as the condition.
+static bool isUsedByExitBranch(ICmpInst *Cond, Loop *L) {
+ BasicBlock *CondBB = Cond->getParent();
+ if (!L->isLoopExiting(CondBB))
+ return false;
+ BranchInst *TermBr = dyn_cast<BranchInst>(CondBB->getTerminator());
+ if (!TermBr || !TermBr->isConditional())
+ return false;
+
+ Value *User = *Cond->use_begin();
+ Instruction *UserInst = dyn_cast<Instruction>(User);
+ while (UserInst &&
+ (UserInst->getOpcode() == Instruction::And ||
+ UserInst->getOpcode() == Instruction::Or)) {
+ if (!UserInst->hasOneUse() || UserInst->getParent() != CondBB)
+ return false;
+ User = *User->use_begin();
+ UserInst = dyn_cast<Instruction>(User);
+ }
+ return User == TermBr;
+}
+
+static bool ShouldCountToZero(ICmpInst *Cond, IVStrideUse* &CondUse,
+ ScalarEvolution *SE, Loop *L,
+ const TargetLowering *TLI = 0) {
+ if (!L->contains(Cond->getParent()))
+ return false;
+
+ if (!isa<SCEVConstant>(CondUse->getOffset()))
+ return false;
+
+ // Handle only tests for equality for the moment.
+ if (!Cond->isEquality() || !Cond->hasOneUse())
+ return false;
+ if (!isUsedByExitBranch(Cond, L))
+ return false;
+
+ Value *CondOp0 = Cond->getOperand(0);
+ const SCEV *IV = SE->getSCEV(CondOp0);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
+ if (!AR || !AR->isAffine())
+ return false;
+
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
+ if (!SC || SC->getValue()->getSExtValue() < 0)
+ // If it's already counting down, don't do anything.
+ return false;
+
+ // If the RHS of the comparison is not an loop invariant, the rewrite
+ // cannot be done. Also bail out if it's already comparing against a zero.
+ // If we are checking this before cmp stride optimization, check if it's
+ // comparing against a already legal immediate.
+ Value *RHS = Cond->getOperand(1);
+ ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS);
+ if (!L->isLoopInvariant(RHS) ||
+ (RHSC && RHSC->isZero()) ||
+ (RHSC && TLI && TLI->isLegalICmpImmediate(RHSC->getSExtValue())))
+ return false;
+
+ // Make sure the IV is only used for counting. Value may be preinc or
+ // postinc; 2 uses in either case.
+ if (!CondOp0->hasNUses(2))
+ return false;
+
+ return true;
+}
+
+/// OptimizeLoopTermCond - Change loop terminating condition to use the
/// postinc iv when possible.
void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
- // Finally, get the terminating condition for the loop if possible. If we
- // can, we want to change it to use a post-incremented version of its
- // induction variable, to allow coalescing the live ranges for the IV into
- // one register value.
BasicBlock *LatchBlock = L->getLoopLatch();
- BasicBlock *ExitingBlock = L->getExitingBlock();
-
- if (!ExitingBlock)
- // Multiple exits, just look at the exit in the latch block if there is one.
- ExitingBlock = LatchBlock;
- BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
- if (!TermBr)
- return;
- if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
- return;
+ bool LatchExit = L->isLoopExiting(LatchBlock);
+ SmallVector<BasicBlock*, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
- // Search IVUsesByStride to find Cond's IVUse if there is one.
- IVStrideUse *CondUse = 0;
- const SCEV *const *CondStride = 0;
- ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
- if (!FindIVUserForCond(Cond, CondUse, CondStride))
- return; // setcc doesn't use the IV.
-
- if (ExitingBlock != LatchBlock) {
- if (!Cond->hasOneUse())
- // See below, we don't want the condition to be cloned.
- return;
-
- // If exiting block is the latch block, we know it's safe and profitable to
- // transform the icmp to use post-inc iv. Otherwise do so only if it would
- // not reuse another iv and its iv would be reused by other uses. We are
- // optimizing for the case where the icmp is the only use of the iv.
- IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[*CondStride];
- for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
- E = StrideUses.Users.end(); I != E; ++I) {
- if (I->getUser() == Cond)
- continue;
- if (!I->isUseOfPostIncrementedValue())
- return;
- }
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitingBlock = ExitingBlocks[i];
- // FIXME: This is expensive, and worse still ChangeCompareStride does a
- // similar check. Can we perform all the icmp related transformations after
- // StrengthReduceStridedIVUsers?
- if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(*CondStride)) {
- int64_t SInt = SC->getValue()->getSExtValue();
- for (unsigned NewStride = 0, ee = IU->StrideOrder.size(); NewStride != ee;
- ++NewStride) {
- std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
- IU->IVUsesByStride.find(IU->StrideOrder[NewStride]);
- if (!isa<SCEVConstant>(SI->first) || SI->first == *CondStride)
- continue;
- int64_t SSInt =
- cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
- if (SSInt == SInt)
- return; // This can definitely be reused.
- if (unsigned(abs64(SSInt)) < SInt || (SSInt % SInt) != 0)
- continue;
- int64_t Scale = SSInt / SInt;
- bool AllUsesAreAddresses = true;
- bool AllUsesAreOutsideLoop = true;
- std::vector<BasedUser> UsersToProcess;
- const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
- AllUsesAreAddresses,
- AllUsesAreOutsideLoop,
- UsersToProcess);
- // Avoid rewriting the compare instruction with an iv of new stride
- // if it's likely the new stride uses will be rewritten using the
- // stride of the compare instruction.
- if (AllUsesAreAddresses &&
- ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess))
- return;
- }
- }
+ // Finally, get the terminating condition for the loop if possible. If we
+ // can, we want to change it to use a post-incremented version of its
+ // induction variable, to allow coalescing the live ranges for the IV into
+ // one register value.
- StrideNoReuse.insert(*CondStride);
- }
+ BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (!TermBr)
+ continue;
+ // FIXME: Overly conservative, termination condition could be an 'or' etc..
+ if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
+ continue;
- // If the trip count is computed in terms of a max (due to ScalarEvolution
- // being unable to find a sufficient guard, for example), change the loop
- // comparison to use SLT or ULT instead of NE.
- Cond = OptimizeMax(L, Cond, CondUse);
-
- // If possible, change stride and operands of the compare instruction to
- // eliminate one stride.
- if (ExitingBlock == LatchBlock)
- Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);
-
- // It's possible for the setcc instruction to be anywhere in the loop, and
- // possible for it to have multiple users. If it is not immediately before
- // the latch block branch, move it.
- if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) {
- if (Cond->hasOneUse()) { // Condition has a single use, just move it.
- Cond->moveBefore(TermBr);
- } else {
- // Otherwise, clone the terminating condition and insert into the loopend.
- Cond = cast<ICmpInst>(Cond->clone());
- Cond->setName(L->getHeader()->getName() + ".termcond");
- LatchBlock->getInstList().insert(TermBr, Cond);
-
- // Clone the IVUse, as the old use still exists!
- IU->IVUsesByStride[*CondStride]->addUser(CondUse->getOffset(), Cond,
- CondUse->getOperandValToReplace());
- CondUse = &IU->IVUsesByStride[*CondStride]->Users.back();
+ // Search IVUsesByStride to find Cond's IVUse if there is one.
+ IVStrideUse *CondUse = 0;
+ const SCEV *CondStride = 0;
+ ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
+ if (!FindIVUserForCond(Cond, CondUse, CondStride))
+ continue;
+
+ // If the latch block is exiting and it's not a single block loop, it's
+ // not safe to use postinc iv in other exiting blocks. FIXME: overly
+ // conservative? How about icmp stride optimization?
+ bool UsePostInc = !(e > 1 && LatchExit && ExitingBlock != LatchBlock);
+ if (UsePostInc && ExitingBlock != LatchBlock) {
+ if (!Cond->hasOneUse())
+ // See below, we don't want the condition to be cloned.
+ UsePostInc = false;
+ else {
+ // If exiting block is the latch block, we know it's safe and profitable
+ // to transform the icmp to use post-inc iv. Otherwise do so only if it
+ // would not reuse another iv and its iv would be reused by other uses.
+ // We are optimizing for the case where the icmp is the only use of the
+ // iv.
+ IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[CondStride];
+ for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
+ E = StrideUses.Users.end(); I != E; ++I) {
+ if (I->getUser() == Cond)
+ continue;
+ if (!I->isUseOfPostIncrementedValue()) {
+ UsePostInc = false;
+ break;
+ }
+ }
+ }
+
+ // If iv for the stride might be shared and any of the users use pre-inc
+ // iv might be used, then it's not safe to use post-inc iv.
+ if (UsePostInc &&
+ isa<SCEVConstant>(CondStride) &&
+ StrideMightBeShared(CondStride, L, true))
+ UsePostInc = false;
}
- }
- // If we get to here, we know that we can transform the setcc instruction to
- // use the post-incremented version of the IV, allowing us to coalesce the
- // live ranges for the IV correctly.
- CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), *CondStride));
- CondUse->setIsUseOfPostIncrementedValue(true);
- Changed = true;
+ // If the trip count is computed in terms of a max (due to ScalarEvolution
+ // being unable to find a sufficient guard, for example), change the loop
+ // comparison to use SLT or ULT instead of NE.
+ Cond = OptimizeMax(L, Cond, CondUse);
+
+ // If possible, change stride and operands of the compare instruction to
+ // eliminate one stride. However, avoid rewriting the compare instruction
+ // with an iv of new stride if it's likely the new stride uses will be
+ // rewritten using the stride of the compare instruction.
+ if (ExitingBlock == LatchBlock && isa<SCEVConstant>(CondStride)) {
+ // If the condition stride is a constant and it's the only use, we might
+ // want to optimize it first by turning it to count toward zero.
+ if (!StrideMightBeShared(CondStride, L, false) &&
+ !ShouldCountToZero(Cond, CondUse, SE, L, TLI))
+ Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);
+ }
- ++NumLoopCond;
-}
+ if (!UsePostInc)
+ continue;
-/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding
-/// when to exit the loop is used only for that purpose, try to rearrange things
-/// so it counts down to a test against zero.
-void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
+ DEBUG(errs() << " Change loop exiting icmp to use postinc iv: "
+ << *Cond << '\n');
- // If the number of times the loop is executed isn't computable, give up.
- const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
- if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
- return;
+ // It's possible for the setcc instruction to be anywhere in the loop, and
+ // possible for it to have multiple users. If it is not immediately before
+ // the exiting block branch, move it.
+ if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) {
+ if (Cond->hasOneUse()) { // Condition has a single use, just move it.
+ Cond->moveBefore(TermBr);
+ } else {
+ // Otherwise, clone the terminating condition and insert into the
+ // loopend.
+ Cond = cast<ICmpInst>(Cond->clone());
+ Cond->setName(L->getHeader()->getName() + ".termcond");
+ ExitingBlock->getInstList().insert(TermBr, Cond);
+
+ // Clone the IVUse, as the old use still exists!
+ IU->IVUsesByStride[CondStride]->addUser(CondUse->getOffset(), Cond,
+ CondUse->getOperandValToReplace());
+ CondUse = &IU->IVUsesByStride[CondStride]->Users.back();
+ }
+ }
- // Get the terminating condition for the loop if possible (this isn't
- // necessarily in the latch, or a block that's a predecessor of the header).
- if (!L->getExitBlock())
- return; // More than one loop exit blocks.
+ // If we get to here, we know that we can transform the setcc instruction to
+ // use the post-incremented version of the IV, allowing us to coalesce the
+ // live ranges for the IV correctly.
+ CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), CondStride));
+ CondUse->setIsUseOfPostIncrementedValue(true);
+ Changed = true;
- // Okay, there is one exit block. Try to find the condition that causes the
- // loop to be exited.
- BasicBlock *ExitingBlock = L->getExitingBlock();
- if (!ExitingBlock)
- return; // More than one block exiting!
+ ++NumLoopCond;
+ }
+}
- // Okay, we've computed the exiting block. See what condition causes us to
- // exit.
- //
- // FIXME: we should be able to handle switch instructions (with a single exit)
- BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
- if (TermBr == 0) return;
- assert(TermBr->isConditional() && "If unconditional, it can't be in loop!");
- if (!isa<ICmpInst>(TermBr->getCondition()))
- return;
- ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
+bool LoopStrengthReduce::OptimizeLoopCountIVOfStride(const SCEV* &Stride,
+ IVStrideUse* &CondUse,
+ Loop *L) {
+ // If the only use is an icmp of a loop exiting conditional branch, then
+ // attempt the optimization.
+ BasedUser User = BasedUser(*CondUse, SE);
+ assert(isa<ICmpInst>(User.Inst) && "Expecting an ICMPInst!");
+ ICmpInst *Cond = cast<ICmpInst>(User.Inst);
+
+ // Less strict check now that compare stride optimization is done.
+ if (!ShouldCountToZero(Cond, CondUse, SE, L))
+ return false;
- // Handle only tests for equality for the moment, and only stride 1.
- if (Cond->getPredicate() != CmpInst::ICMP_EQ)
- return;
- const SCEV *IV = SE->getSCEV(Cond->getOperand(0));
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
- const SCEV *One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType());
- if (!AR || !AR->isAffine() || AR->getStepRecurrence(*SE) != One)
- return;
- // If the RHS of the comparison is defined inside the loop, the rewrite
- // cannot be done.
- if (Instruction *CR = dyn_cast<Instruction>(Cond->getOperand(1)))
- if (L->contains(CR->getParent()))
- return;
+ Value *CondOp0 = Cond->getOperand(0);
+ PHINode *PHIExpr = dyn_cast<PHINode>(CondOp0);
+ Instruction *Incr;
+ if (!PHIExpr) {
+ // Value tested is postinc. Find the phi node.
+ Incr = dyn_cast<BinaryOperator>(CondOp0);
+ // FIXME: Just use User.OperandValToReplace here?
+ if (!Incr || Incr->getOpcode() != Instruction::Add)
+ return false;
- // Make sure the IV is only used for counting. Value may be preinc or
- // postinc; 2 uses in either case.
- if (!Cond->getOperand(0)->hasNUses(2))
- return;
- PHINode *phi = dyn_cast<PHINode>(Cond->getOperand(0));
- Instruction *incr;
- if (phi && phi->getParent()==L->getHeader()) {
- // value tested is preinc. Find the increment.
- // A CmpInst is not a BinaryOperator; we depend on this.
- Instruction::use_iterator UI = phi->use_begin();
- incr = dyn_cast<BinaryOperator>(UI);
- if (!incr)
- incr = dyn_cast<BinaryOperator>(++UI);
- // 1 use for postinc value, the phi. Unnecessarily conservative?
- if (!incr || !incr->hasOneUse() || incr->getOpcode()!=Instruction::Add)
- return;
- } else {
- // Value tested is postinc. Find the phi node.
- incr = dyn_cast<BinaryOperator>(Cond->getOperand(0));
- if (!incr || incr->getOpcode()!=Instruction::Add)
- return;
-
- Instruction::use_iterator UI = Cond->getOperand(0)->use_begin();
- phi = dyn_cast<PHINode>(UI);
- if (!phi)
- phi = dyn_cast<PHINode>(++UI);
+ PHIExpr = dyn_cast<PHINode>(Incr->getOperand(0));
+ if (!PHIExpr)
+ return false;
// 1 use for preinc value, the increment.
- if (!phi || phi->getParent()!=L->getHeader() || !phi->hasOneUse())
- return;
+ if (!PHIExpr->hasOneUse())
+ return false;
+ } else {
+ assert(isa<PHINode>(CondOp0) &&
+ "Unexpected loop exiting counting instruction sequence!");
+ PHIExpr = cast<PHINode>(CondOp0);
+ // Value tested is preinc. Find the increment.
+ // A CmpInst is not a BinaryOperator; we depend on this.
+ Instruction::use_iterator UI = PHIExpr->use_begin();
+ Incr = dyn_cast<BinaryOperator>(UI);
+ if (!Incr)
+ Incr = dyn_cast<BinaryOperator>(++UI);
+ // One use for postinc value, the phi. Unnecessarily conservative?
+ if (!Incr || !Incr->hasOneUse() || Incr->getOpcode() != Instruction::Add)
+ return false;
}
// Replace the increment with a decrement.
- BinaryOperator *decr =
- BinaryOperator::Create(Instruction::Sub, incr->getOperand(0),
- incr->getOperand(1), "tmp", incr);
- incr->replaceAllUsesWith(decr);
- incr->eraseFromParent();
+ DEBUG(errs() << "LSR: Examining use ");
+ DEBUG(WriteAsOperand(errs(), CondOp0, /*PrintType=*/false));
+ DEBUG(errs() << " in Inst: " << *Cond << '\n');
+ BinaryOperator *Decr = BinaryOperator::Create(Instruction::Sub,
+ Incr->getOperand(0), Incr->getOperand(1), "tmp", Incr);
+ Incr->replaceAllUsesWith(Decr);
+ Incr->eraseFromParent();
// Substitute endval-startval for the original startval, and 0 for the
- // original endval. Since we're only testing for equality this is OK even
+ // original endval. Since we're only testing for equality this is OK even
// if the computation wraps around.
BasicBlock *Preheader = L->getLoopPreheader();
Instruction *PreInsertPt = Preheader->getTerminator();
- int inBlock = L->contains(phi->getIncomingBlock(0)) ? 1 : 0;
- Value *startVal = phi->getIncomingValue(inBlock);
- Value *endVal = Cond->getOperand(1);
- // FIXME check for case where both are constant
+ unsigned InBlock = L->contains(PHIExpr->getIncomingBlock(0)) ? 1 : 0;
+ Value *StartVal = PHIExpr->getIncomingValue(InBlock);
+ Value *EndVal = Cond->getOperand(1);
+ DEBUG(errs() << " Optimize loop counting iv to count down ["
+ << *EndVal << " .. " << *StartVal << "]\n");
+
+ // FIXME: check for case where both are constant.
Constant* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0);
- BinaryOperator *NewStartVal =
- BinaryOperator::Create(Instruction::Sub, endVal, startVal,
- "tmp", PreInsertPt);
- phi->setIncomingValue(inBlock, NewStartVal);
+ BinaryOperator *NewStartVal = BinaryOperator::Create(Instruction::Sub,
+ EndVal, StartVal, "tmp", PreInsertPt);
+ PHIExpr->setIncomingValue(InBlock, NewStartVal);
Cond->setOperand(1, Zero);
+ DEBUG(errs() << " New icmp: " << *Cond << "\n");
+
+ int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue();
+ const SCEV *NewStride = 0;
+ bool Found = false;
+ for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
+ const SCEV *OldStride = IU->StrideOrder[i];
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OldStride))
+ if (SC->getValue()->getSExtValue() == -SInt) {
+ Found = true;
+ NewStride = OldStride;
+ break;
+ }
+ }
+
+ if (!Found)
+ NewStride = SE->getIntegerSCEV(-SInt, Stride->getType());
+ IU->AddUser(NewStride, CondUse->getOffset(), Cond, Cond->getOperand(0));
+ IU->IVUsesByStride[Stride]->removeUser(CondUse);
+
+ CondUse = &IU->IVUsesByStride[NewStride]->Users.back();
+ Stride = NewStride;
- Changed = true;
+ ++NumCountZero;
+
+ return true;
}
-bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
+/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding
+/// when to exit the loop is used only for that purpose, try to rearrange things
+/// so it counts down to a test against zero.
+bool LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
+ bool ThisChanged = false;
+ for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
+ const SCEV *Stride = IU->StrideOrder[i];
+ std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
+ IU->IVUsesByStride.find(Stride);
+ assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
+ // FIXME: Generalize to non-affine IV's.
+ if (!SI->first->isLoopInvariant(L))
+ continue;
+ // If stride is a constant and it has an icmpinst use, check if we can
+ // optimize the loop to count down.
+ if (isa<SCEVConstant>(Stride) && SI->second->Users.size() == 1) {
+ Instruction *User = SI->second->Users.begin()->getUser();
+ if (!isa<ICmpInst>(User))
+ continue;
+ const SCEV *CondStride = Stride;
+ IVStrideUse *Use = &*SI->second->Users.begin();
+ if (!OptimizeLoopCountIVOfStride(CondStride, Use, L))
+ continue;
+ ThisChanged = true;
+ // Now check if it's possible to reuse this iv for other stride uses.
+ for (unsigned j = 0, ee = IU->StrideOrder.size(); j != ee; ++j) {
+ const SCEV *SStride = IU->StrideOrder[j];
+ if (SStride == CondStride)
+ continue;
+ std::map<const SCEV *, IVUsersOfOneStride *>::iterator SII =
+ IU->IVUsesByStride.find(SStride);
+ assert(SII != IU->IVUsesByStride.end() && "Stride doesn't exist!");
+ // FIXME: Generalize to non-affine IV's.
+ if (!SII->first->isLoopInvariant(L))
+ continue;
+ // FIXME: Rewrite other stride using CondStride.
+ }
+ }
+ }
+
+ Changed |= ThisChanged;
+ return ThisChanged;
+}
+
+bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
IU = &getAnalysis<IVUsers>();
LI = &getAnalysis<LoopInfo>();
DT = &getAnalysis<DominatorTree>();
SE = &getAnalysis<ScalarEvolution>();
Changed = false;
+ // If LoopSimplify form is not available, stay out of trouble.
+ if (!L->getLoopPreheader() || !L->getLoopLatch())
+ return false;
+
if (!IU->IVUsesByStride.empty()) {
DEBUG(errs() << "\nLSR on \"" << L->getHeader()->getParent()->getName()
<< "\" ";
@@ -2545,7 +2771,7 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
// Change loop terminating condition to use the postinc iv when possible
// and optimize loop terminating compare. FIXME: Move this after
- // StrengthReduceStridedIVUsers?
+ // StrengthReduceIVUsersOfStride?
OptimizeLoopTermCond(L);
// FIXME: We can shrink overlarge IV's here. e.g. if the code has
@@ -2561,26 +2787,12 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
// IVsByStride keeps IVs for one particular loop.
assert(IVsByStride.empty() && "Stale entries in IVsByStride?");
- // Note: this processes each stride/type pair individually. All users
- // passed into StrengthReduceStridedIVUsers have the same type AND stride.
- // Also, note that we iterate over IVUsesByStride indirectly by using
- // StrideOrder. This extra layer of indirection makes the ordering of
- // strides deterministic - not dependent on map order.
- for (unsigned Stride = 0, e = IU->StrideOrder.size();
- Stride != e; ++Stride) {
- std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
- IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
- assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
- // FIXME: Generalize to non-affine IV's.
- if (!SI->first->isLoopInvariant(L))
- continue;
- StrengthReduceStridedIVUsers(SI->first, *SI->second, L);
- }
- }
+ StrengthReduceIVUsers(L);
- // After all sharing is done, see if we can adjust the loop to test against
- // zero instead of counting up to a maximum. This is usually faster.
- OptimizeLoopCountIV(L);
+ // After all sharing is done, see if we can adjust the loop to test against
+ // zero instead of counting up to a maximum. This is usually faster.
+ OptimizeLoopCountIV(L);
+ }
// We're done analyzing this loop; release all the state we built up for it.
IVsByStride.clear();
diff --git a/lib/Transforms/Scalar/LoopUnroll.cpp b/lib/Transforms/Scalar/LoopUnroll.cpp
deleted file mode 100644
index 837ec59..0000000
--- a/lib/Transforms/Scalar/LoopUnroll.cpp
+++ /dev/null
@@ -1,177 +0,0 @@
-//===-- LoopUnroll.cpp - Loop unroller pass -------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass implements a simple loop unroller. It works best when loops have
-// been canonicalized by the -indvars pass, allowing it to determine the trip
-// counts of loops easily.
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "loop-unroll"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/UnrollLoop.h"
-#include <climits>
-
-using namespace llvm;
-
-static cl::opt<unsigned>
-UnrollThreshold("unroll-threshold", cl::init(100), cl::Hidden,
- cl::desc("The cut-off point for automatic loop unrolling"));
-
-static cl::opt<unsigned>
-UnrollCount("unroll-count", cl::init(0), cl::Hidden,
- cl::desc("Use this unroll count for all loops, for testing purposes"));
-
-static cl::opt<bool>
-UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
- cl::desc("Allows loops to be partially unrolled until "
- "-unroll-threshold loop size is reached."));
-
-namespace {
- class LoopUnroll : public LoopPass {
- public:
- static char ID; // Pass ID, replacement for typeid
- LoopUnroll() : LoopPass(&ID) {}
-
- /// A magic value for use with the Threshold parameter to indicate
- /// that the loop unroll should be performed regardless of how much
- /// code expansion would result.
- static const unsigned NoThreshold = UINT_MAX;
-
- bool runOnLoop(Loop *L, LPPassManager &LPM);
-
- /// This transformation requires natural loop information & requires that
- /// loop preheaders be inserted into the CFG...
- ///
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequiredID(LoopSimplifyID);
- AU.addRequiredID(LCSSAID);
- AU.addRequired<LoopInfo>();
- AU.addPreservedID(LCSSAID);
- AU.addPreserved<LoopInfo>();
- // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
- // If loop unroll does not preserve dom info then LCSSA pass on next
- // loop will receive invalid dom info.
- // For now, recreate dom info, if loop is unrolled.
- AU.addPreserved<DominatorTree>();
- AU.addPreserved<DominanceFrontier>();
- }
- };
-}
-
-char LoopUnroll::ID = 0;
-static RegisterPass<LoopUnroll> X("loop-unroll", "Unroll loops");
-
-Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); }
-
-/// ApproximateLoopSize - Approximate the size of the loop.
-static unsigned ApproximateLoopSize(const Loop *L) {
- unsigned Size = 0;
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
- I != E; ++I) {
- BasicBlock *BB = *I;
- Instruction *Term = BB->getTerminator();
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- if (isa<PHINode>(I) && BB == L->getHeader()) {
- // Ignore PHI nodes in the header.
- } else if (I->hasOneUse() && I->use_back() == Term) {
- // Ignore instructions only used by the loop terminator.
- } else if (isa<DbgInfoIntrinsic>(I)) {
- // Ignore debug instructions
- } else if (isa<GetElementPtrInst>(I) && I->hasOneUse()) {
- // Ignore GEP as they generally are subsumed into a load or store.
- } else if (isa<CallInst>(I)) {
- // Estimate size overhead introduced by call instructions which
- // is higher than other instructions. Here 3 and 10 are magic
- // numbers that help one isolated test case from PR2067 without
- // negatively impacting measured benchmarks.
- Size += isa<IntrinsicInst>(I) ? 3 : 10;
- } else {
- ++Size;
- }
-
- // TODO: Ignore expressions derived from PHI and constants if inval of phi
- // is a constant, or if operation is associative. This will get induction
- // variables.
- }
- }
-
- return Size;
-}
-
-bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
- assert(L->isLCSSAForm());
- LoopInfo *LI = &getAnalysis<LoopInfo>();
-
- BasicBlock *Header = L->getHeader();
- DEBUG(errs() << "Loop Unroll: F[" << Header->getParent()->getName()
- << "] Loop %" << Header->getName() << "\n");
- (void)Header;
-
- // Find trip count
- unsigned TripCount = L->getSmallConstantTripCount();
- unsigned Count = UnrollCount;
-
- // Automatically select an unroll count.
- if (Count == 0) {
- // Conservative heuristic: if we know the trip count, see if we can
- // completely unroll (subject to the threshold, checked below); otherwise
- // try to find greatest modulo of the trip count which is still under
- // threshold value.
- if (TripCount == 0)
- return false;
- Count = TripCount;
- }
-
- // Enforce the threshold.
- if (UnrollThreshold != NoThreshold) {
- unsigned LoopSize = ApproximateLoopSize(L);
- DEBUG(errs() << " Loop Size = " << LoopSize << "\n");
- uint64_t Size = (uint64_t)LoopSize*Count;
- if (TripCount != 1 && Size > UnrollThreshold) {
- DEBUG(errs() << " Too large to fully unroll with count: " << Count
- << " because size: " << Size << ">" << UnrollThreshold << "\n");
- if (!UnrollAllowPartial) {
- DEBUG(errs() << " will not try to unroll partially because "
- << "-unroll-allow-partial not given\n");
- return false;
- }
- // Reduce unroll count to be modulo of TripCount for partial unrolling
- Count = UnrollThreshold / LoopSize;
- while (Count != 0 && TripCount%Count != 0) {
- Count--;
- }
- if (Count < 2) {
- DEBUG(errs() << " could not unroll partially\n");
- return false;
- }
- DEBUG(errs() << " partially unrolling with count: " << Count << "\n");
- }
- }
-
- // Unroll the loop.
- Function *F = L->getHeader()->getParent();
- if (!UnrollLoop(L, Count, LI, &LPM))
- return false;
-
- // FIXME: Reconstruct dom info, because it is not preserved properly.
- DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
- if (DT) {
- DT->runOnFunction(*F);
- DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>();
- if (DF)
- DF->runOnFunction(*F);
- }
- return true;
-}
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index c7b00da..38d267a 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -32,7 +32,6 @@
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -407,6 +406,10 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){
initLoopData();
Function *F = loopHeader->getParent();
+ // If LoopSimplify was unable to form a preheader, don't do any unswitching.
+ if (!loopPreheader)
+ return false;
+
// If the condition is trivial, always unswitch. There is no code growth for
// this case.
if (!IsTrivialUnswitchCondition(LoopCond)) {
@@ -957,7 +960,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
Worklist.pop_back();
// Simple constant folding.
- if (Constant *C = ConstantFoldInstruction(I, I->getContext())) {
+ if (Constant *C = ConstantFoldInstruction(I)) {
ReplaceUsesOfWith(I, C, Worklist, L, LPM);
continue;
}
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index af29f97..8466918 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -27,7 +27,6 @@
#include "llvm/Function.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
#include "llvm/Pass.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Support/CFG.h"
@@ -198,8 +197,7 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
/// LowerNegateToMultiply - Replace 0-X with X*-1.
///
static Instruction *LowerNegateToMultiply(Instruction *Neg,
- std::map<AssertingVH<>, unsigned> &ValueRankMap,
- LLVMContext &Context) {
+ std::map<AssertingVH<>, unsigned> &ValueRankMap) {
Constant *Cst = Constant::getAllOnesValue(Neg->getType());
Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg);
@@ -255,7 +253,6 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I,
std::vector<ValueEntry> &Ops) {
Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
unsigned Opcode = I->getOpcode();
- LLVMContext &Context = I->getContext();
// First step, linearize the expression if it is in ((A+B)+(C+D)) form.
BinaryOperator *LHSBO = isReassociableOp(LHS, Opcode);
@@ -265,13 +262,11 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I,
// transform them into multiplies by -1 so they can be reassociated.
if (I->getOpcode() == Instruction::Mul) {
if (!LHSBO && LHS->hasOneUse() && BinaryOperator::isNeg(LHS)) {
- LHS = LowerNegateToMultiply(cast<Instruction>(LHS),
- ValueRankMap, Context);
+ LHS = LowerNegateToMultiply(cast<Instruction>(LHS), ValueRankMap);
LHSBO = isReassociableOp(LHS, Opcode);
}
if (!RHSBO && RHS->hasOneUse() && BinaryOperator::isNeg(RHS)) {
- RHS = LowerNegateToMultiply(cast<Instruction>(RHS),
- ValueRankMap, Context);
+ RHS = LowerNegateToMultiply(cast<Instruction>(RHS), ValueRankMap);
RHSBO = isReassociableOp(RHS, Opcode);
}
}
@@ -373,7 +368,7 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
// version of the value is returned, and BI is left pointing at the instruction
// that should be processed next by the reassociation pass.
//
-static Value *NegateValue(LLVMContext &Context, Value *V, Instruction *BI) {
+static Value *NegateValue(Value *V, Instruction *BI) {
// We are trying to expose opportunity for reassociation. One of the things
// that we want to do to achieve this is to push a negation as deep into an
// expression chain as possible, to expose the add instructions. In practice,
@@ -386,8 +381,8 @@ static Value *NegateValue(LLVMContext &Context, Value *V, Instruction *BI) {
if (Instruction *I = dyn_cast<Instruction>(V))
if (I->getOpcode() == Instruction::Add && I->hasOneUse()) {
// Push the negates through the add.
- I->setOperand(0, NegateValue(Context, I->getOperand(0), BI));
- I->setOperand(1, NegateValue(Context, I->getOperand(1), BI));
+ I->setOperand(0, NegateValue(I->getOperand(0), BI));
+ I->setOperand(1, NegateValue(I->getOperand(1), BI));
// We must move the add instruction here, because the neg instructions do
// not dominate the old add instruction in general. By moving it, we are
@@ -407,7 +402,7 @@ static Value *NegateValue(LLVMContext &Context, Value *V, Instruction *BI) {
/// ShouldBreakUpSubtract - Return true if we should break up this subtract of
/// X-Y into (X + -Y).
-static bool ShouldBreakUpSubtract(LLVMContext &Context, Instruction *Sub) {
+static bool ShouldBreakUpSubtract(Instruction *Sub) {
// If this is a negation, we can't split it up!
if (BinaryOperator::isNeg(Sub))
return false;
@@ -431,7 +426,7 @@ static bool ShouldBreakUpSubtract(LLVMContext &Context, Instruction *Sub) {
/// BreakUpSubtract - If we have (X-Y), and if either X is an add, or if this is
/// only used by an add, transform this into (X+(0-Y)) to promote better
/// reassociation.
-static Instruction *BreakUpSubtract(LLVMContext &Context, Instruction *Sub,
+static Instruction *BreakUpSubtract(Instruction *Sub,
std::map<AssertingVH<>, unsigned> &ValueRankMap) {
// Convert a subtract into an add and a neg instruction... so that sub
// instructions can be commuted with other add instructions...
@@ -439,7 +434,7 @@ static Instruction *BreakUpSubtract(LLVMContext &Context, Instruction *Sub,
// Calculate the negative value of Operand 1 of the sub instruction...
// and set it as the RHS of the add instruction we just made...
//
- Value *NegVal = NegateValue(Context, Sub->getOperand(1), Sub);
+ Value *NegVal = NegateValue(Sub->getOperand(1), Sub);
Instruction *New =
BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub);
New->takeName(Sub);
@@ -457,8 +452,7 @@ static Instruction *BreakUpSubtract(LLVMContext &Context, Instruction *Sub,
/// by one, change this into a multiply by a constant to assist with further
/// reassociation.
static Instruction *ConvertShiftToMul(Instruction *Shl,
- std::map<AssertingVH<>, unsigned> &ValueRankMap,
- LLVMContext &Context) {
+ std::map<AssertingVH<>, unsigned> &ValueRankMap) {
// If an operand of this shift is a reassociable multiply, or if the shift
// is used by a reassociable multiply or add, turn into a multiply.
if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) ||
@@ -781,13 +775,11 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
/// ReassociateBB - Inspect all of the instructions in this basic block,
/// reassociating them as we go.
void Reassociate::ReassociateBB(BasicBlock *BB) {
- LLVMContext &Context = BB->getContext();
-
for (BasicBlock::iterator BBI = BB->begin(); BBI != BB->end(); ) {
Instruction *BI = BBI++;
if (BI->getOpcode() == Instruction::Shl &&
isa<ConstantInt>(BI->getOperand(1)))
- if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap, Context)) {
+ if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) {
MadeChange = true;
BI = NI;
}
@@ -800,8 +792,8 @@ void Reassociate::ReassociateBB(BasicBlock *BB) {
// If this is a subtract instruction which is not already in negate form,
// see if we can convert it to X+-Y.
if (BI->getOpcode() == Instruction::Sub) {
- if (ShouldBreakUpSubtract(Context, BI)) {
- BI = BreakUpSubtract(Context, BI, ValueRankMap);
+ if (ShouldBreakUpSubtract(BI)) {
+ BI = BreakUpSubtract(BI, ValueRankMap);
MadeChange = true;
} else if (BinaryOperator::isNeg(BI)) {
// Otherwise, this is a negation. See if the operand is a multiply tree
@@ -809,7 +801,7 @@ void Reassociate::ReassociateBB(BasicBlock *BB) {
if (isReassociableOp(BI->getOperand(1), Instruction::Mul) &&
(!BI->hasOneUse() ||
!isReassociableOp(BI->use_back(), Instruction::Mul))) {
- BI = LowerNegateToMultiply(BI, ValueRankMap, Context);
+ BI = LowerNegateToMultiply(BI, ValueRankMap);
MadeChange = true;
}
}
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 509a6db..c202a2c 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -795,9 +795,14 @@ void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
return markOverdefined(&EVI);
Value *AggVal = EVI.getAggregateOperand();
- unsigned i = *EVI.idx_begin();
- LatticeVal EltVal = getStructValueState(AggVal, i);
- mergeInValue(getValueState(&EVI), &EVI, EltVal);
+ if (isa<StructType>(AggVal->getType())) {
+ unsigned i = *EVI.idx_begin();
+ LatticeVal EltVal = getStructValueState(AggVal, i);
+ mergeInValue(getValueState(&EVI), &EVI, EltVal);
+ } else {
+ // Otherwise, must be extracting from an array.
+ return markOverdefined(&EVI);
+ }
}
void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
diff --git a/lib/Transforms/Scalar/SCCVN.cpp b/lib/Transforms/Scalar/SCCVN.cpp
index c047fca..001267a 100644
--- a/lib/Transforms/Scalar/SCCVN.cpp
+++ b/lib/Transforms/Scalar/SCCVN.cpp
@@ -507,7 +507,7 @@ void ValueTable::erase(Value *V) {
/// verifyRemoved - Verify that the value is removed from all internal data
/// structures.
void ValueTable::verifyRemoved(const Value *V) const {
- for (DenseMap<Value*, uint32_t>::iterator
+ for (DenseMap<Value*, uint32_t>::const_iterator
I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
assert(I->first != V && "Inst still occurs in value numbering map!");
}
@@ -629,9 +629,6 @@ bool SCCVN::runOnFunction(Function& F) {
}
}
- // FIXME: This code is commented out for now, because it can lead to the
- // insertion of a lot of redundant PHIs being inserted by SSAUpdater.
-#if 0
// Perform a forward data-flow to compute availability at all points on
// the CFG.
do {
@@ -709,7 +706,6 @@ bool SCCVN::runOnFunction(Function& F) {
CurInst->eraseFromParent();
}
}
-#endif
VT.clear();
for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 5669da0..b54565c 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -26,10 +26,6 @@ void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createCFGSimplificationPass());
}
-void LLVMAddCondPropagationPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createCondPropagationPass());
-}
-
void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createDeadStoreEliminationPass());
}
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 575c93b..611505e 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -100,7 +100,7 @@ public:
/// EmitPutChar - Emit a call to the putchar function. This assumes that Char
/// is an integer.
- void EmitPutChar(Value *Char, IRBuilder<> &B);
+ Value *EmitPutChar(Value *Char, IRBuilder<> &B);
/// EmitPutS - Emit a call to the puts function. This assumes that Str is
/// some pointer.
@@ -252,18 +252,20 @@ Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name,
/// EmitPutChar - Emit a call to the putchar function. This assumes that Char
/// is an integer.
-void LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) {
+Value *LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) {
Module *M = Caller->getParent();
Value *PutChar = M->getOrInsertFunction("putchar", Type::getInt32Ty(*Context),
Type::getInt32Ty(*Context), NULL);
CallInst *CI = B.CreateCall(PutChar,
B.CreateIntCast(Char,
Type::getInt32Ty(*Context),
+ /*isSigned*/true,
"chari"),
"putchar");
if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
+ return CI;
}
/// EmitPutS - Emit a call to the puts function. This assumes that Str is
@@ -302,7 +304,8 @@ void LibCallOptimization::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B) {
Type::getInt32Ty(*Context),
Type::getInt32Ty(*Context),
File->getType(), NULL);
- Char = B.CreateIntCast(Char, Type::getInt32Ty(*Context), "chari");
+ Char = B.CreateIntCast(Char, Type::getInt32Ty(*Context), /*isSigned*/true,
+ "chari");
CallInst *CI = B.CreateCall2(F, Char, File, "fputc");
if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
@@ -955,6 +958,17 @@ struct MemCmpOpt : public LibCallOptimization {
return B.CreateZExt(B.CreateXor(LHSV, RHSV, "shortdiff"), CI->getType());
}
+ // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
+ std::string LHSStr, RHSStr;
+ if (GetConstantStringInfo(LHS, LHSStr) &&
+ GetConstantStringInfo(RHS, RHSStr)) {
+ // Make sure we're not reading out-of-bounds memory.
+ if (Len > LHSStr.length() || Len > RHSStr.length())
+ return 0;
+ uint64_t Ret = memcmp(LHSStr.data(), RHSStr.data(), Len);
+ return ConstantInt::get(CI->getType(), Ret);
+ }
+
return 0;
}
};
@@ -1314,11 +1328,13 @@ struct PrintFOpt : public LibCallOptimization {
return CI->use_empty() ? (Value*)CI :
ConstantInt::get(CI->getType(), 0);
- // printf("x") -> putchar('x'), even for '%'.
+ // printf("x") -> putchar('x'), even for '%'. Return the result of putchar
+ // in case there is an error writing to stdout.
if (FormatStr.size() == 1) {
- EmitPutChar(ConstantInt::get(Type::getInt32Ty(*Context), FormatStr[0]), B);
- return CI->use_empty() ? (Value*)CI :
- ConstantInt::get(CI->getType(), 1);
+ Value *Res = EmitPutChar(ConstantInt::get(Type::getInt32Ty(*Context),
+ FormatStr[0]), B);
+ if (CI->use_empty()) return CI;
+ return B.CreateIntCast(Res, CI->getType(), true);
}
// printf("foo\n") --> puts("foo")
@@ -1339,9 +1355,10 @@ struct PrintFOpt : public LibCallOptimization {
// printf("%c", chr) --> putchar(*(i8*)dst)
if (FormatStr == "%c" && CI->getNumOperands() > 2 &&
isa<IntegerType>(CI->getOperand(2)->getType())) {
- EmitPutChar(CI->getOperand(2), B);
- return CI->use_empty() ? (Value*)CI :
- ConstantInt::get(CI->getType(), 1);
+ Value *Res = EmitPutChar(CI->getOperand(2), B);
+
+ if (CI->use_empty()) return CI;
+ return B.CreateIntCast(Res, CI->getType(), true);
}
// printf("%s\n", str) --> puts(str)
@@ -2479,10 +2496,6 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
// lround, lroundf, lroundl:
// * lround(cnst) -> cnst'
//
-// memcmp:
-// * memcmp(x,y,l) -> cnst
-// (if all arguments are constant and strlen(x) <= l and strlen(y) <= l)
-//
// pow, powf, powl:
// * pow(exp(x),y) -> exp(x*y)
// * pow(sqrt(x),y) -> pow(x,y*0.5)
diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp
index 4864e23..b06ae3d 100644
--- a/lib/Transforms/Scalar/TailDuplication.cpp
+++ b/lib/Transforms/Scalar/TailDuplication.cpp
@@ -359,8 +359,7 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
Instruction *Inst = BI++;
if (isInstructionTriviallyDead(Inst))
Inst->eraseFromParent();
- else if (Constant *C = ConstantFoldInstruction(Inst,
- Inst->getContext())) {
+ else if (Constant *C = ConstantFoldInstruction(Inst)) {
Inst->replaceAllUsesWith(C);
Inst->eraseFromParent();
}
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index b56e170..4119cb9 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -25,7 +25,7 @@
// unlikely, that the return returns something else (like constant 0), and
// can still be TRE'd. It can be TRE'd if ALL OTHER return instructions in
// the function return the exact same value.
-// 4. If it can prove that callees do not access theier caller stack frame,
+// 4. If it can prove that callees do not access their caller stack frame,
// they are marked as eligible for tail call elimination (by the code
// generator).
//
@@ -58,6 +58,7 @@
#include "llvm/Function.h"
#include "llvm/Instructions.h"
#include "llvm/Pass.h"
+#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Support/CFG.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
@@ -75,7 +76,7 @@ namespace {
private:
bool ProcessReturningBlock(ReturnInst *RI, BasicBlock *&OldEntry,
bool &TailCallsAreMarkedTail,
- std::vector<PHINode*> &ArgumentPHIs,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
bool CannotTailCallElimCallsMarkedTail);
bool CanMoveAboveCall(Instruction *I, CallInst *CI);
Value *CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI);
@@ -90,7 +91,6 @@ FunctionPass *llvm::createTailCallEliminationPass() {
return new TailCallElim();
}
-
/// AllocaMightEscapeToCalls - Return true if this alloca may be accessed by
/// callees of this function. We only do very simple analysis right now, this
/// could be expanded in the future to use mod/ref information for particular
@@ -100,7 +100,7 @@ static bool AllocaMightEscapeToCalls(AllocaInst *AI) {
return true;
}
-/// FunctionContainsAllocas - Scan the specified basic block for alloca
+/// CheckForEscapingAllocas - Scan the specified basic block for alloca
/// instructions. If it contains any that might be accessed by calls, return
/// true.
static bool CheckForEscapingAllocas(BasicBlock *BB,
@@ -127,7 +127,7 @@ bool TailCallElim::runOnFunction(Function &F) {
BasicBlock *OldEntry = 0;
bool TailCallsAreMarkedTail = false;
- std::vector<PHINode*> ArgumentPHIs;
+ SmallVector<PHINode*, 8> ArgumentPHIs;
bool MadeChange = false;
bool FunctionContainsEscapingAllocas = false;
@@ -154,7 +154,6 @@ bool TailCallElim::runOnFunction(Function &F) {
/// happen. This bug is PR962.
if (FunctionContainsEscapingAllocas)
return false;
-
// Second pass, change any tail calls to loops.
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
@@ -204,7 +203,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
if (I->mayHaveSideEffects()) // This also handles volatile loads.
return false;
- if (LoadInst* L = dyn_cast<LoadInst>(I)) {
+ if (LoadInst *L = dyn_cast<LoadInst>(I)) {
// Loads may always be moved above calls without side effects.
if (CI->mayHaveSideEffects()) {
// Non-volatile loads may be moved above a call with side effects if it
@@ -235,7 +234,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
// We currently handle static constants and arguments that are not modified as
// part of the recursion.
//
-static bool isDynamicConstant(Value *V, CallInst *CI) {
+static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) {
if (isa<Constant>(V)) return true; // Static constants are always dyn consts
// Check to see if this is an immutable argument, if so, the value
@@ -253,6 +252,15 @@ static bool isDynamicConstant(Value *V, CallInst *CI) {
if (CI->getOperand(ArgNo+1) == Arg)
return true;
}
+
+ // Switch cases are always constant integers. If the value is being switched
+ // on and the return is only reachable from one of its cases, it's
+ // effectively constant.
+ if (BasicBlock *UniquePred = RI->getParent()->getUniquePredecessor())
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(UniquePred->getTerminator()))
+ if (SI->getCondition() == V)
+ return SI->getDefaultDest() != RI->getParent();
+
// Not a constant or immutable argument, we can't safely transform.
return false;
}
@@ -265,10 +273,6 @@ static Value *getCommonReturnValue(ReturnInst *TheRI, CallInst *CI) {
Function *F = TheRI->getParent()->getParent();
Value *ReturnedValue = 0;
- // TODO: Handle multiple value ret instructions;
- if (isa<StructType>(F->getReturnType()))
- return 0;
-
for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI)
if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()))
if (RI != TheRI) {
@@ -278,7 +282,7 @@ static Value *getCommonReturnValue(ReturnInst *TheRI, CallInst *CI) {
// evaluatable at the start of the initial invocation of the function,
// instead of at the end of the evaluation.
//
- if (!isDynamicConstant(RetOp, CI))
+ if (!isDynamicConstant(RetOp, CI, RI))
return 0;
if (ReturnedValue && RetOp != ReturnedValue)
@@ -315,7 +319,7 @@ Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
bool &TailCallsAreMarkedTail,
- std::vector<PHINode*> &ArgumentPHIs,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
bool CannotTailCallElimCallsMarkedTail) {
BasicBlock *BB = Ret->getParent();
Function *F = BB->getParent();
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index c728c0b..2974592 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -275,8 +275,6 @@ void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) {
/// SplitEdge - Split the edge connecting specified block. Pass P must
/// not be NULL.
BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
- assert(!isa<IndirectBrInst>(BB->getTerminator()) &&
- "Cannot split an edge from an IndirectBrInst");
TerminatorInst *LatchTerm = BB->getTerminator();
unsigned SuccNum = 0;
#ifndef NDEBUG
@@ -386,6 +384,12 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
bool IsLoopEntry = !!L;
bool SplitMakesNewLoopHeader = false;
for (unsigned i = 0; i != NumPreds; ++i) {
+ // This is slightly more strict than necessary; the minimum requirement
+ // is that there be no more than one indirectbr branching to BB. And
+ // all BlockAddress uses would need to be updated.
+ assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
+ "Cannot split an edge from an IndirectBrInst");
+
Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
if (LI) {
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index fd8862c..162d7b3 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -20,6 +20,7 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
#include "llvm/Support/CFG.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include "llvm/Analysis/ConstantFolding.h"
@@ -322,8 +323,6 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
/// mapping its operands through ValueMap if they are available.
Constant *PruningFunctionCloner::
ConstantFoldMappedInstruction(const Instruction *I) {
- LLVMContext &Context = I->getContext();
-
SmallVector<Constant*, 8> Ops;
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i),
@@ -333,9 +332,8 @@ ConstantFoldMappedInstruction(const Instruction *I) {
return 0; // All operands not constant!
if (const CmpInst *CI = dyn_cast<CmpInst>(I))
- return ConstantFoldCompareInstOperands(CI->getPredicate(),
- &Ops[0], Ops.size(),
- Context, TD);
+ return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
+ TD);
if (const LoadInst *LI = dyn_cast<LoadInst>(I))
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
@@ -346,7 +344,28 @@ ConstantFoldMappedInstruction(const Instruction *I) {
CE);
return ConstantFoldInstOperands(I->getOpcode(), I->getType(), &Ops[0],
- Ops.size(), Context, TD);
+ Ops.size(), TD);
+}
+
+static MDNode *UpdateInlinedAtInfo(MDNode *InsnMD, MDNode *TheCallMD,
+ LLVMContext &Context) {
+ DILocation ILoc(InsnMD);
+ if (ILoc.isNull()) return InsnMD;
+
+ DILocation CallLoc(TheCallMD);
+ if (CallLoc.isNull()) return InsnMD;
+
+ DILocation OrigLocation = ILoc.getOrigLocation();
+ MDNode *NewLoc = TheCallMD;
+ if (!OrigLocation.isNull())
+ NewLoc = UpdateInlinedAtInfo(OrigLocation.getNode(), TheCallMD, Context);
+
+ SmallVector<Value *, 4> MDVs;
+ MDVs.push_back(InsnMD->getElement(0)); // Line
+ MDVs.push_back(InsnMD->getElement(1)); // Col
+ MDVs.push_back(InsnMD->getElement(2)); // Scope
+ MDVs.push_back(NewLoc);
+ return MDNode::get(Context, MDVs.data(), MDVs.size());
}
/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
@@ -361,7 +380,8 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
SmallVectorImpl<ReturnInst*> &Returns,
const char *NameSuffix,
ClonedCodeInfo *CodeInfo,
- const TargetData *TD) {
+ const TargetData *TD,
+ Instruction *TheCall) {
assert(NameSuffix && "NameSuffix cannot be null!");
#ifndef NDEBUG
@@ -400,19 +420,52 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
// references as we go. This uses ValueMap to do all the hard work.
//
BasicBlock::iterator I = NewBB->begin();
+
+ LLVMContext &Context = OldFunc->getContext();
+ unsigned DbgKind = Context.getMetadata().getMDKind("dbg");
+ MDNode *TheCallMD = NULL;
+ SmallVector<Value *, 4> MDVs;
+ if (TheCall && TheCall->hasMetadata())
+ TheCallMD = Context.getMetadata().getMD(DbgKind, TheCall);
// Handle PHI nodes specially, as we have to remove references to dead
// blocks.
if (PHINode *PN = dyn_cast<PHINode>(I)) {
// Skip over all PHI nodes, remembering them for later.
BasicBlock::const_iterator OldI = BI->begin();
- for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI)
+ for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) {
+ if (I->hasMetadata()) {
+ if (TheCallMD) {
+ if (MDNode *IMD = Context.getMetadata().getMD(DbgKind, I)) {
+ MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD, Context);
+ Context.getMetadata().addMD(DbgKind, NewMD, I);
+ }
+ } else {
+ // The cloned instruction has dbg info but the call instruction
+ // does not have dbg info. Remove dbg info from cloned instruction.
+ Context.getMetadata().removeMD(DbgKind, I);
+ }
+ }
PHIToResolve.push_back(cast<PHINode>(OldI));
+ }
}
// Otherwise, remap the rest of the instructions normally.
- for (; I != NewBB->end(); ++I)
+ for (; I != NewBB->end(); ++I) {
+ if (I->hasMetadata()) {
+ if (TheCallMD) {
+ if (MDNode *IMD = Context.getMetadata().getMD(DbgKind, I)) {
+ MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD, Context);
+ Context.getMetadata().addMD(DbgKind, NewMD, I);
+ }
+ } else {
+ // The cloned instruction has dbg info but the call instruction
+ // does not have dbg info. Remove dbg info from cloned instruction.
+ Context.getMetadata().removeMD(DbgKind, I);
+ }
+ }
RemapInstruction(I, ValueMap);
+ }
}
// Defer PHI resolution until rest of function is resolved, PHI resolution
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 20f5a4a..043046c 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -386,7 +386,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
// (which can happen, e.g., because an argument was constant), but we'll be
// happy with whatever the cloner can do.
CloneAndPruneFunctionInto(Caller, CalledFunc, ValueMap, Returns, ".i",
- &InlinedFunctionInfo, TD);
+ &InlinedFunctionInfo, TD, TheCall);
// Remember the first block that is newly cloned over.
FirstNewBlock = LastBlock; ++FirstNewBlock;
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index 56e662e..590d667 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -50,7 +50,6 @@ namespace {
LCSSA() : LoopPass(&ID) {}
// Cached analysis information for the current function.
- LoopInfo *LI;
DominatorTree *DT;
std::vector<BasicBlock*> LoopBlocks;
PredIteratorCache PredCache;
@@ -64,6 +63,9 @@ namespace {
///
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
+
+ // LCSSA doesn't actually require LoopSimplify, but the PassManager
+ // doesn't know how to schedule LoopSimplify by itself.
AU.addRequiredID(LoopSimplifyID);
AU.addPreservedID(LoopSimplifyID);
AU.addRequiredTransitive<LoopInfo>();
@@ -121,7 +123,6 @@ static bool BlockDominatesAnExit(BasicBlock *BB,
bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) {
L = TheLoop;
- LI = &LPM.getAnalysis<LoopInfo>();
DT = &getAnalysis<DominatorTree>();
// Get the set of exiting blocks.
@@ -216,7 +217,7 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
SSAUpdate.Initialize(Inst);
// Insert the LCSSA phi's into all of the exit blocks dominated by the
- // value., and add them to the Phi's map.
+ // value, and add them to the Phi's map.
for (SmallVectorImpl<BasicBlock*>::const_iterator BBI = ExitBlocks.begin(),
BBE = ExitBlocks.end(); BBI != BBE; ++BBI) {
BasicBlock *ExitBB = *BBI;
@@ -230,8 +231,17 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
PN->reserveOperandSpace(PredCache.GetNumPreds(ExitBB));
// Add inputs from inside the loop for this PHI.
- for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI)
+ for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) {
PN->addIncoming(Inst, *PI);
+
+ // If the exit block has a predecessor not within the loop, arrange for
+ // the incoming value use corresponding to that predecessor to be
+ // rewritten in terms of a different LCSSA PHI.
+ if (!inLoop(*PI))
+ UsesToRewrite.push_back(
+ &PN->getOperandUse(
+ PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1)));
+ }
// Remember that this phi makes the value alive in this block.
SSAUpdate.AddAvailableValue(ExitBB, PN);
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 543ddf1..aef0f5f 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -24,10 +24,14 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -236,7 +240,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) {
//===----------------------------------------------------------------------===//
-// Local dead code elimination...
+// Local dead code elimination.
//
/// isInstructionTriviallyDead - Return true if the result produced by the
@@ -248,6 +252,9 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) {
// We don't want debug info removed by anything this general.
if (isa<DbgInfoIntrinsic>(I)) return false;
+ // Likewise for memory use markers.
+ if (isa<MemoryUseIntrinsic>(I)) return false;
+
if (!I->mayHaveSideEffects()) return true;
// Special case intrinsics that "may have side effects" but can be deleted
@@ -323,9 +330,53 @@ llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
}
//===----------------------------------------------------------------------===//
-// Control Flow Graph Restructuring...
+// Control Flow Graph Restructuring.
//
+
+/// RemovePredecessorAndSimplify - Like BasicBlock::removePredecessor, this
+/// method is called when we're about to delete Pred as a predecessor of BB. If
+/// BB contains any PHI nodes, this drops the entries in the PHI nodes for Pred.
+///
+/// Unlike the removePredecessor method, this attempts to simplify uses of PHI
+/// nodes that collapse into identity values. For example, if we have:
+/// x = phi(1, 0, 0, 0)
+/// y = and x, z
+///
+/// .. and delete the predecessor corresponding to the '1', this will attempt to
+/// recursively fold the and to 0.
+void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
+ TargetData *TD) {
+ // This only adjusts blocks with PHI nodes.
+ if (!isa<PHINode>(BB->begin()))
+ return;
+
+ // Remove the entries for Pred from the PHI nodes in BB, but do not simplify
+ // them down. This will leave us with single entry phi nodes and other phis
+ // that can be removed.
+ BB->removePredecessor(Pred, true);
+
+ WeakVH PhiIt = &BB->front();
+ while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) {
+ PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt));
+
+ Value *PNV = PN->hasConstantValue();
+ if (PNV == 0) continue;
+
+ // If we're able to simplify the phi to a single value, substitute the new
+ // value into all of its uses.
+ assert(PNV != PN && "hasConstantValue broken");
+
+ ReplaceAndSimplifyAllUses(PN, PNV, TD);
+
+ // If recursive simplification ended up deleting the next PHI node we would
+ // iterate to, then our iterator is invalid, restart scanning from the top
+ // of the block.
+ if (PhiIt == 0) PhiIt = &BB->front();
+ }
+}
+
+
/// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its
/// predecessor is known to have one successor (DestBB!). Eliminate the edge
/// between them, moving the instructions in the predecessor into DestBB and
@@ -362,6 +413,174 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
PredBB->eraseFromParent();
}
+/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an
+/// almost-empty BB ending in an unconditional branch to Succ, into succ.
+///
+/// Assumption: Succ is the single successor for BB.
+///
+static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
+ assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
+
+ DEBUG(errs() << "Looking to fold " << BB->getName() << " into "
+ << Succ->getName() << "\n");
+ // Shortcut, if there is only a single predecessor it must be BB and merging
+ // is always safe
+ if (Succ->getSinglePredecessor()) return true;
+
+ // Make a list of the predecessors of BB
+ typedef SmallPtrSet<BasicBlock*, 16> BlockSet;
+ BlockSet BBPreds(pred_begin(BB), pred_end(BB));
+
+ // Use that list to make another list of common predecessors of BB and Succ
+ BlockSet CommonPreds;
+ for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ);
+ PI != PE; ++PI)
+ if (BBPreds.count(*PI))
+ CommonPreds.insert(*PI);
+
+ // Shortcut, if there are no common predecessors, merging is always safe
+ if (CommonPreds.empty())
+ return true;
+
+ // Look at all the phi nodes in Succ, to see if they present a conflict when
+ // merging these blocks
+ for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+
+ // If the incoming value from BB is again a PHINode in
+ // BB which has the same incoming value for *PI as PN does, we can
+ // merge the phi nodes and then the blocks can still be merged
+ PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB));
+ if (BBPN && BBPN->getParent() == BB) {
+ for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
+ PI != PE; PI++) {
+ if (BBPN->getIncomingValueForBlock(*PI)
+ != PN->getIncomingValueForBlock(*PI)) {
+ DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in "
+ << Succ->getName() << " is conflicting with "
+ << BBPN->getName() << " with regard to common predecessor "
+ << (*PI)->getName() << "\n");
+ return false;
+ }
+ }
+ } else {
+ Value* Val = PN->getIncomingValueForBlock(BB);
+ for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
+ PI != PE; PI++) {
+ // See if the incoming value for the common predecessor is equal to the
+ // one for BB, in which case this phi node will not prevent the merging
+ // of the block.
+ if (Val != PN->getIncomingValueForBlock(*PI)) {
+ DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in "
+ << Succ->getName() << " is conflicting with regard to common "
+ << "predecessor " << (*PI)->getName() << "\n");
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+/// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an
+/// unconditional branch, and contains no instructions other than PHI nodes,
+/// potential debug intrinsics and the branch. If possible, eliminate BB by
+/// rewriting all the predecessors to branch to the successor block and return
+/// true. If we can't transform, return false.
+bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
+ // We can't eliminate infinite loops.
+ BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0);
+ if (BB == Succ) return false;
+
+ // Check to see if merging these blocks would cause conflicts for any of the
+ // phi nodes in BB or Succ. If not, we can safely merge.
+ if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false;
+
+ // Check for cases where Succ has multiple predecessors and a PHI node in BB
+ // has uses which will not disappear when the PHI nodes are merged. It is
+ // possible to handle such cases, but difficult: it requires checking whether
+ // BB dominates Succ, which is non-trivial to calculate in the case where
+ // Succ has multiple predecessors. Also, it requires checking whether
+ // constructing the necessary self-referential PHI node doesn't intoduce any
+ // conflicts; this isn't too difficult, but the previous code for doing this
+ // was incorrect.
+ //
+ // Note that if this check finds a live use, BB dominates Succ, so BB is
+ // something like a loop pre-header (or rarely, a part of an irreducible CFG);
+ // folding the branch isn't profitable in that case anyway.
+ if (!Succ->getSinglePredecessor()) {
+ BasicBlock::iterator BBI = BB->begin();
+ while (isa<PHINode>(*BBI)) {
+ for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
+ UI != E; ++UI) {
+ if (PHINode* PN = dyn_cast<PHINode>(*UI)) {
+ if (PN->getIncomingBlock(UI) != BB)
+ return false;
+ } else {
+ return false;
+ }
+ }
+ ++BBI;
+ }
+ }
+
+ DEBUG(errs() << "Killing Trivial BB: \n" << *BB);
+
+ if (isa<PHINode>(Succ->begin())) {
+ // If there is more than one pred of succ, and there are PHI nodes in
+ // the successor, then we need to add incoming edges for the PHI nodes
+ //
+ const SmallVector<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
+
+ // Loop over all of the PHI nodes in the successor of BB.
+ for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ Value *OldVal = PN->removeIncomingValue(BB, false);
+ assert(OldVal && "No entry in PHI for Pred BB!");
+
+ // If this incoming value is one of the PHI nodes in BB, the new entries
+ // in the PHI node are the entries from the old PHI.
+ if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) {
+ PHINode *OldValPN = cast<PHINode>(OldVal);
+ for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i)
+ // Note that, since we are merging phi nodes and BB and Succ might
+ // have common predecessors, we could end up with a phi node with
+ // identical incoming branches. This will be cleaned up later (and
+ // will trigger asserts if we try to clean it up now, without also
+ // simplifying the corresponding conditional branch).
+ PN->addIncoming(OldValPN->getIncomingValue(i),
+ OldValPN->getIncomingBlock(i));
+ } else {
+ // Add an incoming value for each of the new incoming values.
+ for (unsigned i = 0, e = BBPreds.size(); i != e; ++i)
+ PN->addIncoming(OldVal, BBPreds[i]);
+ }
+ }
+ }
+
+ while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
+ if (Succ->getSinglePredecessor()) {
+ // BB is the only predecessor of Succ, so Succ will end up with exactly
+ // the same predecessors BB had.
+ Succ->getInstList().splice(Succ->begin(),
+ BB->getInstList(), BB->begin());
+ } else {
+ // We explicitly check for such uses in CanPropagatePredecessorsForPHIs.
+ assert(PN->use_empty() && "There shouldn't be any uses here!");
+ PN->eraseFromParent();
+ }
+ }
+
+ // Everything that jumped to BB now goes to Succ.
+ BB->replaceAllUsesWith(Succ);
+ if (!Succ->hasName()) Succ->takeName(BB);
+ BB->eraseFromParent(); // Delete the old basic block.
+ return true;
+}
+
+
+
/// OnlyUsedByDbgIntrinsics - Return true if the instruction I is only used
/// by DbgIntrinsics. If DbgInUses is specified then the vector is filled
/// with the DbgInfoIntrinsic that use the instruction I.
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index cd8d952..2ab0972 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -23,6 +23,11 @@
//
// This pass also guarantees that loops will have exactly one backedge.
//
+// Indirectbr instructions introduce several complications. If the loop
+// contains or is entered by an indirectbr instruction, it may not be possible
+// to transform the loop and make these guarantees. Client code should check
+// that these conditions are true before relying on them.
+//
// Note that the simplifycfg pass will clean up blocks which are split out but
// end up being unnecessary, so usage of this pass should not pessimize
// generated code.
@@ -81,17 +86,15 @@ namespace {
AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
}
- /// verifyAnalysis() - Verify loop nest.
- void verifyAnalysis() const {
- assert(L->isLoopSimplifyForm() && "LoopSimplify form not preserved!");
- }
+ /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
+ void verifyAnalysis() const;
private:
bool ProcessLoop(Loop *L, LPPassManager &LPM);
BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
BasicBlock *InsertPreheaderForLoop(Loop *L);
Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM);
- void InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
+ BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
void PlaceSplitBlockCarefully(BasicBlock *NewBB,
SmallVectorImpl<BasicBlock*> &SplitPreds,
Loop *L);
@@ -160,8 +163,10 @@ ReprocessLoop:
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
Preheader = InsertPreheaderForLoop(L);
- NumInserted++;
- Changed = true;
+ if (Preheader) {
+ NumInserted++;
+ Changed = true;
+ }
}
// Next, check to make sure that all exit nodes of the loop only have
@@ -180,21 +185,22 @@ ReprocessLoop:
// Must be exactly this loop: no subloops, parent loops, or non-loop preds
// allowed.
if (!L->contains(*PI)) {
- RewriteLoopExitBlock(L, ExitBlock);
- NumInserted++;
- Changed = true;
+ if (RewriteLoopExitBlock(L, ExitBlock)) {
+ NumInserted++;
+ Changed = true;
+ }
break;
}
}
// If the header has more than two predecessors at this point (from the
// preheader and from multiple backedges), we must adjust the loop.
- unsigned NumBackedges = L->getNumBackEdges();
- if (NumBackedges != 1) {
+ BasicBlock *LoopLatch = L->getLoopLatch();
+ if (!LoopLatch) {
// If this is really a nested loop, rip it out into a child loop. Don't do
// this for loops with a giant number of backedges, just factor them into a
// common backedge instead.
- if (NumBackedges < 8) {
+ if (L->getNumBackEdges() < 8) {
if (SeparateNestedLoop(L, LPM)) {
++NumNested;
// This is a big restructuring change, reprocess the whole loop.
@@ -207,9 +213,11 @@ ReprocessLoop:
// If we either couldn't, or didn't want to, identify nesting of the loops,
// insert a new block that all backedges target, then make it jump to the
// loop header.
- InsertUniqueBackedgeBlock(L, Preheader);
- NumInserted++;
- Changed = true;
+ LoopLatch = InsertUniqueBackedgeBlock(L, Preheader);
+ if (LoopLatch) {
+ NumInserted++;
+ Changed = true;
+ }
}
// Scan over the PHI nodes in the loop header. Since they now have only two
@@ -233,7 +241,14 @@ ReprocessLoop:
// loop-invariant instructions out of the way to open up more
// opportunities, and the disadvantage of having the responsibility
// to preserve dominator information.
- if (ExitBlocks.size() > 1 && L->getUniqueExitBlock()) {
+ bool UniqueExit = true;
+ if (!ExitBlocks.empty())
+ for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i)
+ if (ExitBlocks[i] != ExitBlocks[0]) {
+ UniqueExit = false;
+ break;
+ }
+ if (UniqueExit) {
SmallVector<BasicBlock*, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
@@ -251,7 +266,8 @@ ReprocessLoop:
Instruction *Inst = I++;
if (Inst == CI)
continue;
- if (!L->makeLoopInvariant(Inst, Changed, Preheader->getTerminator())) {
+ if (!L->makeLoopInvariant(Inst, Changed,
+ Preheader ? Preheader->getTerminator() : 0)) {
AllInvariant = false;
break;
}
@@ -303,8 +319,15 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
SmallVector<BasicBlock*, 8> OutsideBlocks;
for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
PI != PE; ++PI)
- if (!L->contains(*PI)) // Coming in from outside the loop?
- OutsideBlocks.push_back(*PI); // Keep track of it...
+ if (!L->contains(*PI)) { // Coming in from outside the loop?
+ // If the loop is branched to from an indirect branch, we won't
+ // be able to fully transform the loop, because it prohibits
+ // edge splitting.
+ if (isa<IndirectBrInst>((*PI)->getTerminator())) return 0;
+
+ // Keep track of it.
+ OutsideBlocks.push_back(*PI);
+ }
// Split out the loop pre-header.
BasicBlock *NewBB =
@@ -324,8 +347,12 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
SmallVector<BasicBlock*, 8> LoopBlocks;
for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I)
- if (L->contains(*I))
+ if (L->contains(*I)) {
+ // Don't do this if the loop is exited via an indirect branch.
+ if (isa<IndirectBrInst>((*I)->getTerminator())) return 0;
+
LoopBlocks.push_back(*I);
+ }
assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?");
BasicBlock *NewBB = SplitBlockPredecessors(Exit, &LoopBlocks[0],
@@ -519,13 +546,18 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
/// backedges to target a new basic block and have that block branch to the loop
/// header. This ensures that loops have exactly one backedge.
///
-void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
+BasicBlock *
+LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
// Get information about the loop
BasicBlock *Header = L->getHeader();
Function *F = Header->getParent();
+ // Unique backedge insertion currently depends on having a preheader.
+ if (!Preheader)
+ return 0;
+
// Figure out which basic blocks contain back-edges to the loop header.
std::vector<BasicBlock*> BackedgeBlocks;
for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I)
@@ -612,4 +644,40 @@ void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
DT->splitBlock(BEBlock);
if (DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>())
DF->splitBlock(BEBlock);
+
+ return BEBlock;
+}
+
+void LoopSimplify::verifyAnalysis() const {
+ // It used to be possible to just assert L->isLoopSimplifyForm(), however
+ // with the introduction of indirectbr, there are now cases where it's
+ // not possible to transform a loop as necessary. We can at least check
+ // that there is an indirectbr near any time there's trouble.
+
+ // Indirectbr can interfere with preheader and unique backedge insertion.
+ if (!L->getLoopPreheader() || !L->getLoopLatch()) {
+ bool HasIndBrPred = false;
+ for (pred_iterator PI = pred_begin(L->getHeader()),
+ PE = pred_end(L->getHeader()); PI != PE; ++PI)
+ if (isa<IndirectBrInst>((*PI)->getTerminator())) {
+ HasIndBrPred = true;
+ break;
+ }
+ assert(HasIndBrPred &&
+ "LoopSimplify has no excuse for missing loop header info!");
+ }
+
+ // Indirectbr can interfere with exit block canonicalization.
+ if (!L->hasDedicatedExits()) {
+ bool HasIndBrExiting = false;
+ SmallVector<BasicBlock*, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i)
+ if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) {
+ HasIndBrExiting = true;
+ break;
+ }
+ assert(HasIndBrExiting &&
+ "LoopSimplify has no excuse for missing exit block info!");
+ }
}
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index d68427a..6232f32 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -108,8 +108,19 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {
bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) {
assert(L->isLCSSAForm());
- BasicBlock *Header = L->getHeader();
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) {
+ DEBUG(errs() << " Can't unroll; loop preheader-insertion failed.\n");
+ return false;
+ }
+
BasicBlock *LatchBlock = L->getLoopLatch();
+ if (!LatchBlock) {
+ DEBUG(errs() << " Can't unroll; loop exit-block-insertion failed.\n");
+ return false;
+ }
+
+ BasicBlock *Header = L->getHeader();
BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
if (!BI || BI->isUnconditional()) {
@@ -351,8 +362,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
if (isInstructionTriviallyDead(Inst))
(*BB)->getInstList().erase(Inst);
- else if (Constant *C = ConstantFoldInstruction(Inst,
- Header->getContext())) {
+ else if (Constant *C = ConstantFoldInstruction(Inst)) {
Inst->replaceAllUsesWith(C);
(*BB)->getInstList().erase(Inst);
}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 8e1fb98..8dbc808 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -78,166 +78,6 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred);
}
-/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an
-/// almost-empty BB ending in an unconditional branch to Succ, into succ.
-///
-/// Assumption: Succ is the single successor for BB.
-///
-static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
- assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
-
- DEBUG(errs() << "Looking to fold " << BB->getName() << " into "
- << Succ->getName() << "\n");
- // Shortcut, if there is only a single predecessor it must be BB and merging
- // is always safe
- if (Succ->getSinglePredecessor()) return true;
-
- // Make a list of the predecessors of BB
- typedef SmallPtrSet<BasicBlock*, 16> BlockSet;
- BlockSet BBPreds(pred_begin(BB), pred_end(BB));
-
- // Use that list to make another list of common predecessors of BB and Succ
- BlockSet CommonPreds;
- for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ);
- PI != PE; ++PI)
- if (BBPreds.count(*PI))
- CommonPreds.insert(*PI);
-
- // Shortcut, if there are no common predecessors, merging is always safe
- if (CommonPreds.empty())
- return true;
-
- // Look at all the phi nodes in Succ, to see if they present a conflict when
- // merging these blocks
- for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
- PHINode *PN = cast<PHINode>(I);
-
- // If the incoming value from BB is again a PHINode in
- // BB which has the same incoming value for *PI as PN does, we can
- // merge the phi nodes and then the blocks can still be merged
- PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB));
- if (BBPN && BBPN->getParent() == BB) {
- for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
- PI != PE; PI++) {
- if (BBPN->getIncomingValueForBlock(*PI)
- != PN->getIncomingValueForBlock(*PI)) {
- DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in "
- << Succ->getName() << " is conflicting with "
- << BBPN->getName() << " with regard to common predecessor "
- << (*PI)->getName() << "\n");
- return false;
- }
- }
- } else {
- Value* Val = PN->getIncomingValueForBlock(BB);
- for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
- PI != PE; PI++) {
- // See if the incoming value for the common predecessor is equal to the
- // one for BB, in which case this phi node will not prevent the merging
- // of the block.
- if (Val != PN->getIncomingValueForBlock(*PI)) {
- DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in "
- << Succ->getName() << " is conflicting with regard to common "
- << "predecessor " << (*PI)->getName() << "\n");
- return false;
- }
- }
- }
- }
-
- return true;
-}
-
-/// TryToSimplifyUncondBranchFromEmptyBlock - BB contains an unconditional
-/// branch to Succ, and contains no instructions other than PHI nodes and the
-/// branch. If possible, eliminate BB.
-static bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
- BasicBlock *Succ) {
- // Check to see if merging these blocks would cause conflicts for any of the
- // phi nodes in BB or Succ. If not, we can safely merge.
- if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false;
-
- // Check for cases where Succ has multiple predecessors and a PHI node in BB
- // has uses which will not disappear when the PHI nodes are merged. It is
- // possible to handle such cases, but difficult: it requires checking whether
- // BB dominates Succ, which is non-trivial to calculate in the case where
- // Succ has multiple predecessors. Also, it requires checking whether
- // constructing the necessary self-referential PHI node doesn't intoduce any
- // conflicts; this isn't too difficult, but the previous code for doing this
- // was incorrect.
- //
- // Note that if this check finds a live use, BB dominates Succ, so BB is
- // something like a loop pre-header (or rarely, a part of an irreducible CFG);
- // folding the branch isn't profitable in that case anyway.
- if (!Succ->getSinglePredecessor()) {
- BasicBlock::iterator BBI = BB->begin();
- while (isa<PHINode>(*BBI)) {
- for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
- UI != E; ++UI) {
- if (PHINode* PN = dyn_cast<PHINode>(*UI)) {
- if (PN->getIncomingBlock(UI) != BB)
- return false;
- } else {
- return false;
- }
- }
- ++BBI;
- }
- }
-
- DEBUG(errs() << "Killing Trivial BB: \n" << *BB);
-
- if (isa<PHINode>(Succ->begin())) {
- // If there is more than one pred of succ, and there are PHI nodes in
- // the successor, then we need to add incoming edges for the PHI nodes
- //
- const SmallVector<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
-
- // Loop over all of the PHI nodes in the successor of BB.
- for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
- PHINode *PN = cast<PHINode>(I);
- Value *OldVal = PN->removeIncomingValue(BB, false);
- assert(OldVal && "No entry in PHI for Pred BB!");
-
- // If this incoming value is one of the PHI nodes in BB, the new entries
- // in the PHI node are the entries from the old PHI.
- if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) {
- PHINode *OldValPN = cast<PHINode>(OldVal);
- for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i)
- // Note that, since we are merging phi nodes and BB and Succ might
- // have common predecessors, we could end up with a phi node with
- // identical incoming branches. This will be cleaned up later (and
- // will trigger asserts if we try to clean it up now, without also
- // simplifying the corresponding conditional branch).
- PN->addIncoming(OldValPN->getIncomingValue(i),
- OldValPN->getIncomingBlock(i));
- } else {
- // Add an incoming value for each of the new incoming values.
- for (unsigned i = 0, e = BBPreds.size(); i != e; ++i)
- PN->addIncoming(OldVal, BBPreds[i]);
- }
- }
- }
-
- while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
- if (Succ->getSinglePredecessor()) {
- // BB is the only predecessor of Succ, so Succ will end up with exactly
- // the same predecessors BB had.
- Succ->getInstList().splice(Succ->begin(),
- BB->getInstList(), BB->begin());
- } else {
- // We explicitly check for such uses in CanPropagatePredecessorsForPHIs.
- assert(PN->use_empty() && "There shouldn't be any uses here!");
- PN->eraseFromParent();
- }
- }
-
- // Everything that jumped to BB now goes to Succ.
- BB->replaceAllUsesWith(Succ);
- if (!Succ->hasName()) Succ->takeName(BB);
- BB->eraseFromParent(); // Delete the old basic block.
- return true;
-}
/// GetIfCondition - Given a basic block (BB) with two predecessors (and
/// presumably PHI nodes in it), check to see if the merge at this block is due
@@ -1217,7 +1057,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) {
}
// Check for trivial simplification.
- if (Constant *C = ConstantFoldInstruction(N, BB->getContext())) {
+ if (Constant *C = ConstantFoldInstruction(N)) {
TranslateMap[BBI] = C;
delete N; // Constant folded away, don't need actual inst
} else {
@@ -1983,13 +1823,11 @@ bool llvm::SimplifyCFG(BasicBlock *BB) {
if (BI->isUnconditional()) {
BasicBlock::iterator BBI = BB->getFirstNonPHI();
- BasicBlock *Succ = BI->getSuccessor(0);
// Ignore dbg intrinsics.
while (isa<DbgInfoIntrinsic>(BBI))
++BBI;
- if (BBI->isTerminator() && // Terminator is the only non-phi instruction!
- Succ != BB) // Don't hurt infinite loops!
- if (TryToSimplifyUncondBranchFromEmptyBlock(BB, Succ))
+ if (BBI->isTerminator()) // Terminator is the only non-phi instruction!
+ if (TryToSimplifyUncondBranchFromEmptyBlock(BB))
return true;
} else { // Conditional branch
OpenPOWER on IntegriCloud