summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms')
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp15
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp13
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp104
-rw-r--r--contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp24
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp23
-rw-r--r--contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp7
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombine.h1
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp148
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp24
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp3
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp60
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp82
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp141
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp192
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp96
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp85
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp3
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp1
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp1
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp65
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp683
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GVN.cpp391
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp5
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp43
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp35
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SROA.cpp31
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp17
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp41
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp25
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Local.cpp29
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp101
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp21
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Utils.cpp1
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp514
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp348
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp730
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/VecUtils.h164
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp7
41 files changed, 3231 insertions, 1056 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
index 8336d3a..a7bf188 100644
--- a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
using namespace llvm;
@@ -66,13 +67,13 @@ ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); }
static void FindUsedValues(GlobalVariable *LLVMUsed,
SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
if (LLVMUsed == 0) return;
- ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer());
- if (Inits == 0) return;
-
- for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
- if (GlobalValue *GV =
- dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
- UsedValues.insert(GV);
+ ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
+
+ for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) {
+ Value *Operand = Inits->getOperand(i)->stripPointerCastsNoFollowAliases();
+ GlobalValue *GV = cast<GlobalValue>(Operand);
+ UsedValues.insert(GV);
+ }
}
// True if A is better than B.
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
index dc99492..201f320 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -42,6 +42,7 @@ namespace {
private:
SmallPtrSet<GlobalValue*, 32> AliveGlobals;
+ SmallPtrSet<Constant *, 8> SeenConstants;
/// GlobalIsNeeded - mark the specific global value as needed, and
/// recursively mark anything that it uses as also needed.
@@ -151,6 +152,7 @@ bool GlobalDCE::runOnModule(Module &M) {
// Make sure that all memory is released
AliveGlobals.clear();
+ SeenConstants.clear();
return Changed;
}
@@ -190,12 +192,15 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) {
if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
return GlobalIsNeeded(GV);
-
+
// Loop over all of the operands of the constant, adding any globals they
// use to the list of needed globals.
- for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I)
- if (Constant *OpC = dyn_cast<Constant>(*I))
- MarkUsedGlobalsAsNeeded(OpC);
+ for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I) {
+ // If we've already processed this constant there's no need to do it again.
+ Constant *Op = dyn_cast<Constant>(*I);
+ if (Op && SeenConstants.insert(Op))
+ MarkUsedGlobalsAsNeeded(Op);
+ }
}
// RemoveUnusedGlobalValue - Loop over all of the uses of the specified
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index b035a82..0ef900e 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -3041,6 +3041,105 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
return true;
}
+static Value::use_iterator getFirst(Value *V, SmallPtrSet<Use*, 8> &Tried) {
+ for (Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) {
+ Use *U = &I.getUse();
+ if (Tried.count(U))
+ continue;
+
+ User *Usr = *I;
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(Usr);
+ if (!GV || !GV->hasName()) {
+ Tried.insert(U);
+ return I;
+ }
+
+ StringRef Name = GV->getName();
+ if (Name != "llvm.used" && Name != "llvm.compiler_used") {
+ Tried.insert(U);
+ return I;
+ }
+ }
+ return V->use_end();
+}
+
+static bool replaceAllNonLLVMUsedUsesWith(Constant *Old, Constant *New);
+
+static bool replaceUsesOfWithOnConstant(ConstantArray *CA, Value *From,
+ Value *ToV, Use *U) {
+ Constant *To = cast<Constant>(ToV);
+
+ SmallVector<Constant*, 8> NewOps;
+ for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) {
+ Constant *Op = CA->getOperand(i);
+ NewOps.push_back(Op == From ? To : Op);
+ }
+
+ Constant *Replacement = ConstantArray::get(CA->getType(), NewOps);
+ assert(Replacement != CA && "CA didn't contain From!");
+
+ bool Ret = replaceAllNonLLVMUsedUsesWith(CA, Replacement);
+ if (Replacement->use_empty())
+ Replacement->destroyConstant();
+ if (CA->use_empty())
+ CA->destroyConstant();
+ return Ret;
+}
+
+static bool replaceUsesOfWithOnConstant(ConstantExpr *CE, Value *From,
+ Value *ToV, Use *U) {
+ Constant *To = cast<Constant>(ToV);
+ SmallVector<Constant*, 8> NewOps;
+ for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) {
+ Constant *Op = CE->getOperand(i);
+ NewOps.push_back(Op == From ? To : Op);
+ }
+
+ Constant *Replacement = CE->getWithOperands(NewOps);
+ assert(Replacement != CE && "CE didn't contain From!");
+
+ bool Ret = replaceAllNonLLVMUsedUsesWith(CE, Replacement);
+ if (Replacement->use_empty())
+ Replacement->destroyConstant();
+ if (CE->use_empty())
+ CE->destroyConstant();
+ return Ret;
+}
+
+static bool replaceUsesOfWithOnConstant(Constant *C, Value *From, Value *To,
+ Use *U) {
+ if (ConstantArray *CA = dyn_cast<ConstantArray>(C))
+ return replaceUsesOfWithOnConstant(CA, From, To, U);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ return replaceUsesOfWithOnConstant(CE, From, To, U);
+ C->replaceUsesOfWithOnConstant(From, To, U);
+ return true;
+}
+
+static bool replaceAllNonLLVMUsedUsesWith(Constant *Old, Constant *New) {
+ SmallPtrSet<Use*, 8> Tried;
+ bool Ret = false;
+ for (;;) {
+ Value::use_iterator I = getFirst(Old, Tried);
+ if (I == Old->use_end())
+ break;
+ Use &U = I.getUse();
+
+ // Must handle Constants specially, we cannot call replaceUsesOfWith on a
+ // constant because they are uniqued.
+ if (Constant *C = dyn_cast<Constant>(U.getUser())) {
+ if (!isa<GlobalValue>(C)) {
+ Ret |= replaceUsesOfWithOnConstant(C, Old, New, &U);
+ continue;
+ }
+ }
+
+ U.set(New);
+ Ret = true;
+ }
+ return Ret;
+}
+
bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
bool Changed = false;
@@ -3060,11 +3159,12 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
bool hasOneUse = Target->hasOneUse() && Aliasee->hasOneUse();
// Make all users of the alias use the aliasee instead.
- if (!J->use_empty()) {
- J->replaceAllUsesWith(Aliasee);
+ if (replaceAllNonLLVMUsedUsesWith(J, Aliasee)) {
++NumAliasesResolved;
Changed = true;
}
+ if (!J->use_empty())
+ continue;
// If the alias is externally visible, we may still be able to simplify it.
if (!J->hasLocalLinkage()) {
diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index 892100f..4ce749c 100644
--- a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -72,6 +72,15 @@ STATISTIC(NumThunksWritten, "Number of thunks generated");
STATISTIC(NumAliasesWritten, "Number of aliases generated");
STATISTIC(NumDoubleWeak, "Number of new functions created");
+/// Returns the type id for a type to be hashed. We turn pointer types into
+/// integers here because the actual compare logic below considers pointers and
+/// integers of the same size as equal.
+static Type::TypeID getTypeIDForHash(Type *Ty) {
+ if (Ty->isPointerTy())
+ return Type::IntegerTyID;
+ return Ty->getTypeID();
+}
+
/// Creates a hash-code for the function which is the same for any two
/// functions that will compare equal, without looking at the instructions
/// inside the function.
@@ -83,9 +92,9 @@ static unsigned profileFunction(const Function *F) {
ID.AddInteger(F->getCallingConv());
ID.AddBoolean(F->hasGC());
ID.AddBoolean(FTy->isVarArg());
- ID.AddInteger(FTy->getReturnType()->getTypeID());
+ ID.AddInteger(getTypeIDForHash(FTy->getReturnType()));
for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
- ID.AddInteger(FTy->getParamType(i)->getTypeID());
+ ID.AddInteger(getTypeIDForHash(FTy->getParamType(i)));
return ID.ComputeHash();
}
@@ -200,8 +209,7 @@ private:
// Any two pointers in the same address space are equivalent, intptr_t and
// pointers are equivalent. Otherwise, standard type equivalence rules apply.
-bool FunctionComparator::isEquivalentType(Type *Ty1,
- Type *Ty2) const {
+bool FunctionComparator::isEquivalentType(Type *Ty1, Type *Ty2) const {
if (Ty1 == Ty2)
return true;
if (Ty1->getTypeID() != Ty2->getTypeID()) {
@@ -740,7 +748,13 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
if (NewG->getReturnType()->isVoidTy()) {
Builder.CreateRetVoid();
} else {
- Builder.CreateRet(Builder.CreateBitCast(CI, NewG->getReturnType()));
+ Type *RetTy = NewG->getReturnType();
+ if (CI->getType()->isIntegerTy() && RetTy->isPointerTy())
+ Builder.CreateRet(Builder.CreateIntToPtr(CI, RetTy));
+ else if (CI->getType()->isPointerTy() && RetTy->isIntegerTy())
+ Builder.CreateRet(Builder.CreatePtrToInt(CI, RetTy));
+ else
+ Builder.CreateRet(Builder.CreateBitCast(CI, RetTy));
}
NewG->copyAttributesFrom(G);
diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 027a9f2..986c0b8 100644
--- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -33,7 +33,12 @@ RunLoopVectorization("vectorize-loops",
cl::desc("Run the Loop vectorization passes"));
static cl::opt<bool>
-RunBBVectorization("vectorize", cl::desc("Run the BB vectorization passes"));
+RunSLPVectorization("vectorize-slp",
+ cl::desc("Run the SLP vectorization passes"));
+
+static cl::opt<bool>
+RunBBVectorization("vectorize-slp-aggressive",
+ cl::desc("Run the BB vectorization passes"));
static cl::opt<bool>
UseGVNAfterVectorization("use-gvn-after-vectorization",
@@ -52,7 +57,8 @@ PassManagerBuilder::PassManagerBuilder() {
DisableSimplifyLibCalls = false;
DisableUnitAtATime = false;
DisableUnrollLoops = false;
- Vectorize = RunBBVectorization;
+ BBVectorize = RunBBVectorization;
+ SLPVectorize = RunSLPVectorization;
LoopVectorize = RunLoopVectorization;
}
@@ -207,7 +213,10 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
- if (Vectorize) {
+ if (SLPVectorize)
+ MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+
+ if (BBVectorize) {
MPM.add(createBBVectorizePass());
MPM.add(createInstructionCombiningPass());
if (OptLevel > 1 && UseGVNAfterVectorization)
@@ -321,6 +330,14 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
PM.add(createGlobalDCEPass());
}
+inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) {
+ return reinterpret_cast<PassManagerBuilder*>(P);
+}
+
+inline LLVMPassManagerBuilderRef wrap(PassManagerBuilder *P) {
+ return reinterpret_cast<LLVMPassManagerBuilderRef>(P);
+}
+
LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() {
PassManagerBuilder *PMB = new PassManagerBuilder();
return wrap(PMB);
diff --git a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
index 5f8681f..3396f79 100644
--- a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
@@ -195,10 +195,9 @@ static void findUsedValues(GlobalVariable *LLVMUsed,
SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
if (LLVMUsed == 0) return;
UsedValues.insert(LLVMUsed);
-
- ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer());
- if (Inits == 0) return;
-
+
+ ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
+
for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
if (GlobalValue *GV =
dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
index 1f6a3a5..2a36074 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
@@ -233,6 +233,7 @@ private:
Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS);
Value *EmitGEPOffset(User *GEP);
+ Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
public:
// InsertNewInstBefore - insert an instruction New before instruction Old
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 7595da0..166f8df 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -24,9 +24,9 @@ namespace {
/// Class representing coefficient of floating-point addend.
/// This class needs to be highly efficient, which is especially true for
/// the constructor. As of I write this comment, the cost of the default
- /// constructor is merely 4-byte-store-zero (Assuming compiler is able to
+ /// constructor is merely 4-byte-store-zero (Assuming compiler is able to
/// perform write-merging).
- ///
+ ///
class FAddendCoef {
public:
// The constructor has to initialize a APFloat, which is uncessary for
@@ -37,31 +37,31 @@ namespace {
//
FAddendCoef() : IsFp(false), BufHasFpVal(false), IntVal(0) {}
~FAddendCoef();
-
+
void set(short C) {
assert(!insaneIntVal(C) && "Insane coefficient");
IsFp = false; IntVal = C;
}
-
+
void set(const APFloat& C);
void negate();
-
+
bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); }
Value *getValue(Type *) const;
-
+
// If possible, don't define operator+/operator- etc because these
// operators inevitably call FAddendCoef's constructor which is not cheap.
void operator=(const FAddendCoef &A);
void operator+=(const FAddendCoef &A);
void operator-=(const FAddendCoef &A);
void operator*=(const FAddendCoef &S);
-
+
bool isOne() const { return isInt() && IntVal == 1; }
bool isTwo() const { return isInt() && IntVal == 2; }
bool isMinusOne() const { return isInt() && IntVal == -1; }
bool isMinusTwo() const { return isInt() && IntVal == -2; }
-
+
private:
bool insaneIntVal(int V) { return V > 4 || V < -4; }
APFloat *getFpValPtr(void)
@@ -74,26 +74,28 @@ namespace {
return *getFpValPtr();
}
- APFloat &getFpVal(void)
- { assert(IsFp && BufHasFpVal && "Incorret state"); return *getFpValPtr(); }
-
+ APFloat &getFpVal(void) {
+ assert(IsFp && BufHasFpVal && "Incorret state");
+ return *getFpValPtr();
+ }
+
bool isInt() const { return !IsFp; }
// If the coefficient is represented by an integer, promote it to a
- // floating point.
+ // floating point.
void convertToFpType(const fltSemantics &Sem);
// Construct an APFloat from a signed integer.
// TODO: We should get rid of this function when APFloat can be constructed
- // from an *SIGNED* integer.
+ // from an *SIGNED* integer.
APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val);
private:
bool IsFp;
-
+
// True iff FpValBuf contains an instance of APFloat.
bool BufHasFpVal;
-
+
// The integer coefficient of an individual addend is either 1 or -1,
// and we try to simplify at most 4 addends from neighboring at most
// two instructions. So the range of <IntVal> falls in [-4, 4]. APInt
@@ -102,7 +104,7 @@ namespace {
AlignedCharArrayUnion<APFloat> FpValBuf;
};
-
+
/// FAddend is used to represent floating-point addend. An addend is
/// represented as <C, V>, where the V is a symbolic value, and C is a
/// constant coefficient. A constant addend is represented as <C, 0>.
@@ -110,10 +112,10 @@ namespace {
class FAddend {
public:
FAddend() { Val = 0; }
-
+
Value *getSymVal (void) const { return Val; }
const FAddendCoef &getCoef(void) const { return Coeff; }
-
+
bool isConstant() const { return Val == 0; }
bool isZero() const { return Coeff.isZero(); }
@@ -122,17 +124,17 @@ namespace {
{ Coeff.set(Coefficient); Val = V; }
void set(const ConstantFP* Coefficient, Value *V)
{ Coeff.set(Coefficient->getValueAPF()); Val = V; }
-
+
void negate() { Coeff.negate(); }
-
+
/// Drill down the U-D chain one step to find the definition of V, and
/// try to break the definition into one or two addends.
static unsigned drillValueDownOneStep(Value* V, FAddend &A0, FAddend &A1);
-
+
/// Similar to FAddend::drillDownOneStep() except that the value being
/// splitted is the addend itself.
unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const;
-
+
void operator+=(const FAddend &T) {
assert((Val == T.Val) && "Symbolic-values disagree");
Coeff += T.Coeff;
@@ -140,12 +142,12 @@ namespace {
private:
void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; }
-
+
// This addend has the value of "Coeff * Val".
Value *Val;
FAddendCoef Coeff;
};
-
+
/// FAddCombine is the class for optimizing an unsafe fadd/fsub along
/// with its neighboring at most two instructions.
///
@@ -153,17 +155,17 @@ namespace {
public:
FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(0) {}
Value *simplify(Instruction *FAdd);
-
+
private:
typedef SmallVector<const FAddend*, 4> AddendVect;
-
+
Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota);
Value *performFactorization(Instruction *I);
/// Convert given addend to a Value
Value *createAddendVal(const FAddend &A, bool& NeedNeg);
-
+
/// Return the number of instructions needed to emit the N-ary addition.
unsigned calcInstrNumber(const AddendVect& Vect);
Value *createFSub(Value *Opnd0, Value *Opnd1);
@@ -173,10 +175,10 @@ namespace {
Value *createFNeg(Value *V);
Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota);
void createInstPostProc(Instruction *NewInst);
-
+
InstCombiner::BuilderTy *Builder;
Instruction *Instr;
-
+
private:
// Debugging stuff are clustered here.
#ifndef NDEBUG
@@ -188,7 +190,7 @@ namespace {
void incCreateInstNum() {}
#endif
};
-}
+}
//===----------------------------------------------------------------------===//
//
@@ -211,7 +213,7 @@ void FAddendCoef::set(const APFloat& C) {
} else
*P = C;
- IsFp = BufHasFpVal = true;
+ IsFp = BufHasFpVal = true;
}
void FAddendCoef::convertToFpType(const fltSemantics &Sem) {
@@ -225,7 +227,7 @@ void FAddendCoef::convertToFpType(const fltSemantics &Sem) {
new(P) APFloat(Sem, 0 - IntVal);
P->changeSign();
}
- IsFp = BufHasFpVal = true;
+ IsFp = BufHasFpVal = true;
}
APFloat FAddendCoef::createAPFloatFromInt(const fltSemantics &Sem, int Val) {
@@ -254,14 +256,14 @@ void FAddendCoef::operator+=(const FAddendCoef &That) {
getFpVal().add(That.getFpVal(), RndMode);
return;
}
-
+
if (isInt()) {
const APFloat &T = That.getFpVal();
convertToFpType(T.getSemantics());
getFpVal().add(T, RndMode);
return;
}
-
+
APFloat &T = getFpVal();
T.add(createAPFloatFromInt(T.getSemantics(), That.IntVal), RndMode);
}
@@ -275,7 +277,7 @@ void FAddendCoef::operator-=(const FAddendCoef &That) {
getFpVal().subtract(That.getFpVal(), RndMode);
return;
}
-
+
if (isInt()) {
const APFloat &T = That.getFpVal();
convertToFpType(T.getSemantics());
@@ -303,7 +305,7 @@ void FAddendCoef::operator*=(const FAddendCoef &That) {
return;
}
- const fltSemantics &Semantic =
+ const fltSemantics &Semantic =
isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics();
if (isInt())
@@ -338,11 +340,11 @@ Value *FAddendCoef::getValue(Type *Ty) const {
// A - B <1, A>, <1,B>
// 0 - B <-1, B>
// C * A, <C, A>
-// A + C <1, A> <C, NULL>
+// A + C <1, A> <C, NULL>
// 0 +/- 0 <0, NULL> (corner case)
//
// Legend: A and B are not constant, C is constant
-//
+//
unsigned FAddend::drillValueDownOneStep
(Value *Val, FAddend &Addend0, FAddend &Addend1) {
Instruction *I = 0;
@@ -413,7 +415,7 @@ unsigned FAddend::drillAddendDownOneStep
return 0;
unsigned BreakNum = FAddend::drillValueDownOneStep(Val, Addend0, Addend1);
- if (!BreakNum || Coeff.isOne())
+ if (!BreakNum || Coeff.isOne())
return BreakNum;
Addend0.Scale(Coeff);
@@ -435,10 +437,10 @@ unsigned FAddend::drillAddendDownOneStep
Value *FAddCombine::performFactorization(Instruction *I) {
assert((I->getOpcode() == Instruction::FAdd ||
I->getOpcode() == Instruction::FSub) && "Expect add/sub");
-
+
Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0));
Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1));
-
+
if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode())
return 0;
@@ -453,14 +455,14 @@ Value *FAddCombine::performFactorization(Instruction *I) {
Value *Opnd1_0 = I1->getOperand(0);
Value *Opnd1_1 = I1->getOperand(1);
- // Input Instr I Factor AddSub0 AddSub1
+ // Input Instr I Factor AddSub0 AddSub1
// ----------------------------------------------
// (x*y) +/- (x*z) x y z
// (y/x) +/- (z/x) x y z
//
Value *Factor = 0;
Value *AddSub0 = 0, *AddSub1 = 0;
-
+
if (isMpy) {
if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1)
Factor = Opnd0_0;
@@ -492,7 +494,7 @@ Value *FAddCombine::performFactorization(Instruction *I) {
if (isMpy)
return createFMul(Factor, NewAddSub);
-
+
return createFDiv(NewAddSub, Factor);
}
@@ -506,7 +508,7 @@ Value *FAddCombine::simplify(Instruction *I) {
assert((I->getOpcode() == Instruction::FAdd ||
I->getOpcode() == Instruction::FSub) && "Expect add/sub");
- // Save the instruction before calling other member-functions.
+ // Save the instruction before calling other member-functions.
Instr = I;
FAddend Opnd0, Opnd1, Opnd0_0, Opnd0_1, Opnd1_0, Opnd1_1;
@@ -517,7 +519,7 @@ Value *FAddCombine::simplify(Instruction *I) {
unsigned Opnd0_ExpNum = 0;
unsigned Opnd1_ExpNum = 0;
- if (!Opnd0.isConstant())
+ if (!Opnd0.isConstant())
Opnd0_ExpNum = Opnd0.drillAddendDownOneStep(Opnd0_0, Opnd0_1);
// Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1.
@@ -539,7 +541,7 @@ Value *FAddCombine::simplify(Instruction *I) {
Value *V0 = I->getOperand(0);
Value *V1 = I->getOperand(1);
- InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) &&
+ InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) &&
(!isa<Constant>(V1) && V1->hasOneUse())) ? 2 : 1;
if (Value *R = simplifyFAdd(AllOpnds, InstQuota))
@@ -579,7 +581,7 @@ Value *FAddCombine::simplify(Instruction *I) {
return R;
}
- // step 6: Try factorization as the last resort,
+ // step 6: Try factorization as the last resort,
return performFactorization(I);
}
@@ -588,7 +590,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
unsigned AddendNum = Addends.size();
assert(AddendNum <= 4 && "Too many addends");
- // For saving intermediate results;
+ // For saving intermediate results;
unsigned NextTmpIdx = 0;
FAddend TmpResult[3];
@@ -604,7 +606,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
AddendVect SimpVect;
// The outer loop works on one symbolic-value at a time. Suppose the input
- // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ...
+ // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ...
// The symbolic-values will be processed in this order: x, y, z.
//
for (unsigned SymIdx = 0; SymIdx < AddendNum; SymIdx++) {
@@ -631,7 +633,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
if (T && T->getSymVal() == Val) {
// Set null such that next iteration of the outer loop will not process
// this addend again.
- Addends[SameSymIdx] = 0;
+ Addends[SameSymIdx] = 0;
SimpVect.push_back(T);
}
}
@@ -644,7 +646,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
R += *SimpVect[Idx];
// Pop all addends being folded and push the resulting folded addend.
- SimpVect.resize(StartIdx);
+ SimpVect.resize(StartIdx);
if (Val != 0) {
if (!R.isZero()) {
SimpVect.push_back(&R);
@@ -657,7 +659,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
}
}
- assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) &&
+ assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) &&
"out-of-bound access");
if (ConstAdd)
@@ -679,7 +681,7 @@ Value *FAddCombine::createNaryFAdd
assert(!Opnds.empty() && "Expect at least one addend");
// Step 1: Check if the # of instructions needed exceeds the quota.
- //
+ //
unsigned InstrNeeded = calcInstrNumber(Opnds);
if (InstrNeeded > InstrQuota)
return 0;
@@ -700,7 +702,7 @@ Value *FAddCombine::createNaryFAdd
// Iterate the addends, creating fadd/fsub using adjacent two addends.
for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end();
I != E; I++) {
- bool NeedNeg;
+ bool NeedNeg;
Value *V = createAddendVal(**I, NeedNeg);
if (!LastVal) {
LastVal = V;
@@ -726,7 +728,7 @@ Value *FAddCombine::createNaryFAdd
}
#ifndef NDEBUG
- assert(CreateInstrNum == InstrNeeded &&
+ assert(CreateInstrNum == InstrNeeded &&
"Inconsistent in instruction numbers");
#endif
@@ -784,8 +786,8 @@ unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) {
unsigned OpndNum = Opnds.size();
unsigned InstrNeeded = OpndNum - 1;
- // The number of addends in the form of "(-1)*x".
- unsigned NegOpndNum = 0;
+ // The number of addends in the form of "(-1)*x".
+ unsigned NegOpndNum = 0;
// Adjust the number of instructions needed to emit the N-ary add.
for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end();
@@ -972,6 +974,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI),
XorLHS);
}
+ // (X + signbit) + C could have gotten canonicalized to (X ^ signbit) + C,
+ // transform them into (X + (signbit ^ C))
+ if (XorRHS->getValue().isSignBit())
+ return BinaryOperator::CreateAdd(XorLHS,
+ ConstantExpr::getXor(XorRHS, CI));
}
}
@@ -1230,6 +1237,31 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
}
}
+ // select C, 0, B + select C, A, 0 -> select C, A, B
+ {
+ Value *A1, *B1, *C1, *A2, *B2, *C2;
+ if (match(LHS, m_Select(m_Value(C1), m_Value(A1), m_Value(B1))) &&
+ match(RHS, m_Select(m_Value(C2), m_Value(A2), m_Value(B2)))) {
+ if (C1 == C2) {
+ Constant *Z1=0, *Z2=0;
+ Value *A, *B, *C=C1;
+ if (match(A1, m_AnyZero()) && match(B2, m_AnyZero())) {
+ Z1 = dyn_cast<Constant>(A1); A = A2;
+ Z2 = dyn_cast<Constant>(B2); B = B1;
+ } else if (match(B1, m_AnyZero()) && match(A2, m_AnyZero())) {
+ Z1 = dyn_cast<Constant>(B1); B = B2;
+ Z2 = dyn_cast<Constant>(A2); A = A1;
+ }
+
+ if (Z1 && Z2 &&
+ (I.hasNoSignedZeros() ||
+ (Z1->isNegativeZeroValue() && Z2->isNegativeZeroValue()))) {
+ return SelectInst::Create(C, A, B);
+ }
+ }
+ }
+ }
+
if (I.hasUnsafeAlgebra()) {
if (Value *V = FAddCombine(Builder).simplify(&I))
return ReplaceInstUsesWith(I, V);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 990cbc3..ec75dd2 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -266,9 +266,8 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
return 0;
}
-
-/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is
-/// true, otherwise (V < Lo || V >= Hi). In practice, we emit the more efficient
+/// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise
+/// (V < Lo || V >= Hi). In practice, we emit the more efficient
/// (V-Lo) \<u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates
/// whether to treat the V, Lo and HI as signed or not. IB is the location to
/// insert new instructions.
@@ -935,6 +934,9 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
RHS->getPredicate() == FCmpInst::FCMP_ORD) {
+ if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType())
+ return 0;
+
// (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y)
if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
@@ -1545,14 +1547,6 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
switch (RHSCC) {
default: llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_EQ:
- if (LHSCst == SubOne(RHSCst)) {
- // (X == 13 | X == 14) -> X-13 <u 2
- Constant *AddCST = ConstantExpr::getNeg(LHSCst);
- Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
- AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
- return Builder->CreateICmpULT(Add, AddCST);
- }
-
if (LHS->getOperand(0) == RHS->getOperand(0)) {
// if LHSCst and RHSCst differ only by one bit:
// (A == C1 || A == C2) -> (A & ~(C1 ^ C2)) == C1
@@ -1566,6 +1560,14 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
}
}
+ if (LHSCst == SubOne(RHSCst)) {
+ // (X == 13 | X == 14) -> X-13 <u 2
+ Constant *AddCST = ConstantExpr::getNeg(LHSCst);
+ Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
+ AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
+ return Builder->CreateICmpULT(Add, AddCST);
+ }
+
break; // (X == 13 | X == 15) -> no change
case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change
case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 64cd1bd..78b4a2c 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1372,7 +1372,8 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
NestF->getType() == PointerType::getUnqual(NewFTy) ?
NestF : ConstantExpr::getBitCast(NestF,
PointerType::getUnqual(NewFTy));
- const AttributeSet &NewPAL = AttributeSet::get(FTy->getContext(), NewAttrs);
+ const AttributeSet &NewPAL =
+ AttributeSet::get(FTy->getContext(), NewAttrs);
Instruction *NewCaller;
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index a96e754..4c252c0 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -232,7 +232,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
Constant *Init = GV->getInitializer();
if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
return 0;
-
+
uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
if (ArrayElementCount > 1024) return 0; // Don't blow up on huge arrays.
@@ -2487,6 +2487,55 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
return new ICmpInst(Pred, Y, Z);
}
+ // icmp slt (X + -1), Y -> icmp sle X, Y
+ if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT &&
+ match(B, m_AllOnes()))
+ return new ICmpInst(CmpInst::ICMP_SLE, A, Op1);
+
+ // icmp sge (X + -1), Y -> icmp sgt X, Y
+ if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE &&
+ match(B, m_AllOnes()))
+ return new ICmpInst(CmpInst::ICMP_SGT, A, Op1);
+
+ // icmp sle (X + 1), Y -> icmp slt X, Y
+ if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE &&
+ match(B, m_One()))
+ return new ICmpInst(CmpInst::ICMP_SLT, A, Op1);
+
+ // icmp sgt (X + 1), Y -> icmp sge X, Y
+ if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT &&
+ match(B, m_One()))
+ return new ICmpInst(CmpInst::ICMP_SGE, A, Op1);
+
+ // if C1 has greater magnitude than C2:
+ // icmp (X + C1), (Y + C2) -> icmp (X + C3), Y
+ // s.t. C3 = C1 - C2
+ //
+ // if C2 has greater magnitude than C1:
+ // icmp (X + C1), (Y + C2) -> icmp X, (Y + C3)
+ // s.t. C3 = C2 - C1
+ if (A && C && NoOp0WrapProblem && NoOp1WrapProblem &&
+ (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned())
+ if (ConstantInt *C1 = dyn_cast<ConstantInt>(B))
+ if (ConstantInt *C2 = dyn_cast<ConstantInt>(D)) {
+ const APInt &AP1 = C1->getValue();
+ const APInt &AP2 = C2->getValue();
+ if (AP1.isNegative() == AP2.isNegative()) {
+ APInt AP1Abs = C1->getValue().abs();
+ APInt AP2Abs = C2->getValue().abs();
+ if (AP1Abs.uge(AP2Abs)) {
+ ConstantInt *C3 = Builder->getInt(AP1 - AP2);
+ Value *NewAdd = Builder->CreateNSWAdd(A, C3);
+ return new ICmpInst(Pred, NewAdd, C);
+ } else {
+ ConstantInt *C3 = Builder->getInt(AP2 - AP1);
+ Value *NewAdd = Builder->CreateNSWAdd(C, C3);
+ return new ICmpInst(Pred, A, NewAdd);
+ }
+ }
+ }
+
+
// Analyze the case when either Op0 or Op1 is a sub instruction.
// Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null).
A = 0; B = 0; C = 0; D = 0;
@@ -2620,6 +2669,15 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
{ Value *A, *B;
+ // Transform (A & ~B) == 0 --> (A & B) != 0
+ // and (A & ~B) != 0 --> (A & B) == 0
+ // if A is a power of 2.
+ if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Op1, m_Zero()) && isKnownToBeAPowerOfTwo(A) && I.isEquality())
+ return new ICmpInst(I.getInversePredicate(),
+ Builder->CreateAnd(A, B),
+ Op1);
+
// ~x < ~y --> y < x
// ~x < cst --> ~cst < x
if (match(Op0, m_Not(m_Value(A)))) {
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 337cfe3..e2d7966 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -69,8 +69,8 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
// If the GEP has all zero indices, it doesn't offset the pointer. If it
// doesn't, it does.
- if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, ToDelete,
- IsOffset || !GEP->hasAllZeroIndices()))
+ if (!isOnlyCopiedFromConstantGlobal(
+ GEP, TheCopy, ToDelete, IsOffset || !GEP->hasAllZeroIndices()))
return false;
continue;
}
@@ -166,7 +166,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
if (AI.isArrayAllocation()) { // Check C != 1
if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
- Type *NewTy =
+ Type *NewTy =
ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
New->setAlignment(AI.getAlignment());
@@ -294,7 +294,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
Type *SrcPTy = SrcTy->getElementType();
- if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
+ if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
DestPTy->isVectorTy()) {
// If the source is an array, the code below will not succeed. Check to
// see if a trivial 'gep P, 0, 0' will help matters. Only do this for
@@ -311,7 +311,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
}
if (IC.getDataLayout() &&
- (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
+ (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
SrcPTy->isVectorTy()) &&
// Do not allow turning this into a load of an integer, which is then
// casted to a pointer, this pessimizes pointer analysis a lot.
@@ -322,7 +322,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
// Okay, we are casting from one integer or pointer type to another of
// the same size. Instead of casting the pointer before the load, cast
// the result of the loaded value.
- LoadInst *NewLoad =
+ LoadInst *NewLoad =
IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
NewLoad->setAlignment(LI.getAlignment());
NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope());
@@ -359,7 +359,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
// None of the following transforms are legal for volatile/atomic loads.
// FIXME: Some of it is okay for atomic loads; needs refactoring.
if (!LI.isSimple()) return 0;
-
+
// Do really simple store-to-load forwarding and load CSE, to catch cases
// where there are several consecutive memory accesses to the same location,
// separated by a few arithmetic operations.
@@ -380,7 +380,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
Constant::getNullValue(Op->getType()), &LI);
return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
}
- }
+ }
// load null/undef -> unreachable
// TODO: Consider a target hook for valid address spaces for this xform.
@@ -399,7 +399,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
if (CE->isCast())
if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
return Res;
-
+
if (Op->hasOneUse()) {
// Change select and PHI nodes to select values instead of addresses: this
// helps alias analysis out a lot, allows many others simplifications, and
@@ -453,18 +453,18 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
if (SrcTy == 0) return 0;
-
+
Type *SrcPTy = SrcTy->getElementType();
if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy())
return 0;
-
+
/// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
/// to its first element. This allows us to handle things like:
/// store i32 xxx, (bitcast {foo*, float}* %P to i32*)
/// on 32-bit hosts.
SmallVector<Value*, 4> NewGEPIndices;
-
+
// If the source is an array, the code below will not succeed. Check to
// see if a trivial 'gep P, 0, 0' will help matters. Only do this for
// constants.
@@ -472,7 +472,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
// Index through pointer.
Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext()));
NewGEPIndices.push_back(Zero);
-
+
while (1) {
if (StructType *STy = dyn_cast<StructType>(SrcPTy)) {
if (!STy->getNumElements()) /* Struct can be empty {} */
@@ -486,24 +486,24 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
break;
}
}
-
+
SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
}
if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy())
return 0;
-
+
// If the pointers point into different address spaces or if they point to
// values with different sizes, we can't do the transformation.
if (!IC.getDataLayout() ||
- SrcTy->getAddressSpace() !=
+ SrcTy->getAddressSpace() !=
cast<PointerType>(CI->getType())->getAddressSpace() ||
IC.getDataLayout()->getTypeSizeInBits(SrcPTy) !=
IC.getDataLayout()->getTypeSizeInBits(DestPTy))
return 0;
// Okay, we are casting from one integer or pointer type to another of
- // the same size. Instead of casting the pointer before
+ // the same size. Instead of casting the pointer before
// the store, cast the value to be stored.
Value *NewCast;
Value *SIOp0 = SI.getOperand(0);
@@ -517,12 +517,12 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
if (SIOp0->getType()->isPointerTy())
opcode = Instruction::PtrToInt;
}
-
+
// SIOp0 is a pointer to aggregate and this is a store to the first field,
// emit a GEP to index into its first field.
if (!NewGEPIndices.empty())
CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices);
-
+
NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
SIOp0->getName()+".c");
SI.setOperand(0, NewCast);
@@ -541,7 +541,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
static bool equivalentAddressValues(Value *A, Value *B) {
// Test if the values are trivially equivalent.
if (A == B) return true;
-
+
// Test if the values come form identical arithmetic instructions.
// This uses isIdenticalToWhenDefined instead of isIdenticalTo because
// its only used to compare two uses within the same basic block, which
@@ -554,7 +554,7 @@ static bool equivalentAddressValues(Value *A, Value *B) {
if (Instruction *BI = dyn_cast<Instruction>(B))
if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
return true;
-
+
// Otherwise they may not be equivalent.
return false;
}
@@ -585,7 +585,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
// If the RHS is an alloca with a single use, zapify the store, making the
// alloca dead.
if (Ptr->hasOneUse()) {
- if (isa<AllocaInst>(Ptr))
+ if (isa<AllocaInst>(Ptr))
return EraseInstFromFunction(SI);
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
if (isa<AllocaInst>(GEP->getOperand(0))) {
@@ -608,8 +608,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
(isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
ScanInsts++;
continue;
- }
-
+ }
+
if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
// Prev store isn't volatile, and stores to the same location?
if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1),
@@ -621,7 +621,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
}
break;
}
-
+
// If this is a load, we have to stop. However, if the loaded value is from
// the pointer we're loading and is producing the pointer we're storing,
// then *this* store is dead (X = load P; store X -> P).
@@ -629,12 +629,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
LI->isSimple())
return EraseInstFromFunction(SI);
-
+
// Otherwise, this is a load from some other location. Stores before it
// may not be dead.
break;
}
-
+
// Don't skip over loads or things that can modify memory.
if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
break;
@@ -664,11 +664,11 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (Instruction *Res = InstCombineStoreToCast(*this, SI))
return Res;
-
+
// If this store is the last instruction in the basic block (possibly
// excepting debug info instructions), and if the block ends with an
// unconditional branch, try to move it to the successor block.
- BBI = &SI;
+ BBI = &SI;
do {
++BBI;
} while (isa<DbgInfoIntrinsic>(BBI) ||
@@ -677,7 +677,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (BI->isUnconditional())
if (SimplifyStoreAtEndOfBlock(SI))
return 0; // xform done!
-
+
return 0;
}
@@ -691,12 +691,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
///
bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
BasicBlock *StoreBB = SI.getParent();
-
+
// Check to see if the successor block has exactly two incoming edges. If
// so, see if the other predecessor contains a store to the same location.
// if so, insert a PHI node (if needed) and move the stores down.
BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
-
+
// Determine whether Dest has exactly two predecessors and, if so, compute
// the other predecessor.
pred_iterator PI = pred_begin(DestBB);
@@ -708,7 +708,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
if (++PI == pred_end(DestBB))
return false;
-
+
P = *PI;
if (P != StoreBB) {
if (OtherBB)
@@ -728,7 +728,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
if (!OtherBr || BBI == OtherBB->begin())
return false;
-
+
// If the other block ends in an unconditional branch, check for the 'if then
// else' case. there is an instruction before the branch.
StoreInst *OtherStore = 0;
@@ -750,10 +750,10 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
} else {
// Otherwise, the other block ended with a conditional branch. If one of the
// destinations is StoreBB, then we have the if/then case.
- if (OtherBr->getSuccessor(0) != StoreBB &&
+ if (OtherBr->getSuccessor(0) != StoreBB &&
OtherBr->getSuccessor(1) != StoreBB)
return false;
-
+
// Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
// if/then triangle. See if there is a store to the same ptr as SI that
// lives in OtherBB.
@@ -771,7 +771,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
BBI == OtherBB->begin())
return false;
}
-
+
// In order to eliminate the store in OtherBr, we have to
// make sure nothing reads or overwrites the stored value in
// StoreBB.
@@ -781,7 +781,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
return false;
}
}
-
+
// Insert a PHI node now if we need it.
Value *MergedVal = OtherStore->getOperand(0);
if (MergedVal != SI.getOperand(0)) {
@@ -790,7 +790,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
PN->addIncoming(OtherStore->getOperand(0), OtherBB);
MergedVal = InsertNewInstBefore(PN, DestBB->front());
}
-
+
// Advance to a place where it is safe to insert the new store and
// insert it.
BBI = DestBB->getFirstInsertionPt();
@@ -800,7 +800,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
SI.getOrdering(),
SI.getSynchScope());
InsertNewInstBefore(NewSI, *BBI);
- NewSI->setDebugLoc(OtherStore->getDebugLoc());
+ NewSI->setDebugLoc(OtherStore->getDebugLoc());
// If the two stores had the same TBAA tag, preserve it.
if (MDNode *TBAATag = SI.getMetadata(LLVMContext::MD_tbaa))
@@ -808,7 +808,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
OtherStore->getMetadata(LLVMContext::MD_tbaa))))
NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
-
+
// Nuke the old stores.
EraseInstFromFunction(SI);
EraseInstFromFunction(*OtherStore);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 173f2bf..ecc9fc3 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -28,7 +28,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
// if this is safe. For example, the use could be in dynamically unreached
// code.
if (!V->hasOneUse()) return 0;
-
+
bool MadeChange = false;
// ((1 << A) >>u B) --> (1 << (A-B))
@@ -41,7 +41,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
A = IC.Builder->CreateSub(A, B);
return IC.Builder->CreateShl(PowerOf2, A);
}
-
+
// (PowerOfTwo >>u B) --> isExact since shifting out the result would make it
// inexact. Similarly for <<.
if (BinaryOperator *I = dyn_cast<BinaryOperator>(V))
@@ -52,12 +52,12 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
I->setOperand(0, V2);
MadeChange = true;
}
-
+
if (I->getOpcode() == Instruction::LShr && !I->isExact()) {
I->setIsExact();
MadeChange = true;
}
-
+
if (I->getOpcode() == Instruction::Shl && !I->hasNoUnsignedWrap()) {
I->setHasNoUnsignedWrap();
MadeChange = true;
@@ -67,7 +67,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
// TODO: Lots more we could do here:
// If V is a phi node, we can call this on each of its operands.
// "select cond, X, 0" can simplify to "X".
-
+
return MadeChange ? V : 0;
}
@@ -84,12 +84,12 @@ static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
LHSExt = LHSExt.zext(W * 2);
RHSExt = RHSExt.zext(W * 2);
}
-
+
APInt MulExt = LHSExt * RHSExt;
-
+
if (!sign)
return MulExt.ugt(APInt::getLowBitsSet(W * 2, W));
-
+
APInt Min = APInt::getSignedMinValue(W).sext(W * 2);
APInt Max = APInt::getSignedMaxValue(W).sext(W * 2);
return MulExt.slt(Min) || MulExt.sgt(Max);
@@ -107,16 +107,16 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (match(Op1, m_AllOnes())) // X * -1 == 0 - X
return BinaryOperator::CreateNeg(Op0, I.getName());
-
+
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
-
+
// ((X << C1)*C2) == (X * (C2 << C1))
if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
if (SI->getOpcode() == Instruction::Shl)
if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
return BinaryOperator::CreateMul(SI->getOperand(0),
ConstantExpr::getShl(CI, ShOp));
-
+
const APInt &Val = CI->getValue();
if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C
Constant *NewCst = ConstantInt::get(Op0->getType(), Val.logBase2());
@@ -125,7 +125,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap();
return Shl;
}
-
+
// Canonicalize (X+C1)*CI -> X*CI+C1*CI.
{ Value *X; ConstantInt *C1;
if (Op0->hasOneUse() &&
@@ -158,9 +158,9 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
}
}
}
-
+
// Simplify mul instructions with a constant RHS.
- if (isa<Constant>(Op1)) {
+ if (isa<Constant>(Op1)) {
// Try to fold constant mul into select arguments.
if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
if (Instruction *R = FoldOpIntoSelect(I, SI))
@@ -181,7 +181,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
Value *Op1C = Op1;
BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0);
if (!BO ||
- (BO->getOpcode() != Instruction::UDiv &&
+ (BO->getOpcode() != Instruction::UDiv &&
BO->getOpcode() != Instruction::SDiv)) {
Op1C = Op0;
BO = dyn_cast<BinaryOperator>(Op1);
@@ -227,14 +227,14 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (match(Op1, m_Shl(m_One(), m_Value(Y))))
return BinaryOperator::CreateShl(Op0, Y);
}
-
+
// If one of the operands of the multiply is a cast from a boolean value, then
// we know the bool is either zero or one, so this is a 'masking' multiply.
// X * Y (where Y is 0 or 1) -> X & (0-Y)
if (!I.getType()->isVectorTy()) {
// -2 is "-1 << 1" so it is all bits set except the low one.
APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true);
-
+
Value *BoolCast = 0, *OtherOp = 0;
if (MaskedValueIsZero(Op0, Negative2))
BoolCast = Op0, OtherOp = Op1;
@@ -280,7 +280,7 @@ static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) {
return;
if (I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra())
return;
-
+
ConstantFP *CFP = dyn_cast<ConstantFP>(I->getOperand(0));
if (CFP && CFP->isExactlyValue(0.5)) {
Y = I->getOperand(1);
@@ -289,14 +289,14 @@ static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) {
CFP = dyn_cast<ConstantFP>(I->getOperand(1));
if (CFP && CFP->isExactlyValue(0.5))
Y = I->getOperand(0);
-}
+}
/// Helper function of InstCombiner::visitFMul(BinaryOperator(). It returns
/// true iff the given value is FMul or FDiv with one and only one operand
/// being a normal constant (i.e. not Zero/NaN/Infinity).
static bool isFMulOrFDivWithConstant(Value *V) {
Instruction *I = dyn_cast<Instruction>(V);
- if (!I || (I->getOpcode() != Instruction::FMul &&
+ if (!I || (I->getOpcode() != Instruction::FMul &&
I->getOpcode() != Instruction::FDiv))
return false;
@@ -318,10 +318,10 @@ static bool isNormalFp(const ConstantFP *C) {
/// foldFMulConst() is a helper routine of InstCombiner::visitFMul().
/// The input \p FMulOrDiv is a FMul/FDiv with one and only one operand
/// being a constant (i.e. isFMulOrFDivWithConstant(FMulOrDiv) == true).
-/// This function is to simplify "FMulOrDiv * C" and returns the
+/// This function is to simplify "FMulOrDiv * C" and returns the
/// resulting expression. Note that this function could return NULL in
/// case the constants cannot be folded into a normal floating-point.
-///
+///
Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C,
Instruction *InsertBefore) {
assert(isFMulOrFDivWithConstant(FMulOrDiv) && "V is invalid");
@@ -351,7 +351,7 @@ Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C,
if (isNormalFp(F)) {
R = BinaryOperator::CreateFMul(Opnd0, F);
} else {
- // (X / C1) * C => X / (C1/C)
+ // (X / C1) * C => X / (C1/C)
Constant *F = ConstantExpr::getFDiv(C1, C);
if (isNormalFp(cast<ConstantFP>(F)))
R = BinaryOperator::CreateFDiv(Opnd0, F);
@@ -415,13 +415,13 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
if (C0) {
std::swap(C0, C1);
std::swap(Opnd0, Opnd1);
- Swap = true;
+ Swap = true;
}
if (C1 && C1->getValueAPF().isNormal() &&
isFMulOrFDivWithConstant(Opnd0)) {
Value *M1 = ConstantExpr::getFMul(C1, C);
- Value *M0 = isNormalFp(cast<ConstantFP>(M1)) ?
+ Value *M0 = isNormalFp(cast<ConstantFP>(M1)) ?
foldFMulConst(cast<Instruction>(Opnd0), C, &I) :
0;
if (M0 && M1) {
@@ -495,7 +495,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
}
// (X*Y) * X => (X*X) * Y where Y != X
- // The purpose is two-fold:
+ // The purpose is two-fold:
// 1) to form a power expression (of X).
// 2) potentially shorten the critical path: After transformation, the
// latency of the instruction Y is amortized by the expression of X*X,
@@ -524,6 +524,35 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
}
}
+ // B * (uitofp i1 C) -> select C, B, 0
+ if (I.hasNoNaNs() && I.hasNoInfs() && I.hasNoSignedZeros()) {
+ Value *LHS = Op0, *RHS = Op1;
+ Value *B, *C;
+ if (!match(RHS, m_UIToFp(m_Value(C))))
+ std::swap(LHS, RHS);
+
+ if (match(RHS, m_UIToFp(m_Value(C))) && C->getType()->isIntegerTy(1)) {
+ B = LHS;
+ Value *Zero = ConstantFP::getNegativeZero(B->getType());
+ return SelectInst::Create(C, B, Zero);
+ }
+ }
+
+ // A * (1 - uitofp i1 C) -> select C, 0, A
+ if (I.hasNoNaNs() && I.hasNoInfs() && I.hasNoSignedZeros()) {
+ Value *LHS = Op0, *RHS = Op1;
+ Value *A, *C;
+ if (!match(RHS, m_FSub(m_FPOne(), m_UIToFp(m_Value(C)))))
+ std::swap(LHS, RHS);
+
+ if (match(RHS, m_FSub(m_FPOne(), m_UIToFp(m_Value(C)))) &&
+ C->getType()->isIntegerTy(1)) {
+ A = LHS;
+ Value *Zero = ConstantFP::getNegativeZero(A->getType());
+ return SelectInst::Create(C, Zero, A);
+ }
+ }
+
if (!isa<Constant>(Op1))
std::swap(Opnd0, Opnd1);
else
@@ -537,7 +566,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
/// instruction.
bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
SelectInst *SI = cast<SelectInst>(I.getOperand(1));
-
+
// div/rem X, (Cond ? 0 : Y) -> div/rem X, Y
int NonNullOperand = -1;
if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1)))
@@ -547,36 +576,36 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2)))
if (ST->isNullValue())
NonNullOperand = 1;
-
+
if (NonNullOperand == -1)
return false;
-
+
Value *SelectCond = SI->getOperand(0);
-
+
// Change the div/rem to use 'Y' instead of the select.
I.setOperand(1, SI->getOperand(NonNullOperand));
-
+
// Okay, we know we replace the operand of the div/rem with 'Y' with no
// problem. However, the select, or the condition of the select may have
// multiple uses. Based on our knowledge that the operand must be non-zero,
// propagate the known value for the select into other uses of it, and
// propagate a known value of the condition into its other users.
-
+
// If the select and condition only have a single use, don't bother with this,
// early exit.
if (SI->use_empty() && SelectCond->hasOneUse())
return true;
-
+
// Scan the current block backward, looking for other uses of SI.
BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin();
-
+
while (BBI != BBFront) {
--BBI;
// If we found a call to a function, we can't assume it will return, so
// information from below it cannot be propagated above it.
if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI))
break;
-
+
// Replace uses of the select or its condition with the known values.
for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end();
I != E; ++I) {
@@ -589,17 +618,17 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
Worklist.Add(BBI);
}
}
-
+
// If we past the instruction, quit looking for it.
if (&*BBI == SI)
SI = 0;
if (&*BBI == SelectCond)
SelectCond = 0;
-
+
// If we ran out of things to eliminate, break out of the loop.
if (SelectCond == 0 && SI == 0)
break;
-
+
}
return true;
}
@@ -617,7 +646,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
I.setOperand(1, V);
return &I;
}
-
+
// Handle cases involving: [su]div X, (select Cond, Y, Z)
// This does not apply for fdiv.
if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
@@ -683,16 +712,16 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
// Handle the integer div common cases
if (Instruction *Common = commonIDivTransforms(I))
return Common;
-
- {
+
+ {
// X udiv 2^C -> X >> C
// Check to see if this is an unsigned division with an exact power of 2,
// if so, convert to a right shift.
const APInt *C;
if (match(Op1, m_Power2(C))) {
BinaryOperator *LShr =
- BinaryOperator::CreateLShr(Op0,
- ConstantInt::get(Op0->getType(),
+ BinaryOperator::CreateLShr(Op0,
+ ConstantInt::get(Op0->getType(),
C->logBase2()));
if (I.isExact()) LShr->setIsExact();
return LShr;
@@ -732,7 +761,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
return BinaryOperator::CreateLShr(Op0, N);
}
}
-
+
// udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2)
// where C1&C2 are powers of two.
{ Value *Cond; const APInt *C1, *C2;
@@ -740,11 +769,11 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
// Construct the "on true" case of the select
Value *TSI = Builder->CreateLShr(Op0, C1->logBase2(), Op1->getName()+".t",
I.isExact());
-
+
// Construct the "on false" case of the select
Value *FSI = Builder->CreateLShr(Op0, C2->logBase2(), Op1->getName()+".f",
I.isExact());
-
+
// construct the select instruction and return it.
return SelectInst::Create(Cond, TSI, FSI);
}
@@ -799,7 +828,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
// X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
}
-
+
if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
// X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
// Safe because the only negative value (1 << Y) can take on is
@@ -809,13 +838,13 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
}
}
}
-
+
return 0;
}
/// CvtFDivConstToReciprocal tries to convert X/C into X*1/C if C not a special
/// FP value and:
-/// 1) 1/C is exact, or
+/// 1) 1/C is exact, or
/// 2) reciprocal is allowed.
/// If the convertion was successful, the simplified expression "X * 1/C" is
/// returned; otherwise, NULL is returned.
@@ -826,7 +855,7 @@ static Instruction *CvtFDivConstToReciprocal(Value *Dividend,
const APFloat &FpVal = Divisor->getValueAPF();
APFloat Reciprocal(FpVal.getSemantics());
bool Cvt = FpVal.getExactInverse(&Reciprocal);
-
+
if (!Cvt && AllowReciprocal && FpVal.isNormal()) {
Reciprocal = APFloat(FpVal.getSemantics(), 1.0f);
(void)Reciprocal.divide(FpVal, APFloat::rmNearestTiesToEven);
@@ -870,10 +899,10 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
Constant *C = ConstantExpr::getFMul(C1, C2);
const APFloat &F = cast<ConstantFP>(C)->getValueAPF();
if (F.isNormal() && !F.isDenormal()) {
- Res = CvtFDivConstToReciprocal(X, cast<ConstantFP>(C),
+ Res = CvtFDivConstToReciprocal(X, cast<ConstantFP>(C),
AllowReciprocal);
if (!Res)
- Res = BinaryOperator::CreateFDiv(X, C);
+ Res = BinaryOperator::CreateFDiv(X, C);
}
}
@@ -911,7 +940,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
if (Fold) {
const APFloat &FoldC = cast<ConstantFP>(Fold)->getValueAPF();
if (FoldC.isNormal() && !FoldC.isDenormal()) {
- Instruction *R = CreateDiv ?
+ Instruction *R = CreateDiv ?
BinaryOperator::CreateFDiv(Fold, X) :
BinaryOperator::CreateFMul(X, Fold);
R->setFastMathFlags(I.getFastMathFlags());
@@ -997,7 +1026,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
if (Instruction *common = commonIRemTransforms(I))
return common;
-
+
// X urem C^2 -> X and C-1
{ const APInt *C;
if (match(Op1, m_Power2(C)))
@@ -1005,7 +1034,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
ConstantInt::get(I.getType(), *C-1));
}
- // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)
+ // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)
if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
Constant *N1 = Constant::getAllOnesValue(I.getType());
Value *Add = Builder->CreateAdd(Op1, N1);
@@ -1041,7 +1070,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
// Handle the integer rem common cases
if (Instruction *Common = commonIRemTransforms(I))
return Common;
-
+
if (Value *RHSNeg = dyn_castNegVal(Op1))
if (!isa<Constant>(RHSNeg) ||
(isa<ConstantInt>(RHSNeg) &&
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index b0a998c..bd14e81 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -27,10 +27,10 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
unsigned Opc = FirstInst->getOpcode();
Value *LHSVal = FirstInst->getOperand(0);
Value *RHSVal = FirstInst->getOperand(1);
-
+
Type *LHSType = LHSVal->getType();
Type *RHSType = RHSVal->getType();
-
+
bool isNUW = false, isNSW = false, isExact = false;
if (OverflowingBinaryOperator *BO =
dyn_cast<OverflowingBinaryOperator>(FirstInst)) {
@@ -39,7 +39,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
} else if (PossiblyExactOperator *PEO =
dyn_cast<PossiblyExactOperator>(FirstInst))
isExact = PEO->isExact();
-
+
// Scan to see if all operands are the same opcode, and all have one use.
for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
@@ -54,14 +54,14 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
if (CmpInst *CI = dyn_cast<CmpInst>(I))
if (CI->getPredicate() != cast<CmpInst>(FirstInst)->getPredicate())
return 0;
-
+
if (isNUW)
isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
if (isNSW)
isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
if (isExact)
isExact = cast<PossiblyExactOperator>(I)->isExact();
-
+
// Keep track of which operand needs a phi node.
if (I->getOperand(0) != LHSVal) LHSVal = 0;
if (I->getOperand(1) != RHSVal) RHSVal = 0;
@@ -73,9 +73,9 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
// bad when the PHIs are in the header of a loop.
if (!LHSVal && !RHSVal)
return 0;
-
+
// Otherwise, this is safe to transform!
-
+
Value *InLHS = FirstInst->getOperand(0);
Value *InRHS = FirstInst->getOperand(1);
PHINode *NewLHS = 0, *NewRHS = 0;
@@ -86,7 +86,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
InsertNewInstBefore(NewLHS, PN);
LHSVal = NewLHS;
}
-
+
if (RHSVal == 0) {
NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(),
FirstInst->getOperand(1)->getName() + ".pn");
@@ -94,7 +94,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
InsertNewInstBefore(NewRHS, PN);
RHSVal = NewRHS;
}
-
+
// Add all operands to the new PHIs.
if (NewLHS || NewRHS) {
for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
@@ -109,7 +109,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
}
}
}
-
+
if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst)) {
CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
LHSVal, RHSVal);
@@ -129,8 +129,8 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
GetElementPtrInst *FirstInst =cast<GetElementPtrInst>(PN.getIncomingValue(0));
-
- SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(),
+
+ SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(),
FirstInst->op_end());
// This is true if all GEP bases are allocas and if all indices into them are
// constants.
@@ -140,9 +140,9 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
// more than one phi, which leads to higher register pressure. This is
// especially bad when the PHIs are in the header of a loop.
bool NeededPhi = false;
-
+
bool AllInBounds = true;
-
+
// Scan to see if all operands are the same opcode, and all have one use.
for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
@@ -151,18 +151,18 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
return 0;
AllInBounds &= GEP->isInBounds();
-
+
// Keep track of whether or not all GEPs are of alloca pointers.
if (AllBasePointersAreAllocas &&
(!isa<AllocaInst>(GEP->getOperand(0)) ||
!GEP->hasAllConstantIndices()))
AllBasePointersAreAllocas = false;
-
+
// Compare the operand lists.
for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) {
if (FirstInst->getOperand(op) == GEP->getOperand(op))
continue;
-
+
// Don't merge two GEPs when two operands differ (introducing phi nodes)
// if one of the PHIs has a constant for the index. The index may be
// substantially cheaper to compute for the constants, so making it a
@@ -171,7 +171,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
if (isa<ConstantInt>(FirstInst->getOperand(op)) ||
isa<ConstantInt>(GEP->getOperand(op)))
return 0;
-
+
if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType())
return 0;
@@ -186,7 +186,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
NeededPhi = true;
}
}
-
+
// If all of the base pointers of the PHI'd GEPs are from allocas, don't
// bother doing this transformation. At best, this will just save a bit of
// offset calculation, but all the predecessors will have to materialize the
@@ -195,11 +195,11 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
// which can usually all be folded into the load.
if (AllBasePointersAreAllocas)
return 0;
-
+
// Otherwise, this is safe to transform. Insert PHI nodes for each operand
// that is variable.
SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size());
-
+
bool HasAnyPHIs = false;
for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) {
if (FixedOperands[i]) continue; // operand doesn't need a phi.
@@ -207,28 +207,28 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
PHINode *NewPN = PHINode::Create(FirstOp->getType(), e,
FirstOp->getName()+".pn");
InsertNewInstBefore(NewPN, PN);
-
+
NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0));
OperandPhis[i] = NewPN;
FixedOperands[i] = NewPN;
HasAnyPHIs = true;
}
-
+
// Add all operands to the new PHIs.
if (HasAnyPHIs) {
for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
GetElementPtrInst *InGEP =cast<GetElementPtrInst>(PN.getIncomingValue(i));
BasicBlock *InBB = PN.getIncomingBlock(i);
-
+
for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op)
if (PHINode *OpPhi = OperandPhis[op])
OpPhi->addIncoming(InGEP->getOperand(op), InBB);
}
}
-
+
Value *Base = FixedOperands[0];
- GetElementPtrInst *NewGEP =
+ GetElementPtrInst *NewGEP =
GetElementPtrInst::Create(Base, makeArrayRef(FixedOperands).slice(1));
if (AllInBounds) NewGEP->setIsInBounds();
NewGEP->setDebugLoc(FirstInst->getDebugLoc());
@@ -246,11 +246,11 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
/// to a register.
static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
BasicBlock::iterator BBI = L, E = L->getParent()->end();
-
+
for (++BBI; BBI != E; ++BBI)
if (BBI->mayWriteToMemory())
return false;
-
+
// Check for non-address taken alloca. If not address-taken already, it isn't
// profitable to do this xform.
if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) {
@@ -266,11 +266,11 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
isAddressTaken = true;
break;
}
-
+
if (!isAddressTaken && AI->isStaticAlloca())
return false;
}
-
+
// If this load is a load from a GEP with a constant offset from an alloca,
// then we don't want to sink it. In its present form, it will be
// load [constant stack offset]. Sinking it will cause us to have to
@@ -280,7 +280,7 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
if (AllocaInst *AI = dyn_cast<AllocaInst>(GEP->getOperand(0)))
if (AI->isStaticAlloca() && GEP->hasAllConstantIndices())
return false;
-
+
return true;
}
@@ -300,41 +300,41 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
bool isVolatile = FirstLI->isVolatile();
unsigned LoadAlignment = FirstLI->getAlignment();
unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace();
-
+
// We can't sink the load if the loaded value could be modified between the
// load and the PHI.
if (FirstLI->getParent() != PN.getIncomingBlock(0) ||
!isSafeAndProfitableToSinkLoad(FirstLI))
return 0;
-
+
// If the PHI is of volatile loads and the load block has multiple
// successors, sinking it would remove a load of the volatile value from
// the path through the other successor.
- if (isVolatile &&
+ if (isVolatile &&
FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1)
return 0;
-
+
// Check to see if all arguments are the same operation.
for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i));
if (!LI || !LI->hasOneUse())
return 0;
-
- // We can't sink the load if the loaded value could be modified between
+
+ // We can't sink the load if the loaded value could be modified between
// the load and the PHI.
if (LI->isVolatile() != isVolatile ||
LI->getParent() != PN.getIncomingBlock(i) ||
LI->getPointerAddressSpace() != LoadAddrSpace ||
!isSafeAndProfitableToSinkLoad(LI))
return 0;
-
+
// If some of the loads have an alignment specified but not all of them,
// we can't do the transformation.
if ((LoadAlignment != 0) != (LI->getAlignment() != 0))
return 0;
-
+
LoadAlignment = std::min(LoadAlignment, LI->getAlignment());
-
+
// If the PHI is of volatile loads and the load block has multiple
// successors, sinking it would remove a load of the volatile value from
// the path through the other successor.
@@ -342,16 +342,16 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
LI->getParent()->getTerminator()->getNumSuccessors() != 1)
return 0;
}
-
+
// Okay, they are all the same operation. Create a new PHI node of the
// correct type, and PHI together all of the LHS's of the instructions.
PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(),
PN.getNumIncomingValues(),
PN.getName()+".in");
-
+
Value *InVal = FirstLI->getOperand(0);
NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
-
+
// Add all operands to the new PHI.
for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0);
@@ -359,7 +359,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
InVal = 0;
NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
}
-
+
Value *PhiVal;
if (InVal) {
// The new PHI unions all of the same values together. This is really
@@ -370,14 +370,14 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
InsertNewInstBefore(NewPN, PN);
PhiVal = NewPN;
}
-
+
// If this was a volatile load that we are merging, make sure to loop through
// and mark all the input loads as non-volatile. If we don't do this, we will
// insert a new volatile load and the old ones will not be deletable.
if (isVolatile)
for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false);
-
+
LoadInst *NewLI = new LoadInst(PhiVal, "", isVolatile, LoadAlignment);
NewLI->setDebugLoc(FirstLI->getDebugLoc());
return NewLI;
@@ -395,7 +395,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
return FoldPHIArgGEPIntoPHI(PN);
if (isa<LoadInst>(FirstInst))
return FoldPHIArgLoadIntoPHI(PN);
-
+
// Scan the instruction, looking for input operations that can be folded away.
// If all input operands to the phi are the same instruction (e.g. a cast from
// the same type or "+42") we can pull the operation through the PHI, reducing
@@ -403,7 +403,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
Constant *ConstantOp = 0;
Type *CastSrcTy = 0;
bool isNUW = false, isNSW = false, isExact = false;
-
+
if (isa<CastInst>(FirstInst)) {
CastSrcTy = FirstInst->getOperand(0)->getType();
@@ -414,12 +414,12 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
return 0;
}
} else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) {
- // Can fold binop, compare or shift here if the RHS is a constant,
+ // Can fold binop, compare or shift here if the RHS is a constant,
// otherwise call FoldPHIArgBinOpIntoPHI.
ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1));
if (ConstantOp == 0)
return FoldPHIArgBinOpIntoPHI(PN);
-
+
if (OverflowingBinaryOperator *BO =
dyn_cast<OverflowingBinaryOperator>(FirstInst)) {
isNUW = BO->hasNoUnsignedWrap();
@@ -442,7 +442,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
} else if (I->getOperand(1) != ConstantOp) {
return 0;
}
-
+
if (isNUW)
isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
if (isNSW)
@@ -486,7 +486,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
NewCI->setDebugLoc(FirstInst->getDebugLoc());
return NewCI;
}
-
+
if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) {
BinOp = BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
if (isNUW) BinOp->setHasNoUnsignedWrap();
@@ -495,7 +495,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
BinOp->setDebugLoc(FirstInst->getDebugLoc());
return BinOp;
}
-
+
CmpInst *CIOp = cast<CmpInst>(FirstInst);
CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
PhiVal, ConstantOp);
@@ -513,7 +513,7 @@ static bool DeadPHICycle(PHINode *PN,
// Remember this node, and if we find the cycle, return.
if (!PotentiallyDeadPHIs.insert(PN))
return true;
-
+
// Don't scan crazily complex things.
if (PotentiallyDeadPHIs.size() == 16)
return false;
@@ -527,16 +527,16 @@ static bool DeadPHICycle(PHINode *PN,
/// PHIsEqualValue - Return true if this phi node is always equal to
/// NonPhiInVal. This happens with mutually cyclic phi nodes like:
/// z = some value; x = phi (y, z); y = phi (x, z)
-static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
+static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
SmallPtrSet<PHINode*, 16> &ValueEqualPHIs) {
// See if we already saw this PHI node.
if (!ValueEqualPHIs.insert(PN))
return true;
-
+
// Don't scan crazily complex things.
if (ValueEqualPHIs.size() == 16)
return false;
-
+
// Scan the operands to see if they are either phi nodes or are equal to
// the value.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
@@ -547,7 +547,7 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
} else if (Op != NonPhiInVal)
return false;
}
-
+
return true;
}
@@ -557,10 +557,10 @@ struct PHIUsageRecord {
unsigned PHIId; // The ID # of the PHI (something determinstic to sort on)
unsigned Shift; // The amount shifted.
Instruction *Inst; // The trunc instruction.
-
+
PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User)
: PHIId(pn), Shift(Sh), Inst(User) {}
-
+
bool operator<(const PHIUsageRecord &RHS) const {
if (PHIId < RHS.PHIId) return true;
if (PHIId > RHS.PHIId) return false;
@@ -570,15 +570,15 @@ struct PHIUsageRecord {
RHS.Inst->getType()->getPrimitiveSizeInBits();
}
};
-
+
struct LoweredPHIRecord {
PHINode *PN; // The PHI that was lowered.
unsigned Shift; // The amount shifted.
unsigned Width; // The width extracted.
-
+
LoweredPHIRecord(PHINode *pn, unsigned Sh, Type *Ty)
: PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {}
-
+
// Ctor form used by DenseMap.
LoweredPHIRecord(PHINode *pn, unsigned Sh)
: PN(pn), Shift(Sh), Width(0) {}
@@ -621,20 +621,20 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
// PHIUsers - Keep track of all of the truncated values extracted from a set
// of PHIs, along with their offset. These are the things we want to rewrite.
SmallVector<PHIUsageRecord, 16> PHIUsers;
-
+
// PHIs are often mutually cyclic, so we keep track of a whole set of PHI
// nodes which are extracted from. PHIsToSlice is a set we use to avoid
// revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to
// check the uses of (to ensure they are all extracts).
SmallVector<PHINode*, 8> PHIsToSlice;
SmallPtrSet<PHINode*, 8> PHIsInspected;
-
+
PHIsToSlice.push_back(&FirstPhi);
PHIsInspected.insert(&FirstPhi);
-
+
for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) {
PHINode *PN = PHIsToSlice[PHIId];
-
+
// Scan the input list of the PHI. If any input is an invoke, and if the
// input is defined in the predecessor, then we won't be split the critical
// edge which is required to insert a truncate. Because of this, we have to
@@ -644,85 +644,85 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
if (II == 0) continue;
if (II->getParent() != PN->getIncomingBlock(i))
continue;
-
+
// If we have a phi, and if it's directly in the predecessor, then we have
// a critical edge where we need to put the truncate. Since we can't
// split the edge in instcombine, we have to bail out.
return 0;
}
-
-
+
+
for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
UI != E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
-
+
// If the user is a PHI, inspect its uses recursively.
if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
if (PHIsInspected.insert(UserPN))
PHIsToSlice.push_back(UserPN);
continue;
}
-
+
// Truncates are always ok.
if (isa<TruncInst>(User)) {
PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User));
continue;
}
-
+
// Otherwise it must be a lshr which can only be used by one trunc.
if (User->getOpcode() != Instruction::LShr ||
!User->hasOneUse() || !isa<TruncInst>(User->use_back()) ||
!isa<ConstantInt>(User->getOperand(1)))
return 0;
-
+
unsigned Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue();
PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back()));
}
}
-
+
// If we have no users, they must be all self uses, just nuke the PHI.
if (PHIUsers.empty())
return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType()));
-
+
// If this phi node is transformable, create new PHIs for all the pieces
// extracted out of it. First, sort the users by their offset and size.
array_pod_sort(PHIUsers.begin(), PHIUsers.end());
-
+
DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n';
for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n';
);
-
+
// PredValues - This is a temporary used when rewriting PHI nodes. It is
// hoisted out here to avoid construction/destruction thrashing.
DenseMap<BasicBlock*, Value*> PredValues;
-
+
// ExtractedVals - Each new PHI we introduce is saved here so we don't
// introduce redundant PHIs.
DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals;
-
+
for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) {
unsigned PHIId = PHIUsers[UserI].PHIId;
PHINode *PN = PHIsToSlice[PHIId];
unsigned Offset = PHIUsers[UserI].Shift;
Type *Ty = PHIUsers[UserI].Inst->getType();
-
+
PHINode *EltPHI;
-
+
// If we've already lowered a user like this, reuse the previously lowered
// value.
if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) {
-
+
// Otherwise, Create the new PHI node for this user.
EltPHI = PHINode::Create(Ty, PN->getNumIncomingValues(),
PN->getName()+".off"+Twine(Offset), PN);
assert(EltPHI->getType() != PN->getType() &&
"Truncate didn't shrink phi?");
-
+
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *Pred = PN->getIncomingBlock(i);
Value *&PredVal = PredValues[Pred];
-
+
// If we already have a value for this predecessor, reuse it.
if (PredVal) {
EltPHI->addIncoming(PredVal, Pred);
@@ -736,7 +736,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
EltPHI->addIncoming(PredVal, Pred);
continue;
}
-
+
if (PHINode *InPHI = dyn_cast<PHINode>(PN)) {
// If the incoming value was a PHI, and if it was one of the PHIs we
// already rewrote it, just use the lowered value.
@@ -746,7 +746,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
continue;
}
}
-
+
// Otherwise, do an extract in the predecessor.
Builder->SetInsertPoint(Pred, Pred->getTerminator());
Value *Res = InVal;
@@ -756,7 +756,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
Res = Builder->CreateTrunc(Res, Ty, "extract.t");
PredVal = Res;
EltPHI->addIncoming(Res, Pred);
-
+
// If the incoming value was a PHI, and if it was one of the PHIs we are
// rewriting, we will ultimately delete the code we inserted. This
// means we need to revisit that PHI to make sure we extract out the
@@ -765,22 +765,22 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
if (PHIsInspected.count(OldInVal)) {
unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(),
OldInVal)-PHIsToSlice.begin();
- PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset,
+ PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset,
cast<Instruction>(Res)));
++UserE;
}
}
PredValues.clear();
-
+
DEBUG(errs() << " Made element PHI for offset " << Offset << ": "
<< *EltPHI << '\n');
ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI;
}
-
+
// Replace the use of this piece with the PHI node.
ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI);
}
-
+
// Replace all the remaining uses of the PHI nodes (self uses and the lshrs)
// with undefs.
Value *Undef = UndefValue::get(FirstPhi.getType());
@@ -818,7 +818,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
if (DeadPHICycle(PU, PotentiallyDeadPHIs))
return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
}
-
+
// If this phi has a single use, and if that use just computes a value for
// the next iteration of a loop, delete the phi. This occurs with unused
// induction variables, e.g. "for (int j = 0; ; ++j);". Detecting this
@@ -847,7 +847,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
if (InValNo != NumIncomingVals) {
Value *NonPhiInVal = PN.getIncomingValue(InValNo);
-
+
// Scan the rest of the operands to see if there are any conflicts, if so
// there is no need to recursively scan other phis.
for (++InValNo; InValNo != NumIncomingVals; ++InValNo) {
@@ -855,7 +855,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal))
break;
}
-
+
// If we scanned over all operands, then we have one unique value plus
// phi values. Scan PHI nodes to see if they all merge in each other or
// the value.
@@ -899,6 +899,6 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
!TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
return Res;
-
+
return 0;
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 121aa1f..59502fb 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -350,6 +350,68 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
return 0;
}
+/// foldSelectICmpAndOr - We want to turn:
+/// (select (icmp eq (and X, C1), 0), Y, (or Y, C2))
+/// into:
+/// (or (shl (and X, C1), C3), y)
+/// iff:
+/// C1 and C2 are both powers of 2
+/// where:
+/// C3 = Log(C2) - Log(C1)
+///
+/// This transform handles cases where:
+/// 1. The icmp predicate is inverted
+/// 2. The select operands are reversed
+/// 3. The magnitude of C2 and C1 are flipped
+static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
+ Value *FalseVal,
+ InstCombiner::BuilderTy *Builder) {
+ const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
+ if (!IC || !IC->isEquality())
+ return 0;
+
+ Value *CmpLHS = IC->getOperand(0);
+ Value *CmpRHS = IC->getOperand(1);
+
+ if (!match(CmpRHS, m_Zero()))
+ return 0;
+
+ Value *X;
+ const APInt *C1;
+ if (!match(CmpLHS, m_And(m_Value(X), m_Power2(C1))))
+ return 0;
+
+ const APInt *C2;
+ bool OrOnTrueVal = false;
+ bool OrOnFalseVal = match(FalseVal, m_Or(m_Specific(TrueVal), m_Power2(C2)));
+ if (!OrOnFalseVal)
+ OrOnTrueVal = match(TrueVal, m_Or(m_Specific(FalseVal), m_Power2(C2)));
+
+ if (!OrOnFalseVal && !OrOnTrueVal)
+ return 0;
+
+ Value *V = CmpLHS;
+ Value *Y = OrOnFalseVal ? TrueVal : FalseVal;
+
+ unsigned C1Log = C1->logBase2();
+ unsigned C2Log = C2->logBase2();
+ if (C2Log > C1Log) {
+ V = Builder->CreateZExtOrTrunc(V, Y->getType());
+ V = Builder->CreateShl(V, C2Log - C1Log);
+ } else if (C1Log > C2Log) {
+ V = Builder->CreateLShr(V, C1Log - C2Log);
+ V = Builder->CreateZExtOrTrunc(V, Y->getType());
+ } else
+ V = Builder->CreateZExtOrTrunc(V, Y->getType());
+
+ ICmpInst::Predicate Pred = IC->getPredicate();
+ if ((Pred == ICmpInst::ICMP_NE && OrOnFalseVal) ||
+ (Pred == ICmpInst::ICMP_EQ && OrOnTrueVal))
+ V = Builder->CreateXor(V, *C2);
+
+ return Builder->CreateOr(V, Y);
+}
+
/// visitSelectInstWithICmp - Visit a SelectInst that has an
/// ICmpInst as its first operand.
///
@@ -521,6 +583,9 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
}
}
+ if (Value *V = foldSelectICmpAndOr(SI, TrueVal, FalseVal, Builder))
+ return ReplaceInstUsesWith(SI, V);
+
return Changed ? &SI : 0;
}
@@ -676,7 +741,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// Change: A = select B, false, C --> A = and !B, C
Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
return BinaryOperator::CreateAnd(NotCond, FalseVal);
- } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
+ }
+ if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
if (C->getZExtValue() == false) {
// Change: A = select B, C, false --> A = and B, C
return BinaryOperator::CreateAnd(CondVal, TrueVal);
@@ -690,14 +756,14 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// select a, a, b -> a|b
if (CondVal == TrueVal)
return BinaryOperator::CreateOr(CondVal, FalseVal);
- else if (CondVal == FalseVal)
+ if (CondVal == FalseVal)
return BinaryOperator::CreateAnd(CondVal, TrueVal);
// select a, ~a, b -> (~a)&b
// select a, b, ~a -> (~a)|b
if (match(TrueVal, m_Not(m_Specific(CondVal))))
return BinaryOperator::CreateAnd(TrueVal, FalseVal);
- else if (match(FalseVal, m_Not(m_Specific(CondVal))))
+ if (match(FalseVal, m_Not(m_Specific(CondVal))))
return BinaryOperator::CreateOr(TrueVal, FalseVal);
}
@@ -838,7 +904,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
Value *NewFalseOp = NegVal;
if (AddOp != TI)
std::swap(NewTrueOp, NewFalseOp);
- Value *NewSel =
+ Value *NewSel =
Builder->CreateSelect(CondVal, NewTrueOp,
NewFalseOp, SI.getName() + ".p");
@@ -862,7 +928,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
Value *LHS, *RHS, *LHS2, *RHS2;
if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) {
if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2))
- if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2,
+ if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2,
SI, SPF, RHS))
return R;
if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2))
@@ -908,7 +974,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return &SI;
}
- if (VectorType *VecTy = dyn_cast<VectorType>(SI.getType())) {
+ if (VectorType* VecTy = dyn_cast<VectorType>(SI.getType())) {
unsigned VWidth = VecTy->getNumElements();
APInt UndefElts(VWidth, 0);
APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
@@ -918,24 +984,6 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return &SI;
}
- if (ConstantVector *CV = dyn_cast<ConstantVector>(CondVal)) {
- // Form a shufflevector instruction.
- SmallVector<Constant *, 8> Mask(VWidth);
- Type *Int32Ty = Type::getInt32Ty(CV->getContext());
- for (unsigned i = 0; i != VWidth; ++i) {
- Constant *Elem = cast<Constant>(CV->getOperand(i));
- if (ConstantInt *E = dyn_cast<ConstantInt>(Elem))
- Mask[i] = ConstantInt::get(Int32Ty, i + (E->isZero() ? VWidth : 0));
- else if (isa<UndefValue>(Elem))
- Mask[i] = UndefValue::get(Int32Ty);
- else
- return 0;
- }
- Constant *MaskVal = ConstantVector::get(Mask);
- Value *V = Builder->CreateShuffleVector(TrueVal, FalseVal, MaskVal);
- return ReplaceInstUsesWith(SI, V);
- }
-
if (isa<ConstantAggregateZero>(CondVal)) {
return ReplaceInstUsesWith(SI, FalseVal);
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 4f71db1..4301ddb 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -105,6 +105,75 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
return 0;
}
+// If we have a PHI node with a vector type that has only 2 uses: feed
+// itself and be an operand of extractelemnt at a constant location,
+// try to replace the PHI of the vector type with a PHI of a scalar type
+Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
+ // Verify that the PHI node has exactly 2 uses. Otherwise return NULL.
+ if (!PN->hasNUses(2))
+ return NULL;
+
+ // If so, it's known at this point that one operand is PHI and the other is
+ // an extractelement node. Find the PHI user that is not the extractelement
+ // node.
+ Value::use_iterator iu = PN->use_begin();
+ Instruction *PHIUser = dyn_cast<Instruction>(*iu);
+ if (PHIUser == cast<Instruction>(&EI))
+ PHIUser = cast<Instruction>(*(++iu));
+
+ // Verify that this PHI user has one use, which is the PHI itself,
+ // and that it is a binary operation which is cheap to scalarize.
+ // otherwise return NULL.
+ if (!PHIUser->hasOneUse() || !(PHIUser->use_back() == PN) ||
+ !(isa<BinaryOperator>(PHIUser)) ||
+ !CheapToScalarize(PHIUser, true))
+ return NULL;
+
+ // Create a scalar PHI node that will replace the vector PHI node
+ // just before the current PHI node.
+ PHINode * scalarPHI = cast<PHINode>(
+ InsertNewInstWith(PHINode::Create(EI.getType(),
+ PN->getNumIncomingValues(), ""), *PN));
+ // Scalarize each PHI operand.
+ for (unsigned i=0; i < PN->getNumIncomingValues(); i++) {
+ Value *PHIInVal = PN->getIncomingValue(i);
+ BasicBlock *inBB = PN->getIncomingBlock(i);
+ Value *Elt = EI.getIndexOperand();
+ // If the operand is the PHI induction variable:
+ if (PHIInVal == PHIUser) {
+ // Scalarize the binary operation. Its first operand is the
+ // scalar PHI and the second operand is extracted from the other
+ // vector operand.
+ BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
+ unsigned opId = (B0->getOperand(0) == PN) ? 1: 0;
+ Value *Op = Builder->CreateExtractElement(
+ B0->getOperand(opId), Elt, B0->getOperand(opId)->getName()+".Elt");
+ Value *newPHIUser = InsertNewInstWith(
+ BinaryOperator::Create(B0->getOpcode(), scalarPHI,Op),
+ *B0);
+ scalarPHI->addIncoming(newPHIUser, inBB);
+ } else {
+ // Scalarize PHI input:
+ Instruction *newEI =
+ ExtractElementInst::Create(PHIInVal, Elt, "");
+ // Insert the new instruction into the predecessor basic block.
+ Instruction *pos = dyn_cast<Instruction>(PHIInVal);
+ BasicBlock::iterator InsertPos;
+ if (pos && !isa<PHINode>(pos)) {
+ InsertPos = pos;
+ ++InsertPos;
+ } else {
+ InsertPos = inBB->getFirstInsertionPt();
+ }
+
+ InsertNewInstWith(newEI, *InsertPos);
+
+ scalarPHI->addIncoming(newEI, inBB);
+ }
+ }
+ return ReplaceInstUsesWith(EI, scalarPHI);
+}
+
Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
// If vector val is constant with all elements the same, replace EI with
// that element. We handle a known element # below.
@@ -149,6 +218,14 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
return new BitCastInst(Elt, EI.getType());
}
+
+ // If there's a vector PHI feeding a scalar use through this extractelement
+ // instruction, try to scalarize the PHI.
+ if (PHINode *PN = dyn_cast<PHINode>(EI.getOperand(0))) {
+ Instruction *scalarPHI = scalarizePHI(EI, PN);
+ if (scalarPHI)
+ return (scalarPHI);
+ }
}
if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
@@ -201,10 +278,10 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
} else if (CastInst *CI = dyn_cast<CastInst>(I)) {
// Canonicalize extractelement(cast) -> cast(extractelement)
// bitcasts can change the number of vector elements and they cost nothing
- if (CI->hasOneUse() && EI.hasOneUse() &&
- (CI->getOpcode() != Instruction::BitCast)) {
+ if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
Value *EE = Builder->CreateExtractElement(CI->getOperand(0),
EI.getIndexOperand());
+ Worklist.AddValue(EE);
return CastInst::Create(CI->getOpcode(), EE, EI.getType());
}
}
@@ -336,6 +413,10 @@ static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask,
if (VecOp == RHS) {
Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
+ // Update Mask to reflect that `ScalarOp' has been inserted at
+ // position `InsertedIdx' within the vector returned by IEI.
+ Mask[InsertedIdx % NumElts] = Mask[ExtractedIdx];
+
// Everything but the extracted element is replaced with the RHS.
for (unsigned i = 0; i != NumElts; ++i) {
if (i != InsertedIdx)
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index c6115e3..ec10751 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1483,7 +1483,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
Module *M = II->getParent()->getParent()->getParent();
Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
- ArrayRef<Value *>(), "", II->getParent());
+ None, "", II->getParent());
}
return EraseInstFromFunction(MI);
}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp b/contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp
index 927982d..39de4b0 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp
@@ -110,7 +110,8 @@ static StringRef GetGVTypeString(const GlobalVariable &G) {
bool BlackList::isInInit(const GlobalVariable &G) const {
return (isIn(*G.getParent()) ||
inSection("global-init", G.getName()) ||
- inSection("global-init-type", GetGVTypeString(G)));
+ inSection("global-init-type", GetGVTypeString(G)) ||
+ inSection("global-init-src", G.getParent()->getModuleIdentifier()));
}
bool BlackList::inSection(const StringRef Section,
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
index 8ba1025..9f35396 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
#include "llvm-c/Initialization.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
index 53a31b0..373168e 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
@@ -30,6 +30,7 @@ using namespace llvm::objcarc;
bool llvm::objcarc::EnableARCOpts;
static cl::opt<bool, true>
EnableARCOptimizations("enable-objc-arc-opts",
+ cl::desc("enable/disable all ARC Optimizations"),
cl::location(EnableARCOpts),
cl::init(true));
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index b96c64f..c43f4f4 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -66,6 +66,8 @@ namespace {
Constant *RetainAutoreleaseCallee;
/// Declaration for objc_retainAutoreleaseReturnValue().
Constant *RetainAutoreleaseRVCallee;
+ /// Declaration for objc_retainAutoreleasedReturnValue().
+ Constant *RetainRVCallee;
/// The inline asm string to insert between calls and RetainRV calls to make
/// the optimization work on targets which need it.
@@ -77,9 +79,12 @@ namespace {
SmallPtrSet<CallInst *, 8> StoreStrongCalls;
Constant *getStoreStrongCallee(Module *M);
+ Constant *getRetainRVCallee(Module *M);
Constant *getRetainAutoreleaseCallee(Module *M);
Constant *getRetainAutoreleaseRVCallee(Module *M);
+ bool OptimizeRetainCall(Function &F, Instruction *Retain);
+
bool ContractAutorelease(Function &F, Instruction *Autorelease,
InstructionClass Class,
SmallPtrSet<Instruction *, 4>
@@ -172,6 +177,57 @@ Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) {
return RetainAutoreleaseRVCallee;
}
+Constant *ObjCARCContract::getRetainRVCallee(Module *M) {
+ if (!RetainRVCallee) {
+ LLVMContext &C = M->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ Type *Params[] = { I8X };
+ FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
+ AttributeSet Attribute =
+ AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ RetainRVCallee =
+ M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy,
+ Attribute);
+ }
+ return RetainRVCallee;
+}
+
+/// Turn objc_retain into objc_retainAutoreleasedReturnValue if the operand is a
+/// return value. We do this late so we do not disrupt the dataflow analysis in
+/// ObjCARCOpt.
+bool
+ObjCARCContract::OptimizeRetainCall(Function &F, Instruction *Retain) {
+ ImmutableCallSite CS(GetObjCArg(Retain));
+ const Instruction *Call = CS.getInstruction();
+ if (!Call)
+ return false;
+ if (Call->getParent() != Retain->getParent())
+ return false;
+
+ // Check that the call is next to the retain.
+ BasicBlock::const_iterator I = Call;
+ ++I;
+ while (IsNoopInstruction(I)) ++I;
+ if (&*I != Retain)
+ return false;
+
+ // Turn it to an objc_retainAutoreleasedReturnValue.
+ Changed = true;
+ ++NumPeeps;
+
+ DEBUG(dbgs() << "Transforming objc_retain => "
+ "objc_retainAutoreleasedReturnValue since the operand is a "
+ "return value.\nOld: "<< *Retain << "\n");
+
+ // We do not have to worry about tail calls/does not throw since
+ // retain/retainRV have the same properties.
+ cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent()));
+
+ DEBUG(dbgs() << "New: " << *Retain << "\n");
+ return true;
+}
+
/// Merge an autorelease with a retain into a fused call.
bool
ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
@@ -329,6 +385,7 @@ bool ObjCARCContract::doInitialization(Module &M) {
StoreStrongCallee = 0;
RetainAutoreleaseCallee = 0;
RetainAutoreleaseRVCallee = 0;
+ RetainRVCallee = 0;
// Initialize RetainRVMarker.
RetainRVMarker = 0;
@@ -380,7 +437,6 @@ bool ObjCARCContract::runOnFunction(Function &F) {
// objc_retainBlock does not necessarily return its argument.
InstructionClass Class = GetBasicInstructionClass(Inst);
switch (Class) {
- case IC_Retain:
case IC_FusedRetainAutorelease:
case IC_FusedRetainAutoreleaseRV:
break;
@@ -389,6 +445,13 @@ bool ObjCARCContract::runOnFunction(Function &F) {
if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited))
continue;
break;
+ case IC_Retain:
+ // Attempt to convert retains to retainrvs if they are next to function
+ // calls.
+ if (!OptimizeRetainCall(F, Inst))
+ break;
+ // If we succeed in our optimization, fall through.
+ // FALLTHROUGH
case IC_RetainRV: {
// If we're compiling for a target which needs a special inline-asm
// marker to do the retainAutoreleasedReturnValue optimization,
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 92d6fc4..43e2e20 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -191,13 +191,13 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) {
do {
const Value *V = Worklist.pop_back_val();
- DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Visiting: " << *V << "\n");
+ DEBUG(dbgs() << "Visiting: " << *V << "\n");
for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
UI != UE; ++UI) {
const User *UUser = *UI;
- DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User: " << *UUser << "\n");
+ DEBUG(dbgs() << "User: " << *UUser << "\n");
// Special - Use by a call (callee or argument) is not considered
// to be an escape.
@@ -207,8 +207,7 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) {
case IC_StoreStrong:
case IC_Autorelease:
case IC_AutoreleaseRV: {
- DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies pointer "
- "arguments. Pointer Escapes!\n");
+ DEBUG(dbgs() << "User copies pointer arguments. Pointer Escapes!\n");
// These special functions make copies of their pointer arguments.
return true;
}
@@ -223,12 +222,11 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) {
isa<PHINode>(UUser) || isa<SelectInst>(UUser)) {
if (VisitedSet.insert(UUser)) {
- DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies value. "
- "Ptr escapes if result escapes. Adding to list.\n");
+ DEBUG(dbgs() << "User copies value. Ptr escapes if result escapes."
+ " Adding to list.\n");
Worklist.push_back(UUser);
} else {
- DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Already visited node."
- "\n");
+ DEBUG(dbgs() << "Already visited node.\n");
}
continue;
}
@@ -245,13 +243,13 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) {
continue;
}
// Otherwise, conservatively assume an escape.
- DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Assuming ptr escapes.\n");
+ DEBUG(dbgs() << "Assuming ptr escapes.\n");
return true;
}
} while (!Worklist.empty());
// No escapes found.
- DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Ptr does not escape.\n");
+ DEBUG(dbgs() << "Ptr does not escape.\n");
return false;
}
@@ -305,6 +303,16 @@ STATISTIC(NumRets, "Number of return value forwarding "
"retain+autoreleaes eliminated");
STATISTIC(NumRRs, "Number of retain+release paths eliminated");
STATISTIC(NumPeeps, "Number of calls peephole-optimized");
+STATISTIC(NumRetainsBeforeOpt,
+ "Number of retains before optimization.");
+STATISTIC(NumReleasesBeforeOpt,
+ "Number of releases before optimization.");
+#ifndef NDEBUG
+STATISTIC(NumRetainsAfterOpt,
+ "Number of retains after optimization.");
+STATISTIC(NumReleasesAfterOpt,
+ "Number of releases after optimization.");
+#endif
namespace {
/// \enum Sequence
@@ -375,7 +383,7 @@ static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) {
namespace {
/// \brief Unidirectional information about either a
/// retain-decrement-use-release sequence or release-use-decrement-retain
- /// reverese sequence.
+ /// reverse sequence.
struct RRInfo {
/// After an objc_retain, the reference count of the referenced
/// object is known to be positive. Similarly, before an objc_release, the
@@ -410,6 +418,10 @@ namespace {
KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0) {}
void clear();
+
+ bool IsTrackingImpreciseReleases() {
+ return ReleaseMetadata != 0;
+ }
};
}
@@ -428,7 +440,7 @@ namespace {
/// True if the reference count is known to be incremented.
bool KnownPositiveRefCount;
- /// True of we've seen an opportunity for partial RR elimination, such as
+ /// True if we've seen an opportunity for partial RR elimination, such as
/// pushing calls into a CFG triangle or into one side of a CFG diamond.
bool Partial;
@@ -457,6 +469,7 @@ namespace {
}
void SetSeq(Sequence NewSeq) {
+ DEBUG(dbgs() << "Old: " << Seq << "; New: " << NewSeq << "\n");
Seq = NewSeq;
}
@@ -469,7 +482,8 @@ namespace {
}
void ResetSequenceProgress(Sequence NewSeq) {
- Seq = NewSeq;
+ DEBUG(dbgs() << "Resetting sequence progress.\n");
+ SetSeq(NewSeq);
Partial = false;
RRI.clear();
}
@@ -706,7 +720,19 @@ void BBState::MergeSucc(const BBState &Other) {
/// Enable/disable ARC sequence annotations.
static cl::opt<bool>
-EnableARCAnnotations("enable-objc-arc-annotations", cl::init(false));
+EnableARCAnnotations("enable-objc-arc-annotations", cl::init(false),
+ cl::desc("Enable emission of arc data flow analysis "
+ "annotations"));
+static cl::opt<bool>
+DisableCheckForCFGHazards("disable-objc-arc-checkforcfghazards", cl::init(false),
+ cl::desc("Disable check for cfg hazards when "
+ "annotating"));
+static cl::opt<std::string>
+ARCAnnotationTargetIdentifier("objc-arc-annotation-target-identifier",
+ cl::init(""),
+ cl::desc("filter out all data flow annotations "
+ "but those that apply to the given "
+ "target llvm identifier."));
/// This function appends a unique ARCAnnotationProvenanceSourceMDKind id to an
/// instruction so that we can track backwards when post processing via the llvm
@@ -791,6 +817,12 @@ static void AppendMDNodeToInstForPtr(unsigned NodeId,
/// state of a pointer at the entrance to a basic block.
static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB,
Value *Ptr, Sequence Seq) {
+ // If we have a target identifier, make sure that we match it before
+ // continuing.
+ if(!ARCAnnotationTargetIdentifier.empty() &&
+ !Ptr->getName().equals(ARCAnnotationTargetIdentifier))
+ return;
+
Module *M = BB->getParent()->getParent();
LLVMContext &C = M->getContext();
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
@@ -828,6 +860,12 @@ static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB,
/// of the pointer at the bottom of the basic block.
static void GenerateARCBBTerminatorAnnotation(const char *Name, BasicBlock *BB,
Value *Ptr, Sequence Seq) {
+ // If we have a target identifier, make sure that we match it before emitting
+ // an annotation.
+ if(!ARCAnnotationTargetIdentifier.empty() &&
+ !Ptr->getName().equals(ARCAnnotationTargetIdentifier))
+ return;
+
Module *M = BB->getParent()->getParent();
LLVMContext &C = M->getContext();
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
@@ -869,6 +907,12 @@ static void GenerateARCAnnotation(unsigned InstMDId,
Sequence OldSeq,
Sequence NewSeq) {
if (EnableARCAnnotations) {
+ // If we have a target identifier, make sure that we match it before
+ // emitting an annotation.
+ if(!ARCAnnotationTargetIdentifier.empty() &&
+ !Ptr->getName().equals(ARCAnnotationTargetIdentifier))
+ return;
+
// First generate the source annotation on our pointer. This will return an
// MDString* if Ptr actually comes from an instruction implying we can put
// in a source annotation. If AppendMDNodeToSourcePtr returns 0 (i.e. NULL),
@@ -909,27 +953,27 @@ static void GenerateARCAnnotation(unsigned InstMDId,
#define ANNOTATE_BB(_states, _bb, _name, _type, _direction) \
do { \
- if (EnableARCAnnotations) { \
- for(BBState::ptr_const_iterator I = (_states)._direction##_ptr_begin(), \
+ if (EnableARCAnnotations) { \
+ for(BBState::ptr_const_iterator I = (_states)._direction##_ptr_begin(), \
E = (_states)._direction##_ptr_end(); I != E; ++I) { \
- Value *Ptr = const_cast<Value*>(I->first); \
- Sequence Seq = I->second.GetSeq(); \
- GenerateARCBB ## _type ## Annotation(_name, (_bb), Ptr, Seq); \
+ Value *Ptr = const_cast<Value*>(I->first); \
+ Sequence Seq = I->second.GetSeq(); \
+ GenerateARCBB ## _type ## Annotation(_name, (_bb), Ptr, Seq); \
+ } \
} \
- } \
-} while (0)
+ } while (0)
-#define ANNOTATE_BOTTOMUP_BBSTART(_states, _basicblock) \
+#define ANNOTATE_BOTTOMUP_BBSTART(_states, _basicblock) \
ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbstart", \
Entrance, bottom_up)
-#define ANNOTATE_BOTTOMUP_BBEND(_states, _basicblock) \
- ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbend", \
+#define ANNOTATE_BOTTOMUP_BBEND(_states, _basicblock) \
+ ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbend", \
Terminator, bottom_up)
-#define ANNOTATE_TOPDOWN_BBSTART(_states, _basicblock) \
- ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbstart", \
+#define ANNOTATE_TOPDOWN_BBSTART(_states, _basicblock) \
+ ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbstart", \
Entrance, top_down)
-#define ANNOTATE_TOPDOWN_BBEND(_states, _basicblock) \
- ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbend", \
+#define ANNOTATE_TOPDOWN_BBEND(_states, _basicblock) \
+ ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbend", \
Terminator, top_down)
#else // !ARC_ANNOTATION
@@ -955,9 +999,6 @@ namespace {
/// them. These are initialized lazily to avoid cluttering up the Module
/// with unused declarations.
- /// Declaration for ObjC runtime function
- /// objc_retainAutoreleasedReturnValue.
- Constant *RetainRVCallee;
/// Declaration for ObjC runtime function objc_autoreleaseReturnValue.
Constant *AutoreleaseRVCallee;
/// Declaration for ObjC runtime function objc_release.
@@ -991,7 +1032,6 @@ namespace {
unsigned ARCAnnotationProvenanceSourceMDKind;
#endif // ARC_ANNOATIONS
- Constant *getRetainRVCallee(Module *M);
Constant *getAutoreleaseRVCallee(Module *M);
Constant *getReleaseCallee(Module *M);
Constant *getRetainCallee(Module *M);
@@ -1000,7 +1040,6 @@ namespace {
bool IsRetainBlockOptimizable(const Instruction *Inst);
- void OptimizeRetainCall(Function &F, Instruction *Retain);
bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
InstructionClass &Class);
@@ -1059,6 +1098,10 @@ namespace {
void OptimizeReturns(Function &F);
+#ifndef NDEBUG
+ void GatherStatistics(Function &F, bool AfterOptimization = false);
+#endif
+
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
virtual bool doInitialization(Module &M);
virtual bool runOnFunction(Function &F);
@@ -1106,22 +1149,6 @@ bool ObjCARCOpt::IsRetainBlockOptimizable(const Instruction *Inst) {
return true;
}
-Constant *ObjCARCOpt::getRetainRVCallee(Module *M) {
- if (!RetainRVCallee) {
- LLVMContext &C = M->getContext();
- Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
- Type *Params[] = { I8X };
- FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
- AttributeSet Attribute =
- AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::NoUnwind);
- RetainRVCallee =
- M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy,
- Attribute);
- }
- return RetainRVCallee;
-}
-
Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
if (!AutoreleaseRVCallee) {
LLVMContext &C = M->getContext();
@@ -1201,38 +1228,6 @@ Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
return AutoreleaseCallee;
}
-/// Turn objc_retain into objc_retainAutoreleasedReturnValue if the operand is a
-/// return value.
-void
-ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) {
- ImmutableCallSite CS(GetObjCArg(Retain));
- const Instruction *Call = CS.getInstruction();
- if (!Call) return;
- if (Call->getParent() != Retain->getParent()) return;
-
- // Check that the call is next to the retain.
- BasicBlock::const_iterator I = Call;
- ++I;
- while (IsNoopInstruction(I)) ++I;
- if (&*I != Retain)
- return;
-
- // Turn it to an objc_retainAutoreleasedReturnValue..
- Changed = true;
- ++NumPeeps;
-
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainCall: Transforming "
- "objc_retain => objc_retainAutoreleasedReturnValue"
- " since the operand is a return value.\n"
- " Old: "
- << *Retain << "\n");
-
- cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent()));
-
- DEBUG(dbgs() << " New: "
- << *Retain << "\n");
-}
-
/// Turn objc_retainAutoreleasedReturnValue into objc_retain if the operand is
/// not a return value. Or, if it can be paired with an
/// objc_autoreleaseReturnValue, delete the pair and return true.
@@ -1269,9 +1264,8 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
Changed = true;
++NumPeeps;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Erasing " << *I << "\n"
- << " Erasing " << *RetainRV
- << "\n");
+ DEBUG(dbgs() << "Erasing autoreleaseRV,retainRV pair: " << *I << "\n"
+ << "Erasing " << *RetainRV << "\n");
EraseInstruction(I);
EraseInstruction(RetainRV);
@@ -1283,16 +1277,13 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
Changed = true;
++NumPeeps;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Transforming "
- "objc_retainAutoreleasedReturnValue => "
+ DEBUG(dbgs() << "Transforming objc_retainAutoreleasedReturnValue => "
"objc_retain since the operand is not a return value.\n"
- " Old: "
- << *RetainRV << "\n");
+ "Old = " << *RetainRV << "\n");
cast<CallInst>(RetainRV)->setCalledFunction(getRetainCallee(F.getParent()));
- DEBUG(dbgs() << " New: "
- << *RetainRV << "\n");
+ DEBUG(dbgs() << "New = " << *RetainRV << "\n");
return false;
}
@@ -1321,12 +1312,10 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
Changed = true;
++NumPeeps;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeAutoreleaseRVCall: Transforming "
- "objc_autoreleaseReturnValue => "
+ DEBUG(dbgs() << "Transforming objc_autoreleaseReturnValue => "
"objc_autorelease since its operand is not used as a return "
"value.\n"
- " Old: "
- << *AutoreleaseRV << "\n");
+ "Old = " << *AutoreleaseRV << "\n");
CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV);
AutoreleaseRVCI->
@@ -1334,8 +1323,7 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease.
Class = IC_Autorelease;
- DEBUG(dbgs() << " New: "
- << *AutoreleaseRV << "\n");
+ DEBUG(dbgs() << "New: " << *AutoreleaseRV << "\n");
}
@@ -1359,18 +1347,24 @@ ObjCARCOpt::OptimizeRetainBlockCall(Function &F, Instruction *Inst,
if (!IsRetainBlockOptimizable(Inst))
return false;
+ Changed = true;
+ ++NumPeeps;
+
+ DEBUG(dbgs() << "Strength reduced retainBlock => retain.\n");
+ DEBUG(dbgs() << "Old: " << *Inst << "\n");
CallInst *RetainBlock = cast<CallInst>(Inst);
RetainBlock->setCalledFunction(getRetainCallee(F.getParent()));
// Remove copy_on_escape metadata.
RetainBlock->setMetadata(CopyOnEscapeMDKind, 0);
Class = IC_Retain;
-
+ DEBUG(dbgs() << "New: " << *Inst << "\n");
return true;
}
/// Visit each call, one at a time, and make simplifications without doing any
/// additional analysis.
void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
+ DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeIndividualCalls ==\n");
// Reset all the flags in preparation for recomputing them.
UsedInThisFunction = 0;
@@ -1380,8 +1374,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
InstructionClass Class = GetBasicInstructionClass(Inst);
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Visiting: Class: "
- << Class << "; " << *Inst << "\n");
+ DEBUG(dbgs() << "Visiting: Class: " << Class << "; " << *Inst << "\n");
switch (Class) {
default: break;
@@ -1397,8 +1390,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
case IC_NoopCast:
Changed = true;
++NumNoops;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Erasing no-op cast:"
- " " << *Inst << "\n");
+ DEBUG(dbgs() << "Erasing no-op cast: " << *Inst << "\n");
EraseInstruction(Inst);
continue;
@@ -1416,11 +1408,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
Constant::getNullValue(Ty),
CI);
llvm::Value *NewValue = UndefValue::get(CI->getType());
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null "
- "pointer-to-weak-pointer is undefined behavior.\n"
- " Old = " << *CI <<
- "\n New = " <<
- *NewValue << "\n");
+ DEBUG(dbgs() << "A null pointer-to-weak-pointer is undefined behavior."
+ "\nOld = " << *CI << "\nNew = " << *NewValue << "\n");
CI->replaceAllUsesWith(NewValue);
CI->eraseFromParent();
continue;
@@ -1439,11 +1428,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
CI);
llvm::Value *NewValue = UndefValue::get(CI->getType());
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null "
- "pointer-to-weak-pointer is undefined behavior.\n"
- " Old = " << *CI <<
- "\n New = " <<
- *NewValue << "\n");
+ DEBUG(dbgs() << "A null pointer-to-weak-pointer is undefined behavior."
+ "\nOld = " << *CI << "\nNew = " << *NewValue << "\n");
CI->replaceAllUsesWith(NewValue);
CI->eraseFromParent();
@@ -1452,13 +1438,13 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
break;
}
case IC_RetainBlock:
- // If we strength reduce an objc_retainBlock to amn objc_retain, continue
+ // If we strength reduce an objc_retainBlock to an objc_retain, continue
// onto the objc_retain peephole optimizations. Otherwise break.
if (!OptimizeRetainBlockCall(F, Inst, Class))
break;
// FALLTHROUGH
case IC_Retain:
- OptimizeRetainCall(F, Inst);
+ ++NumRetainsBeforeOpt;
break;
case IC_RetainRV:
if (OptimizeRetainRVCall(F, Inst))
@@ -1467,6 +1453,9 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
case IC_AutoreleaseRV:
OptimizeAutoreleaseRVCall(F, Inst, Class);
break;
+ case IC_Release:
+ ++NumReleasesBeforeOpt;
+ break;
}
// objc_autorelease(x) -> objc_release(x) if x is otherwise unused.
@@ -1483,15 +1472,11 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
CallInst *NewCall =
CallInst::Create(getReleaseCallee(F.getParent()),
Call->getArgOperand(0), "", Call);
- NewCall->setMetadata(ImpreciseReleaseMDKind,
- MDNode::get(C, ArrayRef<Value *>()));
+ NewCall->setMetadata(ImpreciseReleaseMDKind, MDNode::get(C, None));
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Replacing "
- "objc_autorelease(x) with objc_release(x) since x is "
- "otherwise unused.\n"
- " Old: " << *Call <<
- "\n New: " <<
- *NewCall << "\n");
+ DEBUG(dbgs() << "Replacing autorelease{,RV}(x) with objc_release(x) "
+ "since x is otherwise unused.\nOld: " << *Call << "\nNew: "
+ << *NewCall << "\n");
EraseInstruction(Call);
Inst = NewCall;
@@ -1503,9 +1488,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
// a tail keyword.
if (IsAlwaysTail(Class)) {
Changed = true;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Adding tail keyword"
- " to function since it can never be passed stack args: " << *Inst <<
- "\n");
+ DEBUG(dbgs() << "Adding tail keyword to function since it can never be "
+ "passed stack args: " << *Inst << "\n");
cast<CallInst>(Inst)->setTailCall();
}
@@ -1513,8 +1497,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
// semantics of ARC truly do not do so.
if (IsNeverTail(Class)) {
Changed = true;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Removing tail "
- "keyword from function: " << *Inst <<
+ DEBUG(dbgs() << "Removing tail keyword from function: " << *Inst <<
"\n");
cast<CallInst>(Inst)->setTailCall(false);
}
@@ -1522,8 +1505,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
// Set nounwind as needed.
if (IsNoThrow(Class)) {
Changed = true;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Found no throw"
- " class. Setting nounwind on: " << *Inst << "\n");
+ DEBUG(dbgs() << "Found no throw class. Setting nounwind on: " << *Inst
+ << "\n");
cast<CallInst>(Inst)->setDoesNotThrow();
}
@@ -1538,8 +1521,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
if (IsNullOrUndef(Arg)) {
Changed = true;
++NumNoops;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: ARC calls with "
- " null are no-ops. Erasing: " << *Inst << "\n");
+ DEBUG(dbgs() << "ARC calls with null are no-ops. Erasing: " << *Inst
+ << "\n");
EraseInstruction(Inst);
continue;
}
@@ -1633,10 +1616,9 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
Clone->setArgOperand(0, Op);
Clone->insertBefore(InsertPos);
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Cloning "
+ DEBUG(dbgs() << "Cloning "
<< *CInst << "\n"
- " And inserting "
- "clone at " << *InsertPos << "\n");
+ "And inserting clone at " << *InsertPos << "\n");
Worklist.push_back(std::make_pair(Clone, Incoming));
}
}
@@ -1648,7 +1630,65 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
}
} while (!Worklist.empty());
}
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Finished List.\n");
+}
+
+/// If we have a top down pointer in the S_Use state, make sure that there are
+/// no CFG hazards by checking the states of various bottom up pointers.
+static void CheckForUseCFGHazard(const Sequence SuccSSeq,
+ const bool SuccSRRIKnownSafe,
+ PtrState &S,
+ bool &SomeSuccHasSame,
+ bool &AllSuccsHaveSame,
+ bool &ShouldContinue) {
+ switch (SuccSSeq) {
+ case S_CanRelease: {
+ if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
+ S.ClearSequenceProgress();
+ break;
+ }
+ ShouldContinue = true;
+ break;
+ }
+ case S_Use:
+ SomeSuccHasSame = true;
+ break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
+ AllSuccsHaveSame = false;
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ case S_None:
+ llvm_unreachable("This should have been handled earlier.");
+ }
+}
+
+/// If we have a Top Down pointer in the S_CanRelease state, make sure that
+/// there are no CFG hazards by checking the states of various bottom up
+/// pointers.
+static void CheckForCanReleaseCFGHazard(const Sequence SuccSSeq,
+ const bool SuccSRRIKnownSafe,
+ PtrState &S,
+ bool &SomeSuccHasSame,
+ bool &AllSuccsHaveSame) {
+ switch (SuccSSeq) {
+ case S_CanRelease:
+ SomeSuccHasSame = true;
+ break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ case S_Use:
+ if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
+ AllSuccsHaveSame = false;
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ case S_None:
+ llvm_unreachable("This should have been handled earlier.");
+ }
}
/// Check for critical edges, loop boundaries, irreducible control flow, or
@@ -1661,106 +1701,82 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
// If any top-down local-use or possible-dec has a succ which is earlier in
// the sequence, forget it.
for (BBState::ptr_iterator I = MyStates.top_down_ptr_begin(),
- E = MyStates.top_down_ptr_end(); I != E; ++I)
- switch (I->second.GetSeq()) {
- default: break;
- case S_Use: {
- const Value *Arg = I->first;
- const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
- bool SomeSuccHasSame = false;
- bool AllSuccsHaveSame = true;
- PtrState &S = I->second;
- succ_const_iterator SI(TI), SE(TI, false);
-
- for (; SI != SE; ++SI) {
- Sequence SuccSSeq = S_None;
- bool SuccSRRIKnownSafe = false;
- // If VisitBottomUp has pointer information for this successor, take
- // what we know about it.
- DenseMap<const BasicBlock *, BBState>::iterator BBI =
- BBStates.find(*SI);
- assert(BBI != BBStates.end());
- const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
- SuccSSeq = SuccS.GetSeq();
- SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
- switch (SuccSSeq) {
- case S_None:
- case S_CanRelease: {
- if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
- S.ClearSequenceProgress();
- break;
- }
- continue;
- }
- case S_Use:
- SomeSuccHasSame = true;
- break;
- case S_Stop:
- case S_Release:
- case S_MovableRelease:
- if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
- AllSuccsHaveSame = false;
- break;
- case S_Retain:
- llvm_unreachable("bottom-up pointer in retain state!");
- }
- }
- // If the state at the other end of any of the successor edges
- // matches the current state, require all edges to match. This
- // guards against loops in the middle of a sequence.
- if (SomeSuccHasSame && !AllSuccsHaveSame)
+ E = MyStates.top_down_ptr_end(); I != E; ++I) {
+ PtrState &S = I->second;
+ const Sequence Seq = I->second.GetSeq();
+
+ // We only care about S_Retain, S_CanRelease, and S_Use.
+ if (Seq == S_None)
+ continue;
+
+ // Make sure that if extra top down states are added in the future that this
+ // code is updated to handle it.
+ assert((Seq == S_Retain || Seq == S_CanRelease || Seq == S_Use) &&
+ "Unknown top down sequence state.");
+
+ const Value *Arg = I->first;
+ const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+ bool SomeSuccHasSame = false;
+ bool AllSuccsHaveSame = true;
+
+ succ_const_iterator SI(TI), SE(TI, false);
+
+ for (; SI != SE; ++SI) {
+ // If VisitBottomUp has pointer information for this successor, take
+ // what we know about it.
+ const DenseMap<const BasicBlock *, BBState>::iterator BBI =
+ BBStates.find(*SI);
+ assert(BBI != BBStates.end());
+ const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
+ const Sequence SuccSSeq = SuccS.GetSeq();
+
+ // If bottom up, the pointer is in an S_None state, clear the sequence
+ // progress since the sequence in the bottom up state finished
+ // suggesting a mismatch in between retains/releases. This is true for
+ // all three cases that we are handling here: S_Retain, S_Use, and
+ // S_CanRelease.
+ if (SuccSSeq == S_None) {
S.ClearSequenceProgress();
- break;
- }
- case S_CanRelease: {
- const Value *Arg = I->first;
- const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
- bool SomeSuccHasSame = false;
- bool AllSuccsHaveSame = true;
- PtrState &S = I->second;
- succ_const_iterator SI(TI), SE(TI, false);
-
- for (; SI != SE; ++SI) {
- Sequence SuccSSeq = S_None;
- bool SuccSRRIKnownSafe = false;
- // If VisitBottomUp has pointer information for this successor, take
- // what we know about it.
- DenseMap<const BasicBlock *, BBState>::iterator BBI =
- BBStates.find(*SI);
- assert(BBI != BBStates.end());
- const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
- SuccSSeq = SuccS.GetSeq();
- SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
- switch (SuccSSeq) {
- case S_None: {
- if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
- S.ClearSequenceProgress();
- break;
- }
+ continue;
+ }
+
+ // If we have S_Use or S_CanRelease, perform our check for cfg hazard
+ // checks.
+ const bool SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
+
+ // *NOTE* We do not use Seq from above here since we are allowing for
+ // S.GetSeq() to change while we are visiting basic blocks.
+ switch(S.GetSeq()) {
+ case S_Use: {
+ bool ShouldContinue = false;
+ CheckForUseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S,
+ SomeSuccHasSame, AllSuccsHaveSame,
+ ShouldContinue);
+ if (ShouldContinue)
continue;
- }
- case S_CanRelease:
- SomeSuccHasSame = true;
- break;
- case S_Stop:
- case S_Release:
- case S_MovableRelease:
- case S_Use:
- if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
- AllSuccsHaveSame = false;
- break;
- case S_Retain:
- llvm_unreachable("bottom-up pointer in retain state!");
- }
+ break;
+ }
+ case S_CanRelease: {
+ CheckForCanReleaseCFGHazard(SuccSSeq, SuccSRRIKnownSafe,
+ S, SomeSuccHasSame,
+ AllSuccsHaveSame);
+ break;
+ }
+ case S_Retain:
+ case S_None:
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ break;
}
- // If the state at the other end of any of the successor edges
- // matches the current state, require all edges to match. This
- // guards against loops in the middle of a sequence.
- if (SomeSuccHasSame && !AllSuccsHaveSame)
- S.ClearSequenceProgress();
- break;
- }
}
+
+ // If the state at the other end of any of the successor edges
+ // matches the current state, require all edges to match. This
+ // guards against loops in the middle of a sequence.
+ if (SomeSuccHasSame && !AllSuccsHaveSame)
+ S.ClearSequenceProgress();
+ }
}
bool
@@ -1772,6 +1788,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
InstructionClass Class = GetInstructionClass(Inst);
const Value *Arg = 0;
+ DEBUG(dbgs() << "Class: " << Class << "\n");
+
switch (Class) {
case IC_Release: {
Arg = GetObjCArg(Inst);
@@ -1786,8 +1804,7 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
// pairs by making PtrState hold a stack of states, but this is
// simple and avoids adding overhead for the non-nested case.
if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) {
- DEBUG(dbgs() << "ObjCARCOpt::VisitInstructionBottomUp: Found nested "
- "releases (i.e. a release pair)\n");
+ DEBUG(dbgs() << "Found nested releases (i.e. a release pair)\n");
NestingDetected = true;
}
@@ -1820,7 +1837,10 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
case S_Release:
case S_MovableRelease:
case S_Use:
- S.RRI.ReverseInsertPts.clear();
+ // If OldSeq is not S_Use or OldSeq is S_Use and we are tracking an
+ // imprecise release, clear our reverse insertion points.
+ if (OldSeq != S_Use || S.RRI.IsTrackingImpreciseReleases())
+ S.RRI.ReverseInsertPts.clear();
// FALL THROUGH
case S_CanRelease:
// Don't do retain+release tracking for IC_RetainRV, because it's
@@ -1835,7 +1855,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
llvm_unreachable("bottom-up pointer in retain state!");
}
ANNOTATE_BOTTOMUP(Inst, Arg, OldSeq, S.GetSeq());
- return NestingDetected;
+ // A retain moving bottom up can be a use.
+ break;
}
case IC_AutoreleasepoolPop:
// Conservatively, clear MyStates for all known pointers.
@@ -1861,6 +1882,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
// Check for possible releases.
if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
+ DEBUG(dbgs() << "CanAlterRefCount: Seq: " << Seq << "; " << *Ptr
+ << "\n");
S.ClearKnownPositiveRefCount();
switch (Seq) {
case S_Use:
@@ -1883,6 +1906,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
case S_Release:
case S_MovableRelease:
if (CanUse(Inst, Ptr, PA, Class)) {
+ DEBUG(dbgs() << "CanUse: Seq: " << Seq << "; " << *Ptr
+ << "\n");
assert(S.RRI.ReverseInsertPts.empty());
// If this is an invoke instruction, we're scanning it as part of
// one of its successor blocks, since we can't insert code after it
@@ -1894,6 +1919,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
S.SetSeq(S_Use);
ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use);
} else if (Seq == S_Release && IsUser(Class)) {
+ DEBUG(dbgs() << "PreciseReleaseUse: Seq: " << Seq << "; " << *Ptr
+ << "\n");
// Non-movable releases depend on any possible objc pointer use.
S.SetSeq(S_Stop);
ANNOTATE_BOTTOMUP(Inst, Ptr, S_Release, S_Stop);
@@ -1907,6 +1934,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
break;
case S_Stop:
if (CanUse(Inst, Ptr, PA, Class)) {
+ DEBUG(dbgs() << "PreciseStopUse: Seq: " << Seq << "; " << *Ptr
+ << "\n");
S.SetSeq(S_Use);
ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use);
}
@@ -1927,6 +1956,9 @@ bool
ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
DenseMap<const BasicBlock *, BBState> &BBStates,
MapVector<Value *, RRInfo> &Retains) {
+
+ DEBUG(dbgs() << "\n== ObjCARCOpt::VisitBottomUp ==\n");
+
bool NestingDetected = false;
BBState &MyStates = BBStates[BB];
@@ -1960,7 +1992,7 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
if (isa<InvokeInst>(Inst))
continue;
- DEBUG(dbgs() << "ObjCARCOpt::VisitButtonUp: Visiting " << *Inst << "\n");
+ DEBUG(dbgs() << "Visiting " << *Inst << "\n");
NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates);
}
@@ -2033,13 +2065,18 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
PtrState &S = MyStates.getPtrTopDownState(Arg);
S.ClearKnownPositiveRefCount();
- switch (S.GetSeq()) {
+ Sequence OldSeq = S.GetSeq();
+
+ MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+
+ switch (OldSeq) {
case S_Retain:
case S_CanRelease:
- S.RRI.ReverseInsertPts.clear();
+ if (OldSeq == S_Retain || ReleaseMetadata != 0)
+ S.RRI.ReverseInsertPts.clear();
// FALL THROUGH
case S_Use:
- S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+ S.RRI.ReleaseMetadata = ReleaseMetadata;
S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
Releases[Inst] = S.RRI;
ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_None);
@@ -2078,6 +2115,8 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
// Check for possible releases.
if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
+ DEBUG(dbgs() << "CanAlterRefCount: Seq: " << Seq << "; " << *Ptr
+ << "\n");
S.ClearKnownPositiveRefCount();
switch (Seq) {
case S_Retain:
@@ -2105,6 +2144,8 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
switch (Seq) {
case S_CanRelease:
if (CanUse(Inst, Ptr, PA, Class)) {
+ DEBUG(dbgs() << "CanUse: Seq: " << Seq << "; " << *Ptr
+ << "\n");
S.SetSeq(S_Use);
ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_Use);
}
@@ -2127,6 +2168,7 @@ bool
ObjCARCOpt::VisitTopDown(BasicBlock *BB,
DenseMap<const BasicBlock *, BBState> &BBStates,
DenseMap<Value *, RRInfo> &Releases) {
+ DEBUG(dbgs() << "\n== ObjCARCOpt::VisitTopDown ==\n");
bool NestingDetected = false;
BBState &MyStates = BBStates[BB];
@@ -2156,7 +2198,7 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
Instruction *Inst = I;
- DEBUG(dbgs() << "ObjCARCOpt::VisitTopDown: Visiting " << *Inst << "\n");
+ DEBUG(dbgs() << "Visiting " << *Inst << "\n");
NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates);
}
@@ -2165,6 +2207,9 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
// bottom of the basic block.
ANNOTATE_TOPDOWN_BBEND(MyStates, BB);
+#ifdef ARC_ANNOTATIONS
+ if (!(EnableARCAnnotations && DisableCheckForCFGHazards))
+#endif
CheckForCFGHazards(BB, BBStates, MyStates);
return NestingDetected;
}
@@ -2296,6 +2341,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
Type *ArgTy = Arg->getType();
Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext()));
+ DEBUG(dbgs() << "== ObjCARCOpt::MoveCalls ==\n");
+
// Insert the new retain and release calls.
for (SmallPtrSet<Instruction *, 2>::const_iterator
PI = ReleasesToMove.ReverseInsertPts.begin(),
@@ -2308,10 +2355,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
Call->setDoesNotThrow();
Call->setTailCall();
- DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Release: " << *Call
- << "\n"
- " At insertion point: " << *InsertPt
- << "\n");
+ DEBUG(dbgs() << "Inserting new Retain: " << *Call << "\n"
+ "At insertion point: " << *InsertPt << "\n");
}
for (SmallPtrSet<Instruction *, 2>::const_iterator
PI = RetainsToMove.ReverseInsertPts.begin(),
@@ -2328,10 +2373,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
if (ReleasesToMove.IsTailCallRelease)
Call->setTailCall();
- DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Retain: " << *Call
- << "\n"
- " At insertion point: " << *InsertPt
- << "\n");
+ DEBUG(dbgs() << "Inserting new Release: " << *Call << "\n"
+ "At insertion point: " << *InsertPt << "\n");
}
// Delete the original retain and release calls.
@@ -2341,8 +2384,7 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
Instruction *OrigRetain = *AI;
Retains.blot(OrigRetain);
DeadInsts.push_back(OrigRetain);
- DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting retain: " << *OrigRetain <<
- "\n");
+ DEBUG(dbgs() << "Deleting retain: " << *OrigRetain << "\n");
}
for (SmallPtrSet<Instruction *, 2>::const_iterator
AI = ReleasesToMove.Calls.begin(),
@@ -2350,9 +2392,9 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
Instruction *OrigRelease = *AI;
Releases.erase(OrigRelease);
DeadInsts.push_back(OrigRelease);
- DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting release: " << *OrigRelease
- << "\n");
+ DEBUG(dbgs() << "Deleting release: " << *OrigRelease << "\n");
}
+
}
bool
@@ -2506,6 +2548,12 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
if (OldDelta != 0)
return false;
+#ifdef ARC_ANNOTATIONS
+ // Do not move calls if ARC annotations are requested.
+ if (EnableARCAnnotations)
+ return false;
+#endif // ARC_ANNOTATIONS
+
Changed = true;
assert(OldCount != 0 && "Unreachable code?");
NumRRs += OldCount - NewCount;
@@ -2524,6 +2572,8 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
MapVector<Value *, RRInfo> &Retains,
DenseMap<Value *, RRInfo> &Releases,
Module *M) {
+ DEBUG(dbgs() << "\n== ObjCARCOpt::PerformCodePlacement ==\n");
+
bool AnyPairsCompletelyEliminated = false;
RRInfo RetainsToMove;
RRInfo ReleasesToMove;
@@ -2539,8 +2589,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
Instruction *Retain = cast<Instruction>(V);
- DEBUG(dbgs() << "ObjCARCOpt::PerformCodePlacement: Visiting: " << *Retain
- << "\n");
+ DEBUG(dbgs() << "Visiting: " << *Retain << "\n");
Value *Arg = GetObjCArg(Retain);
@@ -2567,12 +2616,6 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
ReleasesToMove, Arg, KnownSafe,
AnyPairsCompletelyEliminated);
-#ifdef ARC_ANNOTATIONS
- // Do not move calls if ARC annotations are requested. If we were to move
- // calls in this case, we would not be able
- PerformMoveCalls = PerformMoveCalls && !EnableARCAnnotations;
-#endif // ARC_ANNOTATIONS
-
if (PerformMoveCalls) {
// Ok, everything checks out and we're all set. Let's move/delete some
// code!
@@ -2597,14 +2640,15 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
/// Weak pointer optimizations.
void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
+ DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeWeakCalls ==\n");
+
// First, do memdep-style RLE and S2L optimizations. We can't use memdep
// itself because it uses AliasAnalysis and we need to do provenance
// queries instead.
for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
Instruction *Inst = &*I++;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Visiting: " << *Inst <<
- "\n");
+ DEBUG(dbgs() << "Visiting: " << *Inst << "\n");
InstructionClass Class = GetBasicInstructionClass(Inst);
if (Class != IC_LoadWeak && Class != IC_LoadWeakRetained)
@@ -2752,9 +2796,6 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
done:;
}
}
-
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Finished List.\n\n");
-
}
/// Identify program paths which execute sequences of retains and releases which
@@ -2820,17 +2861,17 @@ FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB,
BB, Autorelease, DepInsts, Visited, PA);
if (DepInsts.size() != 1)
return 0;
-
+
CallInst *Retain =
dyn_cast_or_null<CallInst>(*DepInsts.begin());
-
+
// Check that we found a retain with the same argument.
if (!Retain ||
!IsRetain(GetBasicInstructionClass(Retain)) ||
GetObjCArg(Retain) != Arg) {
return 0;
}
-
+
return Retain;
}
@@ -2847,7 +2888,7 @@ FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB,
BB, Ret, DepInsts, V, PA);
if (DepInsts.size() != 1)
return 0;
-
+
CallInst *Autorelease =
dyn_cast_or_null<CallInst>(*DepInsts.begin());
if (!Autorelease)
@@ -2857,7 +2898,7 @@ FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB,
return 0;
if (GetObjCArg(Autorelease) != Arg)
return 0;
-
+
return Autorelease;
}
@@ -2873,60 +2914,87 @@ void ObjCARCOpt::OptimizeReturns(Function &F) {
if (!F.getReturnType()->isPointerTy())
return;
+ DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeReturns ==\n");
+
SmallPtrSet<Instruction *, 4> DependingInstructions;
SmallPtrSet<const BasicBlock *, 4> Visited;
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
BasicBlock *BB = FI;
ReturnInst *Ret = dyn_cast<ReturnInst>(&BB->back());
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Visiting: " << *Ret << "\n");
+ DEBUG(dbgs() << "Visiting: " << *Ret << "\n");
if (!Ret)
continue;
-
+
const Value *Arg = StripPointerCastsAndObjCCalls(Ret->getOperand(0));
-
- // Look for an ``autorelease'' instruction that is a predecssor of Ret and
+
+ // Look for an ``autorelease'' instruction that is a predecessor of Ret and
// dependent on Arg such that there are no instructions dependent on Arg
// that need a positive ref count in between the autorelease and Ret.
CallInst *Autorelease =
FindPredecessorAutoreleaseWithSafePath(Arg, BB, Ret,
DependingInstructions, Visited,
PA);
- if (Autorelease) {
- DependingInstructions.clear();
- Visited.clear();
-
- CallInst *Retain =
- FindPredecessorRetainWithSafePath(Arg, BB, Autorelease,
- DependingInstructions, Visited, PA);
- if (Retain) {
- DependingInstructions.clear();
- Visited.clear();
-
- // Check that there is nothing that can affect the reference count
- // between the retain and the call. Note that Retain need not be in BB.
- if (HasSafePathToPredecessorCall(Arg, Retain, DependingInstructions,
- Visited, PA)) {
- // If so, we can zap the retain and autorelease.
- Changed = true;
- ++NumRets;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Erasing: " << *Retain
- << "\n Erasing: "
- << *Autorelease << "\n");
- EraseInstruction(Retain);
- EraseInstruction(Autorelease);
- }
- }
- }
-
DependingInstructions.clear();
Visited.clear();
+
+ if (!Autorelease)
+ continue;
+
+ CallInst *Retain =
+ FindPredecessorRetainWithSafePath(Arg, BB, Autorelease,
+ DependingInstructions, Visited, PA);
+ DependingInstructions.clear();
+ Visited.clear();
+
+ if (!Retain)
+ continue;
+
+ // Check that there is nothing that can affect the reference count
+ // between the retain and the call. Note that Retain need not be in BB.
+ bool HasSafePathToCall = HasSafePathToPredecessorCall(Arg, Retain,
+ DependingInstructions,
+ Visited, PA);
+ DependingInstructions.clear();
+ Visited.clear();
+
+ if (!HasSafePathToCall)
+ continue;
+
+ // If so, we can zap the retain and autorelease.
+ Changed = true;
+ ++NumRets;
+ DEBUG(dbgs() << "Erasing: " << *Retain << "\nErasing: "
+ << *Autorelease << "\n");
+ EraseInstruction(Retain);
+ EraseInstruction(Autorelease);
}
+}
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Finished List.\n\n");
+#ifndef NDEBUG
+void
+ObjCARCOpt::GatherStatistics(Function &F, bool AfterOptimization) {
+ llvm::Statistic &NumRetains =
+ AfterOptimization? NumRetainsAfterOpt : NumRetainsBeforeOpt;
+ llvm::Statistic &NumReleases =
+ AfterOptimization? NumReleasesAfterOpt : NumReleasesBeforeOpt;
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+ Instruction *Inst = &*I++;
+ switch (GetBasicInstructionClass(Inst)) {
+ default:
+ break;
+ case IC_Retain:
+ ++NumRetains;
+ break;
+ case IC_Release:
+ ++NumReleases;
+ break;
+ }
+ }
}
+#endif
bool ObjCARCOpt::doInitialization(Module &M) {
if (!EnableARCOpts)
@@ -2958,7 +3026,6 @@ bool ObjCARCOpt::doInitialization(Module &M) {
// calls finalizers which can have arbitrary side effects.
// These are initialized lazily.
- RetainRVCallee = 0;
AutoreleaseRVCallee = 0;
ReleaseCallee = 0;
RetainCallee = 0;
@@ -2978,7 +3045,8 @@ bool ObjCARCOpt::runOnFunction(Function &F) {
Changed = false;
- DEBUG(dbgs() << "ObjCARCOpt: Visiting Function: " << F.getName() << "\n");
+ DEBUG(dbgs() << "<<< ObjCARCOpt: Visiting Function: " << F.getName() << " >>>"
+ "\n");
PA.setAA(&getAnalysis<AliasAnalysis>());
@@ -2986,7 +3054,7 @@ bool ObjCARCOpt::runOnFunction(Function &F) {
// when compiling code that isn't ObjC, skip these if the relevant ObjC
// library functions aren't declared.
- // Preliminary optimizations. This also computs UsedInThisFunction.
+ // Preliminary optimizations. This also computes UsedInThisFunction.
OptimizeIndividualCalls(F);
// Optimizations for weak pointers.
@@ -3013,6 +3081,13 @@ bool ObjCARCOpt::runOnFunction(Function &F) {
(1 << IC_AutoreleaseRV)))
OptimizeReturns(F);
+ // Gather statistics after optimization.
+#ifndef NDEBUG
+ if (AreStatisticsEnabled()) {
+ GatherStatistics(F, true);
+ }
+#endif
+
DEBUG(dbgs() << "\n");
return Changed;
diff --git a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 015fd2e..f0d29c8 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/ValueMap.h"
#include "llvm/Analysis/DominatorInternals.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -88,7 +89,7 @@ namespace {
/// Keeps track of non-local addresses that have been sunk into a block.
/// This allows us to avoid inserting duplicate code for blocks with
/// multiple load/stores of the same address.
- DenseMap<Value*, Value*> SunkAddrs;
+ ValueMap<Value*, Value*> SunkAddrs;
/// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
/// be updated.
@@ -1653,10 +1654,6 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// start of the block.
CurInstIterator = BB->begin();
SunkAddrs.clear();
- } else {
- // This address is now available for reassignment, so erase the table
- // entry; we don't want to match some completely different instruction.
- SunkAddrs[Addr] = 0;
}
}
++NumMemoryInsts;
@@ -1761,7 +1758,7 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
if (!DefIsLiveOut)
return false;
- // Make sure non of the uses are PHI nodes.
+ // Make sure none of the uses are PHI nodes.
for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
UI != E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
index 129af8d..f350b9b 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -498,6 +498,75 @@ void ValueTable::verifyRemoved(const Value *V) const {
//===----------------------------------------------------------------------===//
namespace {
+ class GVN;
+ struct AvailableValueInBlock {
+ /// BB - The basic block in question.
+ BasicBlock *BB;
+ enum ValType {
+ SimpleVal, // A simple offsetted value that is accessed.
+ LoadVal, // A value produced by a load.
+ MemIntrin // A memory intrinsic which is loaded from.
+ };
+
+ /// V - The value that is live out of the block.
+ PointerIntPair<Value *, 2, ValType> Val;
+
+ /// Offset - The byte offset in Val that is interesting for the load query.
+ unsigned Offset;
+
+ static AvailableValueInBlock get(BasicBlock *BB, Value *V,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(V);
+ Res.Val.setInt(SimpleVal);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(MI);
+ Res.Val.setInt(MemIntrin);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(LI);
+ Res.Val.setInt(LoadVal);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
+ bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
+ bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
+
+ Value *getSimpleValue() const {
+ assert(isSimpleValue() && "Wrong accessor");
+ return Val.getPointer();
+ }
+
+ LoadInst *getCoercedLoadValue() const {
+ assert(isCoercedLoadValue() && "Wrong accessor");
+ return cast<LoadInst>(Val.getPointer());
+ }
+
+ MemIntrinsic *getMemIntrinValue() const {
+ assert(isMemIntrinValue() && "Wrong accessor");
+ return cast<MemIntrinsic>(Val.getPointer());
+ }
+
+ /// MaterializeAdjustedValue - Emit code into this block to adjust the value
+ /// defined here to the specified type. This handles various coercion cases.
+ Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const;
+ };
class GVN : public FunctionPass {
bool NoLoads;
@@ -519,6 +588,11 @@ namespace {
BumpPtrAllocator TableAllocator;
SmallVector<Instruction*, 8> InstrsToErase;
+
+ typedef SmallVector<NonLocalDepResult, 64> LoadDepVect;
+ typedef SmallVector<AvailableValueInBlock, 64> AvailValInBlkVect;
+ typedef SmallVector<BasicBlock*, 64> UnavailBlkVect;
+
public:
static char ID; // Pass identification, replacement for typeid
explicit GVN(bool noloads = false)
@@ -599,11 +673,17 @@ namespace {
}
- // Helper fuctions
- // FIXME: eliminate or document these better
+ // Helper fuctions of redundant load elimination
bool processLoad(LoadInst *L);
- bool processInstruction(Instruction *I);
bool processNonLocalLoad(LoadInst *L);
+ void AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
+ AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks);
+ bool PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks);
+
+ // Other helper routines
+ bool processInstruction(Instruction *I);
bool processBlock(BasicBlock *BB);
void dump(DenseMap<uint32_t, Value*> &d);
bool iterateOnFunction(Function &F);
@@ -1159,114 +1239,6 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
return ConstantFoldLoadFromConstPtr(Src, &TD);
}
-namespace {
-
-struct AvailableValueInBlock {
- /// BB - The basic block in question.
- BasicBlock *BB;
- enum ValType {
- SimpleVal, // A simple offsetted value that is accessed.
- LoadVal, // A value produced by a load.
- MemIntrin // A memory intrinsic which is loaded from.
- };
-
- /// V - The value that is live out of the block.
- PointerIntPair<Value *, 2, ValType> Val;
-
- /// Offset - The byte offset in Val that is interesting for the load query.
- unsigned Offset;
-
- static AvailableValueInBlock get(BasicBlock *BB, Value *V,
- unsigned Offset = 0) {
- AvailableValueInBlock Res;
- Res.BB = BB;
- Res.Val.setPointer(V);
- Res.Val.setInt(SimpleVal);
- Res.Offset = Offset;
- return Res;
- }
-
- static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
- unsigned Offset = 0) {
- AvailableValueInBlock Res;
- Res.BB = BB;
- Res.Val.setPointer(MI);
- Res.Val.setInt(MemIntrin);
- Res.Offset = Offset;
- return Res;
- }
-
- static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
- unsigned Offset = 0) {
- AvailableValueInBlock Res;
- Res.BB = BB;
- Res.Val.setPointer(LI);
- Res.Val.setInt(LoadVal);
- Res.Offset = Offset;
- return Res;
- }
-
- bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
- bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
- bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
-
- Value *getSimpleValue() const {
- assert(isSimpleValue() && "Wrong accessor");
- return Val.getPointer();
- }
-
- LoadInst *getCoercedLoadValue() const {
- assert(isCoercedLoadValue() && "Wrong accessor");
- return cast<LoadInst>(Val.getPointer());
- }
-
- MemIntrinsic *getMemIntrinValue() const {
- assert(isMemIntrinValue() && "Wrong accessor");
- return cast<MemIntrinsic>(Val.getPointer());
- }
-
- /// MaterializeAdjustedValue - Emit code into this block to adjust the value
- /// defined here to the specified type. This handles various coercion cases.
- Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const {
- Value *Res;
- if (isSimpleValue()) {
- Res = getSimpleValue();
- if (Res->getType() != LoadTy) {
- const DataLayout *TD = gvn.getDataLayout();
- assert(TD && "Need target data to handle type mismatch case");
- Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
- *TD);
-
- DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
- << *getSimpleValue() << '\n'
- << *Res << '\n' << "\n\n\n");
- }
- } else if (isCoercedLoadValue()) {
- LoadInst *Load = getCoercedLoadValue();
- if (Load->getType() == LoadTy && Offset == 0) {
- Res = Load;
- } else {
- Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
- gvn);
-
- DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " "
- << *getCoercedLoadValue() << '\n'
- << *Res << '\n' << "\n\n\n");
- }
- } else {
- const DataLayout *TD = gvn.getDataLayout();
- assert(TD && "Need target data to handle type mismatch case");
- Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
- LoadTy, BB->getTerminator(), *TD);
- DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
- << " " << *getMemIntrinValue() << '\n'
- << *Res << '\n' << "\n\n\n");
- }
- return Res;
- }
-};
-
-} // end anonymous namespace
/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
/// construct SSA form, allowing us to eliminate LI. This returns the value
@@ -1323,48 +1295,59 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
return V;
}
+Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const {
+ Value *Res;
+ if (isSimpleValue()) {
+ Res = getSimpleValue();
+ if (Res->getType() != LoadTy) {
+ const DataLayout *TD = gvn.getDataLayout();
+ assert(TD && "Need target data to handle type mismatch case");
+ Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
+ *TD);
+
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
+ << *getSimpleValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
+ } else if (isCoercedLoadValue()) {
+ LoadInst *Load = getCoercedLoadValue();
+ if (Load->getType() == LoadTy && Offset == 0) {
+ Res = Load;
+ } else {
+ Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
+ gvn);
+
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " "
+ << *getCoercedLoadValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
+ } else {
+ const DataLayout *TD = gvn.getDataLayout();
+ assert(TD && "Need target data to handle type mismatch case");
+ Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
+ LoadTy, BB->getTerminator(), *TD);
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
+ << " " << *getMemIntrinValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
+ return Res;
+}
+
static bool isLifetimeStart(const Instruction *Inst) {
if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
return II->getIntrinsicID() == Intrinsic::lifetime_start;
return false;
}
-/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
-/// non-local by performing PHI construction.
-bool GVN::processNonLocalLoad(LoadInst *LI) {
- // Find the non-local dependencies of the load.
- SmallVector<NonLocalDepResult, 64> Deps;
- AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
- MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps);
- //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: "
- // << Deps.size() << *LI << '\n');
-
- // If we had to process more than one hundred blocks to find the
- // dependencies, this load isn't worth worrying about. Optimizing
- // it will be too expensive.
- unsigned NumDeps = Deps.size();
- if (NumDeps > 100)
- return false;
-
- // If we had a phi translation failure, we'll have a single entry which is a
- // clobber in the current block. Reject this early.
- if (NumDeps == 1 &&
- !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
- DEBUG(
- dbgs() << "GVN: non-local load ";
- WriteAsOperand(dbgs(), LI);
- dbgs() << " has unknown dependencies\n";
- );
- return false;
- }
+void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
+ AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks) {
// Filter out useless results (non-locals, etc). Keep track of the blocks
// where we have a value available in repl, also keep track of whether we see
// dependencies that produce an unknown value for the load (such as a call
// that could potentially clobber the load).
- SmallVector<AvailableValueInBlock, 64> ValuesPerBlock;
- SmallVector<BasicBlock*, 64> UnavailableBlocks;
-
+ unsigned NumDeps = Deps.size();
for (unsigned i = 0, e = NumDeps; i != e; ++i) {
BasicBlock *DepBB = Deps[i].getBB();
MemDepResult DepInfo = Deps[i].getResult();
@@ -1480,35 +1463,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
}
UnavailableBlocks.push_back(DepBB);
- continue;
}
+}
- // If we have no predecessors that produce a known value for this load, exit
- // early.
- if (ValuesPerBlock.empty()) return false;
-
- // If all of the instructions we depend on produce a known value for this
- // load, then it is fully redundant and we can use PHI insertion to compute
- // its value. Insert PHIs and remove the fully redundant value now.
- if (UnavailableBlocks.empty()) {
- DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
-
- // Perform PHI construction.
- Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
- LI->replaceAllUsesWith(V);
-
- if (isa<PHINode>(V))
- V->takeName(LI);
- if (V->getType()->getScalarType()->isPointerTy())
- MD->invalidateCachedPointerInfo(V);
- markInstructionForDeletion(LI);
- ++NumGVNLoad;
- return true;
- }
-
- if (!EnablePRE || !EnableLoadPRE)
- return false;
-
+bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks) {
// Okay, we have *some* definitions of the value. This means that the value
// is available in some of our (transitive) predecessors. Lets think about
// doing PRE of this load. This will involve inserting a new load into the
@@ -1526,7 +1485,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
BasicBlock *LoadBB = LI->getParent();
BasicBlock *TmpBB = LoadBB;
- bool allSingleSucc = true;
while (TmpBB->getSinglePredecessor()) {
TmpBB = TmpBB->getSinglePredecessor();
if (TmpBB == LoadBB) // Infinite (unreachable) loop.
@@ -1615,13 +1573,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
// pointer if it is not available.
PHITransAddr Address(LI->getPointerOperand(), TD);
Value *LoadPtr = 0;
- if (allSingleSucc) {
- LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
- *DT, NewInsts);
- } else {
- Address.PHITranslateValue(LoadBB, UnavailablePred, DT);
- LoadPtr = Address.getAddr();
- }
+ LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
+ *DT, NewInsts);
// If we couldn't find or insert a computation of this phi translated value,
// we fail PRE.
@@ -1632,24 +1585,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
break;
}
- // Make sure it is valid to move this load here. We have to watch out for:
- // @1 = getelementptr (i8* p, ...
- // test p and branch if == 0
- // load @1
- // It is valid to have the getelementptr before the test, even if p can
- // be 0, as getelementptr only does address arithmetic.
- // If we are not pushing the value through any multiple-successor blocks
- // we do not have this case. Otherwise, check that the load is safe to
- // put anywhere; this can be improved, but should be conservatively safe.
- if (!allSingleSucc &&
- // FIXME: REEVALUTE THIS.
- !isSafeToLoadUnconditionally(LoadPtr,
- UnavailablePred->getTerminator(),
- LI->getAlignment(), TD)) {
- CanDoPRE = false;
- break;
- }
-
I->second = LoadPtr;
}
@@ -1714,6 +1649,72 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
return true;
}
+/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
+/// non-local by performing PHI construction.
+bool GVN::processNonLocalLoad(LoadInst *LI) {
+ // Step 1: Find the non-local dependencies of the load.
+ LoadDepVect Deps;
+ AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
+ MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps);
+
+ // If we had to process more than one hundred blocks to find the
+ // dependencies, this load isn't worth worrying about. Optimizing
+ // it will be too expensive.
+ unsigned NumDeps = Deps.size();
+ if (NumDeps > 100)
+ return false;
+
+ // If we had a phi translation failure, we'll have a single entry which is a
+ // clobber in the current block. Reject this early.
+ if (NumDeps == 1 &&
+ !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
+ DEBUG(
+ dbgs() << "GVN: non-local load ";
+ WriteAsOperand(dbgs(), LI);
+ dbgs() << " has unknown dependencies\n";
+ );
+ return false;
+ }
+
+ // Step 2: Analyze the availability of the load
+ AvailValInBlkVect ValuesPerBlock;
+ UnavailBlkVect UnavailableBlocks;
+ AnalyzeLoadAvailability(LI, Deps, ValuesPerBlock, UnavailableBlocks);
+
+ // If we have no predecessors that produce a known value for this load, exit
+ // early.
+ if (ValuesPerBlock.empty())
+ return false;
+
+ // Step 3: Eliminate fully redundancy.
+ //
+ // If all of the instructions we depend on produce a known value for this
+ // load, then it is fully redundant and we can use PHI insertion to compute
+ // its value. Insert PHIs and remove the fully redundant value now.
+ if (UnavailableBlocks.empty()) {
+ DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
+
+ // Perform PHI construction.
+ Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
+ LI->replaceAllUsesWith(V);
+
+ if (isa<PHINode>(V))
+ V->takeName(LI);
+ if (V->getType()->getScalarType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(V);
+ markInstructionForDeletion(LI);
+ ++NumGVNLoad;
+ return true;
+ }
+
+ // Step 4: Eliminate partial redundancy.
+ if (!EnablePRE || !EnableLoadPRE)
+ return false;
+
+ return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks);
+}
+
+
static void patchReplacementInstruction(Instruction *I, Value *Repl) {
// Patch the replacement so that it is not more restrictive than the value
// being replaced.
diff --git a/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp b/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp
index 5d02c68..4796eb2 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -200,9 +200,8 @@ void GlobalMerge::collectUsedGlobalVariables(Module &M) {
if (!GV || !GV->hasInitializer()) return;
// Should be an array of 'i8*'.
- const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
- if (InitList == 0) return;
-
+ const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
+
for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
if (const GlobalVariable *G =
dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts()))
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index e98ae95..14c5655 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -56,8 +56,8 @@ namespace {
}
bool runOnLoop(Loop *L, LPPassManager &LPM);
- void simplifyLoopLatch(Loop *L);
- bool rotateLoop(Loop *L);
+ bool simplifyLoopLatch(Loop *L);
+ bool rotateLoop(Loop *L, bool SimplifiedLatch);
private:
LoopInfo *LI;
@@ -84,13 +84,14 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
// Simplify the loop latch before attempting to rotate the header
// upward. Rotation may not be needed if the loop tail can be folded into the
// loop exit.
- simplifyLoopLatch(L);
+ bool SimplifiedLatch = simplifyLoopLatch(L);
// One loop can be rotated multiple times.
bool MadeChange = false;
- while (rotateLoop(L))
+ while (rotateLoop(L, SimplifiedLatch)) {
MadeChange = true;
-
+ SimplifiedLatch = false;
+ }
return MadeChange;
}
@@ -212,25 +213,25 @@ static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
/// canonical form so downstream passes can handle it.
///
/// I don't believe this invalidates SCEV.
-void LoopRotate::simplifyLoopLatch(Loop *L) {
+bool LoopRotate::simplifyLoopLatch(Loop *L) {
BasicBlock *Latch = L->getLoopLatch();
if (!Latch || Latch->hasAddressTaken())
- return;
+ return false;
BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
if (!Jmp || !Jmp->isUnconditional())
- return;
+ return false;
BasicBlock *LastExit = Latch->getSinglePredecessor();
if (!LastExit || !L->isLoopExiting(LastExit))
- return;
+ return false;
BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
if (!BI)
- return;
+ return false;
if (!shouldSpeculateInstrs(Latch->begin(), Jmp))
- return;
+ return false;
DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
<< LastExit->getName() << "\n");
@@ -253,10 +254,20 @@ void LoopRotate::simplifyLoopLatch(Loop *L) {
if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
DT->eraseNode(Latch);
Latch->eraseFromParent();
+ return true;
}
/// Rotate loop LP. Return true if the loop is rotated.
-bool LoopRotate::rotateLoop(Loop *L) {
+///
+/// \param SimplifiedLatch is true if the latch was just folded into the final
+/// loop exit. In this case we may want to rotate even though the new latch is
+/// now an exiting branch. This rotation would have happened had the latch not
+/// been simplified. However, if SimplifiedLatch is false, then we avoid
+/// rotating loops in which the latch exits to avoid excessive or endless
+/// rotation. LoopRotate should be repeatable and converge to a canonical
+/// form. This property is satisfied because simplifying the loop latch can only
+/// happen once across multiple invocations of the LoopRotate pass.
+bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// If the loop has only one block then there is not much to rotate.
if (L->getBlocks().size() == 1)
return false;
@@ -276,7 +287,12 @@ bool LoopRotate::rotateLoop(Loop *L) {
// If the loop latch already contains a branch that leaves the loop then the
// loop is already rotated.
- if (OrigLatch == 0 || L->isLoopExiting(OrigLatch))
+ if (OrigLatch == 0)
+ return false;
+
+ // Rotate if either the loop latch does *not* exit the loop, or if the loop
+ // latch was just simplified.
+ if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch)
return false;
// Check size of original header and reject loop if it is very big or we can't
@@ -505,4 +521,3 @@ bool LoopRotate::rotateLoop(Loop *L) {
++NumRotated;
return true;
}
-
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 7ee4027..a3c241d 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -143,13 +143,9 @@ namespace {
// So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier
// than Y which is defined earlier than Z. Permute "x | 1", "Y & 2",
// "z" in the order of X-Y-Z is better than any other orders.
- class PtrSortFunctor {
- ArrayRef<XorOpnd> A;
-
- public:
- PtrSortFunctor(ArrayRef<XorOpnd> Array) : A(Array) {}
- bool operator()(unsigned LHSIndex, unsigned RHSIndex) {
- return A[LHSIndex].getSymbolicRank() < A[RHSIndex].getSymbolicRank();
+ struct PtrSortFunctor {
+ bool operator()(XorOpnd * const &LHS, XorOpnd * const &RHS) {
+ return LHS->getSymbolicRank() < RHS->getSymbolicRank();
}
};
private:
@@ -1199,9 +1195,6 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
if (X != Opnd2->getSymbolicPart())
return false;
- const APInt &C1 = Opnd1->getConstPart();
- const APInt &C2 = Opnd2->getConstPart();
-
// This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.)
int DeadInstNum = 1;
if (Opnd1->getValue()->hasOneUse())
@@ -1219,6 +1212,8 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
if (Opnd2->isOrExpr())
std::swap(Opnd1, Opnd2);
+ const APInt &C1 = Opnd1->getConstPart();
+ const APInt &C2 = Opnd2->getConstPart();
APInt C3((~C1) ^ C2);
// Do not increase code size!
@@ -1234,6 +1229,8 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
} else if (Opnd1->isOrExpr()) {
// Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
//
+ const APInt &C1 = Opnd1->getConstPart();
+ const APInt &C2 = Opnd2->getConstPart();
APInt C3 = C1 ^ C2;
// Do not increase code size
@@ -1248,6 +1245,8 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
} else {
// Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2))
//
+ const APInt &C1 = Opnd1->getConstPart();
+ const APInt &C2 = Opnd2->getConstPart();
APInt C3 = C1 ^ C2;
Res = createAndInstr(I, X, C3);
}
@@ -1274,7 +1273,7 @@ Value *Reassociate::OptimizeXor(Instruction *I,
return 0;
SmallVector<XorOpnd, 8> Opnds;
- SmallVector<unsigned, 8> OpndIndices;
+ SmallVector<XorOpnd*, 8> OpndPtrs;
Type *Ty = Ops[0].Op->getType();
APInt ConstOpnd(Ty->getIntegerBitWidth(), 0);
@@ -1285,23 +1284,29 @@ Value *Reassociate::OptimizeXor(Instruction *I,
XorOpnd O(V);
O.setSymbolicRank(getRank(O.getSymbolicPart()));
Opnds.push_back(O);
- OpndIndices.push_back(Opnds.size() - 1);
} else
ConstOpnd ^= cast<ConstantInt>(V)->getValue();
}
+ // NOTE: From this point on, do *NOT* add/delete element to/from "Opnds".
+ // It would otherwise invalidate the "Opnds"'s iterator, and hence invalidate
+ // the "OpndPtrs" as well. For the similar reason, do not fuse this loop
+ // with the previous loop --- the iterator of the "Opnds" may be invalidated
+ // when new elements are added to the vector.
+ for (unsigned i = 0, e = Opnds.size(); i != e; ++i)
+ OpndPtrs.push_back(&Opnds[i]);
+
// Step 2: Sort the Xor-Operands in a way such that the operands containing
// the same symbolic value cluster together. For instance, the input operand
// sequence ("x | 123", "y & 456", "x & 789") will be sorted into:
// ("x | 123", "x & 789", "y & 456").
- std::sort(OpndIndices.begin(), OpndIndices.end(),
- XorOpnd::PtrSortFunctor(Opnds));
+ std::sort(OpndPtrs.begin(), OpndPtrs.end(), XorOpnd::PtrSortFunctor());
// Step 3: Combine adjacent operands
XorOpnd *PrevOpnd = 0;
bool Changed = false;
for (unsigned i = 0, e = Opnds.size(); i < e; i++) {
- XorOpnd *CurrOpnd = &Opnds[OpndIndices[i]];
+ XorOpnd *CurrOpnd = OpndPtrs[i];
// The combined value
Value *CV;
diff --git a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
index f6bb365..d073e78 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2322,17 +2322,15 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
ConstantVector::get(Mask),
Name + ".expand");
- DEBUG(dbgs() << " shuffle1: " << *V << "\n");
+ DEBUG(dbgs() << " shuffle: " << *V << "\n");
Mask.clear();
for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
- if (i >= BeginIndex && i < EndIndex)
- Mask.push_back(IRB.getInt32(i));
- else
- Mask.push_back(IRB.getInt32(i + VecTy->getNumElements()));
- V = IRB.CreateShuffleVector(V, Old, ConstantVector::get(Mask),
- Name + "insert");
- DEBUG(dbgs() << " shuffle2: " << *V << "\n");
+ Mask.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
+
+ V = IRB.CreateSelect(ConstantVector::get(Mask), V, Old, Name + "blend");
+
+ DEBUG(dbgs() << " blend: " << *V << "\n");
return V;
}
@@ -2671,6 +2669,7 @@ private:
StoreInst *NewSI;
if (BeginOffset == NewAllocaBeginOffset &&
+ EndOffset == NewAllocaEndOffset &&
canConvertValue(TD, V->getType(), NewAllocaTy)) {
V = convertValue(TD, IRB, V, NewAllocaTy);
NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
@@ -3050,16 +3049,16 @@ private:
bool visitSelectInst(SelectInst &SI) {
DEBUG(dbgs() << " original: " << SI << "\n");
-
- // Find the operand we need to rewrite here.
- bool IsTrueVal = SI.getTrueValue() == OldPtr;
- if (IsTrueVal)
- assert(SI.getFalseValue() != OldPtr && "Pointer is both operands!");
- else
- assert(SI.getFalseValue() == OldPtr && "Pointer isn't an operand!");
+ assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) &&
+ "Pointer isn't an operand!");
Value *NewPtr = getAdjustedAllocaPtr(IRB, OldPtr->getType());
- SI.setOperand(IsTrueVal ? 1 : 2, NewPtr);
+ // Replace the operands which were using the old pointer.
+ if (SI.getOperand(1) == OldPtr)
+ SI.setOperand(1, NewPtr);
+ if (SI.getOperand(2) == OldPtr)
+ SI.setOperand(2, NewPtr);
+
DEBUG(dbgs() << " to: " << SI << "\n");
deleteIfTriviallyDead(OldPtr);
return false;
diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index e590a37..bfde334 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -1462,8 +1462,8 @@ bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) {
}
// performScalarRepl - This algorithm is a simple worklist driven algorithm,
-// which runs on all of the alloca instructions in the function, removing them
-// if they are only used by getelementptr instructions.
+// which runs on all of the alloca instructions in the entry block, removing
+// them if they are only used by getelementptr instructions.
//
bool SROA::performScalarRepl(Function &F) {
std::vector<AllocaInst*> WorkList;
@@ -1724,17 +1724,8 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI,
continue;
ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
- if (!IdxVal) {
- // Non constant GEPs are only a problem on arrays, structs, and pointers
- // Vectors can be dynamically indexed.
- // FIXME: Add support for dynamic indexing on arrays. This should be
- // ok on any subarrays of the alloca array, eg, a[0][i] is ok, but a[i][0]
- // isn't.
- if (!(*GEPIt)->isVectorTy())
- return MarkUnsafe(Info, GEPI);
- NonConstant = true;
- NonConstantIdxSize = TD->getTypeAllocSize(*GEPIt);
- }
+ if (!IdxVal)
+ return MarkUnsafe(Info, GEPI);
}
// Compute the offset due to this GEP and check if the alloca has a
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 63d7a1d..be8d39e 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -87,29 +87,26 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
assert(VMap.count(I) && "No mapping from source argument specified!");
#endif
- // Clone any attributes.
- if (NewFunc->arg_size() == OldFunc->arg_size())
- NewFunc->copyAttributesFrom(OldFunc);
- else {
- //Some arguments were deleted with the VMap. Copy arguments one by one
- for (Function::const_arg_iterator I = OldFunc->arg_begin(),
- E = OldFunc->arg_end(); I != E; ++I)
- if (Argument* Anew = dyn_cast<Argument>(VMap[I])) {
- AttributeSet attrs = OldFunc->getAttributes()
- .getParamAttributes(I->getArgNo() + 1);
- if (attrs.getNumSlots() > 0)
- Anew->addAttr(attrs);
- }
- NewFunc->setAttributes(NewFunc->getAttributes()
- .addAttributes(NewFunc->getContext(),
- AttributeSet::ReturnIndex,
- OldFunc->getAttributes()));
- NewFunc->setAttributes(NewFunc->getAttributes()
- .addAttributes(NewFunc->getContext(),
- AttributeSet::FunctionIndex,
- OldFunc->getAttributes()));
+ AttributeSet OldAttrs = OldFunc->getAttributes();
+ // Clone any argument attributes that are present in the VMap.
+ for (Function::const_arg_iterator I = OldFunc->arg_begin(),
+ E = OldFunc->arg_end();
+ I != E; ++I)
+ if (Argument *Anew = dyn_cast<Argument>(VMap[I])) {
+ AttributeSet attrs =
+ OldAttrs.getParamAttributes(I->getArgNo() + 1);
+ if (attrs.getNumSlots() > 0)
+ Anew->addAttr(attrs);
+ }
- }
+ NewFunc->setAttributes(NewFunc->getAttributes()
+ .addAttributes(NewFunc->getContext(),
+ AttributeSet::ReturnIndex,
+ OldAttrs.getRetAttributes()));
+ NewFunc->setAttributes(NewFunc->getAttributes()
+ .addAttributes(NewFunc->getContext(),
+ AttributeSet::FunctionIndex,
+ OldAttrs.getFnAttributes()));
// Loop over all of the basic blocks in the function, cloning them as
// appropriate. Note that we save BE this way in order to handle cloning of
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
index e9828d6..dabb67b9 100644
--- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -758,8 +758,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// If the call site was an invoke instruction, add a branch to the normal
// destination.
- if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
- BranchInst::Create(II->getNormalDest(), TheCall);
+ if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+ BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
+ NewBr->setDebugLoc(Returns[0]->getDebugLoc());
+ }
// If the return instruction returned a value, replace uses of the call with
// uses of the returned value.
@@ -787,15 +789,16 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// "starter" and "ender" blocks. How we accomplish this depends on whether
// this is an invoke instruction or a call instruction.
BasicBlock *AfterCallBB;
+ BranchInst *CreatedBranchToNormalDest = NULL;
if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
// Add an unconditional branch to make this look like the CallInst case...
- BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
+ CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall);
// Split the basic block. This guarantees that no PHI nodes will have to be
// updated due to new incoming edges, and make the invoke case more
// symmetric to the call case.
- AfterCallBB = OrigBB->splitBasicBlock(NewBr,
+ AfterCallBB = OrigBB->splitBasicBlock(CreatedBranchToNormalDest,
CalledFunc->getName()+".exit");
} else { // It's a call
@@ -850,11 +853,20 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Add a branch to the merge points and remove return instructions.
+ DebugLoc Loc;
for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
ReturnInst *RI = Returns[i];
- BranchInst::Create(AfterCallBB, RI);
+ BranchInst* BI = BranchInst::Create(AfterCallBB, RI);
+ Loc = RI->getDebugLoc();
+ BI->setDebugLoc(Loc);
RI->eraseFromParent();
}
+ // We need to set the debug location to *somewhere* inside the
+ // inlined function. The line number may be nonsensical, but the
+ // instruction will at least be associated with the right
+ // function.
+ if (CreatedBranchToNormalDest)
+ CreatedBranchToNormalDest->setDebugLoc(Loc);
} else if (!Returns.empty()) {
// Otherwise, if there is exactly one return value, just replace anything
// using the return value of the call with the computed value.
@@ -874,6 +886,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
AfterCallBB->getInstList().splice(AfterCallBB->begin(),
ReturnBB->getInstList());
+ if (CreatedBranchToNormalDest)
+ CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc());
+
// Delete the return instruction now and empty ReturnBB now.
Returns[0]->eraseFromParent();
ReturnBB->eraseFromParent();
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
index be80d34..12e5b3e 100644
--- a/contrib/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -832,7 +832,24 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
/// Dbg Intrinsic utilities
///
-/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// See if there is a dbg.value intrinsic for DIVar before I.
+static bool LdStHasDebugValue(DIVariable &DIVar, Instruction *I) {
+ // Since we can't guarantee that the original dbg.declare instrinsic
+ // is removed by LowerDbgDeclare(), we need to make sure that we are
+ // not inserting the same dbg.value intrinsic over and over.
+ llvm::BasicBlock::InstListType::iterator PrevI(I);
+ if (PrevI != I->getParent()->getInstList().begin()) {
+ --PrevI;
+ if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(PrevI))
+ if (DVI->getValue() == I->getOperand(0) &&
+ DVI->getOffset() == 0 &&
+ DVI->getVariable() == DIVar)
+ return true;
+ }
+ return false;
+}
+
+/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
/// that has an associated llvm.dbg.decl intrinsic.
bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
StoreInst *SI, DIBuilder &Builder) {
@@ -840,6 +857,9 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
if (!DIVar.Verify())
return false;
+ if (LdStHasDebugValue(DIVar, SI))
+ return true;
+
Instruction *DbgVal = NULL;
// If an argument is zero extended then use argument directly. The ZExt
// may be zapped by an optimization pass in future.
@@ -863,7 +883,7 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
return true;
}
-/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
/// that has an associated llvm.dbg.decl intrinsic.
bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
LoadInst *LI, DIBuilder &Builder) {
@@ -871,6 +891,9 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
if (!DIVar.Verify())
return false;
+ if (LdStHasDebugValue(DIVar, LI))
+ return true;
+
Instruction *DbgVal =
Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0,
DIVar, LI);
@@ -902,6 +925,8 @@ bool llvm::LowerDbgDeclare(Function &F) {
E = Dbgs.end(); I != E; ++I) {
DbgDeclareInst *DDI = *I;
if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress())) {
+ // We only remove the dbg.declare intrinsic if all uses are
+ // converted to dbg.value intrinsics.
bool RemoveDDI = true;
for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
UI != E; ++UI)
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 681bf9c..052ad85 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -59,6 +59,10 @@ static cl::opt<bool>
SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
cl::desc("Sink common instructions down to the end block"));
+static cl::opt<bool>
+HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
+ cl::desc("Hoist conditional stores if an unconditional store preceeds"));
+
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
@@ -1332,6 +1336,66 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
return Changed;
}
+/// \brief Determine if we can hoist sink a sole store instruction out of a
+/// conditional block.
+///
+/// We are looking for code like the following:
+/// BrBB:
+/// store i32 %add, i32* %arrayidx2
+/// ... // No other stores or function calls (we could be calling a memory
+/// ... // function).
+/// %cmp = icmp ult %x, %y
+/// br i1 %cmp, label %EndBB, label %ThenBB
+/// ThenBB:
+/// store i32 %add5, i32* %arrayidx2
+/// br label EndBB
+/// EndBB:
+/// ...
+/// We are going to transform this into:
+/// BrBB:
+/// store i32 %add, i32* %arrayidx2
+/// ... //
+/// %cmp = icmp ult %x, %y
+/// %add.add5 = select i1 %cmp, i32 %add, %add5
+/// store i32 %add.add5, i32* %arrayidx2
+/// ...
+///
+/// \return The pointer to the value of the previous store if the store can be
+/// hoisted into the predecessor block. 0 otherwise.
+Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
+ BasicBlock *StoreBB, BasicBlock *EndBB) {
+ StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
+ if (!StoreToHoist)
+ return 0;
+
+ // Volatile or atomic.
+ if (!StoreToHoist->isSimple())
+ return 0;
+
+ Value *StorePtr = StoreToHoist->getPointerOperand();
+
+ // Look for a store to the same pointer in BrBB.
+ unsigned MaxNumInstToLookAt = 10;
+ for (BasicBlock::reverse_iterator RI = BrBB->rbegin(),
+ RE = BrBB->rend(); RI != RE && (--MaxNumInstToLookAt); ++RI) {
+ Instruction *CurI = &*RI;
+
+ // Could be calling an instruction that effects memory like free().
+ if (CurI->mayHaveSideEffects() && !isa<StoreInst>(CurI))
+ return 0;
+
+ StoreInst *SI = dyn_cast<StoreInst>(CurI);
+ // Found the previous store make sure it stores to the same location.
+ if (SI && SI->getPointerOperand() == StorePtr)
+ // Found the previous store, return its value operand.
+ return SI->getValueOperand();
+ else if (SI)
+ return 0; // Unknown store.
+ }
+
+ return 0;
+}
+
/// \brief Speculate a conditional basic block flattening the CFG.
///
/// Note that this is a very risky transform currently. Speculating
@@ -1395,6 +1459,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
unsigned SpeculationCost = 0;
+ Value *SpeculatedStoreValue = 0;
+ StoreInst *SpeculatedStore = 0;
for (BasicBlock::iterator BBI = ThenBB->begin(),
BBE = llvm::prior(ThenBB->end());
BBI != BBE; ++BBI) {
@@ -1410,13 +1476,21 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
return false;
// Don't hoist the instruction if it's unsafe or expensive.
- if (!isSafeToSpeculativelyExecute(I))
+ if (!isSafeToSpeculativelyExecute(I) &&
+ !(HoistCondStores &&
+ (SpeculatedStoreValue = isSafeToSpeculateStore(I, BB, ThenBB,
+ EndBB))))
return false;
- if (ComputeSpeculationCost(I) > PHINodeFoldingThreshold)
+ if (!SpeculatedStoreValue &&
+ ComputeSpeculationCost(I) > PHINodeFoldingThreshold)
return false;
+ // Store the store speculation candidate.
+ if (SpeculatedStoreValue)
+ SpeculatedStore = cast<StoreInst>(I);
+
// Do not hoist the instruction if any of its operands are defined but not
- // used in this BB. The transformation will prevent the operand from
+ // used in BB. The transformation will prevent the operand from
// being sunk into the use block.
for (User::op_iterator i = I->op_begin(), e = I->op_end();
i != e; ++i) {
@@ -1473,12 +1547,24 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
// If there are no PHIs to process, bail early. This helps ensure idempotence
// as well.
- if (!HaveRewritablePHIs)
+ if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue))
return false;
// If we get here, we can hoist the instruction and if-convert.
DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
+ // Insert a select of the value of the speculated store.
+ if (SpeculatedStoreValue) {
+ IRBuilder<true, NoFolder> Builder(BI);
+ Value *TrueV = SpeculatedStore->getValueOperand();
+ Value *FalseV = SpeculatedStoreValue;
+ if (Invert)
+ std::swap(TrueV, FalseV);
+ Value *S = Builder.CreateSelect(BrCond, TrueV, FalseV, TrueV->getName() +
+ "." + FalseV->getName());
+ SpeculatedStore->setOperand(0, S);
+ }
+
// Hoist the instructions.
BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(),
llvm::prior(ThenBB->end()));
@@ -3073,7 +3159,12 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
Value *Sub = SI->getCondition();
if (!Offset->isNullValue())
Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off");
- Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
+ Value *Cmp;
+ // If NumCases overflowed, then all possible values jump to the successor.
+ if (NumCases->isNullValue() && SI->getNumCases() != 0)
+ Cmp = ConstantInt::getTrue(SI->getContext());
+ else
+ Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
BranchInst *NewBI = Builder.CreateCondBr(
Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest());
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index c231704..6bea2dd 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1518,6 +1518,12 @@ struct FPrintFOpt : public LibCallOptimization {
if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
return 0;
+ // Do not do any of the following transformations if the fprintf return
+ // value is used, in general the fprintf return value is not compatible
+ // with fwrite(), fputc() or fputs().
+ if (!CI->use_empty())
+ return 0;
+
// fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
if (CI->getNumArgOperands() == 2) {
for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
@@ -1527,11 +1533,10 @@ struct FPrintFOpt : public LibCallOptimization {
// These optimizations require DataLayout.
if (!TD) return 0;
- Value *NewCI = EmitFWrite(CI->getArgOperand(1),
- ConstantInt::get(TD->getIntPtrType(*Context),
- FormatStr.size()),
- CI->getArgOperand(0), B, TD, TLI);
- return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0;
+ return EmitFWrite(CI->getArgOperand(1),
+ ConstantInt::get(TD->getIntPtrType(*Context),
+ FormatStr.size()),
+ CI->getArgOperand(0), B, TD, TLI);
}
// The remaining optimizations require the format string to be "%s" or "%c"
@@ -1544,14 +1549,12 @@ struct FPrintFOpt : public LibCallOptimization {
if (FormatStr[1] == 'c') {
// fprintf(F, "%c", chr) --> fputc(chr, F)
if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
- Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B,
- TD, TLI);
- return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
+ return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
}
if (FormatStr[1] == 's') {
// fprintf(F, "%s", str) --> fputs(str, F)
- if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty())
+ if (!CI->getArgOperand(2)->getType()->isPointerTy())
return 0;
return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
}
diff --git a/contrib/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
index 5812d46..c3df215 100644
--- a/contrib/llvm/lib/Transforms/Utils/Utils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
#include "llvm-c/Initialization.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
index b5941bd..544c5ee 100644
--- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -57,7 +57,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
return VM[V] = const_cast<Value*>(V);
// Create a dummy node in case we have a metadata cycle.
- MDNode *Dummy = MDNode::getTemporary(V->getContext(), ArrayRef<Value*>());
+ MDNode *Dummy = MDNode::getTemporary(V->getContext(), None);
VM[V] = Dummy;
// Check all operands to see if any need to be remapped.
diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d26154e..08d3725 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8,9 +8,9 @@
//===----------------------------------------------------------------------===//
//
// This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops
-// and generates target-independent LLVM-IR. Legalization of the IR is done
-// in the codegen. However, the vectorizer uses (will use) the codegen
-// interfaces to generate IR that is likely to result in an optimal binary.
+// and generates target-independent LLVM-IR.
+// The vectorizer uses the TargetTransformInfo analysis to estimate the costs
+// of instructions in order to estimate the profitability of vectorization.
//
// The loop vectorizer combines consecutive loop iterations into a single
// 'wide' iteration. After this transformation the index is incremented
@@ -78,7 +78,9 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/PatternMatch.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ValueHandle.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -87,6 +89,7 @@
#include <map>
using namespace llvm;
+using namespace llvm::PatternMatch;
static cl::opt<unsigned>
VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
@@ -112,9 +115,9 @@ TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16),
/// We don't unroll loops with a known constant trip count below this number.
static const unsigned TinyTripCountUnrollThreshold = 128;
-/// When performing a runtime memory check, do not check more than this
-/// number of pointers. Notice that the check is quadratic!
-static const unsigned RuntimeMemoryCheckThreshold = 4;
+/// When performing memory disambiguation checks at runtime do not make more
+/// than this number of comparisons.
+static const unsigned RuntimeMemoryCheckThreshold = 8;
/// We use a metadata with this name to indicate that a scalar loop was
/// vectorized and that we don't need to re-vectorize it if we run into it
@@ -333,7 +336,7 @@ public:
DominatorTree *DT, TargetTransformInfo* TTI,
AliasAnalysis *AA, TargetLibraryInfo *TLI)
: TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI),
- Induction(0) {}
+ Induction(0), HasFunNoNaNAttr(false) {}
/// This enum represents the kinds of reductions that we support.
enum ReductionKind {
@@ -343,8 +346,10 @@ public:
RK_IntegerOr, ///< Bitwise or logical OR of numbers.
RK_IntegerAnd, ///< Bitwise or logical AND of numbers.
RK_IntegerXor, ///< Bitwise or logical XOR of numbers.
+ RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()).
RK_FloatAdd, ///< Sum of floats.
- RK_FloatMult ///< Product of floats.
+ RK_FloatMult, ///< Product of floats.
+ RK_FloatMinMax ///< Min/max implemented in terms of select(cmp()).
};
/// This enum represents the kinds of inductions that we support.
@@ -356,21 +361,52 @@ public:
IK_ReversePtrInduction ///< Reverse ptr indvar. Step = - sizeof(elem).
};
+ // This enum represents the kind of minmax reduction.
+ enum MinMaxReductionKind {
+ MRK_Invalid,
+ MRK_UIntMin,
+ MRK_UIntMax,
+ MRK_SIntMin,
+ MRK_SIntMax,
+ MRK_FloatMin,
+ MRK_FloatMax
+ };
+
/// This POD struct holds information about reduction variables.
struct ReductionDescriptor {
ReductionDescriptor() : StartValue(0), LoopExitInstr(0),
- Kind(RK_NoReduction) {}
+ Kind(RK_NoReduction), MinMaxKind(MRK_Invalid) {}
- ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K)
- : StartValue(Start), LoopExitInstr(Exit), Kind(K) {}
+ ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K,
+ MinMaxReductionKind MK)
+ : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK) {}
// The starting value of the reduction.
// It does not have to be zero!
- Value *StartValue;
+ TrackingVH<Value> StartValue;
// The instruction who's value is used outside the loop.
Instruction *LoopExitInstr;
// The kind of the reduction.
ReductionKind Kind;
+ // If this a min/max reduction the kind of reduction.
+ MinMaxReductionKind MinMaxKind;
+ };
+
+ /// This POD struct holds information about a potential reduction operation.
+ struct ReductionInstDesc {
+ ReductionInstDesc(bool IsRedux, Instruction *I) :
+ IsReduction(IsRedux), PatternLastInst(I), MinMaxKind(MRK_Invalid) {}
+
+ ReductionInstDesc(Instruction *I, MinMaxReductionKind K) :
+ IsReduction(true), PatternLastInst(I), MinMaxKind(K) {}
+
+ // Is this instruction a reduction candidate.
+ bool IsReduction;
+ // The last instruction in a min/max pattern (select of the select(icmp())
+ // pattern), or the current reduction instruction otherwise.
+ Instruction *PatternLastInst;
+ // If this is a min/max pattern the comparison predicate.
+ MinMaxReductionKind MinMaxKind;
};
// This POD struct holds information about the memory runtime legality
@@ -387,16 +423,18 @@ public:
}
/// Insert a pointer and calculate the start and end SCEVs.
- void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr);
+ void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr);
/// This flag indicates if we need to add the runtime check.
bool Need;
/// Holds the pointers that we need to check.
- SmallVector<Value*, 2> Pointers;
+ SmallVector<TrackingVH<Value>, 2> Pointers;
/// Holds the pointer value at the beginning of the loop.
SmallVector<const SCEV*, 2> Starts;
/// Holds the pointer value at the end of the loop.
SmallVector<const SCEV*, 2> Ends;
+ /// Holds the information if this pointer is used for writing to memory.
+ SmallVector<bool, 2> IsWritePtr;
};
/// A POD for saving information about induction variables.
@@ -404,7 +442,7 @@ public:
InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {}
InductionInfo() : StartValue(0), IK(IK_NoInduction) {}
/// Start value.
- Value *StartValue;
+ TrackingVH<Value> StartValue;
/// Induction kind.
InductionKind IK;
};
@@ -461,6 +499,10 @@ public:
/// Returns the information that we collected about runtime memory check.
RuntimePointerCheck *getRuntimePointerCheck() { return &PtrRtCheck; }
+
+ /// This function returns the identity element (or neutral element) for
+ /// the operation K.
+ static Constant *getReductionIdentity(ReductionKind K, Type *Tp);
private:
/// Check if a single basic block loop is vectorizable.
/// At this point we know that this is a loop with a constant trip count
@@ -487,9 +529,17 @@ private:
/// Returns True, if 'Phi' is the kind of reduction variable for type
/// 'Kind'. If this is a reduction variable, it adds it to ReductionList.
bool AddReductionVar(PHINode *Phi, ReductionKind Kind);
- /// Returns true if the instruction I can be a reduction variable of type
- /// 'Kind'.
- bool isReductionInstr(Instruction *I, ReductionKind Kind);
+ /// Returns a struct describing if the instruction 'I' can be a reduction
+ /// variable of type 'Kind'. If the reduction is a min/max pattern of
+ /// select(icmp()) this function advances the instruction pointer 'I' from the
+ /// compare instruction to the select instruction and stores this pointer in
+ /// 'PatternLastInst' member of the returned struct.
+ ReductionInstDesc isReductionInstr(Instruction *I, ReductionKind Kind,
+ ReductionInstDesc &Desc);
+ /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
+ /// pattern corresponding to a min(X, Y) or max(X, Y).
+ static ReductionInstDesc isMinMaxSelectCmpPattern(Instruction *I,
+ ReductionInstDesc &Prev);
/// Returns the induction kind of Phi. This function may return NoInduction
/// if the PHI is not an induction variable.
InductionKind isInductionVariable(PHINode *Phi);
@@ -540,6 +590,8 @@ private:
/// We need to check that all of the pointers in this list are disjoint
/// at runtime.
RuntimePointerCheck PtrRtCheck;
+ /// Can we assume the absence of NaNs.
+ bool HasFunNoNaNAttr;
};
/// LoopVectorizationCostModel - estimates the expected speedups due to
@@ -662,6 +714,11 @@ struct LoopVectorize : public LoopPass {
AA = getAnalysisIfAvailable<AliasAnalysis>();
TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+ if (DL == NULL) {
+ DEBUG(dbgs() << "LV: Not vectorizing because of missing data layout");
+ return false;
+ }
+
DEBUG(dbgs() << "LV: Checking a loop in \"" <<
L->getHeader()->getParent()->getName() << "\"\n");
@@ -737,7 +794,8 @@ struct LoopVectorize : public LoopPass {
void
LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE,
- Loop *Lp, Value *Ptr) {
+ Loop *Lp, Value *Ptr,
+ bool WritePtr) {
const SCEV *Sc = SE->getSCEV(Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
assert(AR && "Invalid addrec expression");
@@ -746,6 +804,7 @@ LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE,
Pointers.push_back(Ptr);
Starts.push_back(AR->getStart());
Ends.push_back(ScEnd);
+ IsWritePtr.push_back(WritePtr);
}
Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
@@ -906,12 +965,18 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment();
+ unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ScalarDataTy);
+ unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF;
+
+ if (ScalarAllocatedSize != VectorElementSize)
+ return scalarizeInstruction(Instr);
+
// If the pointer is loop invariant or if it is non consecutive,
// scalarize the load.
- int Stride = Legal->isConsecutivePtr(Ptr);
- bool Reverse = Stride < 0;
+ int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
+ bool Reverse = ConsecutiveStride < 0;
bool UniformLoad = LI && Legal->isUniform(Ptr);
- if (Stride == 0 || UniformLoad)
+ if (!ConsecutiveStride || UniformLoad)
return scalarizeInstruction(Instr);
Constant *Zero = Builder.getInt32(0);
@@ -1110,6 +1175,10 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
for (unsigned i = 0; i < NumPointers; ++i) {
for (unsigned j = i+1; j < NumPointers; ++j) {
+ // No need to check if two readonly pointers intersect.
+ if (!PtrRtCheck->IsWritePtr[i] && !PtrRtCheck->IsWritePtr[j])
+ continue;
+
Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy, "bc");
Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy, "bc");
Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy, "bc");
@@ -1167,7 +1236,7 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
// Mark the old scalar loop with metadata that tells us not to vectorize this
// loop again if we run into it.
- MDNode *MD = MDNode::get(OldBasicBlock->getContext(), ArrayRef<Value*>());
+ MDNode *MD = MDNode::get(OldBasicBlock->getContext(), None);
OldBasicBlock->getTerminator()->setMetadata(AlreadyVectorizedMDName, MD);
// Some loops have a single integer induction variable, while other loops
@@ -1436,24 +1505,24 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
/// This function returns the identity element (or neutral element) for
/// the operation K.
-static Constant*
-getReductionIdentity(LoopVectorizationLegality::ReductionKind K, Type *Tp) {
+Constant*
+LoopVectorizationLegality::getReductionIdentity(ReductionKind K, Type *Tp) {
switch (K) {
- case LoopVectorizationLegality:: RK_IntegerXor:
- case LoopVectorizationLegality:: RK_IntegerAdd:
- case LoopVectorizationLegality:: RK_IntegerOr:
+ case RK_IntegerXor:
+ case RK_IntegerAdd:
+ case RK_IntegerOr:
// Adding, Xoring, Oring zero to a number does not change it.
return ConstantInt::get(Tp, 0);
- case LoopVectorizationLegality:: RK_IntegerMult:
+ case RK_IntegerMult:
// Multiplying a number by 1 does not change it.
return ConstantInt::get(Tp, 1);
- case LoopVectorizationLegality:: RK_IntegerAnd:
+ case RK_IntegerAnd:
// AND-ing a number with an all-1 value does not change it.
return ConstantInt::get(Tp, -1, true);
- case LoopVectorizationLegality:: RK_FloatMult:
+ case RK_FloatMult:
// Multiplying a number by 1 does not change it.
return ConstantFP::get(Tp, 1.0L);
- case LoopVectorizationLegality:: RK_FloatAdd:
+ case RK_FloatAdd:
// Adding zero to a number does not change it.
return ConstantFP::get(Tp, 0.0L);
default:
@@ -1566,7 +1635,7 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
}
/// This function translates the reduction kind to an LLVM binary operator.
-static Instruction::BinaryOps
+static unsigned
getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) {
switch (Kind) {
case LoopVectorizationLegality::RK_IntegerAdd:
@@ -1583,11 +1652,53 @@ getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) {
return Instruction::FMul;
case LoopVectorizationLegality::RK_FloatAdd:
return Instruction::FAdd;
+ case LoopVectorizationLegality::RK_IntegerMinMax:
+ return Instruction::ICmp;
+ case LoopVectorizationLegality::RK_FloatMinMax:
+ return Instruction::FCmp;
default:
llvm_unreachable("Unknown reduction operation");
}
}
+Value *createMinMaxOp(IRBuilder<> &Builder,
+ LoopVectorizationLegality::MinMaxReductionKind RK,
+ Value *Left,
+ Value *Right) {
+ CmpInst::Predicate P = CmpInst::ICMP_NE;
+ switch (RK) {
+ default:
+ llvm_unreachable("Unknown min/max reduction kind");
+ case LoopVectorizationLegality::MRK_UIntMin:
+ P = CmpInst::ICMP_ULT;
+ break;
+ case LoopVectorizationLegality::MRK_UIntMax:
+ P = CmpInst::ICMP_UGT;
+ break;
+ case LoopVectorizationLegality::MRK_SIntMin:
+ P = CmpInst::ICMP_SLT;
+ break;
+ case LoopVectorizationLegality::MRK_SIntMax:
+ P = CmpInst::ICMP_SGT;
+ break;
+ case LoopVectorizationLegality::MRK_FloatMin:
+ P = CmpInst::FCMP_OLT;
+ break;
+ case LoopVectorizationLegality::MRK_FloatMax:
+ P = CmpInst::FCMP_OGT;
+ break;
+ }
+
+ Value *Cmp;
+ if (RK == LoopVectorizationLegality::MRK_FloatMin || RK == LoopVectorizationLegality::MRK_FloatMax)
+ Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
+ else
+ Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
+
+ Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
+ return Select;
+}
+
void
InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
//===------------------------------------------------===//
@@ -1651,13 +1762,24 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
// Find the reduction identity variable. Zero for addition, or, xor,
// one for multiplication, -1 for And.
- Constant *Iden = getReductionIdentity(RdxDesc.Kind, VecTy->getScalarType());
- Constant *Identity = ConstantVector::getSplat(VF, Iden);
-
- // This vector is the Identity vector where the first element is the
- // incoming scalar reduction.
- Value *VectorStart = Builder.CreateInsertElement(Identity,
- RdxDesc.StartValue, Zero);
+ Value *Identity;
+ Value *VectorStart;
+ if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax ||
+ RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) {
+ // MinMax reduction have the start value as their identify.
+ VectorStart = Identity = Builder.CreateVectorSplat(VF, RdxDesc.StartValue,
+ "minmax.ident");
+ } else {
+ Constant *Iden =
+ LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
+ VecTy->getScalarType());
+ Identity = ConstantVector::getSplat(VF, Iden);
+
+ // This vector is the Identity vector where the first element is the
+ // incoming scalar reduction.
+ VectorStart = Builder.CreateInsertElement(Identity,
+ RdxDesc.StartValue, Zero);
+ }
// Fix the vector-loop phi.
// We created the induction variable so we know that the
@@ -1699,10 +1821,15 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
// Reduce all of the unrolled parts into a single vector.
Value *ReducedPartRdx = RdxParts[0];
+ unsigned Op = getReductionBinOp(RdxDesc.Kind);
for (unsigned part = 1; part < UF; ++part) {
- Instruction::BinaryOps Op = getReductionBinOp(RdxDesc.Kind);
- ReducedPartRdx = Builder.CreateBinOp(Op, RdxParts[part], ReducedPartRdx,
- "bin.rdx");
+ if (Op != Instruction::ICmp && Op != Instruction::FCmp)
+ ReducedPartRdx = Builder.CreateBinOp((Instruction::BinaryOps)Op,
+ RdxParts[part], ReducedPartRdx,
+ "bin.rdx");
+ else
+ ReducedPartRdx = createMinMaxOp(Builder, RdxDesc.MinMaxKind,
+ ReducedPartRdx, RdxParts[part]);
}
// VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
@@ -1727,8 +1854,11 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
ConstantVector::get(ShuffleMask),
"rdx.shuf");
- Instruction::BinaryOps Op = getReductionBinOp(RdxDesc.Kind);
- TmpVec = Builder.CreateBinOp(Op, TmpVec, Shuf, "bin.rdx");
+ if (Op != Instruction::ICmp && Op != Instruction::FCmp)
+ TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
+ "bin.rdx");
+ else
+ TmpVec = createMinMaxOp(Builder, RdxDesc.MinMaxKind, TmpVec, Shuf);
}
// The result is in the first element of the vector.
@@ -1861,18 +1991,33 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
// We know that all PHIs in non header blocks are converted into
// selects, so we don't have to worry about the insertion order and we
// can just use the builder.
-
// At this point we generate the predication tree. There may be
// duplications since this is a simple recursive scan, but future
// optimizations will clean it up.
- VectorParts Cond = createEdgeMask(P->getIncomingBlock(0),
- P->getParent());
- for (unsigned part = 0; part < UF; ++part) {
- VectorParts &In0 = getVectorValue(P->getIncomingValue(0));
- VectorParts &In1 = getVectorValue(P->getIncomingValue(1));
- Entry[part] = Builder.CreateSelect(Cond[part], In0[part], In1[part],
- "predphi");
+ unsigned NumIncoming = P->getNumIncomingValues();
+ assert(NumIncoming > 1 && "Invalid PHI");
+
+ // Generate a sequence of selects of the form:
+ // SELECT(Mask3, In3,
+ // SELECT(Mask2, In2,
+ // ( ...)))
+ for (unsigned In = 0; In < NumIncoming; In++) {
+ VectorParts Cond = createEdgeMask(P->getIncomingBlock(In),
+ P->getParent());
+ VectorParts &In0 = getVectorValue(P->getIncomingValue(In));
+
+ for (unsigned part = 0; part < UF; ++part) {
+ // We don't need to 'select' the first PHI operand because it is
+ // the default value if all of the other masks don't match.
+ if (In == 0)
+ Entry[part] = In0[part];
+ else
+ // Select between the current value and the previous incoming edge
+ // based on the incoming mask.
+ Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
+ Entry[part], "predphi");
+ }
}
continue;
}
@@ -2153,12 +2298,6 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
if (!isa<BranchInst>(BB->getTerminator()))
return false;
- // We must have at most two predecessors because we need to convert
- // all PHIs to selects.
- unsigned Preds = std::distance(pred_begin(BB), pred_end(BB));
- if (Preds > 2)
- return false;
-
// We must be able to predicate all blocks that need to be predicated.
if (blockNeedsPredication(BB) && !blockCanBePredicated(BB))
return false;
@@ -2239,6 +2378,26 @@ bool LoopVectorizationLegality::canVectorize() {
return true;
}
+/// \brief Check that the instruction has outside loop users and is not an
+/// identified reduction variable.
+static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
+ SmallPtrSet<Value *, 4> &Reductions) {
+ // Reduction instructions are allowed to have exit users. All other
+ // instructions must not have external users.
+ if (!Reductions.count(Inst))
+ //Check that all of the users of the loop are inside the BB.
+ for (Value::use_iterator I = Inst->use_begin(), E = Inst->use_end();
+ I != E; ++I) {
+ Instruction *U = cast<Instruction>(*I);
+ // This user may be a reduction exit value.
+ if (!TheLoop->contains(U)) {
+ DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n");
+ return true;
+ }
+ }
+ return false;
+}
+
bool LoopVectorizationLegality::canVectorizeInstrs() {
BasicBlock *PreHeader = TheLoop->getLoopPreheader();
BasicBlock *Header = TheLoop->getHeader();
@@ -2250,6 +2409,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
return false;
}
+ // Look for the attribute signaling the absence of NaNs.
+ Function &F = *Header->getParent();
+ if (F.hasFnAttribute("no-nans-fp-math"))
+ HasFunNoNaNAttr = F.getAttributes().getAttribute(
+ AttributeSet::FunctionIndex,
+ "no-nans-fp-math").getValueAsString() == "true";
+
// For each block in the loop.
for (Loop::block_iterator bb = TheLoop->block_begin(),
be = TheLoop->block_end(); bb != be; ++bb) {
@@ -2259,12 +2425,6 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
++it) {
if (PHINode *Phi = dyn_cast<PHINode>(it)) {
- // This should not happen because the loop should be normalized.
- if (Phi->getNumIncomingValues() != 2) {
- DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
- return false;
- }
-
// Check that this PHI type is allowed.
if (!Phi->getType()->isIntegerTy() &&
!Phi->getType()->isFloatingPointTy() &&
@@ -2276,8 +2436,19 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// If this PHINode is not in the header block, then we know that we
// can convert it to select during if-conversion. No need to check if
// the PHIs in this block are induction or reduction variables.
- if (*bb != Header)
- continue;
+ if (*bb != Header) {
+ // Check that this instruction has no outside users or is an
+ // identified reduction value with an outside user.
+ if(!hasOutsideLoopUser(TheLoop, it, AllowedExit))
+ continue;
+ return false;
+ }
+
+ // We only allow if-converted PHIs with more than two incoming values.
+ if (Phi->getNumIncomingValues() != 2) {
+ DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
+ return false;
+ }
// This is the value coming from the preheader.
Value *StartValue = Phi->getIncomingValueForBlock(PreHeader);
@@ -2319,6 +2490,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n");
continue;
}
+ if (AddReductionVar(Phi, RK_IntegerMinMax)) {
+ DEBUG(dbgs() << "LV: Found a MINMAX reduction PHI."<< *Phi <<"\n");
+ continue;
+ }
if (AddReductionVar(Phi, RK_FloatMult)) {
DEBUG(dbgs() << "LV: Found an FMult reduction PHI."<< *Phi <<"\n");
continue;
@@ -2327,6 +2502,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
DEBUG(dbgs() << "LV: Found an FAdd reduction PHI."<< *Phi <<"\n");
continue;
}
+ if (AddReductionVar(Phi, RK_FloatMinMax)) {
+ DEBUG(dbgs() << "LV: Found an float MINMAX reduction PHI."<< *Phi <<"\n");
+ continue;
+ }
DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
return false;
@@ -2356,17 +2535,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Reduction instructions are allowed to have exit users.
// All other instructions must not have external users.
- if (!AllowedExit.count(it))
- //Check that all of the users of the loop are inside the BB.
- for (Value::use_iterator I = it->use_begin(), E = it->use_end();
- I != E; ++I) {
- Instruction *U = cast<Instruction>(*I);
- // This user may be a reduction exit value.
- if (!TheLoop->contains(U)) {
- DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n");
- return false;
- }
- }
+ if (hasOutsideLoopUser(TheLoop, it, AllowedExit))
+ return false;
+
} // next instr.
}
@@ -2446,13 +2617,6 @@ LoopVectorizationLegality::hasPossibleGlobalWriteReorder(
bool LoopVectorizationLegality::canVectorizeMemory() {
- if (TheLoop->isAnnotatedParallel()) {
- DEBUG(dbgs()
- << "LV: A loop annotated parallel, ignore memory dependency "
- << "checks.\n");
- return true;
- }
-
typedef SmallVector<Value*, 16> ValueVector;
typedef SmallPtrSet<Value*, 16> ValueSet;
// Holds the Load and Store *instructions*.
@@ -2461,6 +2625,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
PtrRtCheck.Pointers.clear();
PtrRtCheck.Need = false;
+ const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
+
// For each block.
for (Loop::block_iterator bb = TheLoop->block_begin(),
be = TheLoop->block_end(); bb != be; ++bb) {
@@ -2475,7 +2641,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
if (it->mayReadFromMemory()) {
LoadInst *Ld = dyn_cast<LoadInst>(it);
if (!Ld) return false;
- if (!Ld->isSimple()) {
+ if (!Ld->isSimple() && !IsAnnotatedParallel) {
DEBUG(dbgs() << "LV: Found a non-simple load.\n");
return false;
}
@@ -2487,7 +2653,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
if (it->mayWriteToMemory()) {
StoreInst *St = dyn_cast<StoreInst>(it);
if (!St) return false;
- if (!St->isSimple()) {
+ if (!St->isSimple() && !IsAnnotatedParallel) {
DEBUG(dbgs() << "LV: Found a non-simple store.\n");
return false;
}
@@ -2534,6 +2700,13 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
ReadWrites.insert(std::make_pair(Ptr, ST));
}
+ if (IsAnnotatedParallel) {
+ DEBUG(dbgs()
+ << "LV: A loop annotated parallel, ignore memory dependency "
+ << "checks.\n");
+ return true;
+ }
+
for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
LoadInst *LD = cast<LoadInst>(*I);
Value* Ptr = LD->getPointerOperand();
@@ -2556,6 +2729,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
return true;
}
+ unsigned NumReadPtrs = 0;
+ unsigned NumWritePtrs = 0;
+
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
bool CanDoRT = true;
@@ -2563,7 +2739,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
for (MI = ReadWrites.begin(), ME = ReadWrites.end(); MI != ME; ++MI) {
Value *V = (*MI).first;
if (hasComputableBounds(V)) {
- PtrRtCheck.insert(SE, TheLoop, V);
+ PtrRtCheck.insert(SE, TheLoop, V, true);
+ NumWritePtrs++;
DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n");
} else {
CanDoRT = false;
@@ -2573,7 +2750,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
for (MI = Reads.begin(), ME = Reads.end(); MI != ME; ++MI) {
Value *V = (*MI).first;
if (hasComputableBounds(V)) {
- PtrRtCheck.insert(SE, TheLoop, V);
+ PtrRtCheck.insert(SE, TheLoop, V, false);
+ NumReadPtrs++;
DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n");
} else {
CanDoRT = false;
@@ -2583,7 +2761,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
// Check that we did not collect too many pointers or found a
// unsizeable pointer.
- if (!CanDoRT || PtrRtCheck.Pointers.size() > RuntimeMemoryCheckThreshold) {
+ unsigned NumComparisons = (NumWritePtrs * (NumReadPtrs + NumWritePtrs - 1));
+ DEBUG(dbgs() << "LV: We need to compare " << NumComparisons << " ptrs.\n");
+ if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
PtrRtCheck.reset();
CanDoRT = false;
}
@@ -2646,8 +2826,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
Inst,
WriteObjects,
MaxByteWidth)) {
- DEBUG(dbgs() << "LV: Found a possible write-write reorder:"
- << *UI <<"\n");
+ DEBUG(dbgs() << "LV: Found a possible write-write reorder:" << **UI
+ << "\n");
return false;
}
@@ -2690,8 +2870,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
Inst,
WriteObjects,
MaxByteWidth)) {
- DEBUG(dbgs() << "LV: Found a possible read-write reorder:"
- << *UI <<"\n");
+ DEBUG(dbgs() << "LV: Found a possible read-write reorder:" << **UI
+ << "\n");
return false;
}
}
@@ -2737,7 +2917,18 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
// used as reduction variables (such as ADD). We may have a single
// out-of-block user. The cycle must end with the original PHI.
Instruction *Iter = Phi;
- while (true) {
+
+ // To recognize min/max patterns formed by a icmp select sequence, we store
+ // the number of instruction we saw from the recognized min/max pattern,
+ // such that we don't stop when we see the phi has two uses (one by the select
+ // and one by the icmp) and to make sure we only see exactly the two
+ // instructions.
+ unsigned NumCmpSelectPatternInst = 0;
+ ReductionInstDesc ReduxDesc(false, 0);
+
+ // Avoid cycles in the chain.
+ SmallPtrSet<Instruction *, 8> VisitedInsts;
+ while (VisitedInsts.insert(Iter)) {
// If the instruction has no users then this is a broken
// chain and can't be a reduction variable.
if (Iter->use_empty())
@@ -2751,9 +2942,6 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
// Is this a bin op ?
FoundBinOp |= !isa<PHINode>(Iter);
- // Remember the current instruction.
- Instruction *OldIter = Iter;
-
// For each of the *users* of iter.
for (Value::use_iterator it = Iter->use_begin(), e = Iter->use_end();
it != e; ++it) {
@@ -2782,25 +2970,35 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
Iter->hasNUsesOrMore(2))
continue;
- // We can't have multiple inside users.
- if (FoundInBlockUser)
+ // We can't have multiple inside users except for a combination of
+ // icmp/select both using the phi.
+ if (FoundInBlockUser && !NumCmpSelectPatternInst)
return false;
FoundInBlockUser = true;
// Any reduction instr must be of one of the allowed kinds.
- if (!isReductionInstr(U, Kind))
+ ReduxDesc = isReductionInstr(U, Kind, ReduxDesc);
+ if (!ReduxDesc.IsReduction)
return false;
+ if (Kind == RK_IntegerMinMax && (isa<ICmpInst>(U) || isa<SelectInst>(U)))
+ ++NumCmpSelectPatternInst;
+ if (Kind == RK_FloatMinMax && (isa<FCmpInst>(U) || isa<SelectInst>(U)))
+ ++NumCmpSelectPatternInst;
+
// Reductions of instructions such as Div, and Sub is only
// possible if the LHS is the reduction variable.
- if (!U->isCommutative() && !isa<PHINode>(U) && U->getOperand(0) != Iter)
+ if (!U->isCommutative() && !isa<PHINode>(U) && !isa<SelectInst>(U) &&
+ !isa<ICmpInst>(U) && !isa<FCmpInst>(U) && U->getOperand(0) != Iter)
return false;
- Iter = U;
+ Iter = ReduxDesc.PatternLastInst;
}
- // If all uses were skipped this can't be a reduction variable.
- if (Iter == OldIter)
+ // This means we have seen one but not the other instruction of the
+ // pattern or more than just a select and cmp.
+ if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) &&
+ NumCmpSelectPatternInst != 2)
return false;
// We found a reduction var if we have reached the original
@@ -2811,47 +3009,107 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
AllowedExit.insert(ExitInstruction);
// Save the description of this reduction variable.
- ReductionDescriptor RD(RdxStart, ExitInstruction, Kind);
+ ReductionDescriptor RD(RdxStart, ExitInstruction, Kind,
+ ReduxDesc.MinMaxKind);
Reductions[Phi] = RD;
// We've ended the cycle. This is a reduction variable if we have an
// outside user and it has a binary op.
return FoundBinOp && ExitInstruction;
}
}
+
+ return false;
}
-bool
+/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
+/// pattern corresponding to a min(X, Y) or max(X, Y).
+LoopVectorizationLegality::ReductionInstDesc
+LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I,
+ ReductionInstDesc &Prev) {
+
+ assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
+ "Expect a select instruction");
+ Instruction *Cmp = 0;
+ SelectInst *Select = 0;
+
+ // We must handle the select(cmp()) as a single instruction. Advance to the
+ // select.
+ if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) {
+ if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->use_begin())))
+ return ReductionInstDesc(false, I);
+ return ReductionInstDesc(Select, Prev.MinMaxKind);
+ }
+
+ // Only handle single use cases for now.
+ if (!(Select = dyn_cast<SelectInst>(I)))
+ return ReductionInstDesc(false, I);
+ if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) &&
+ !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0))))
+ return ReductionInstDesc(false, I);
+ if (!Cmp->hasOneUse())
+ return ReductionInstDesc(false, I);
+
+ Value *CmpLeft;
+ Value *CmpRight;
+
+ // Look for a min/max pattern.
+ if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_UIntMin);
+ else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_UIntMax);
+ else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_SIntMax);
+ else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_SIntMin);
+ else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_FloatMin);
+ else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_FloatMax);
+ else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_FloatMin);
+ else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_FloatMax);
+
+ return ReductionInstDesc(false, I);
+}
+
+LoopVectorizationLegality::ReductionInstDesc
LoopVectorizationLegality::isReductionInstr(Instruction *I,
- ReductionKind Kind) {
+ ReductionKind Kind,
+ ReductionInstDesc &Prev) {
bool FP = I->getType()->isFloatingPointTy();
bool FastMath = (FP && I->isCommutative() && I->isAssociative());
-
switch (I->getOpcode()) {
default:
- return false;
+ return ReductionInstDesc(false, I);
case Instruction::PHI:
- if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd))
- return false;
- // possibly.
- return true;
+ if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd &&
+ Kind != RK_FloatMinMax))
+ return ReductionInstDesc(false, I);
+ return ReductionInstDesc(I, Prev.MinMaxKind);
case Instruction::Sub:
case Instruction::Add:
- return Kind == RK_IntegerAdd;
- case Instruction::SDiv:
- case Instruction::UDiv:
+ return ReductionInstDesc(Kind == RK_IntegerAdd, I);
case Instruction::Mul:
- return Kind == RK_IntegerMult;
+ return ReductionInstDesc(Kind == RK_IntegerMult, I);
case Instruction::And:
- return Kind == RK_IntegerAnd;
+ return ReductionInstDesc(Kind == RK_IntegerAnd, I);
case Instruction::Or:
- return Kind == RK_IntegerOr;
+ return ReductionInstDesc(Kind == RK_IntegerOr, I);
case Instruction::Xor:
- return Kind == RK_IntegerXor;
+ return ReductionInstDesc(Kind == RK_IntegerXor, I);
case Instruction::FMul:
- return Kind == RK_FloatMult && FastMath;
+ return ReductionInstDesc(Kind == RK_FloatMult && FastMath, I);
case Instruction::FAdd:
- return Kind == RK_FloatAdd && FastMath;
- }
+ return ReductionInstDesc(Kind == RK_FloatAdd && FastMath, I);
+ case Instruction::FCmp:
+ case Instruction::ICmp:
+ case Instruction::Select:
+ if (Kind != RK_IntegerMinMax &&
+ (!HasFunNoNaNAttr || Kind != RK_FloatMinMax))
+ return ReductionInstDesc(false, I);
+ return isMinMaxSelectCmpPattern(I, Prev);
+ }
}
LoopVectorizationLegality::InductionKind
@@ -3384,9 +3642,11 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
// Scalarized loads/stores.
- int Stride = Legal->isConsecutivePtr(Ptr);
- bool Reverse = Stride < 0;
- if (0 == Stride) {
+ int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
+ bool Reverse = ConsecutiveStride < 0;
+ unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ValTy);
+ unsigned VectorElementSize = DL->getTypeStoreSize(VectorTy)/VF;
+ if (!ConsecutiveStride || ScalarAllocatedSize != VectorElementSize) {
unsigned Cost = 0;
// The cost of extracting from the value vector and pointer vector.
Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
diff --git a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
new file mode 100644
index 0000000..cc30cc9
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -0,0 +1,348 @@
+//===- SLPVectorizer.cpp - A bottom up SLP Vectorizer ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass implements the Bottom Up SLP vectorizer. It detects consecutive
+// stores that can be put together into vector-stores. Next, it attempts to
+// construct vectorizable tree using the use-def chains. If a profitable tree
+// was found, the SLP vectorizer performs vectorization on the tree.
+//
+// The pass is inspired by the work described in the paper:
+// "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks.
+//
+//===----------------------------------------------------------------------===//
+#define SV_NAME "slp-vectorizer"
+#define DEBUG_TYPE SV_NAME
+
+#include "VecUtils.h"
+#include "llvm/Transforms/Vectorize.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+
+using namespace llvm;
+
+static cl::opt<int>
+SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
+ cl::desc("Only vectorize trees if the gain is above this "
+ "number. (gain = -cost of vectorization)"));
+namespace {
+
+/// The SLPVectorizer Pass.
+struct SLPVectorizer : public FunctionPass {
+ typedef std::map<Value*, BoUpSLP::StoreList> StoreListMap;
+
+ /// Pass identification, replacement for typeid
+ static char ID;
+
+ explicit SLPVectorizer() : FunctionPass(ID) {
+ initializeSLPVectorizerPass(*PassRegistry::getPassRegistry());
+ }
+
+ ScalarEvolution *SE;
+ DataLayout *DL;
+ TargetTransformInfo *TTI;
+ AliasAnalysis *AA;
+ LoopInfo *LI;
+
+ virtual bool runOnFunction(Function &F) {
+ SE = &getAnalysis<ScalarEvolution>();
+ DL = getAnalysisIfAvailable<DataLayout>();
+ TTI = &getAnalysis<TargetTransformInfo>();
+ AA = &getAnalysis<AliasAnalysis>();
+ LI = &getAnalysis<LoopInfo>();
+
+ StoreRefs.clear();
+ bool Changed = false;
+
+ // Must have DataLayout. We can't require it because some tests run w/o
+ // triple.
+ if (!DL)
+ return false;
+
+ for (Function::iterator it = F.begin(), e = F.end(); it != e; ++it) {
+ BasicBlock *BB = it;
+ bool BBChanged = false;
+
+ // Use the bollom up slp vectorizer to construct chains that start with
+ // he store instructions.
+ BoUpSLP R(BB, SE, DL, TTI, AA, LI->getLoopFor(BB));
+
+ // Vectorize trees that end at reductions.
+ BBChanged |= vectorizeReductions(BB, R);
+
+ // Vectorize trees that end at stores.
+ if (unsigned count = collectStores(BB, R)) {
+ (void)count;
+ DEBUG(dbgs()<<"SLP: Found " << count << " stores to vectorize.\n");
+ BBChanged |= vectorizeStoreChains(R);
+ }
+
+ // Try to hoist some of the scalarization code to the preheader.
+ if (BBChanged) hoistGatherSequence(LI, BB, R);
+
+ Changed |= BBChanged;
+ }
+
+ if (Changed) {
+ DEBUG(dbgs()<<"SLP: vectorized \""<<F.getName()<<"\"\n");
+ DEBUG(verifyFunction(F));
+ }
+ return Changed;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ FunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetTransformInfo>();
+ AU.addRequired<LoopInfo>();
+ }
+
+private:
+
+ /// \brief Collect memory references and sort them according to their base
+ /// object. We sort the stores to their base objects to reduce the cost of the
+ /// quadratic search on the stores. TODO: We can further reduce this cost
+ /// if we flush the chain creation every time we run into a memory barrier.
+ unsigned collectStores(BasicBlock *BB, BoUpSLP &R);
+
+ /// \brief Try to vectorize a chain that starts at two arithmetic instrs.
+ bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R);
+
+ /// \brief Try to vectorize a list of operands.
+ bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R);
+
+ /// \brief Try to vectorize a chain that may start at the operands of \V;
+ bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);
+
+ /// \brief Vectorize the stores that were collected in StoreRefs.
+ bool vectorizeStoreChains(BoUpSLP &R);
+
+ /// \brief Try to hoist gather sequences outside of the loop in cases where
+ /// all of the sources are loop invariant.
+ void hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, BoUpSLP &R);
+
+ /// \brief Scan the basic block and look for reductions that may start a
+ /// vectorization chain.
+ bool vectorizeReductions(BasicBlock *BB, BoUpSLP &R);
+
+private:
+ StoreListMap StoreRefs;
+};
+
+unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
+ unsigned count = 0;
+ StoreRefs.clear();
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ StoreInst *SI = dyn_cast<StoreInst>(it);
+ if (!SI)
+ continue;
+
+ // Check that the pointer points to scalars.
+ Type *Ty = SI->getValueOperand()->getType();
+ if (Ty->isAggregateType() || Ty->isVectorTy())
+ return 0;
+
+ // Find the base of the GEP.
+ Value *Ptr = SI->getPointerOperand();
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
+ Ptr = GEP->getPointerOperand();
+
+ // Save the store locations.
+ StoreRefs[Ptr].push_back(SI);
+ count++;
+ }
+ return count;
+}
+
+bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
+ if (!A || !B) return false;
+ Value *VL[] = { A, B };
+ return tryToVectorizeList(VL, R);
+}
+
+bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) {
+ DEBUG(dbgs()<<"SLP: Vectorizing a list of length = " << VL.size() << ".\n");
+
+ // Check that all of the parts are scalar.
+ for (int i = 0, e = VL.size(); i < e; ++i) {
+ Type *Ty = VL[i]->getType();
+ if (Ty->isAggregateType() || Ty->isVectorTy())
+ return 0;
+ }
+
+ int Cost = R.getTreeCost(VL);
+ int ExtrCost = R.getScalarizationCost(VL);
+ DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost <<
+ " Cost of extract:" << ExtrCost << ".\n");
+ if ((Cost+ExtrCost) >= -SLPCostThreshold) return false;
+ DEBUG(dbgs()<<"SLP: Vectorizing pair.\n");
+ R.vectorizeArith(VL);
+ return true;
+}
+
+bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
+ if (!V) return false;
+ // Try to vectorize V.
+ if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R))
+ return true;
+
+ BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0));
+ BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1));
+ // Try to skip B.
+ if (B && B->hasOneUse()) {
+ BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));
+ BinaryOperator *B1 = dyn_cast<BinaryOperator>(B->getOperand(1));
+ if (tryToVectorizePair(A, B0, R)) {
+ B->moveBefore(V);
+ return true;
+ }
+ if (tryToVectorizePair(A, B1, R)) {
+ B->moveBefore(V);
+ return true;
+ }
+ }
+
+ // Try to skip A.
+ if (A && A->hasOneUse()) {
+ BinaryOperator *A0 = dyn_cast<BinaryOperator>(A->getOperand(0));
+ BinaryOperator *A1 = dyn_cast<BinaryOperator>(A->getOperand(1));
+ if (tryToVectorizePair(A0, B, R)) {
+ A->moveBefore(V);
+ return true;
+ }
+ if (tryToVectorizePair(A1, B, R)) {
+ A->moveBefore(V);
+ return true;
+ }
+ }
+ return 0;
+}
+
+bool SLPVectorizer::vectorizeReductions(BasicBlock *BB, BoUpSLP &R) {
+ bool Changed = false;
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ if (isa<DbgInfoIntrinsic>(it)) continue;
+
+ // Try to vectorize reductions that use PHINodes.
+ if (PHINode *P = dyn_cast<PHINode>(it)) {
+ // Check that the PHI is a reduction PHI.
+ if (P->getNumIncomingValues() != 2) return Changed;
+ Value *Rdx = (P->getIncomingBlock(0) == BB ? P->getIncomingValue(0) :
+ (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) :
+ 0));
+ // Check if this is a Binary Operator.
+ BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx);
+ if (!BI)
+ continue;
+
+ Value *Inst = BI->getOperand(0);
+ if (Inst == P) Inst = BI->getOperand(1);
+ Changed |= tryToVectorize(dyn_cast<BinaryOperator>(Inst), R);
+ continue;
+ }
+
+ // Try to vectorize trees that start at compare instructions.
+ if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
+ if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) {
+ Changed |= true;
+ continue;
+ }
+ for (int i = 0; i < 2; ++i)
+ if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i)))
+ Changed |= tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R);
+ continue;
+ }
+ }
+
+ return Changed;
+}
+
+bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {
+ bool Changed = false;
+ // Attempt to sort and vectorize each of the store-groups.
+ for (StoreListMap::iterator it = StoreRefs.begin(), e = StoreRefs.end();
+ it != e; ++it) {
+ if (it->second.size() < 2)
+ continue;
+
+ DEBUG(dbgs()<<"SLP: Analyzing a store chain of length " <<
+ it->second.size() << ".\n");
+
+ Changed |= R.vectorizeStores(it->second, -SLPCostThreshold);
+ }
+ return Changed;
+}
+
+void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB,
+ BoUpSLP &R) {
+ // Check if this block is inside a loop.
+ Loop *L = LI->getLoopFor(BB);
+ if (!L)
+ return;
+
+ // Check if it has a preheader.
+ BasicBlock *PreHeader = L->getLoopPreheader();
+ if (!PreHeader)
+ return;
+
+ // Mark the insertion point for the block.
+ Instruction *Location = PreHeader->getTerminator();
+
+ BoUpSLP::ValueList &Gathers = R.getGatherSeqInstructions();
+ for (BoUpSLP::ValueList::iterator it = Gathers.begin(), e = Gathers.end();
+ it != e; ++it) {
+ InsertElementInst *Insert = dyn_cast<InsertElementInst>(*it);
+
+ // The InsertElement sequence can be simplified into a constant.
+ if (!Insert)
+ continue;
+
+ // If the vector or the element that we insert into it are
+ // instructions that are defined in this basic block then we can't
+ // hoist this instruction.
+ Instruction *CurrVec = dyn_cast<Instruction>(Insert->getOperand(0));
+ Instruction *NewElem = dyn_cast<Instruction>(Insert->getOperand(1));
+ if (CurrVec && L->contains(CurrVec)) continue;
+ if (NewElem && L->contains(NewElem)) continue;
+
+ // We can hoist this instruction. Move it to the pre-header.
+ Insert->moveBefore(Location);
+ }
+}
+
+} // end anonymous namespace
+
+char SLPVectorizer::ID = 0;
+static const char lv_name[] = "SLP Vectorizer";
+INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false)
+
+namespace llvm {
+ Pass *createSLPVectorizerPass() {
+ return new SLPVectorizer();
+ }
+}
+
diff --git a/contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp b/contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp
new file mode 100644
index 0000000..9b94366
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp
@@ -0,0 +1,730 @@
+//===- VecUtils.cpp --- Vectorization Utilities ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "SLP"
+
+#include "VecUtils.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+#include <map>
+
+using namespace llvm;
+
+static const unsigned MinVecRegSize = 128;
+
+static const unsigned RecursionMaxDepth = 6;
+
+namespace llvm {
+
+BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl,
+ TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp) :
+ BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp) {
+ numberInstructions();
+}
+
+void BoUpSLP::numberInstructions() {
+ int Loc = 0;
+ InstrIdx.clear();
+ InstrVec.clear();
+ // Number the instructions in the block.
+ for (BasicBlock::iterator it=BB->begin(), e=BB->end(); it != e; ++it) {
+ InstrIdx[it] = Loc++;
+ InstrVec.push_back(it);
+ assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation");
+ }
+}
+
+Value *BoUpSLP::getPointerOperand(Value *I) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) return LI->getPointerOperand();
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand();
+ return 0;
+}
+
+unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
+ if (LoadInst *L=dyn_cast<LoadInst>(I)) return L->getPointerAddressSpace();
+ if (StoreInst *S=dyn_cast<StoreInst>(I)) return S->getPointerAddressSpace();
+ return -1;
+}
+
+bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
+ Value *PtrA = getPointerOperand(A);
+ Value *PtrB = getPointerOperand(B);
+ unsigned ASA = getAddressSpaceOperand(A);
+ unsigned ASB = getAddressSpaceOperand(B);
+
+ // Check that the address spaces match and that the pointers are valid.
+ if (!PtrA || !PtrB || (ASA != ASB)) return false;
+
+ // Check that A and B are of the same type.
+ if (PtrA->getType() != PtrB->getType()) return false;
+
+ // Calculate the distance.
+ const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
+ const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
+ const SCEV *OffsetSCEV = SE->getMinusSCEV(PtrSCEVA, PtrSCEVB);
+ const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV);
+
+ // Non constant distance.
+ if (!ConstOffSCEV) return false;
+
+ int64_t Offset = ConstOffSCEV->getValue()->getSExtValue();
+ Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
+ // The Instructions are connsecutive if the size of the first load/store is
+ // the same as the offset.
+ int64_t Sz = DL->getTypeStoreSize(Ty);
+ return ((-Offset) == Sz);
+}
+
+bool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) {
+ Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType();
+ unsigned Sz = DL->getTypeSizeInBits(StoreTy);
+ unsigned VF = MinVecRegSize / Sz;
+
+ if (!isPowerOf2_32(Sz) || VF < 2) return false;
+
+ bool Changed = false;
+ // Look for profitable vectorizable trees at all offsets, starting at zero.
+ for (unsigned i = 0, e = Chain.size(); i < e; ++i) {
+ if (i + VF > e) return Changed;
+ DEBUG(dbgs()<<"SLP: Analyzing " << VF << " stores at offset "<< i << "\n");
+ ArrayRef<Value *> Operands = Chain.slice(i, VF);
+
+ int Cost = getTreeCost(Operands);
+ DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");
+ if (Cost < CostThreshold) {
+ DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
+ vectorizeTree(Operands, VF);
+ i += VF - 1;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) {
+ ValueSet Heads, Tails;
+ SmallDenseMap<Value*, Value*> ConsecutiveChain;
+
+ // We may run into multiple chains that merge into a single chain. We mark the
+ // stores that we vectorized so that we don't visit the same store twice.
+ ValueSet VectorizedStores;
+ bool Changed = false;
+
+ // Do a quadratic search on all of the given stores and find
+ // all of the pairs of loads that follow each other.
+ for (unsigned i = 0, e = Stores.size(); i < e; ++i)
+ for (unsigned j = 0; j < e; ++j) {
+ if (i == j) continue;
+ if (isConsecutiveAccess(Stores[i], Stores[j])) {
+ Tails.insert(Stores[j]);
+ Heads.insert(Stores[i]);
+ ConsecutiveChain[Stores[i]] = Stores[j];
+ }
+ }
+
+ // For stores that start but don't end a link in the chain:
+ for (ValueSet::iterator it = Heads.begin(), e = Heads.end();it != e; ++it) {
+ if (Tails.count(*it)) continue;
+
+ // We found a store instr that starts a chain. Now follow the chain and try
+ // to vectorize it.
+ ValueList Operands;
+ Value *I = *it;
+ // Collect the chain into a list.
+ while (Tails.count(I) || Heads.count(I)) {
+ if (VectorizedStores.count(I)) break;
+ Operands.push_back(I);
+ // Move to the next value in the chain.
+ I = ConsecutiveChain[I];
+ }
+
+ bool Vectorized = vectorizeStoreChain(Operands, costThreshold);
+
+ // Mark the vectorized stores so that we don't vectorize them again.
+ if (Vectorized)
+ VectorizedStores.insert(Operands.begin(), Operands.end());
+ Changed |= Vectorized;
+ }
+
+ return Changed;
+}
+
+int BoUpSLP::getScalarizationCost(ArrayRef<Value *> VL) {
+ // Find the type of the operands in VL.
+ Type *ScalarTy = VL[0]->getType();
+ if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+ ScalarTy = SI->getValueOperand()->getType();
+ VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
+ // Find the cost of inserting/extracting values from the vector.
+ return getScalarizationCost(VecTy);
+}
+
+int BoUpSLP::getScalarizationCost(Type *Ty) {
+ int Cost = 0;
+ for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)
+ Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
+ return Cost;
+}
+
+AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) return AA->getLocation(SI);
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) return AA->getLocation(LI);
+ return AliasAnalysis::Location();
+}
+
+Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) {
+ assert(Src->getParent() == Dst->getParent() && "Not the same BB");
+ BasicBlock::iterator I = Src, E = Dst;
+ /// Scan all of the instruction from SRC to DST and check if
+ /// the source may alias.
+ for (++I; I != E; ++I) {
+ // Ignore store instructions that are marked as 'ignore'.
+ if (MemBarrierIgnoreList.count(I)) continue;
+ if (Src->mayWriteToMemory()) /* Write */ {
+ if (!I->mayReadOrWriteMemory()) continue;
+ } else /* Read */ {
+ if (!I->mayWriteToMemory()) continue;
+ }
+ AliasAnalysis::Location A = getLocation(&*I);
+ AliasAnalysis::Location B = getLocation(Src);
+
+ if (!A.Ptr || !B.Ptr || AA->alias(A, B))
+ return I;
+ }
+ return 0;
+}
+
+void BoUpSLP::vectorizeArith(ArrayRef<Value *> Operands) {
+ Value *Vec = vectorizeTree(Operands, Operands.size());
+ BasicBlock::iterator Loc = cast<Instruction>(Vec);
+ IRBuilder<> Builder(++Loc);
+ // After vectorizing the operands we need to generate extractelement
+ // instructions and replace all of the uses of the scalar values with
+ // the values that we extracted from the vectorized tree.
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+ Value *S = Builder.CreateExtractElement(Vec, Builder.getInt32(i));
+ Operands[i]->replaceAllUsesWith(S);
+ }
+}
+
+int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
+ // Get rid of the list of stores that were removed, and from the
+ // lists of instructions with multiple users.
+ MemBarrierIgnoreList.clear();
+ LaneMap.clear();
+ MultiUserVals.clear();
+ MustScalarize.clear();
+
+ // Scan the tree and find which value is used by which lane, and which values
+ // must be scalarized.
+ getTreeUses_rec(VL, 0);
+
+ // Check that instructions with multiple users can be vectorized. Mark unsafe
+ // instructions.
+ for (ValueSet::iterator it = MultiUserVals.begin(),
+ e = MultiUserVals.end(); it != e; ++it) {
+ // Check that all of the users of this instr are within the tree
+ // and that they are all from the same lane.
+ int Lane = -1;
+ for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end();
+ I != E; ++I) {
+ if (LaneMap.find(*I) == LaneMap.end()) {
+ MustScalarize.insert(*it);
+ DEBUG(dbgs()<<"SLP: Adding " << **it <<
+ " to MustScalarize because of an out of tree usage.\n");
+ break;
+ }
+ if (Lane == -1) Lane = LaneMap[*I];
+ if (Lane != LaneMap[*I]) {
+ MustScalarize.insert(*it);
+ DEBUG(dbgs()<<"Adding " << **it <<
+ " to MustScalarize because multiple lane use it: "
+ << Lane << " and " << LaneMap[*I] << ".\n");
+ break;
+ }
+ }
+ }
+
+ // Now calculate the cost of vectorizing the tree.
+ return getTreeCost_rec(VL, 0);
+}
+
+void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {
+ if (Depth == RecursionMaxDepth) return;
+
+ // Don't handle vectors.
+ if (VL[0]->getType()->isVectorTy()) return;
+ if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+ if (SI->getValueOperand()->getType()->isVectorTy()) return;
+
+ // Check if all of the operands are constants.
+ bool AllConst = true;
+ bool AllSameScalar = true;
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ AllConst &= isa<Constant>(VL[i]);
+ AllSameScalar &= (VL[0] == VL[i]);
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ // If one of the instructions is out of this BB, we need to scalarize all.
+ if (I && I->getParent() != BB) return;
+ }
+
+ // If all of the operands are identical or constant we have a simple solution.
+ if (AllConst || AllSameScalar) return;
+
+ // Scalarize unknown structures.
+ Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
+ if (!VL0) return;
+
+ unsigned Opcode = VL0->getOpcode();
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ // If not all of the instructions are identical then we have to scalarize.
+ if (!I || Opcode != I->getOpcode()) return;
+ }
+
+ // Mark instructions with multiple users.
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ // Remember to check if all of the users of this instr are vectorized
+ // within our tree.
+ if (I && I->getNumUses() > 1) MultiUserVals.insert(I);
+ }
+
+ for (int i = 0, e = VL.size(); i < e; ++i) {
+ // Check that the instruction is only used within
+ // one lane.
+ if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i) return;
+ // Make this instruction as 'seen' and remember the lane.
+ LaneMap[VL[i]] = i;
+ }
+
+ switch (Opcode) {
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast:
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
+ ValueList Operands;
+ // Prepare the operand vector.
+ for (unsigned j = 0; j < VL.size(); ++j)
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
+
+ getTreeUses_rec(Operands, Depth+1);
+ }
+ return;
+ }
+ case Instruction::Store: {
+ ValueList Operands;
+ for (unsigned j = 0; j < VL.size(); ++j)
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
+ getTreeUses_rec(Operands, Depth+1);
+ return;
+ }
+ default:
+ return;
+ }
+}
+
+int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
+ Type *ScalarTy = VL[0]->getType();
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+ ScalarTy = SI->getValueOperand()->getType();
+
+ /// Don't mess with vectors.
+ if (ScalarTy->isVectorTy()) return max_cost;
+ VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
+
+ if (Depth == RecursionMaxDepth) return getScalarizationCost(VecTy);
+
+ // Check if all of the operands are constants.
+ bool AllConst = true;
+ bool AllSameScalar = true;
+ bool MustScalarizeFlag = false;
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ AllConst &= isa<Constant>(VL[i]);
+ AllSameScalar &= (VL[0] == VL[i]);
+ // Must have a single use.
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ MustScalarizeFlag |= MustScalarize.count(VL[i]);
+ // This instruction is outside the basic block.
+ if (I && I->getParent() != BB)
+ return getScalarizationCost(VecTy);
+ }
+
+ // Is this a simple vector constant.
+ if (AllConst) return 0;
+
+ // If all of the operands are identical we can broadcast them.
+ Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
+ if (AllSameScalar) {
+ // If we are in a loop, and this is not an instruction (e.g. constant or
+ // argument) or the instruction is defined outside the loop then assume
+ // that the cost is zero.
+ if (L && (!VL0 || !L->contains(VL0)))
+ return 0;
+
+ // We need to broadcast the scalar.
+ return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);
+ }
+
+ // If this is not a constant, or a scalar from outside the loop then we
+ // need to scalarize it.
+ if (MustScalarizeFlag)
+ return getScalarizationCost(VecTy);
+
+ if (!VL0) return getScalarizationCost(VecTy);
+ assert(VL0->getParent() == BB && "Wrong BB");
+
+ unsigned Opcode = VL0->getOpcode();
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ // If not all of the instructions are identical then we have to scalarize.
+ if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy);
+ }
+
+ // Check if it is safe to sink the loads or the stores.
+ if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
+ int MaxIdx = InstrIdx[VL0];
+ for (unsigned i = 1, e = VL.size(); i < e; ++i )
+ MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
+
+ Instruction *Last = InstrVec[MaxIdx];
+ for (unsigned i = 0, e = VL.size(); i < e; ++i ) {
+ if (VL[i] == Last) continue;
+ Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last);
+ if (Barrier) {
+ DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " <<
+ *Last << "\n because of " << *Barrier << "\n");
+ return max_cost;
+ }
+ }
+ }
+
+ switch (Opcode) {
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast: {
+ int Cost = 0;
+ ValueList Operands;
+ Type *SrcTy = VL0->getOperand(0)->getType();
+ // Prepare the operand vector.
+ for (unsigned j = 0; j < VL.size(); ++j) {
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
+ // Check that the casted type is the same for all users.
+ if (cast<Instruction>(VL[j])->getOperand(0)->getType() != SrcTy)
+ return getScalarizationCost(VecTy);
+ }
+
+ Cost += getTreeCost_rec(Operands, Depth+1);
+ if (Cost >= max_cost) return max_cost;
+
+ // Calculate the cost of this instruction.
+ int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(),
+ VL0->getType(), SrcTy);
+
+ VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size());
+ int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy);
+ Cost += (VecCost - ScalarCost);
+ return Cost;
+ }
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ int Cost = 0;
+ // Calculate the cost of all of the operands.
+ for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
+ ValueList Operands;
+ // Prepare the operand vector.
+ for (unsigned j = 0; j < VL.size(); ++j)
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
+
+ Cost += getTreeCost_rec(Operands, Depth+1);
+ if (Cost >= max_cost) return max_cost;
+ }
+
+ // Calculate the cost of this instruction.
+ int ScalarCost = VecTy->getNumElements() *
+ TTI->getArithmeticInstrCost(Opcode, ScalarTy);
+
+ int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);
+ Cost += (VecCost - ScalarCost);
+ return Cost;
+ }
+ case Instruction::Load: {
+ // If we are scalarize the loads, add the cost of forming the vector.
+ for (unsigned i = 0, e = VL.size()-1; i < e; ++i)
+ if (!isConsecutiveAccess(VL[i], VL[i+1]))
+ return getScalarizationCost(VecTy);
+
+ // Cost of wide load - cost of scalar loads.
+ int ScalarLdCost = VecTy->getNumElements() *
+ TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
+ int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
+ return VecLdCost - ScalarLdCost;
+ }
+ case Instruction::Store: {
+ // We know that we can merge the stores. Calculate the cost.
+ int ScalarStCost = VecTy->getNumElements() *
+ TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
+ int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0);
+ int StoreCost = VecStCost - ScalarStCost;
+
+ ValueList Operands;
+ for (unsigned j = 0; j < VL.size(); ++j) {
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
+ MemBarrierIgnoreList.insert(VL[j]);
+ }
+
+ int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1);
+ return TotalCost;
+ }
+ default:
+ // Unable to vectorize unknown instructions.
+ return getScalarizationCost(VecTy);
+ }
+}
+
+Instruction *BoUpSLP::GetLastInstr(ArrayRef<Value *> VL, unsigned VF) {
+ int MaxIdx = InstrIdx[BB->getFirstNonPHI()];
+ for (unsigned i = 0; i < VF; ++i )
+ MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
+ return InstrVec[MaxIdx + 1];
+}
+
+Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {
+ IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements()));
+ Value *Vec = UndefValue::get(Ty);
+ for (unsigned i=0; i < Ty->getNumElements(); ++i) {
+ // Generate the 'InsertElement' instruction.
+ Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
+ // Remember that this instruction is used as part of a 'gather' sequence.
+ // The caller of the bottom-up slp vectorizer can try to hoist the sequence
+ // if the users are outside of the basic block.
+ GatherInstructions.push_back(Vec);
+ }
+
+ return Vec;
+}
+
+Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {
+ Value *V = vectorizeTree_rec(VL, VF);
+ // We moved some instructions around. We have to number them again
+ // before we can do any analysis.
+ numberInstructions();
+ MustScalarize.clear();
+ return V;
+}
+
+Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
+ Type *ScalarTy = VL[0]->getType();
+ if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+ ScalarTy = SI->getValueOperand()->getType();
+ VectorType *VecTy = VectorType::get(ScalarTy, VF);
+
+ // Check if all of the operands are constants or identical.
+ bool AllConst = true;
+ bool AllSameScalar = true;
+ for (unsigned i = 0, e = VF; i < e; ++i) {
+ AllConst &= isa<Constant>(VL[i]);
+ AllSameScalar &= (VL[0] == VL[i]);
+ // The instruction must be in the same BB, and it must be vectorizable.
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ if (MustScalarize.count(VL[i]) || (I && I->getParent() != BB))
+ return Scalarize(VL, VecTy);
+ }
+
+ // Check that this is a simple vector constant.
+ if (AllConst || AllSameScalar) return Scalarize(VL, VecTy);
+
+ // Scalarize unknown structures.
+ Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
+ if (!VL0) return Scalarize(VL, VecTy);
+
+ if (VectorizedValues.count(VL0)) return VectorizedValues[VL0];
+
+ unsigned Opcode = VL0->getOpcode();
+ for (unsigned i = 0, e = VF; i < e; ++i) {
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ // If not all of the instructions are identical then we have to scalarize.
+ if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy);
+ }
+
+ switch (Opcode) {
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast: {
+ ValueList INVL;
+ for (int i = 0; i < VF; ++i)
+ INVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
+ Value *InVec = vectorizeTree_rec(INVL, VF);
+ IRBuilder<> Builder(GetLastInstr(VL, VF));
+ CastInst *CI = dyn_cast<CastInst>(VL0);
+ Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
+ VectorizedValues[VL0] = V;
+ return V;
+ }
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ ValueList LHSVL, RHSVL;
+ for (int i = 0; i < VF; ++i) {
+ RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
+ LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));
+ }
+
+ Value *RHS = vectorizeTree_rec(RHSVL, VF);
+ Value *LHS = vectorizeTree_rec(LHSVL, VF);
+ IRBuilder<> Builder(GetLastInstr(VL, VF));
+ BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
+ Value *V = Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS);
+ VectorizedValues[VL0] = V;
+ return V;
+ }
+ case Instruction::Load: {
+ LoadInst *LI = cast<LoadInst>(VL0);
+ unsigned Alignment = LI->getAlignment();
+
+ // Check if all of the loads are consecutive.
+ for (unsigned i = 1, e = VF; i < e; ++i)
+ if (!isConsecutiveAccess(VL[i-1], VL[i]))
+ return Scalarize(VL, VecTy);
+
+ IRBuilder<> Builder(GetLastInstr(VL, VF));
+ Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
+ VecTy->getPointerTo());
+ LI = Builder.CreateLoad(VecPtr);
+ LI->setAlignment(Alignment);
+ VectorizedValues[VL0] = LI;
+ return LI;
+ }
+ case Instruction::Store: {
+ StoreInst *SI = cast<StoreInst>(VL0);
+ unsigned Alignment = SI->getAlignment();
+
+ ValueList ValueOp;
+ for (int i = 0; i < VF; ++i)
+ ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand());
+
+ Value *VecValue = vectorizeTree_rec(ValueOp, VF);
+
+ IRBuilder<> Builder(GetLastInstr(VL, VF));
+ Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
+ VecTy->getPointerTo());
+ Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);
+
+ for (int i = 0; i < VF; ++i)
+ cast<Instruction>(VL[i])->eraseFromParent();
+ return 0;
+ }
+ default:
+ Value *S = Scalarize(VL, VecTy);
+ VectorizedValues[VL0] = S;
+ return S;
+ }
+}
+
+} // end of namespace
diff --git a/contrib/llvm/lib/Transforms/Vectorize/VecUtils.h b/contrib/llvm/lib/Transforms/Vectorize/VecUtils.h
new file mode 100644
index 0000000..5456c6c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Vectorize/VecUtils.h
@@ -0,0 +1,164 @@
+//===- VecUtils.h - Vectorization Utilities -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of classes and functions manipulate vectors and chains of
+// vectors.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H
+#define LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include <vector>
+
+namespace llvm {
+
+class BasicBlock; class Instruction; class Type;
+class VectorType; class StoreInst; class Value;
+class ScalarEvolution; class DataLayout;
+class TargetTransformInfo; class AliasAnalysis;
+class Loop;
+
+/// Bottom Up SLP vectorization utility class.
+struct BoUpSLP {
+ typedef SmallVector<Value*, 8> ValueList;
+ typedef SmallPtrSet<Value*, 16> ValueSet;
+ typedef SmallVector<StoreInst*, 8> StoreList;
+ static const int max_cost = 1<<20;
+
+ // \brief C'tor.
+ BoUpSLP(BasicBlock *Bb, ScalarEvolution *Se, DataLayout *Dl,
+ TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp);
+
+ /// \brief Take the pointer operand from the Load/Store instruction.
+ /// \returns NULL if this is not a valid Load/Store instruction.
+ static Value *getPointerOperand(Value *I);
+
+ /// \brief Take the address space operand from the Load/Store instruction.
+ /// \returns -1 if this is not a valid Load/Store instruction.
+ static unsigned getAddressSpaceOperand(Value *I);
+
+ /// \returns true if the memory operations A and B are consecutive.
+ bool isConsecutiveAccess(Value *A, Value *B);
+
+ /// \brief Vectorize the tree that starts with the elements in \p VL.
+ /// \returns the vectorized value.
+ Value *vectorizeTree(ArrayRef<Value *> VL, int VF);
+
+ /// \returns the vectorization cost of the subtree that starts at \p VL.
+ /// A negative number means that this is profitable.
+ int getTreeCost(ArrayRef<Value *> VL);
+
+ /// \returns the scalarization cost for this list of values. Assuming that
+ /// this subtree gets vectorized, we may need to extract the values from the
+ /// roots. This method calculates the cost of extracting the values.
+ int getScalarizationCost(ArrayRef<Value *> VL);
+
+ /// \brief Attempts to order and vectorize a sequence of stores. This
+ /// function does a quadratic scan of the given stores.
+ /// \returns true if the basic block was modified.
+ bool vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold);
+
+ /// \brief Vectorize a group of scalars into a vector tree.
+ void vectorizeArith(ArrayRef<Value *> Operands);
+
+ /// \returns the list of new instructions that were added in order to collect
+ /// scalars into vectors. This list can be used to further optimize the gather
+ /// sequences.
+ ValueList &getGatherSeqInstructions() {return GatherInstructions; }
+
+private:
+ /// \brief This method contains the recursive part of getTreeCost.
+ int getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth);
+
+ /// \brief This recursive method looks for vectorization hazards such as
+ /// values that are used by multiple users and checks that values are used
+ /// by only one vector lane. It updates the variables LaneMap, MultiUserVals.
+ void getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth);
+
+ /// \brief This method contains the recursive part of vectorizeTree.
+ Value *vectorizeTree_rec(ArrayRef<Value *> VL, int VF);
+
+ /// \brief Number all of the instructions in the block.
+ void numberInstructions();
+
+ /// \brief Vectorize a sorted sequence of stores.
+ bool vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold);
+
+ /// \returns the scalarization cost for this type. Scalarization in this
+ /// context means the creation of vectors from a group of scalars.
+ int getScalarizationCost(Type *Ty);
+
+ /// \returns the AA location that is being access by the instruction.
+ AliasAnalysis::Location getLocation(Instruction *I);
+
+ /// \brief Checks if it is possible to sink an instruction from
+ /// \p Src to \p Dst.
+ /// \returns the pointer to the barrier instruction if we can't sink.
+ Value *isUnsafeToSink(Instruction *Src, Instruction *Dst);
+
+ /// \returns the instruction that appears last in the BB from \p VL.
+ /// Only consider the first \p VF elements.
+ Instruction *GetLastInstr(ArrayRef<Value *> VL, unsigned VF);
+
+ /// \returns a vector from a collection of scalars in \p VL.
+ Value *Scalarize(ArrayRef<Value *> VL, VectorType *Ty);
+
+private:
+ /// Maps instructions to numbers and back.
+ SmallDenseMap<Value*, int> InstrIdx;
+ /// Maps integers to Instructions.
+ std::vector<Instruction*> InstrVec;
+
+ // -- containers that are used during getTreeCost -- //
+
+ /// Contains values that must be scalarized because they are used
+ /// by multiple lanes, or by users outside the tree.
+ /// NOTICE: The vectorization methods also use this set.
+ ValueSet MustScalarize;
+
+ /// Contains a list of values that are used outside the current tree. This
+ /// set must be reset between runs.
+ ValueSet MultiUserVals;
+ /// Maps values in the tree to the vector lanes that uses them. This map must
+ /// be reset between runs of getCost.
+ std::map<Value*, int> LaneMap;
+ /// A list of instructions to ignore while sinking
+ /// memory instructions. This map must be reset between runs of getCost.
+ SmallPtrSet<Value *, 8> MemBarrierIgnoreList;
+
+ // -- Containers that are used during vectorizeTree -- //
+
+ /// Maps between the first scalar to the vector. This map must be reset
+ ///between runs.
+ DenseMap<Value*, Value*> VectorizedValues;
+
+ // -- Containers that are used after vectorization by the caller -- //
+
+ /// A list of instructions that are used when gathering scalars into vectors.
+ /// In many cases these instructions can be hoisted outside of the BB.
+ /// Iterating over this list is faster than calling LICM.
+ ValueList GatherInstructions;
+
+ // Analysis and block reference.
+ BasicBlock *BB;
+ ScalarEvolution *SE;
+ DataLayout *DL;
+ TargetTransformInfo *TTI;
+ AliasAnalysis *AA;
+ Loop *L;
+};
+
+} // end of namespace
+
+#endif // LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H
diff --git a/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp
index 19eefd2..a927fe1 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp
@@ -1,4 +1,4 @@
- //===-- Vectorize.cpp -----------------------------------------------------===//
+//===-- Vectorize.cpp -----------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -28,6 +28,7 @@ using namespace llvm;
void llvm::initializeVectorization(PassRegistry &Registry) {
initializeBBVectorizePass(Registry);
initializeLoopVectorizePass(Registry);
+ initializeSLPVectorizerPass(Registry);
}
void LLVMInitializeVectorization(LLVMPassRegistryRef R) {
@@ -41,3 +42,7 @@ void LLVMAddBBVectorizePass(LLVMPassManagerRef PM) {
void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLoopVectorizePass());
}
+
+void LLVMAddSLPVectorizePass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createSLPVectorizerPass());
+}
OpenPOWER on IntegriCloud