summaryrefslogtreecommitdiffstats
path: root/lib/Transforms
diff options
context:
space:
mode:
authorrdivacky <rdivacky@FreeBSD.org>2010-01-01 10:31:22 +0000
committerrdivacky <rdivacky@FreeBSD.org>2010-01-01 10:31:22 +0000
commita16c51cee9225a354c999dd1076d5dba2aa79807 (patch)
treedba00119388b84f9f44e6ec5e9129f807fd79ca3 /lib/Transforms
parent40a6fcdb85efd93fe0e36c9552cfb0b18b5eacd6 (diff)
downloadFreeBSD-src-a16c51cee9225a354c999dd1076d5dba2aa79807.zip
FreeBSD-src-a16c51cee9225a354c999dd1076d5dba2aa79807.tar.gz
Update LLVM to 92395.
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/Hello/Hello.cpp2
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp10
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp16
-rw-r--r--lib/Transforms/Scalar/GVN.cpp71
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp2
-rw-r--r--lib/Transforms/Scalar/InstructionCombining.cpp343
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp1
-rw-r--r--lib/Transforms/Scalar/LICM.cpp4
-rw-r--r--lib/Transforms/Scalar/LoopIndexSplit.cpp4
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp167
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp4
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp604
-rw-r--r--lib/Transforms/Scalar/SCCVN.cpp1
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp885
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp72
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp114
-rw-r--r--lib/Transforms/Utils/BreakCriticalEdges.cpp4
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp49
-rw-r--r--lib/Transforms/Utils/Local.cpp4
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp8
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp8
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp30
22 files changed, 1403 insertions, 1000 deletions
diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp
index 91534a7..eac4e17 100644
--- a/lib/Transforms/Hello/Hello.cpp
+++ b/lib/Transforms/Hello/Hello.cpp
@@ -56,7 +56,7 @@ namespace {
// We don't modify the program, so we preserve all analyses
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
- };
+ }
};
}
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 0b5e007..ae88d9e 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -24,7 +24,6 @@
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/DebugInfo.h"
@@ -220,17 +219,14 @@ static bool StripDebugInfo(Module &M) {
Changed = true;
NMD->eraseFromParent();
}
- MetadataContext &TheMetadata = M.getContext().getMetadata();
- unsigned MDDbgKind = TheMetadata.getMDKind("dbg");
- if (!MDDbgKind)
- return Changed;
-
+
+ unsigned MDDbgKind = M.getMDKindID("dbg");
for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI)
for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE;
++FI)
for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE;
++BI)
- TheMetadata.removeMD(MDDbgKind, BI);
+ BI->setMetadata(MDDbgKind, 0);
return true;
}
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index e4c4ae5..372616c 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -48,7 +48,7 @@ namespace {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// transformation profitability.
const TargetLowering *TLI;
- ProfileInfo *PI;
+ ProfileInfo *PFI;
/// BackEdges - Keep a set of all the loop back edges.
///
@@ -99,7 +99,7 @@ void CodeGenPrepare::findLoopBackEdges(const Function &F) {
bool CodeGenPrepare::runOnFunction(Function &F) {
bool EverMadeChange = false;
- PI = getAnalysisIfAvailable<ProfileInfo>();
+ PFI = getAnalysisIfAvailable<ProfileInfo>();
// First pass, eliminate blocks that contain only PHI nodes and an
// unconditional branch.
EverMadeChange |= EliminateMostlyEmptyBlocks(F);
@@ -288,9 +288,9 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
// The PHIs are now updated, change everything that refers to BB to use
// DestBB and remove BB.
BB->replaceAllUsesWith(DestBB);
- if (PI) {
- PI->replaceAllUses(BB, DestBB);
- PI->removeEdge(ProfileInfo::getEdge(BB, DestBB));
+ if (PFI) {
+ PFI->replaceAllUses(BB, DestBB);
+ PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB));
}
BB->eraseFromParent();
@@ -368,9 +368,9 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,
// If we found a workable predecessor, change TI to branch to Succ.
if (FoundMatch) {
- ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
- if (PI)
- PI->splitEdge(TIBB, Dest, Pred);
+ ProfileInfo *PFI = P->getAnalysisIfAvailable<ProfileInfo>();
+ if (PFI)
+ PFI->splitEdge(TIBB, Dest, Pred);
Dest->removePredecessor(TIBB);
TI->setSuccessor(SuccNum, Pred);
return;
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 222792b..612b415 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -20,6 +20,7 @@
#include "llvm/BasicBlock.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
#include "llvm/Function.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
@@ -48,7 +49,6 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
-#include <cstdio>
using namespace llvm;
STATISTIC(NumGVNInstr, "Number of instructions deleted");
@@ -733,13 +733,13 @@ static RegisterPass<GVN> X("gvn",
"Global Value Numbering");
void GVN::dump(DenseMap<uint32_t, Value*>& d) {
- printf("{\n");
+ errs() << "{\n";
for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
E = d.end(); I != E; ++I) {
- printf("%d\n", I->first);
+ errs() << I->first << "\n";
I->second->dump();
}
- printf("}\n");
+ errs() << "}\n";
}
static bool isSafeReplacement(PHINode* p, Instruction *inst) {
@@ -1278,6 +1278,32 @@ struct AvailableValueInBlock {
assert(!isSimpleValue() && "Wrong accessor");
return cast<MemIntrinsic>(Val.getPointer());
}
+
+ /// MaterializeAdjustedValue - Emit code into this block to adjust the value
+ /// defined here to the specified type. This handles various coercion cases.
+ Value *MaterializeAdjustedValue(const Type *LoadTy,
+ const TargetData *TD) const {
+ Value *Res;
+ if (isSimpleValue()) {
+ Res = getSimpleValue();
+ if (Res->getType() != LoadTy) {
+ assert(TD && "Need target data to handle type mismatch case");
+ Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
+ *TD);
+
+ DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
+ << *getSimpleValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
+ } else {
+ Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
+ LoadTy, BB->getTerminator(), *TD);
+ DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
+ << " " << *getMemIntrinValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
+ return Res;
+ }
};
/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
@@ -1286,7 +1312,15 @@ struct AvailableValueInBlock {
static Value *ConstructSSAForLoadSet(LoadInst *LI,
SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
const TargetData *TD,
+ const DominatorTree &DT,
AliasAnalysis *AA) {
+ // Check for the fully redundant, dominating load case. In this case, we can
+ // just use the dominating value directly.
+ if (ValuesPerBlock.size() == 1 &&
+ DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent()))
+ return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD);
+
+ // Otherwise, we have to construct SSA form.
SmallVector<PHINode*, 8> NewPHIs;
SSAUpdater SSAUpdate(&NewPHIs);
SSAUpdate.Initialize(LI);
@@ -1300,28 +1334,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
if (SSAUpdate.HasValueForBlock(BB))
continue;
- unsigned Offset = AV.Offset;
-
- Value *AvailableVal;
- if (AV.isSimpleValue()) {
- AvailableVal = AV.getSimpleValue();
- if (AvailableVal->getType() != LoadTy) {
- assert(TD && "Need target data to handle type mismatch case");
- AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy,
- BB->getTerminator(), *TD);
-
- DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
- << *AV.getSimpleValue() << '\n'
- << *AvailableVal << '\n' << "\n\n\n");
- }
- } else {
- AvailableVal = GetMemInstValueForLoad(AV.getMemIntrinValue(), Offset,
- LoadTy, BB->getTerminator(), *TD);
- DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
- << " " << *AV.getMemIntrinValue() << '\n'
- << *AvailableVal << '\n' << "\n\n\n");
- }
- SSAUpdate.AddAvailableValue(BB, AvailableVal);
+ SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD));
}
// Perform PHI construction.
@@ -1346,7 +1359,7 @@ static bool isLifetimeStart(Instruction *Inst) {
bool GVN::processNonLocalLoad(LoadInst *LI,
SmallVectorImpl<Instruction*> &toErase) {
// Find the non-local dependencies of the load.
- SmallVector<NonLocalDepEntry, 64> Deps;
+ SmallVector<NonLocalDepResult, 64> Deps;
MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(),
Deps);
//DEBUG(errs() << "INVESTIGATING NONLOCAL LOAD: "
@@ -1490,7 +1503,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
DEBUG(errs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
// Perform PHI construction.
- Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD,
+ Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
VN.getAliasAnalysis());
LI->replaceAllUsesWith(V);
@@ -1679,7 +1692,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,NewLoad));
// Perform PHI construction.
- Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD,
+ Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
VN.getAliasAnalysis());
LI->replaceAllUsesWith(V);
if (isa<PHINode>(V))
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 2912421..3aa4fd3 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -258,7 +258,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
// Check that InVal is defined in the loop.
Instruction *Inst = cast<Instruction>(InVal);
- if (!L->contains(Inst->getParent()))
+ if (!L->contains(Inst))
continue;
// Okay, this instruction has a user outside of the current loop
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index b9c536f..516d72e 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -75,6 +75,15 @@ STATISTIC(NumDeadInst , "Number of dead inst eliminated");
STATISTIC(NumDeadStore, "Number of dead stores eliminated");
STATISTIC(NumSunkInst , "Number of instructions sunk");
+/// SelectPatternFlavor - We can match a variety of different patterns for
+/// select operations.
+enum SelectPatternFlavor {
+ SPF_UNKNOWN = 0,
+ SPF_SMIN, SPF_UMIN,
+ SPF_SMAX, SPF_UMAX
+ //SPF_ABS - TODO.
+};
+
namespace {
/// InstCombineWorklist - This is the worklist management logic for
/// InstCombine.
@@ -257,7 +266,8 @@ namespace {
ConstantInt *RHS);
Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
ConstantInt *DivRHS);
-
+ Instruction *FoldICmpAddOpCst(ICmpInst &ICI, Value *X, ConstantInt *CI,
+ ICmpInst::Predicate Pred, Value *TheAdd);
Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
ICmpInst::Predicate Cond, Instruction &I);
Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
@@ -280,6 +290,9 @@ namespace {
Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI,
Instruction *FI);
Instruction *FoldSelectIntoOp(SelectInst &SI, Value*, Value*);
+ Instruction *FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1,
+ Value *A, Value *B, Instruction &Outer,
+ SelectPatternFlavor SPF2, Value *C);
Instruction *visitSelectInst(SelectInst &SI);
Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI);
Instruction *visitCallInst(CallInst &CI);
@@ -648,6 +661,57 @@ static inline Value *dyn_castFNegVal(Value *V) {
return 0;
}
+/// MatchSelectPattern - Pattern match integer [SU]MIN, [SU]MAX, and ABS idioms,
+/// returning the kind and providing the out parameter results if we
+/// successfully match.
+static SelectPatternFlavor
+MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) {
+ SelectInst *SI = dyn_cast<SelectInst>(V);
+ if (SI == 0) return SPF_UNKNOWN;
+
+ ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition());
+ if (ICI == 0) return SPF_UNKNOWN;
+
+ LHS = ICI->getOperand(0);
+ RHS = ICI->getOperand(1);
+
+ // (icmp X, Y) ? X : Y
+ if (SI->getTrueValue() == ICI->getOperand(0) &&
+ SI->getFalseValue() == ICI->getOperand(1)) {
+ switch (ICI->getPredicate()) {
+ default: return SPF_UNKNOWN; // Equality.
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE: return SPF_UMAX;
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE: return SPF_SMAX;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE: return SPF_UMIN;
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE: return SPF_SMIN;
+ }
+ }
+
+ // (icmp X, Y) ? Y : X
+ if (SI->getTrueValue() == ICI->getOperand(1) &&
+ SI->getFalseValue() == ICI->getOperand(0)) {
+ switch (ICI->getPredicate()) {
+ default: return SPF_UNKNOWN; // Equality.
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE: return SPF_UMIN;
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE: return SPF_SMIN;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE: return SPF_UMAX;
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE: return SPF_SMAX;
+ }
+ }
+
+ // TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5)
+
+ return SPF_UNKNOWN;
+}
+
/// isFreeToInvert - Return true if the specified value is free to invert (apply
/// ~ to). This happens in cases where the ~ can be eliminated.
static inline bool isFreeToInvert(Value *V) {
@@ -732,12 +796,12 @@ static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
APInt MulExt = LHSExt * RHSExt;
- if (sign) {
- APInt Min = APInt::getSignedMinValue(W).sext(W * 2);
- APInt Max = APInt::getSignedMaxValue(W).sext(W * 2);
- return MulExt.slt(Min) || MulExt.sgt(Max);
- } else
+ if (!sign)
return MulExt.ugt(APInt::getLowBitsSet(W * 2, W));
+
+ APInt Min = APInt::getSignedMinValue(W).sext(W * 2);
+ APInt Max = APInt::getSignedMaxValue(W).sext(W * 2);
+ return MulExt.slt(Min) || MulExt.sgt(Max);
}
@@ -2736,9 +2800,13 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
if (Op0 == Op1) // sub X, X -> 0
return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
- // If this is a 'B = x-(-A)', change to B = x+A.
- if (Value *V = dyn_castNegVal(Op1))
- return BinaryOperator::CreateAdd(Op0, V);
+ // If this is a 'B = x-(-A)', change to B = x+A. This preserves NSW/NUW.
+ if (Value *V = dyn_castNegVal(Op1)) {
+ BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V);
+ Res->setHasNoSignedWrap(I.hasNoSignedWrap());
+ Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+ return Res;
+ }
if (isa<UndefValue>(Op0))
return ReplaceInstUsesWith(I, Op0); // undef - X -> undef
@@ -6356,24 +6424,26 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// comparison into the select arms, which will cause one to be
// constant folded and the select turned into a bitwise or.
Value *Op1 = 0, *Op2 = 0;
- if (LHSI->hasOneUse()) {
- if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {
- // Fold the known value into the constant operand.
- Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
- // Insert a new ICmp of the other select operand.
- Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2),
- RHSC, I.getName());
- } else if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) {
- // Fold the known value into the constant operand.
- Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
- // Insert a new ICmp of the other select operand.
+ if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1)))
+ Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
+ if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2)))
+ Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
+
+ // We only want to perform this transformation if it will not lead to
+ // additional code. This is true if either both sides of the select
+ // fold to a constant (in which case the icmp is replaced with a select
+ // which will usually simplify) or this is the only user of the
+ // select (in which case we are trading a select+icmp for a simpler
+ // select+icmp).
+ if ((Op1 && Op2) || (LHSI->hasOneUse() && (Op1 || Op2))) {
+ if (!Op1)
Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1),
RHSC, I.getName());
- }
- }
-
- if (Op1)
+ if (!Op2)
+ Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2),
+ RHSC, I.getName());
return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
+ }
break;
}
case Instruction::Call:
@@ -6452,7 +6522,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// if (X) ...
// For generality, we handle any zero-extension of any operand comparison
// with a constant or another cast from the same type.
- if (isa<ConstantInt>(Op1) || isa<CastInst>(Op1))
+ if (isa<Constant>(Op1) || isa<CastInst>(Op1))
if (Instruction *R = visitICmpInstWithCastAndCast(I))
return R;
}
@@ -6598,9 +6668,112 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
}
}
+
+ {
+ Value *X; ConstantInt *Cst;
+ // icmp X+Cst, X
+ if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X)
+ return FoldICmpAddOpCst(I, X, Cst, I.getPredicate(), Op0);
+
+ // icmp X, X+Cst
+ if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X)
+ return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate(), Op1);
+ }
return Changed ? &I : 0;
}
+/// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X".
+Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
+ Value *X, ConstantInt *CI,
+ ICmpInst::Predicate Pred,
+ Value *TheAdd) {
+ // If we have X+0, exit early (simplifying logic below) and let it get folded
+ // elsewhere. icmp X+0, X -> icmp X, X
+ if (CI->isZero()) {
+ bool isTrue = ICmpInst::isTrueWhenEqual(Pred);
+ return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue));
+ }
+
+ // (X+4) == X -> false.
+ if (Pred == ICmpInst::ICMP_EQ)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext()));
+
+ // (X+4) != X -> true.
+ if (Pred == ICmpInst::ICMP_NE)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext()));
+
+ // If this is an instruction (as opposed to constantexpr) get NUW/NSW info.
+ bool isNUW = false, isNSW = false;
+ if (BinaryOperator *Add = dyn_cast<BinaryOperator>(TheAdd)) {
+ isNUW = Add->hasNoUnsignedWrap();
+ isNSW = Add->hasNoSignedWrap();
+ }
+
+ // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
+ // so the values can never be equal. Similiarly for all other "or equals"
+ // operators.
+
+ // (X+1) <u X --> X >u (MAXUINT-1) --> X != 255
+ // (X+2) <u X --> X >u (MAXUINT-2) --> X > 253
+ // (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0
+ if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
+ // If this is an NUW add, then this is always false.
+ if (isNUW)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext()));
+
+ Value *R = ConstantExpr::getSub(ConstantInt::get(CI->getType(), -1ULL), CI);
+ return new ICmpInst(ICmpInst::ICMP_UGT, X, R);
+ }
+
+ // (X+1) >u X --> X <u (0-1) --> X != 255
+ // (X+2) >u X --> X <u (0-2) --> X <u 254
+ // (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0
+ if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
+ // If this is an NUW add, then this is always true.
+ if (isNUW)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext()));
+ return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI));
+ }
+
+ unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits();
+ ConstantInt *SMax = ConstantInt::get(X->getContext(),
+ APInt::getSignedMaxValue(BitWidth));
+
+ // (X+ 1) <s X --> X >s (MAXSINT-1) --> X == 127
+ // (X+ 2) <s X --> X >s (MAXSINT-2) --> X >s 125
+ // (X+MAXSINT) <s X --> X >s (MAXSINT-MAXSINT) --> X >s 0
+ // (X+MINSINT) <s X --> X >s (MAXSINT-MINSINT) --> X >s -1
+ // (X+ -2) <s X --> X >s (MAXSINT- -2) --> X >s 126
+ // (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127
+ if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) {
+ // If this is an NSW add, then we have two cases: if the constant is
+ // positive, then this is always false, if negative, this is always true.
+ if (isNSW) {
+ bool isTrue = CI->getValue().isNegative();
+ return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue));
+ }
+
+ return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI));
+ }
+
+ // (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127
+ // (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126
+ // (X+MAXSINT) >s X --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1
+ // (X+MINSINT) >s X --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2
+ // (X+ -2) >s X --> X <s (MAXSINT-(-2-1)) --> X <s -126
+ // (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128
+
+ // If this is an NSW add, then we have two cases: if the constant is
+ // positive, then this is always true, if negative, this is always false.
+ if (isNSW) {
+ bool isTrue = !CI->getValue().isNegative();
+ return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue));
+ }
+
+ assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
+ Constant *C = ConstantInt::get(X->getContext(), CI->getValue()-1);
+ return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C));
+}
/// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS
/// and CmpRHS are both known to be integer constants.
@@ -7075,8 +7248,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
break;
case Instruction::Add:
- // Fold: icmp pred (add, X, C1), C2
-
+ // Fold: icmp pred (add X, C1), C2
if (!ICI.isEquality()) {
ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(1));
if (!LHSC) break;
@@ -7299,19 +7471,17 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
// If the re-extended constant didn't change...
if (Res2 == CI) {
- // Make sure that sign of the Cmp and the sign of the Cast are the same.
- // For example, we might have:
- // %A = sext i16 %X to i32
- // %B = icmp ugt i32 %A, 1330
- // It is incorrect to transform this into
- // %B = icmp ugt i16 %X, 1330
- // because %A may have negative value.
- //
- // However, we allow this when the compare is EQ/NE, because they are
- // signless.
- if (isSignedExt == isSignedCmp || ICI.isEquality())
+ // Deal with equality cases early.
+ if (ICI.isEquality())
return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1);
- return 0;
+
+ // A signed comparison of sign extended values simplifies into a
+ // signed comparison.
+ if (isSignedExt && isSignedCmp)
+ return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1);
+
+ // The other three cases all fold into an unsigned comparison.
+ return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, Res1);
}
// The re-extended constant changed so the constant cannot be represented
@@ -9372,9 +9542,6 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
return ReplaceInstUsesWith(SI, TrueVal);
/// NOTE: if we wanted to, this is where to detect integer MIN/MAX
}
-
- /// NOTE: if we wanted to, this is where to detect integer ABS
-
return Changed ? &SI : 0;
}
@@ -9416,6 +9583,35 @@ static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V,
return false;
}
+/// FoldSPFofSPF - We have an SPF (e.g. a min or max) of an SPF of the form:
+/// SPF2(SPF1(A, B), C)
+Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
+ SelectPatternFlavor SPF1,
+ Value *A, Value *B,
+ Instruction &Outer,
+ SelectPatternFlavor SPF2, Value *C) {
+ if (C == A || C == B) {
+ // MAX(MAX(A, B), B) -> MAX(A, B)
+ // MIN(MIN(a, b), a) -> MIN(a, b)
+ if (SPF1 == SPF2)
+ return ReplaceInstUsesWith(Outer, Inner);
+
+ // MAX(MIN(a, b), a) -> a
+ // MIN(MAX(a, b), a) -> a
+ if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) ||
+ (SPF1 == SPF_SMAX && SPF2 == SPF_SMIN) ||
+ (SPF1 == SPF_UMIN && SPF2 == SPF_UMAX) ||
+ (SPF1 == SPF_UMAX && SPF2 == SPF_UMIN))
+ return ReplaceInstUsesWith(Outer, C);
+ }
+
+ // TODO: MIN(MIN(A, 23), 97)
+ return 0;
+}
+
+
+
+
Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
Value *CondVal = SI.getCondition();
Value *TrueVal = SI.getTrueValue();
@@ -9622,9 +9818,28 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// See if we can fold the select into one of our operands.
if (SI.getType()->isInteger()) {
- Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal);
- if (FoldI)
+ if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal))
return FoldI;
+
+ // MAX(MAX(a, b), a) -> MAX(a, b)
+ // MIN(MIN(a, b), a) -> MIN(a, b)
+ // MAX(MIN(a, b), a) -> a
+ // MIN(MAX(a, b), a) -> a
+ Value *LHS, *RHS, *LHS2, *RHS2;
+ if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) {
+ if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2))
+ if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2,
+ SI, SPF, RHS))
+ return R;
+ if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2))
+ if (Instruction *R = FoldSPFofSPF(cast<Instruction>(RHS),SPF2,LHS2,RHS2,
+ SI, SPF, LHS))
+ return R;
+ }
+
+ // TODO.
+ // ABS(-X) -> ABS(X)
+ // ABS(ABS(X)) -> ABS(X)
}
// See if we can fold the select into a phi node if the condition is a select.
@@ -9896,9 +10111,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Intrinsic::getDeclaration(M, MemCpyID, Tys, 1));
Changed = true;
}
+ }
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
// memmove(x,x,size) -> noop.
- if (MMI->getSource() == MMI->getDest())
+ if (MTI->getSource() == MTI->getDest())
return EraseInstFromFunction(CI);
}
@@ -9923,6 +10140,21 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (Operand->getIntrinsicID() == Intrinsic::bswap)
return ReplaceInstUsesWith(CI, Operand->getOperand(1));
break;
+ case Intrinsic::powi:
+ if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getOperand(2))) {
+ // powi(x, 0) -> 1.0
+ if (Power->isZero())
+ return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
+ // powi(x, 1) -> x
+ if (Power->isOne())
+ return ReplaceInstUsesWith(CI, II->getOperand(1));
+ // powi(x, -1) -> 1/x
+ if (Power->isAllOnesValue())
+ return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
+ II->getOperand(1));
+ }
+ break;
+
case Intrinsic::uadd_with_overflow: {
Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType());
@@ -11232,6 +11464,23 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) {
PHINode *PN = PHIsToSlice[PHIId];
+ // Scan the input list of the PHI. If any input is an invoke, and if the
+ // input is defined in the predecessor, then we won't be split the critical
+ // edge which is required to insert a truncate. Because of this, we have to
+ // bail out.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ InvokeInst *II = dyn_cast<InvokeInst>(PN->getIncomingValue(i));
+ if (II == 0) continue;
+ if (II->getParent() != PN->getIncomingBlock(i))
+ continue;
+
+ // If we have a phi, and if it's directly in the predecessor, then we have
+ // a critical edge where we need to put the truncate. Since we can't
+ // split the edge in instcombine, we have to bail out.
+ return 0;
+ }
+
+
for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
UI != E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
@@ -11314,7 +11563,9 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
PredVal = EltPHI;
EltPHI->addIncoming(PredVal, Pred);
continue;
- } else if (PHINode *InPHI = dyn_cast<PHINode>(PN)) {
+ }
+
+ if (PHINode *InPHI = dyn_cast<PHINode>(PN)) {
// If the incoming value was a PHI, and if it was one of the PHIs we
// already rewrote it, just use the lowered value.
if (Value *Res = ExtractedVals[LoweredPHIRecord(InPHI, Offset, Ty)]) {
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index d58b9c9..7e6cf79 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -29,6 +29,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 42a8fdc..99f3ae0 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -433,7 +433,7 @@ bool LICM::isNotUsedInLoop(Instruction &I) {
if (PN->getIncomingValue(i) == &I)
if (CurLoop->contains(PN->getIncomingBlock(i)))
return false;
- } else if (CurLoop->contains(User->getParent())) {
+ } else if (CurLoop->contains(User)) {
return false;
}
}
@@ -831,7 +831,7 @@ void LICM::FindPromotableValuesInLoop(
UI != UE; ++UI) {
// Ignore instructions not in this loop.
Instruction *Use = dyn_cast<Instruction>(*UI);
- if (!Use || !CurLoop->contains(Use->getParent()))
+ if (!Use || !CurLoop->contains(Use))
continue;
if (!isa<LoadInst>(Use) && !isa<StoreInst>(Use)) {
diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp
index 8b6a233..1d9dd68 100644
--- a/lib/Transforms/Scalar/LoopIndexSplit.cpp
+++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp
@@ -288,7 +288,7 @@ bool LoopIndexSplit::runOnLoop(Loop *IncomingLoop, LPPassManager &LPM_Ref) {
// isUsedOutsideLoop - Returns true iff V is used outside the loop L.
static bool isUsedOutsideLoop(Value *V, Loop *L) {
for(Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
- if (!L->contains(cast<Instruction>(*UI)->getParent()))
+ if (!L->contains(cast<Instruction>(*UI)))
return true;
return false;
}
@@ -842,7 +842,7 @@ void LoopIndexSplit::updatePHINodes(BasicBlock *ExitBB, BasicBlock *Latch,
for (Value::use_iterator UI = PHV->use_begin(), E = PHV->use_end();
UI != E; ++UI)
if (PHINode *U = dyn_cast<PHINode>(*UI))
- if (LP->contains(U->getParent())) {
+ if (LP->contains(U)) {
NewV = U;
break;
}
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 85cc712..85f7368 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -144,7 +144,7 @@ namespace {
/// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a
/// single stride of IV. All of the users may have different starting
/// values, and this may not be the only stride.
- void StrengthReduceIVUsersOfStride(const SCEV *const &Stride,
+ void StrengthReduceIVUsersOfStride(const SCEV *Stride,
IVUsersOfOneStride &Uses,
Loop *L);
void StrengthReduceIVUsers(Loop *L);
@@ -157,14 +157,14 @@ namespace {
bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
const SCEV* &CondStride);
bool RequiresTypeConversion(const Type *Ty, const Type *NewTy);
- const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *const&,
+ const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *,
IVExpr&, const Type*,
const std::vector<BasedUser>& UsersToProcess);
bool ValidScale(bool, int64_t,
const std::vector<BasedUser>& UsersToProcess);
bool ValidOffset(bool, int64_t, int64_t,
const std::vector<BasedUser>& UsersToProcess);
- const SCEV *CollectIVUsers(const SCEV *const &Stride,
+ const SCEV *CollectIVUsers(const SCEV *Stride,
IVUsersOfOneStride &Uses,
Loop *L,
bool &AllUsesAreAddresses,
@@ -212,8 +212,6 @@ Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
/// specified set are trivially dead, delete them and see if this makes any of
/// their operands subsequently dead.
void LoopStrengthReduce::DeleteTriviallyDeadInstructions() {
- if (DeadInsts.empty()) return;
-
while (!DeadInsts.empty()) {
Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
@@ -232,44 +230,6 @@ void LoopStrengthReduce::DeleteTriviallyDeadInstructions() {
}
}
-/// containsAddRecFromDifferentLoop - Determine whether expression S involves a
-/// subexpression that is an AddRec from a loop other than L. An outer loop
-/// of L is OK, but not an inner loop nor a disjoint loop.
-static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) {
- // This is very common, put it first.
- if (isa<SCEVConstant>(S))
- return false;
- if (const SCEVCommutativeExpr *AE = dyn_cast<SCEVCommutativeExpr>(S)) {
- for (unsigned int i=0; i< AE->getNumOperands(); i++)
- if (containsAddRecFromDifferentLoop(AE->getOperand(i), L))
- return true;
- return false;
- }
- if (const SCEVAddRecExpr *AE = dyn_cast<SCEVAddRecExpr>(S)) {
- if (const Loop *newLoop = AE->getLoop()) {
- if (newLoop == L)
- return false;
- // if newLoop is an outer loop of L, this is OK.
- if (newLoop->contains(L->getHeader()))
- return false;
- }
- return true;
- }
- if (const SCEVUDivExpr *DE = dyn_cast<SCEVUDivExpr>(S))
- return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
- containsAddRecFromDifferentLoop(DE->getRHS(), L);
-#if 0
- // SCEVSDivExpr has been backed out temporarily, but will be back; we'll
- // need this when it is.
- if (const SCEVSDivExpr *DE = dyn_cast<SCEVSDivExpr>(S))
- return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
- containsAddRecFromDifferentLoop(DE->getRHS(), L);
-#endif
- if (const SCEVCastExpr *CE = dyn_cast<SCEVCastExpr>(S))
- return containsAddRecFromDifferentLoop(CE->getOperand(), L);
- return false;
-}
-
/// isAddressUse - Returns true if the specified instruction is using the
/// specified value as an address.
static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
@@ -362,13 +322,13 @@ namespace {
// Once we rewrite the code to insert the new IVs we want, update the
// operands of Inst to use the new expression 'NewBase', with 'Imm' added
// to it.
- void RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
+ void RewriteInstructionToUseNewBase(const SCEV *NewBase,
Instruction *InsertPt,
SCEVExpander &Rewriter, Loop *L, Pass *P,
SmallVectorImpl<WeakVH> &DeadInsts,
ScalarEvolution *SE);
- Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
+ Value *InsertCodeForBaseAtPosition(const SCEV *NewBase,
const Type *Ty,
SCEVExpander &Rewriter,
Instruction *IP,
@@ -378,12 +338,12 @@ namespace {
}
void BasedUser::dump() const {
- errs() << " Base=" << *Base;
- errs() << " Imm=" << *Imm;
- errs() << " Inst: " << *Inst;
+ dbgs() << " Base=" << *Base;
+ dbgs() << " Imm=" << *Imm;
+ dbgs() << " Inst: " << *Inst;
}
-Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
+Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *NewBase,
const Type *Ty,
SCEVExpander &Rewriter,
Instruction *IP,
@@ -407,7 +367,7 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
// value of NewBase in the case that it's a diffferent instruction from
// the PHI that NewBase is computed from, or null otherwise.
//
-void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
+void BasedUser::RewriteInstructionToUseNewBase(const SCEV *NewBase,
Instruction *NewBasePt,
SCEVExpander &Rewriter, Loop *L, Pass *P,
SmallVectorImpl<WeakVH> &DeadInsts,
@@ -428,7 +388,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
// If this is a use outside the loop (which means after, since it is based
// on a loop indvar) we use the post-incremented value, so that we don't
// artificially make the preinc value live out the bottom of the loop.
- if (!isUseOfPostIncrementedValue && L->contains(Inst->getParent())) {
+ if (!isUseOfPostIncrementedValue && L->contains(Inst)) {
if (NewBasePt && isa<PHINode>(OperandValToReplace)) {
InsertPt = NewBasePt;
++InsertPt;
@@ -444,9 +404,9 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
// Replace the use of the operand Value with the new Phi we just created.
Inst->replaceUsesOfWith(OperandValToReplace, NewVal);
- DEBUG(errs() << " Replacing with ");
- DEBUG(WriteAsOperand(errs(), NewVal, /*PrintType=*/false));
- DEBUG(errs() << ", which has value " << *NewBase << " plus IMM "
+ DEBUG(dbgs() << " Replacing with ");
+ DEBUG(WriteAsOperand(dbgs(), NewVal, /*PrintType=*/false));
+ DEBUG(dbgs() << ", which has value " << *NewBase << " plus IMM "
<< *Imm << "\n");
return;
}
@@ -469,7 +429,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
// that case(?).
Instruction *OldLoc = dyn_cast<Instruction>(OperandValToReplace);
BasicBlock *PHIPred = PN->getIncomingBlock(i);
- if (L->contains(OldLoc->getParent())) {
+ if (L->contains(OldLoc)) {
// If this is a critical edge, split the edge so that we do not insert
// the code on all predecessor/successor paths. We do this unless this
// is the canonical backedge for this loop, as this can make some
@@ -486,7 +446,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
// is outside of the loop, and PredTI is in the loop, we want to
// move the block to be immediately before the PHI block, not
// immediately after PredTI.
- if (L->contains(PHIPred) && !L->contains(PN->getParent()))
+ if (L->contains(PHIPred) && !L->contains(PN))
NewBB->moveBefore(PN->getParent());
// Splitting the edge can reduce the number of PHI entries we have.
@@ -498,15 +458,15 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
Value *&Code = InsertedCode[PHIPred];
if (!Code) {
// Insert the code into the end of the predecessor block.
- Instruction *InsertPt = (L->contains(OldLoc->getParent())) ?
+ Instruction *InsertPt = (L->contains(OldLoc)) ?
PHIPred->getTerminator() :
OldLoc->getParent()->getTerminator();
Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(),
Rewriter, InsertPt, SE);
- DEBUG(errs() << " Changing PHI use to ");
- DEBUG(WriteAsOperand(errs(), Code, /*PrintType=*/false));
- DEBUG(errs() << ", which has value " << *NewBase << " plus IMM "
+ DEBUG(dbgs() << " Changing PHI use to ");
+ DEBUG(WriteAsOperand(dbgs(), Code, /*PrintType=*/false));
+ DEBUG(dbgs() << ", which has value " << *NewBase << " plus IMM "
<< *Imm << "\n");
}
@@ -523,7 +483,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
/// fitsInAddressMode - Return true if V can be subsumed within an addressing
/// mode, and does not need to be put in a register first.
-static bool fitsInAddressMode(const SCEV *const &V, const Type *AccessTy,
+static bool fitsInAddressMode(const SCEV *V, const Type *AccessTy,
const TargetLowering *TLI, bool HasBaseReg) {
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) {
int64_t VC = SC->getValue()->getSExtValue();
@@ -737,7 +697,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
// it is clearly shared across all the IV's. If the use is outside the loop
// (which means after it) we don't want to factor anything *into* the loop,
// so just use 0 as the base.
- if (L->contains(Uses[0].Inst->getParent()))
+ if (L->contains(Uses[0].Inst))
std::swap(Result, Uses[0].Base);
return Result;
}
@@ -762,7 +722,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
// after the loop to affect base computation of values *inside* the loop,
// because we can always add their offsets to the result IV after the loop
// is done, ensuring we get good code inside the loop.
- if (!L->contains(Uses[i].Inst->getParent()))
+ if (!L->contains(Uses[i].Inst))
continue;
NumUsesInsideLoop++;
@@ -818,7 +778,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
// and a Result in the same instruction (for example because it would
// require too many registers). Check this.
for (unsigned i=0; i<NumUses; ++i) {
- if (!L->contains(Uses[i].Inst->getParent()))
+ if (!L->contains(Uses[i].Inst))
continue;
// We know this is an addressing mode use; if there are any uses that
// are not, FreeResult would be Zero.
@@ -854,7 +814,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
// the final IV value coming into those uses does. Instead of trying to
// remove the pieces of the common base, which might not be there,
// subtract off the base to compensate for this.
- if (!L->contains(Uses[i].Inst->getParent())) {
+ if (!L->contains(Uses[i].Inst)) {
Uses[i].Base = SE->getMinusSCEV(Uses[i].Base, Result);
continue;
}
@@ -975,7 +935,7 @@ bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1,
const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
bool AllUsesAreAddresses,
bool AllUsesAreOutsideLoop,
- const SCEV *const &Stride,
+ const SCEV *Stride,
IVExpr &IV, const Type *Ty,
const std::vector<BasedUser>& UsersToProcess) {
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) {
@@ -1088,7 +1048,7 @@ static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) {
/// isNonConstantNegative - Return true if the specified scev is negated, but
/// not a constant.
-static bool isNonConstantNegative(const SCEV *const &Expr) {
+static bool isNonConstantNegative(const SCEV *Expr) {
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Expr);
if (!Mul) return false;
@@ -1105,7 +1065,7 @@ static bool isNonConstantNegative(const SCEV *const &Expr) {
/// base of the strided accesses, as well as the old information from Uses. We
/// progressively move information from the Base field to the Imm field, until
/// we eventually have the full access expression to rewrite the use.
-const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride,
+const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *Stride,
IVUsersOfOneStride &Uses,
Loop *L,
bool &AllUsesAreAddresses,
@@ -1149,7 +1109,7 @@ const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride,
// If the user is not in the current loop, this means it is using the exit
// value of the IV. Do not put anything in the base, make sure it's all in
// the immediate field to allow as much factoring as possible.
- if (!L->contains(UsersToProcess[i].Inst->getParent())) {
+ if (!L->contains(UsersToProcess[i].Inst)) {
UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm,
UsersToProcess[i].Base);
UsersToProcess[i].Base =
@@ -1361,7 +1321,7 @@ LoopStrengthReduce::PrepareToStrengthReduceFully(
const SCEV *CommonExprs,
const Loop *L,
SCEVExpander &PreheaderRewriter) {
- DEBUG(errs() << " Fully reducing all users\n");
+ DEBUG(dbgs() << " Fully reducing all users\n");
// Rewrite the UsersToProcess records, creating a separate PHI for each
// unique Base value.
@@ -1393,7 +1353,7 @@ static Instruction *FindIVIncInsertPt(std::vector<BasedUser> &UsersToProcess,
const Loop *L) {
if (UsersToProcess.size() == 1 &&
UsersToProcess[0].isUseOfPostIncrementedValue &&
- L->contains(UsersToProcess[0].Inst->getParent()))
+ L->contains(UsersToProcess[0].Inst))
return UsersToProcess[0].Inst;
return L->getLoopLatch()->getTerminator();
}
@@ -1410,7 +1370,7 @@ LoopStrengthReduce::PrepareToStrengthReduceWithNewPhi(
Instruction *IVIncInsertPt,
const Loop *L,
SCEVExpander &PreheaderRewriter) {
- DEBUG(errs() << " Inserting new PHI:\n");
+ DEBUG(dbgs() << " Inserting new PHI:\n");
PHINode *Phi = InsertAffinePhi(SE->getUnknown(CommonBaseV),
Stride, IVIncInsertPt, L,
@@ -1423,9 +1383,9 @@ LoopStrengthReduce::PrepareToStrengthReduceWithNewPhi(
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
UsersToProcess[i].Phi = Phi;
- DEBUG(errs() << " IV=");
- DEBUG(WriteAsOperand(errs(), Phi, /*PrintType=*/false));
- DEBUG(errs() << "\n");
+ DEBUG(dbgs() << " IV=");
+ DEBUG(WriteAsOperand(dbgs(), Phi, /*PrintType=*/false));
+ DEBUG(dbgs() << "\n");
}
/// PrepareToStrengthReduceFromSmallerStride - Prepare for the given users to
@@ -1438,7 +1398,7 @@ LoopStrengthReduce::PrepareToStrengthReduceFromSmallerStride(
Value *CommonBaseV,
const IVExpr &ReuseIV,
Instruction *PreInsertPt) {
- DEBUG(errs() << " Rewriting in terms of existing IV of STRIDE "
+ DEBUG(dbgs() << " Rewriting in terms of existing IV of STRIDE "
<< *ReuseIV.Stride << " and BASE " << *ReuseIV.Base << "\n");
// All the users will share the reused IV.
@@ -1482,7 +1442,7 @@ static bool IsImmFoldedIntoAddrMode(GlobalValue *GV, int64_t Offset,
/// stride of IV. All of the users may have different starting values, and this
/// may not be the only stride.
void
-LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *const &Stride,
+LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *Stride,
IVUsersOfOneStride &Uses,
Loop *L) {
// If all the users are moved to another stride, then there is nothing to do.
@@ -1547,7 +1507,7 @@ LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *const &Stride,
UsersToProcess, TLI);
if (DoSink) {
- DEBUG(errs() << " Sinking " << *Imm << " back down into uses\n");
+ DEBUG(dbgs() << " Sinking " << *Imm << " back down into uses\n");
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm, Imm);
CommonExprs = NewCommon;
@@ -1559,7 +1519,7 @@ LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *const &Stride,
// Now that we know what we need to do, insert the PHI node itself.
//
- DEBUG(errs() << "LSR: Examining IVs of TYPE " << *ReplacedTy << " of STRIDE "
+ DEBUG(dbgs() << "LSR: Examining IVs of TYPE " << *ReplacedTy << " of STRIDE "
<< *Stride << ":\n"
<< " Common base: " << *CommonExprs << "\n");
@@ -1623,10 +1583,10 @@ LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *const &Stride,
if (!Base->isZero()) {
BaseV = PreheaderRewriter.expandCodeFor(Base, 0, PreInsertPt);
- DEBUG(errs() << " INSERTING code for BASE = " << *Base << ":");
+ DEBUG(dbgs() << " INSERTING code for BASE = " << *Base << ":");
if (BaseV->hasName())
- DEBUG(errs() << " Result value name = %" << BaseV->getName());
- DEBUG(errs() << "\n");
+ DEBUG(dbgs() << " Result value name = %" << BaseV->getName());
+ DEBUG(dbgs() << "\n");
// If BaseV is a non-zero constant, make sure that it gets inserted into
// the preheader, instead of being forward substituted into the uses. We
@@ -1647,15 +1607,15 @@ LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *const &Stride,
// FIXME: Use emitted users to emit other users.
BasedUser &User = UsersToProcess.back();
- DEBUG(errs() << " Examining ");
+ DEBUG(dbgs() << " Examining ");
if (User.isUseOfPostIncrementedValue)
- DEBUG(errs() << "postinc");
+ DEBUG(dbgs() << "postinc");
else
- DEBUG(errs() << "preinc");
- DEBUG(errs() << " use ");
- DEBUG(WriteAsOperand(errs(), UsersToProcess.back().OperandValToReplace,
+ DEBUG(dbgs() << "preinc");
+ DEBUG(dbgs() << " use ");
+ DEBUG(WriteAsOperand(dbgs(), UsersToProcess.back().OperandValToReplace,
/*PrintType=*/false));
- DEBUG(errs() << " in Inst: " << *User.Inst);
+ DEBUG(dbgs() << " in Inst: " << *User.Inst);
// If this instruction wants to use the post-incremented value, move it
// after the post-inc and use its value instead of the PHI.
@@ -1666,7 +1626,7 @@ LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *const &Stride,
// loop to ensure it is dominated by the increment. In case it's the
// only use of the iv, the increment instruction is already before the
// use.
- if (L->contains(User.Inst->getParent()) && User.Inst != IVIncInsertPt)
+ if (L->contains(User.Inst) && User.Inst != IVIncInsertPt)
User.Inst->moveBefore(IVIncInsertPt);
}
@@ -1728,7 +1688,7 @@ LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *const &Stride,
// common base, and are adding it back here. Use the same expression
// as before, rather than CommonBaseV, so DAGCombiner will zap it.
if (!CommonExprs->isZero()) {
- if (L->contains(User.Inst->getParent()))
+ if (L->contains(User.Inst))
RewriteExpr = SE->getAddExpr(RewriteExpr,
SE->getUnknown(CommonBaseV));
else
@@ -1815,7 +1775,7 @@ namespace {
const ScalarEvolution *SE;
explicit StrideCompare(const ScalarEvolution *se) : SE(se) {}
- bool operator()(const SCEV *const &LHS, const SCEV *const &RHS) {
+ bool operator()(const SCEV *LHS, const SCEV *RHS) {
const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS);
const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
if (LHSC && RHSC) {
@@ -2069,8 +2029,8 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
Cond = new ICmpInst(OldCond, Predicate, NewCmpLHS, NewCmpRHS,
L->getHeader()->getName() + ".termcond");
- DEBUG(errs() << " Change compare stride in Inst " << *OldCond);
- DEBUG(errs() << " to " << *Cond << '\n');
+ DEBUG(dbgs() << " Change compare stride in Inst " << *OldCond);
+ DEBUG(dbgs() << " to " << *Cond << '\n');
// Remove the old compare instruction. The old indvar is probably dead too.
DeadInsts.push_back(CondUse->getOperandValToReplace());
@@ -2403,7 +2363,7 @@ static bool isUsedByExitBranch(ICmpInst *Cond, Loop *L) {
static bool ShouldCountToZero(ICmpInst *Cond, IVStrideUse* &CondUse,
ScalarEvolution *SE, Loop *L,
const TargetLowering *TLI = 0) {
- if (!L->contains(Cond->getParent()))
+ if (!L->contains(Cond))
return false;
if (!isa<SCEVConstant>(CondUse->getOffset()))
@@ -2529,7 +2489,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
if (!UsePostInc)
continue;
- DEBUG(errs() << " Change loop exiting icmp to use postinc iv: "
+ DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: "
<< *Cond << '\n');
// It's possible for the setcc instruction to be anywhere in the loop, and
@@ -2608,9 +2568,9 @@ bool LoopStrengthReduce::OptimizeLoopCountIVOfStride(const SCEV* &Stride,
}
// Replace the increment with a decrement.
- DEBUG(errs() << "LSR: Examining use ");
- DEBUG(WriteAsOperand(errs(), CondOp0, /*PrintType=*/false));
- DEBUG(errs() << " in Inst: " << *Cond << '\n');
+ DEBUG(dbgs() << "LSR: Examining use ");
+ DEBUG(WriteAsOperand(dbgs(), CondOp0, /*PrintType=*/false));
+ DEBUG(dbgs() << " in Inst: " << *Cond << '\n');
BinaryOperator *Decr = BinaryOperator::Create(Instruction::Sub,
Incr->getOperand(0), Incr->getOperand(1), "tmp", Incr);
Incr->replaceAllUsesWith(Decr);
@@ -2624,7 +2584,7 @@ bool LoopStrengthReduce::OptimizeLoopCountIVOfStride(const SCEV* &Stride,
unsigned InBlock = L->contains(PHIExpr->getIncomingBlock(0)) ? 1 : 0;
Value *StartVal = PHIExpr->getIncomingValue(InBlock);
Value *EndVal = Cond->getOperand(1);
- DEBUG(errs() << " Optimize loop counting iv to count down ["
+ DEBUG(dbgs() << " Optimize loop counting iv to count down ["
<< *EndVal << " .. " << *StartVal << "]\n");
// FIXME: check for case where both are constant.
@@ -2633,7 +2593,7 @@ bool LoopStrengthReduce::OptimizeLoopCountIVOfStride(const SCEV* &Stride,
EndVal, StartVal, "tmp", PreInsertPt);
PHIExpr->setIncomingValue(InBlock, NewStartVal);
Cond->setOperand(1, Zero);
- DEBUG(errs() << " New icmp: " << *Cond << "\n");
+ DEBUG(dbgs() << " New icmp: " << *Cond << "\n");
int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue();
const SCEV *NewStride = 0;
@@ -2716,9 +2676,9 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
return false;
if (!IU->IVUsesByStride.empty()) {
- DEBUG(errs() << "\nLSR on \"" << L->getHeader()->getParent()->getName()
+ DEBUG(dbgs() << "\nLSR on \"" << L->getHeader()->getParent()->getName()
<< "\" ";
- L->dump());
+ L->print(dbgs()));
// Sort the StrideOrder so we process larger strides first.
std::stable_sort(IU->StrideOrder.begin(), IU->StrideOrder.end(),
@@ -2758,8 +2718,7 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
IVsByStride.clear();
// Clean up after ourselves
- if (!DeadInsts.empty())
- DeleteTriviallyDeadInstructions();
+ DeleteTriviallyDeadInstructions();
}
// At this point, it is worth checking to see if any recurrence PHIs are also
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index b7adfdc..0c19133 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -877,7 +877,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
for (unsigned i = 0, e = Users.size(); i != e; ++i)
if (Instruction *U = cast<Instruction>(Users[i])) {
- if (!L->contains(U->getParent()))
+ if (!L->contains(U))
continue;
U->replaceUsesOfWith(LIC, Replacement);
Worklist.push_back(U);
@@ -888,7 +888,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
// can. This case occurs when we unswitch switch statements.
for (unsigned i = 0, e = Users.size(); i != e; ++i)
if (Instruction *U = cast<Instruction>(Users[i])) {
- if (!L->contains(U->getParent()))
+ if (!L->contains(U))
continue;
Worklist.push_back(U);
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 8466918..827b47d 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
//
// This pass reassociates commutative expressions in an order that is designed
-// to promote better constant propagation, GCSE, LICM, PRE...
+// to promote better constant propagation, GCSE, LICM, PRE, etc.
//
// For example: 4 + (x + 5) -> x + (4 + 5)
//
@@ -35,8 +35,8 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/DenseMap.h"
#include <algorithm>
-#include <map>
using namespace llvm;
STATISTIC(NumLinear , "Number of insts linearized");
@@ -58,21 +58,22 @@ namespace {
#ifndef NDEBUG
/// PrintOps - Print out the expression identified in the Ops list.
///
-static void PrintOps(Instruction *I, const std::vector<ValueEntry> &Ops) {
+static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) {
Module *M = I->getParent()->getParent()->getParent();
errs() << Instruction::getOpcodeName(I->getOpcode()) << " "
- << *Ops[0].Op->getType();
+ << *Ops[0].Op->getType() << '\t';
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
- WriteAsOperand(errs() << " ", Ops[i].Op, false, M);
- errs() << "," << Ops[i].Rank;
+ errs() << "[ ";
+ WriteAsOperand(errs(), Ops[i].Op, false, M);
+ errs() << ", #" << Ops[i].Rank << "] ";
}
}
#endif
namespace {
class Reassociate : public FunctionPass {
- std::map<BasicBlock*, unsigned> RankMap;
- std::map<AssertingVH<>, unsigned> ValueRankMap;
+ DenseMap<BasicBlock*, unsigned> RankMap;
+ DenseMap<AssertingVH<>, unsigned> ValueRankMap;
bool MadeChange;
public:
static char ID; // Pass identification, replacement for typeid
@@ -86,11 +87,13 @@ namespace {
private:
void BuildRankMap(Function &F);
unsigned getRank(Value *V);
- void ReassociateExpression(BinaryOperator *I);
- void RewriteExprTree(BinaryOperator *I, std::vector<ValueEntry> &Ops,
+ Value *ReassociateExpression(BinaryOperator *I);
+ void RewriteExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops,
unsigned Idx = 0);
- Value *OptimizeExpression(BinaryOperator *I, std::vector<ValueEntry> &Ops);
- void LinearizeExprTree(BinaryOperator *I, std::vector<ValueEntry> &Ops);
+ Value *OptimizeExpression(BinaryOperator *I,
+ SmallVectorImpl<ValueEntry> &Ops);
+ Value *OptimizeAdd(Instruction *I, SmallVectorImpl<ValueEntry> &Ops);
+ void LinearizeExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops);
void LinearizeExpr(BinaryOperator *I);
Value *RemoveFactorFromExpression(Value *V, Value *Factor);
void ReassociateBB(BasicBlock *BB);
@@ -107,10 +110,13 @@ FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }
void Reassociate::RemoveDeadBinaryOp(Value *V) {
Instruction *Op = dyn_cast<Instruction>(V);
- if (!Op || !isa<BinaryOperator>(Op) || !isa<CmpInst>(Op) || !Op->use_empty())
+ if (!Op || !isa<BinaryOperator>(Op) || !Op->use_empty())
return;
Value *LHS = Op->getOperand(0), *RHS = Op->getOperand(1);
+
+ ValueRankMap.erase(Op);
+ Op->eraseFromParent();
RemoveDeadBinaryOp(LHS);
RemoveDeadBinaryOp(RHS);
}
@@ -156,13 +162,14 @@ void Reassociate::BuildRankMap(Function &F) {
}
unsigned Reassociate::getRank(Value *V) {
- if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument...
-
Instruction *I = dyn_cast<Instruction>(V);
- if (I == 0) return 0; // Otherwise it's a global or constant, rank 0.
+ if (I == 0) {
+ if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument.
+ return 0; // Otherwise it's a global or constant, rank 0.
+ }
- unsigned &CachedRank = ValueRankMap[I];
- if (CachedRank) return CachedRank; // Rank already known?
+ if (unsigned Rank = ValueRankMap[I])
+ return Rank; // Rank already known?
// If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that
// we can reassociate expressions for code motion! Since we do not recurse
@@ -182,7 +189,7 @@ unsigned Reassociate::getRank(Value *V) {
//DEBUG(errs() << "Calculated Rank[" << V->getName() << "] = "
// << Rank << "\n");
- return CachedRank = Rank;
+ return ValueRankMap[I] = Rank;
}
/// isReassociableOp - Return true if V is an instruction of the specified
@@ -197,7 +204,7 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
/// LowerNegateToMultiply - Replace 0-X with X*-1.
///
static Instruction *LowerNegateToMultiply(Instruction *Neg,
- std::map<AssertingVH<>, unsigned> &ValueRankMap) {
+ DenseMap<AssertingVH<>, unsigned> &ValueRankMap) {
Constant *Cst = Constant::getAllOnesValue(Neg->getType());
Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg);
@@ -250,7 +257,7 @@ void Reassociate::LinearizeExpr(BinaryOperator *I) {
/// caller MUST use something like RewriteExprTree to put the values back in.
///
void Reassociate::LinearizeExprTree(BinaryOperator *I,
- std::vector<ValueEntry> &Ops) {
+ SmallVectorImpl<ValueEntry> &Ops) {
Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
unsigned Opcode = I->getOpcode();
@@ -282,15 +289,15 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I,
I->setOperand(0, UndefValue::get(I->getType()));
I->setOperand(1, UndefValue::get(I->getType()));
return;
- } else {
- // Turn X+(Y+Z) -> (Y+Z)+X
- std::swap(LHSBO, RHSBO);
- std::swap(LHS, RHS);
- bool Success = !I->swapOperands();
- assert(Success && "swapOperands failed");
- Success = false;
- MadeChange = true;
}
+
+ // Turn X+(Y+Z) -> (Y+Z)+X
+ std::swap(LHSBO, RHSBO);
+ std::swap(LHS, RHS);
+ bool Success = !I->swapOperands();
+ assert(Success && "swapOperands failed");
+ Success = false;
+ MadeChange = true;
} else if (RHSBO) {
// Turn (A+B)+(C+D) -> (((A+B)+C)+D). This guarantees the the RHS is not
// part of the expression tree.
@@ -322,7 +329,7 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I,
// linearized and optimized, emit them in-order. This function is written to be
// tail recursive.
void Reassociate::RewriteExprTree(BinaryOperator *I,
- std::vector<ValueEntry> &Ops,
+ SmallVectorImpl<ValueEntry> &Ops,
unsigned i) {
if (i+2 == Ops.size()) {
if (I->getOperand(0) != Ops[i].Op ||
@@ -369,6 +376,9 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
// that should be processed next by the reassociation pass.
//
static Value *NegateValue(Value *V, Instruction *BI) {
+ if (Constant *C = dyn_cast<Constant>(V))
+ return ConstantExpr::getNeg(C);
+
// We are trying to expose opportunity for reassociation. One of the things
// that we want to do to achieve this is to push a negation as deep into an
// expression chain as possible, to expose the add instructions. In practice,
@@ -376,7 +386,7 @@ static Value *NegateValue(Value *V, Instruction *BI) {
// X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D
// so that later, a: Y = 12+X could get reassociated with the -12 to eliminate
// the constants. We assume that instcombine will clean up the mess later if
- // we introduce tons of unnecessary negation instructions...
+ // we introduce tons of unnecessary negation instructions.
//
if (Instruction *I = dyn_cast<Instruction>(V))
if (I->getOpcode() == Instruction::Add && I->hasOneUse()) {
@@ -393,10 +403,36 @@ static Value *NegateValue(Value *V, Instruction *BI) {
I->setName(I->getName()+".neg");
return I;
}
+
+ // Okay, we need to materialize a negated version of V with an instruction.
+ // Scan the use lists of V to see if we have one already.
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+ if (!BinaryOperator::isNeg(*UI)) continue;
+
+ // We found one! Now we have to make sure that the definition dominates
+ // this use. We do this by moving it to the entry block (if it is a
+ // non-instruction value) or right after the definition. These negates will
+ // be zapped by reassociate later, so we don't need much finesse here.
+ BinaryOperator *TheNeg = cast<BinaryOperator>(*UI);
+
+ BasicBlock::iterator InsertPt;
+ if (Instruction *InstInput = dyn_cast<Instruction>(V)) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) {
+ InsertPt = II->getNormalDest()->begin();
+ } else {
+ InsertPt = InstInput;
+ ++InsertPt;
+ }
+ while (isa<PHINode>(InsertPt)) ++InsertPt;
+ } else {
+ InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin();
+ }
+ TheNeg->moveBefore(InsertPt);
+ return TheNeg;
+ }
// Insert a 'neg' instruction that subtracts the value from zero to get the
// negation.
- //
return BinaryOperator::CreateNeg(V, V->getName() + ".neg", BI);
}
@@ -427,12 +463,12 @@ static bool ShouldBreakUpSubtract(Instruction *Sub) {
/// only used by an add, transform this into (X+(0-Y)) to promote better
/// reassociation.
static Instruction *BreakUpSubtract(Instruction *Sub,
- std::map<AssertingVH<>, unsigned> &ValueRankMap) {
- // Convert a subtract into an add and a neg instruction... so that sub
- // instructions can be commuted with other add instructions...
+ DenseMap<AssertingVH<>, unsigned> &ValueRankMap) {
+ // Convert a subtract into an add and a neg instruction. This allows sub
+ // instructions to be commuted with other add instructions.
//
- // Calculate the negative value of Operand 1 of the sub instruction...
- // and set it as the RHS of the add instruction we just made...
+ // Calculate the negative value of Operand 1 of the sub instruction,
+ // and set it as the RHS of the add instruction we just made.
//
Value *NegVal = NegateValue(Sub->getOperand(1), Sub);
Instruction *New =
@@ -452,7 +488,7 @@ static Instruction *BreakUpSubtract(Instruction *Sub,
/// by one, change this into a multiply by a constant to assist with further
/// reassociation.
static Instruction *ConvertShiftToMul(Instruction *Shl,
- std::map<AssertingVH<>, unsigned> &ValueRankMap) {
+ DenseMap<AssertingVH<>, unsigned> &ValueRankMap) {
// If an operand of this shift is a reassociable multiply, or if the shift
// is used by a reassociable multiply or add, turn into a multiply.
if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) ||
@@ -460,11 +496,10 @@ static Instruction *ConvertShiftToMul(Instruction *Shl,
(isReassociableOp(Shl->use_back(), Instruction::Mul) ||
isReassociableOp(Shl->use_back(), Instruction::Add)))) {
Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
- MulCst =
- ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1)));
+ MulCst = ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1)));
- Instruction *Mul = BinaryOperator::CreateMul(Shl->getOperand(0), MulCst,
- "", Shl);
+ Instruction *Mul =
+ BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl);
ValueRankMap.erase(Shl);
Mul->takeName(Shl);
Shl->replaceAllUsesWith(Mul);
@@ -475,15 +510,16 @@ static Instruction *ConvertShiftToMul(Instruction *Shl,
}
// Scan backwards and forwards among values with the same rank as element i to
-// see if X exists. If X does not exist, return i.
-static unsigned FindInOperandList(std::vector<ValueEntry> &Ops, unsigned i,
+// see if X exists. If X does not exist, return i. This is useful when
+// scanning for 'x' when we see '-x' because they both get the same rank.
+static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i,
Value *X) {
unsigned XRank = Ops[i].Rank;
unsigned e = Ops.size();
for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j)
if (Ops[j].Op == X)
return j;
- // Scan backwards
+ // Scan backwards.
for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j)
if (Ops[j].Op == X)
return j;
@@ -492,7 +528,7 @@ static unsigned FindInOperandList(std::vector<ValueEntry> &Ops, unsigned i,
/// EmitAddTreeOfValues - Emit a tree of add instructions, summing Ops together
/// and returning the result. Insert the tree before I.
-static Value *EmitAddTreeOfValues(Instruction *I, std::vector<Value*> &Ops) {
+static Value *EmitAddTreeOfValues(Instruction *I, SmallVectorImpl<Value*> &Ops){
if (Ops.size() == 1) return Ops.back();
Value *V1 = Ops.back();
@@ -508,32 +544,57 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
BinaryOperator *BO = isReassociableOp(V, Instruction::Mul);
if (!BO) return 0;
- std::vector<ValueEntry> Factors;
+ SmallVector<ValueEntry, 8> Factors;
LinearizeExprTree(BO, Factors);
bool FoundFactor = false;
- for (unsigned i = 0, e = Factors.size(); i != e; ++i)
+ bool NeedsNegate = false;
+ for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
if (Factors[i].Op == Factor) {
FoundFactor = true;
Factors.erase(Factors.begin()+i);
break;
}
+
+ // If this is a negative version of this factor, remove it.
+ if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor))
+ if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op))
+ if (FC1->getValue() == -FC2->getValue()) {
+ FoundFactor = NeedsNegate = true;
+ Factors.erase(Factors.begin()+i);
+ break;
+ }
+ }
+
if (!FoundFactor) {
// Make sure to restore the operands to the expression tree.
RewriteExprTree(BO, Factors);
return 0;
}
- if (Factors.size() == 1) return Factors[0].Op;
+ BasicBlock::iterator InsertPt = BO; ++InsertPt;
+
+ // If this was just a single multiply, remove the multiply and return the only
+ // remaining operand.
+ if (Factors.size() == 1) {
+ ValueRankMap.erase(BO);
+ BO->eraseFromParent();
+ V = Factors[0].Op;
+ } else {
+ RewriteExprTree(BO, Factors);
+ V = BO;
+ }
- RewriteExprTree(BO, Factors);
- return BO;
+ if (NeedsNegate)
+ V = BinaryOperator::CreateNeg(V, "neg", InsertPt);
+
+ return V;
}
/// FindSingleUseMultiplyFactors - If V is a single-use multiply, recursively
/// add its operands as factors, otherwise add V to the list of factors.
static void FindSingleUseMultiplyFactors(Value *V,
- std::vector<Value*> &Factors) {
+ SmallVectorImpl<Value*> &Factors) {
BinaryOperator *BO;
if ((!V->hasOneUse() && !V->use_empty()) ||
!(BO = dyn_cast<BinaryOperator>(V)) ||
@@ -547,10 +608,228 @@ static void FindSingleUseMultiplyFactors(Value *V,
FindSingleUseMultiplyFactors(BO->getOperand(0), Factors);
}
+/// OptimizeAndOrXor - Optimize a series of operands to an 'and', 'or', or 'xor'
+/// instruction. This optimizes based on identities. If it can be reduced to
+/// a single Value, it is returned, otherwise the Ops list is mutated as
+/// necessary.
+static Value *OptimizeAndOrXor(unsigned Opcode,
+ SmallVectorImpl<ValueEntry> &Ops) {
+ // Scan the operand lists looking for X and ~X pairs, along with X,X pairs.
+ // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1.
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ // First, check for X and ~X in the operand list.
+ assert(i < Ops.size());
+ if (BinaryOperator::isNot(Ops[i].Op)) { // Cannot occur for ^.
+ Value *X = BinaryOperator::getNotArgument(Ops[i].Op);
+ unsigned FoundX = FindInOperandList(Ops, i, X);
+ if (FoundX != i) {
+ if (Opcode == Instruction::And) // ...&X&~X = 0
+ return Constant::getNullValue(X->getType());
+
+ if (Opcode == Instruction::Or) // ...|X|~X = -1
+ return Constant::getAllOnesValue(X->getType());
+ }
+ }
+
+ // Next, check for duplicate pairs of values, which we assume are next to
+ // each other, due to our sorting criteria.
+ assert(i < Ops.size());
+ if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) {
+ if (Opcode == Instruction::And || Opcode == Instruction::Or) {
+ // Drop duplicate values for And and Or.
+ Ops.erase(Ops.begin()+i);
+ --i; --e;
+ ++NumAnnihil;
+ continue;
+ }
+
+ // Drop pairs of values for Xor.
+ assert(Opcode == Instruction::Xor);
+ if (e == 2)
+ return Constant::getNullValue(Ops[0].Op->getType());
+
+ // Y ^ X^X -> Y
+ Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
+ i -= 1; e -= 2;
+ ++NumAnnihil;
+ }
+ }
+ return 0;
+}
+/// OptimizeAdd - Optimize a series of operands to an 'add' instruction. This
+/// optimizes based on identities. If it can be reduced to a single Value, it
+/// is returned, otherwise the Ops list is mutated as necessary.
+Value *Reassociate::OptimizeAdd(Instruction *I,
+ SmallVectorImpl<ValueEntry> &Ops) {
+ // Scan the operand lists looking for X and -X pairs. If we find any, we
+ // can simplify the expression. X+-X == 0. While we're at it, scan for any
+ // duplicates. We want to canonicalize Y+Y+Y+Z -> 3*Y+Z.
+ //
+ // TODO: We could handle "X + ~X" -> "-1" if we wanted, since "-X = ~X+1".
+ //
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ Value *TheOp = Ops[i].Op;
+ // Check to see if we've seen this operand before. If so, we factor all
+ // instances of the operand together. Due to our sorting criteria, we know
+ // that these need to be next to each other in the vector.
+ if (i+1 != Ops.size() && Ops[i+1].Op == TheOp) {
+ // Rescan the list, remove all instances of this operand from the expr.
+ unsigned NumFound = 0;
+ do {
+ Ops.erase(Ops.begin()+i);
+ ++NumFound;
+ } while (i != Ops.size() && Ops[i].Op == TheOp);
+
+ DEBUG(errs() << "\nFACTORING [" << NumFound << "]: " << *TheOp << '\n');
+ ++NumFactor;
+
+ // Insert a new multiply.
+ Value *Mul = ConstantInt::get(cast<IntegerType>(I->getType()), NumFound);
+ Mul = BinaryOperator::CreateMul(TheOp, Mul, "factor", I);
+
+ // Now that we have inserted a multiply, optimize it. This allows us to
+ // handle cases that require multiple factoring steps, such as this:
+ // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6
+ Mul = ReassociateExpression(cast<BinaryOperator>(Mul));
+
+ // If every add operand was a duplicate, return the multiply.
+ if (Ops.empty())
+ return Mul;
+
+ // Otherwise, we had some input that didn't have the dupe, such as
+ // "A + A + B" -> "A*2 + B". Add the new multiply to the list of
+ // things being added by this operation.
+ Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul));
+
+ --i;
+ e = Ops.size();
+ continue;
+ }
+
+ // Check for X and -X in the operand list.
+ if (!BinaryOperator::isNeg(TheOp))
+ continue;
+
+ Value *X = BinaryOperator::getNegArgument(TheOp);
+ unsigned FoundX = FindInOperandList(Ops, i, X);
+ if (FoundX == i)
+ continue;
+
+ // Remove X and -X from the operand list.
+ if (Ops.size() == 2)
+ return Constant::getNullValue(X->getType());
+
+ Ops.erase(Ops.begin()+i);
+ if (i < FoundX)
+ --FoundX;
+ else
+ --i; // Need to back up an extra one.
+ Ops.erase(Ops.begin()+FoundX);
+ ++NumAnnihil;
+ --i; // Revisit element.
+ e -= 2; // Removed two elements.
+ }
+
+ // Scan the operand list, checking to see if there are any common factors
+ // between operands. Consider something like A*A+A*B*C+D. We would like to
+ // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies.
+ // To efficiently find this, we count the number of times a factor occurs
+ // for any ADD operands that are MULs.
+ DenseMap<Value*, unsigned> FactorOccurrences;
+
+ // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4)
+ // where they are actually the same multiply.
+ unsigned MaxOcc = 0;
+ Value *MaxOccVal = 0;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op);
+ if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty())
+ continue;
+
+ // Compute all of the factors of this added value.
+ SmallVector<Value*, 8> Factors;
+ FindSingleUseMultiplyFactors(BOp, Factors);
+ assert(Factors.size() > 1 && "Bad linearize!");
+
+ // Add one to FactorOccurrences for each unique factor in this op.
+ SmallPtrSet<Value*, 8> Duplicates;
+ for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
+ Value *Factor = Factors[i];
+ if (!Duplicates.insert(Factor)) continue;
+
+ unsigned Occ = ++FactorOccurrences[Factor];
+ if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; }
+
+ // If Factor is a negative constant, add the negated value as a factor
+ // because we can percolate the negate out. Watch for minint, which
+ // cannot be positivified.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor))
+ if (CI->getValue().isNegative() && !CI->getValue().isMinSignedValue()) {
+ Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
+ assert(!Duplicates.count(Factor) &&
+ "Shouldn't have two constant factors, missed a canonicalize");
+
+ unsigned Occ = ++FactorOccurrences[Factor];
+ if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; }
+ }
+ }
+ }
+
+ // If any factor occurred more than one time, we can pull it out.
+ if (MaxOcc > 1) {
+ DEBUG(errs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << '\n');
+ ++NumFactor;
+
+ // Create a new instruction that uses the MaxOccVal twice. If we don't do
+ // this, we could otherwise run into situations where removing a factor
+ // from an expression will drop a use of maxocc, and this can cause
+ // RemoveFactorFromExpression on successive values to behave differently.
+ Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal);
+ SmallVector<Value*, 4> NewMulOps;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) {
+ NewMulOps.push_back(V);
+ Ops.erase(Ops.begin()+i);
+ --i; --e;
+ }
+ }
+
+ // No need for extra uses anymore.
+ delete DummyInst;
+
+ unsigned NumAddedValues = NewMulOps.size();
+ Value *V = EmitAddTreeOfValues(I, NewMulOps);
+
+ // Now that we have inserted the add tree, optimize it. This allows us to
+ // handle cases that require multiple factoring steps, such as this:
+ // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C))
+ assert(NumAddedValues > 1 && "Each occurrence should contribute a value");
+ V = ReassociateExpression(cast<BinaryOperator>(V));
+
+ // Create the multiply.
+ Value *V2 = BinaryOperator::CreateMul(V, MaxOccVal, "tmp", I);
+
+ // Rerun associate on the multiply in case the inner expression turned into
+ // a multiply. We want to make sure that we keep things in canonical form.
+ V2 = ReassociateExpression(cast<BinaryOperator>(V2));
+
+ // If every add operand included the factor (e.g. "A*B + A*C"), then the
+ // entire result expression is just the multiply "A*(B+C)".
+ if (Ops.empty())
+ return V2;
+
+ // Otherwise, we had some input that didn't have the factor, such as
+ // "A*B + A*C + D" -> "A*(B+C) + D". Add the new multiply to the list of
+ // things being added by this operation.
+ Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2));
+ }
+
+ return 0;
+}
Value *Reassociate::OptimizeExpression(BinaryOperator *I,
- std::vector<ValueEntry> &Ops) {
+ SmallVectorImpl<ValueEntry> &Ops) {
// Now that we have the linearized expression tree, try to optimize it.
// Start by folding any constants that we found.
bool IterateOptimization = false;
@@ -570,198 +849,53 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
switch (Opcode) {
default: break;
case Instruction::And:
- if (CstVal->isZero()) { // ... & 0 -> 0
- ++NumAnnihil;
+ if (CstVal->isZero()) // X & 0 -> 0
return CstVal;
- } else if (CstVal->isAllOnesValue()) { // ... & -1 -> ...
+ if (CstVal->isAllOnesValue()) // X & -1 -> X
Ops.pop_back();
- }
break;
case Instruction::Mul:
- if (CstVal->isZero()) { // ... * 0 -> 0
+ if (CstVal->isZero()) { // X * 0 -> 0
++NumAnnihil;
return CstVal;
- } else if (cast<ConstantInt>(CstVal)->isOne()) {
- Ops.pop_back(); // ... * 1 -> ...
}
+
+ if (cast<ConstantInt>(CstVal)->isOne())
+ Ops.pop_back(); // X * 1 -> X
break;
case Instruction::Or:
- if (CstVal->isAllOnesValue()) { // ... | -1 -> -1
- ++NumAnnihil;
+ if (CstVal->isAllOnesValue()) // X | -1 -> -1
return CstVal;
- }
// FALLTHROUGH!
case Instruction::Add:
case Instruction::Xor:
- if (CstVal->isZero()) // ... [|^+] 0 -> ...
+ if (CstVal->isZero()) // X [|^+] 0 -> X
Ops.pop_back();
break;
}
if (Ops.size() == 1) return Ops[0].Op;
- // Handle destructive annihilation do to identities between elements in the
+ // Handle destructive annihilation due to identities between elements in the
// argument list here.
switch (Opcode) {
default: break;
case Instruction::And:
case Instruction::Or:
- case Instruction::Xor:
- // Scan the operand lists looking for X and ~X pairs, along with X,X pairs.
- // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1.
- for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
- // First, check for X and ~X in the operand list.
- assert(i < Ops.size());
- if (BinaryOperator::isNot(Ops[i].Op)) { // Cannot occur for ^.
- Value *X = BinaryOperator::getNotArgument(Ops[i].Op);
- unsigned FoundX = FindInOperandList(Ops, i, X);
- if (FoundX != i) {
- if (Opcode == Instruction::And) { // ...&X&~X = 0
- ++NumAnnihil;
- return Constant::getNullValue(X->getType());
- } else if (Opcode == Instruction::Or) { // ...|X|~X = -1
- ++NumAnnihil;
- return Constant::getAllOnesValue(X->getType());
- }
- }
- }
-
- // Next, check for duplicate pairs of values, which we assume are next to
- // each other, due to our sorting criteria.
- assert(i < Ops.size());
- if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) {
- if (Opcode == Instruction::And || Opcode == Instruction::Or) {
- // Drop duplicate values.
- Ops.erase(Ops.begin()+i);
- --i; --e;
- IterateOptimization = true;
- ++NumAnnihil;
- } else {
- assert(Opcode == Instruction::Xor);
- if (e == 2) {
- ++NumAnnihil;
- return Constant::getNullValue(Ops[0].Op->getType());
- }
- // ... X^X -> ...
- Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
- i -= 1; e -= 2;
- IterateOptimization = true;
- ++NumAnnihil;
- }
- }
- }
+ case Instruction::Xor: {
+ unsigned NumOps = Ops.size();
+ if (Value *Result = OptimizeAndOrXor(Opcode, Ops))
+ return Result;
+ IterateOptimization |= Ops.size() != NumOps;
break;
+ }
- case Instruction::Add:
- // Scan the operand lists looking for X and -X pairs. If we find any, we
- // can simplify the expression. X+-X == 0.
- for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
- assert(i < Ops.size());
- // Check for X and -X in the operand list.
- if (BinaryOperator::isNeg(Ops[i].Op)) {
- Value *X = BinaryOperator::getNegArgument(Ops[i].Op);
- unsigned FoundX = FindInOperandList(Ops, i, X);
- if (FoundX != i) {
- // Remove X and -X from the operand list.
- if (Ops.size() == 2) {
- ++NumAnnihil;
- return Constant::getNullValue(X->getType());
- } else {
- Ops.erase(Ops.begin()+i);
- if (i < FoundX)
- --FoundX;
- else
- --i; // Need to back up an extra one.
- Ops.erase(Ops.begin()+FoundX);
- IterateOptimization = true;
- ++NumAnnihil;
- --i; // Revisit element.
- e -= 2; // Removed two elements.
- }
- }
- }
- }
-
-
- // Scan the operand list, checking to see if there are any common factors
- // between operands. Consider something like A*A+A*B*C+D. We would like to
- // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies.
- // To efficiently find this, we count the number of times a factor occurs
- // for any ADD operands that are MULs.
- std::map<Value*, unsigned> FactorOccurrences;
- unsigned MaxOcc = 0;
- Value *MaxOccVal = 0;
- for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
- if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op)) {
- if (BOp->getOpcode() == Instruction::Mul && BOp->use_empty()) {
- // Compute all of the factors of this added value.
- std::vector<Value*> Factors;
- FindSingleUseMultiplyFactors(BOp, Factors);
- assert(Factors.size() > 1 && "Bad linearize!");
-
- // Add one to FactorOccurrences for each unique factor in this op.
- if (Factors.size() == 2) {
- unsigned Occ = ++FactorOccurrences[Factors[0]];
- if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factors[0]; }
- if (Factors[0] != Factors[1]) { // Don't double count A*A.
- Occ = ++FactorOccurrences[Factors[1]];
- if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factors[1]; }
- }
- } else {
- std::set<Value*> Duplicates;
- for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
- if (Duplicates.insert(Factors[i]).second) {
- unsigned Occ = ++FactorOccurrences[Factors[i]];
- if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factors[i]; }
- }
- }
- }
- }
- }
- }
-
- // If any factor occurred more than one time, we can pull it out.
- if (MaxOcc > 1) {
- DEBUG(errs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << "\n");
-
- // Create a new instruction that uses the MaxOccVal twice. If we don't do
- // this, we could otherwise run into situations where removing a factor
- // from an expression will drop a use of maxocc, and this can cause
- // RemoveFactorFromExpression on successive values to behave differently.
- Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal);
- std::vector<Value*> NewMulOps;
- for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
- if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) {
- NewMulOps.push_back(V);
- Ops.erase(Ops.begin()+i);
- --i; --e;
- }
- }
-
- // No need for extra uses anymore.
- delete DummyInst;
-
- unsigned NumAddedValues = NewMulOps.size();
- Value *V = EmitAddTreeOfValues(I, NewMulOps);
- Value *V2 = BinaryOperator::CreateMul(V, MaxOccVal, "tmp", I);
-
- // Now that we have inserted V and its sole use, optimize it. This allows
- // us to handle cases that require multiple factoring steps, such as this:
- // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C))
- if (NumAddedValues > 1)
- ReassociateExpression(cast<BinaryOperator>(V));
-
- ++NumFactor;
-
- if (Ops.empty())
- return V2;
+ case Instruction::Add: {
+ unsigned NumOps = Ops.size();
+ if (Value *Result = OptimizeAdd(I, Ops))
+ return Result;
+ IterateOptimization |= Ops.size() != NumOps;
+ }
- // Add the new value to the list of things being added.
- Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2));
-
- // Rewrite the tree so that there is now a use of V.
- RewriteExprTree(I, Ops);
- return OptimizeExpression(I, Ops);
- }
break;
//case Instruction::Mul:
}
@@ -826,13 +960,14 @@ void Reassociate::ReassociateBB(BasicBlock *BB) {
}
}
-void Reassociate::ReassociateExpression(BinaryOperator *I) {
+Value *Reassociate::ReassociateExpression(BinaryOperator *I) {
- // First, walk the expression tree, linearizing the tree, collecting
- std::vector<ValueEntry> Ops;
+ // First, walk the expression tree, linearizing the tree, collecting the
+ // operand information.
+ SmallVector<ValueEntry, 8> Ops;
LinearizeExprTree(I, Ops);
- DEBUG(errs() << "RAIn:\t"; PrintOps(I, Ops); errs() << "\n");
+ DEBUG(errs() << "RAIn:\t"; PrintOps(I, Ops); errs() << '\n');
// Now that we have linearized the tree to a list and have gathered all of
// the operands and their ranks, sort the operands by their rank. Use a
@@ -847,10 +982,11 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) {
if (Value *V = OptimizeExpression(I, Ops)) {
// This expression tree simplified to something that isn't a tree,
// eliminate it.
- DEBUG(errs() << "Reassoc to scalar: " << *V << "\n");
+ DEBUG(errs() << "Reassoc to scalar: " << *V << '\n');
I->replaceAllUsesWith(V);
RemoveDeadBinaryOp(I);
- return;
+ ++NumAnnihil;
+ return V;
}
// We want to sink immediates as deeply as possible except in the case where
@@ -861,22 +997,24 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) {
cast<Instruction>(I->use_back())->getOpcode() == Instruction::Add &&
isa<ConstantInt>(Ops.back().Op) &&
cast<ConstantInt>(Ops.back().Op)->isAllOnesValue()) {
- Ops.insert(Ops.begin(), Ops.back());
- Ops.pop_back();
+ ValueEntry Tmp = Ops.pop_back_val();
+ Ops.insert(Ops.begin(), Tmp);
}
- DEBUG(errs() << "RAOut:\t"; PrintOps(I, Ops); errs() << "\n");
+ DEBUG(errs() << "RAOut:\t"; PrintOps(I, Ops); errs() << '\n');
if (Ops.size() == 1) {
// This expression tree simplified to something that isn't a tree,
// eliminate it.
I->replaceAllUsesWith(Ops[0].Op);
RemoveDeadBinaryOp(I);
- } else {
- // Now that we ordered and optimized the expressions, splat them back into
- // the expression tree, removing any unneeded nodes.
- RewriteExprTree(I, Ops);
+ return Ops[0].Op;
}
+
+ // Now that we ordered and optimized the expressions, splat them back into
+ // the expression tree, removing any unneeded nodes.
+ RewriteExprTree(I, Ops);
+ return I;
}
@@ -888,7 +1026,7 @@ bool Reassociate::runOnFunction(Function &F) {
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
ReassociateBB(FI);
- // We are done with the rank map...
+ // We are done with the rank map.
RankMap.clear();
ValueRankMap.clear();
return MadeChange;
diff --git a/lib/Transforms/Scalar/SCCVN.cpp b/lib/Transforms/Scalar/SCCVN.cpp
index dbc82e1..f91fbda 100644
--- a/lib/Transforms/Scalar/SCCVN.cpp
+++ b/lib/Transforms/Scalar/SCCVN.cpp
@@ -34,7 +34,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
-#include <cstdio>
using namespace llvm;
STATISTIC(NumSCCVNInstr, "Number of instructions deleted by SCCVN");
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index b040a27..79bb7c5 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -74,6 +74,10 @@ namespace {
private:
TargetData *TD;
+ /// DeadInsts - Keep track of instructions we have made dead, so that
+ /// we can remove them after we are done working.
+ SmallVector<Value*, 32> DeadInsts;
+
/// AllocaInfo - When analyzing uses of an alloca instruction, this captures
/// information about the uses. All these fields are initialized to false
/// and set to true when something is learned.
@@ -102,25 +106,29 @@ namespace {
int isSafeAllocaToScalarRepl(AllocaInst *AI);
- void isSafeUseOfAllocation(Instruction *User, AllocaInst *AI,
- AllocaInfo &Info);
- void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI,
- AllocaInfo &Info);
- void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI,
- unsigned OpNo, AllocaInfo &Info);
- void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocaInst *AI,
- AllocaInfo &Info);
+ void isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
+ AllocaInfo &Info);
+ void isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t &Offset,
+ AllocaInfo &Info);
+ void isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize,
+ const Type *MemOpType, bool isStore, AllocaInfo &Info);
+ bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size);
+ uint64_t FindElementAndOffset(const Type *&T, uint64_t &Offset,
+ const Type *&IdxTy);
void DoScalarReplacement(AllocaInst *AI,
std::vector<AllocaInst*> &WorkList);
- void CleanupGEP(GetElementPtrInst *GEP);
- void CleanupAllocaUsers(AllocaInst *AI);
+ void DeleteDeadInstructions();
+ void CleanupAllocaUsers(Value *V);
AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base);
- void RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI,
- SmallVector<AllocaInst*, 32> &NewElts);
-
- void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
+ void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts);
+ void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts);
+ void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts);
+ void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts);
void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
@@ -360,399 +368,350 @@ void SROA::DoScalarReplacement(AllocaInst *AI,
}
}
- // Now that we have created the alloca instructions that we want to use,
- // expand the getelementptr instructions to use them.
- while (!AI->use_empty()) {
- Instruction *User = cast<Instruction>(AI->use_back());
- if (BitCastInst *BCInst = dyn_cast<BitCastInst>(User)) {
- RewriteBitCastUserOfAlloca(BCInst, AI, ElementAllocas);
- BCInst->eraseFromParent();
- continue;
- }
-
- // Replace:
- // %res = load { i32, i32 }* %alloc
- // with:
- // %load.0 = load i32* %alloc.0
- // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0
- // %load.1 = load i32* %alloc.1
- // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1
- // (Also works for arrays instead of structs)
- if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
- Value *Insert = UndefValue::get(LI->getType());
- for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) {
- Value *Load = new LoadInst(ElementAllocas[i], "load", LI);
- Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI);
- }
- LI->replaceAllUsesWith(Insert);
- LI->eraseFromParent();
- continue;
- }
-
- // Replace:
- // store { i32, i32 } %val, { i32, i32 }* %alloc
- // with:
- // %val.0 = extractvalue { i32, i32 } %val, 0
- // store i32 %val.0, i32* %alloc.0
- // %val.1 = extractvalue { i32, i32 } %val, 1
- // store i32 %val.1, i32* %alloc.1
- // (Also works for arrays instead of structs)
- if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
- Value *Val = SI->getOperand(0);
- for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) {
- Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI);
- new StoreInst(Extract, ElementAllocas[i], SI);
- }
- SI->eraseFromParent();
- continue;
- }
-
- GetElementPtrInst *GEPI = cast<GetElementPtrInst>(User);
- // We now know that the GEP is of the form: GEP <ptr>, 0, <cst>
- unsigned Idx =
- (unsigned)cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
-
- assert(Idx < ElementAllocas.size() && "Index out of range?");
- AllocaInst *AllocaToUse = ElementAllocas[Idx];
-
- Value *RepValue;
- if (GEPI->getNumOperands() == 3) {
- // Do not insert a new getelementptr instruction with zero indices, only
- // to have it optimized out later.
- RepValue = AllocaToUse;
- } else {
- // We are indexing deeply into the structure, so we still need a
- // getelement ptr instruction to finish the indexing. This may be
- // expanded itself once the worklist is rerun.
- //
- SmallVector<Value*, 8> NewArgs;
- NewArgs.push_back(Constant::getNullValue(
- Type::getInt32Ty(AI->getContext())));
- NewArgs.append(GEPI->op_begin()+3, GEPI->op_end());
- RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(),
- NewArgs.end(), "", GEPI);
- RepValue->takeName(GEPI);
- }
-
- // If this GEP is to the start of the aggregate, check for memcpys.
- if (Idx == 0 && GEPI->hasAllZeroIndices())
- RewriteBitCastUserOfAlloca(GEPI, AI, ElementAllocas);
-
- // Move all of the users over to the new GEP.
- GEPI->replaceAllUsesWith(RepValue);
- // Delete the old GEP
- GEPI->eraseFromParent();
- }
+ // Now that we have created the new alloca instructions, rewrite all the
+ // uses of the old alloca.
+ RewriteForScalarRepl(AI, AI, 0, ElementAllocas);
- // Finally, delete the Alloca instruction
+ // Now erase any instructions that were made dead while rewriting the alloca.
+ DeleteDeadInstructions();
AI->eraseFromParent();
+
NumReplaced++;
}
-/// isSafeElementUse - Check to see if this use is an allowed use for a
-/// getelementptr instruction of an array aggregate allocation. isFirstElt
-/// indicates whether Ptr is known to the start of the aggregate.
-void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI,
- AllocaInfo &Info) {
- for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end();
- I != E; ++I) {
- Instruction *User = cast<Instruction>(*I);
- switch (User->getOpcode()) {
- case Instruction::Load: break;
- case Instruction::Store:
- // Store is ok if storing INTO the pointer, not storing the pointer
- if (User->getOperand(0) == Ptr) return MarkUnsafe(Info);
- break;
- case Instruction::GetElementPtr: {
- GetElementPtrInst *GEP = cast<GetElementPtrInst>(User);
- bool AreAllZeroIndices = isFirstElt;
- if (GEP->getNumOperands() > 1 &&
- (!isa<ConstantInt>(GEP->getOperand(1)) ||
- !cast<ConstantInt>(GEP->getOperand(1))->isZero()))
- // Using pointer arithmetic to navigate the array.
- return MarkUnsafe(Info);
-
- // Verify that any array subscripts are in range.
- for (gep_type_iterator GEPIt = gep_type_begin(GEP),
- E = gep_type_end(GEP); GEPIt != E; ++GEPIt) {
- // Ignore struct elements, no extra checking needed for these.
- if (isa<StructType>(*GEPIt))
- continue;
-
- // This GEP indexes an array. Verify that this is an in-range
- // constant integer. Specifically, consider A[0][i]. We cannot know that
- // the user isn't doing invalid things like allowing i to index an
- // out-of-range subscript that accesses A[1]. Because of this, we have
- // to reject SROA of any accesses into structs where any of the
- // components are variables.
- ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
- if (!IdxVal) return MarkUnsafe(Info);
-
- // Are all indices still zero?
- AreAllZeroIndices &= IdxVal->isZero();
-
- if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPIt)) {
- if (IdxVal->getZExtValue() >= AT->getNumElements())
- return MarkUnsafe(Info);
- } else if (const VectorType *VT = dyn_cast<VectorType>(*GEPIt)) {
- if (IdxVal->getZExtValue() >= VT->getNumElements())
- return MarkUnsafe(Info);
- }
+/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list,
+/// recursively including all their operands that become trivially dead.
+void SROA::DeleteDeadInstructions() {
+ while (!DeadInsts.empty()) {
+ Instruction *I = cast<Instruction>(DeadInsts.pop_back_val());
+
+ for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
+ if (Instruction *U = dyn_cast<Instruction>(*OI)) {
+ // Zero out the operand and see if it becomes trivially dead.
+ // (But, don't add allocas to the dead instruction list -- they are
+ // already on the worklist and will be deleted separately.)
+ *OI = 0;
+ if (isInstructionTriviallyDead(U) && !isa<AllocaInst>(U))
+ DeadInsts.push_back(U);
}
-
- isSafeElementUse(GEP, AreAllZeroIndices, AI, Info);
- if (Info.isUnsafe) return;
- break;
- }
- case Instruction::BitCast:
- if (isFirstElt) {
- isSafeUseOfBitCastedAllocation(cast<BitCastInst>(User), AI, Info);
- if (Info.isUnsafe) return;
- break;
- }
- DEBUG(errs() << " Transformation preventing inst: " << *User << '\n');
- return MarkUnsafe(Info);
- case Instruction::Call:
- if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
- if (isFirstElt) {
- isSafeMemIntrinsicOnAllocation(MI, AI, I.getOperandNo(), Info);
- if (Info.isUnsafe) return;
- break;
- }
- }
- DEBUG(errs() << " Transformation preventing inst: " << *User << '\n');
- return MarkUnsafe(Info);
- default:
- DEBUG(errs() << " Transformation preventing inst: " << *User << '\n');
- return MarkUnsafe(Info);
- }
- }
- return; // All users look ok :)
-}
-/// AllUsersAreLoads - Return true if all users of this value are loads.
-static bool AllUsersAreLoads(Value *Ptr) {
- for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end();
- I != E; ++I)
- if (cast<Instruction>(*I)->getOpcode() != Instruction::Load)
- return false;
- return true;
-}
-
-/// isSafeUseOfAllocation - Check if this user is an allowed use for an
-/// aggregate allocation.
-void SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI,
- AllocaInfo &Info) {
- if (BitCastInst *C = dyn_cast<BitCastInst>(User))
- return isSafeUseOfBitCastedAllocation(C, AI, Info);
-
- if (LoadInst *LI = dyn_cast<LoadInst>(User))
- if (!LI->isVolatile())
- return;// Loads (returning a first class aggregrate) are always rewritable
-
- if (StoreInst *SI = dyn_cast<StoreInst>(User))
- if (!SI->isVolatile() && SI->getOperand(0) != AI)
- return;// Store is ok if storing INTO the pointer, not storing the pointer
-
- GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User);
- if (GEPI == 0)
- return MarkUnsafe(Info);
-
- gep_type_iterator I = gep_type_begin(GEPI), E = gep_type_end(GEPI);
-
- // The GEP is not safe to transform if not of the form "GEP <ptr>, 0, <cst>".
- if (I == E ||
- I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) {
- return MarkUnsafe(Info);
+ I->eraseFromParent();
}
+}
+
+/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to
+/// performing scalar replacement of alloca AI. The results are flagged in
+/// the Info parameter. Offset indicates the position within AI that is
+/// referenced by this instruction.
+void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
+ AllocaInfo &Info) {
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
- ++I;
- if (I == E) return MarkUnsafe(Info); // ran out of GEP indices??
-
- bool IsAllZeroIndices = true;
-
- // If the first index is a non-constant index into an array, see if we can
- // handle it as a special case.
- if (const ArrayType *AT = dyn_cast<ArrayType>(*I)) {
- if (!isa<ConstantInt>(I.getOperand())) {
- IsAllZeroIndices = 0;
- uint64_t NumElements = AT->getNumElements();
-
- // If this is an array index and the index is not constant, we cannot
- // promote... that is unless the array has exactly one or two elements in
- // it, in which case we CAN promote it, but we have to canonicalize this
- // out if this is the only problem.
- if ((NumElements == 1 || NumElements == 2) &&
- AllUsersAreLoads(GEPI)) {
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
+ isSafeForScalarRepl(BC, AI, Offset, Info);
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+ uint64_t GEPOffset = Offset;
+ isSafeGEP(GEPI, AI, GEPOffset, Info);
+ if (!Info.isUnsafe)
+ isSafeForScalarRepl(GEPI, AI, GEPOffset, Info);
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(UI)) {
+ ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
+ if (Length)
+ isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0,
+ UI.getOperandNo() == 1, Info);
+ else
+ MarkUnsafe(Info);
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ if (!LI->isVolatile()) {
+ const Type *LIType = LI->getType();
+ isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(LIType),
+ LIType, false, Info);
+ } else
+ MarkUnsafe(Info);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ // Store is ok if storing INTO the pointer, not storing the pointer
+ if (!SI->isVolatile() && SI->getOperand(0) != I) {
+ const Type *SIType = SI->getOperand(0)->getType();
+ isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(SIType),
+ SIType, true, Info);
+ } else
+ MarkUnsafe(Info);
+ } else if (isa<DbgInfoIntrinsic>(UI)) {
+ // If one user is DbgInfoIntrinsic then check if all users are
+ // DbgInfoIntrinsics.
+ if (OnlyUsedByDbgInfoIntrinsics(I)) {
Info.needsCleanup = true;
- return; // Canonicalization required!
+ return;
}
- return MarkUnsafe(Info);
+ MarkUnsafe(Info);
+ } else {
+ DEBUG(errs() << " Transformation preventing inst: " << *User << '\n');
+ MarkUnsafe(Info);
}
+ if (Info.isUnsafe) return;
}
-
+}
+
+/// isSafeGEP - Check if a GEP instruction can be handled for scalar
+/// replacement. It is safe when all the indices are constant, in-bounds
+/// references, and when the resulting offset corresponds to an element within
+/// the alloca type. The results are flagged in the Info parameter. Upon
+/// return, Offset is adjusted as specified by the GEP indices.
+void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI,
+ uint64_t &Offset, AllocaInfo &Info) {
+ gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI);
+ if (GEPIt == E)
+ return;
+
// Walk through the GEP type indices, checking the types that this indexes
// into.
- for (; I != E; ++I) {
+ for (; GEPIt != E; ++GEPIt) {
// Ignore struct elements, no extra checking needed for these.
- if (isa<StructType>(*I))
+ if (isa<StructType>(*GEPIt))
continue;
-
- ConstantInt *IdxVal = dyn_cast<ConstantInt>(I.getOperand());
- if (!IdxVal) return MarkUnsafe(Info);
- // Are all indices still zero?
- IsAllZeroIndices &= IdxVal->isZero();
-
- if (const ArrayType *AT = dyn_cast<ArrayType>(*I)) {
- // This GEP indexes an array. Verify that this is an in-range constant
- // integer. Specifically, consider A[0][i]. We cannot know that the user
- // isn't doing invalid things like allowing i to index an out-of-range
- // subscript that accesses A[1]. Because of this, we have to reject SROA
- // of any accesses into structs where any of the components are variables.
- if (IdxVal->getZExtValue() >= AT->getNumElements())
- return MarkUnsafe(Info);
- } else if (const VectorType *VT = dyn_cast<VectorType>(*I)) {
- if (IdxVal->getZExtValue() >= VT->getNumElements())
- return MarkUnsafe(Info);
+ ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
+ if (!IdxVal)
+ return MarkUnsafe(Info);
+ }
+
+ // Compute the offset due to this GEP and check if the alloca has a
+ // component element at that offset.
+ SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
+ Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
+ &Indices[0], Indices.size());
+ if (!TypeHasComponent(AI->getAllocatedType(), Offset, 0))
+ MarkUnsafe(Info);
+}
+
+/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI
+/// alloca or has an offset and size that corresponds to a component element
+/// within it. The offset checked here may have been formed from a GEP with a
+/// pointer bitcasted to a different type.
+void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize,
+ const Type *MemOpType, bool isStore,
+ AllocaInfo &Info) {
+ // Check if this is a load/store of the entire alloca.
+ if (Offset == 0 && MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) {
+ bool UsesAggregateType = (MemOpType == AI->getAllocatedType());
+ // This is safe for MemIntrinsics (where MemOpType is 0), integer types
+ // (which are essentially the same as the MemIntrinsics, especially with
+ // regard to copying padding between elements), or references using the
+ // aggregate type of the alloca.
+ if (!MemOpType || isa<IntegerType>(MemOpType) || UsesAggregateType) {
+ if (!UsesAggregateType) {
+ if (isStore)
+ Info.isMemCpyDst = true;
+ else
+ Info.isMemCpySrc = true;
+ }
+ return;
}
}
-
- // If there are any non-simple uses of this getelementptr, make sure to reject
- // them.
- return isSafeElementUse(GEPI, IsAllZeroIndices, AI, Info);
+ // Check if the offset/size correspond to a component within the alloca type.
+ const Type *T = AI->getAllocatedType();
+ if (TypeHasComponent(T, Offset, MemSize))
+ return;
+
+ return MarkUnsafe(Info);
}
-/// isSafeMemIntrinsicOnAllocation - Check if the specified memory
-/// intrinsic can be promoted by SROA. At this point, we know that the operand
-/// of the memintrinsic is a pointer to the beginning of the allocation.
-void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI,
- unsigned OpNo, AllocaInfo &Info) {
- // If not constant length, give up.
- ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
- if (!Length) return MarkUnsafe(Info);
-
- // If not the whole aggregate, give up.
- if (Length->getZExtValue() !=
- TD->getTypeAllocSize(AI->getType()->getElementType()))
- return MarkUnsafe(Info);
-
- // We only know about memcpy/memset/memmove.
- if (!isa<MemIntrinsic>(MI))
- return MarkUnsafe(Info);
-
- // Otherwise, we can transform it. Determine whether this is a memcpy/set
- // into or out of the aggregate.
- if (OpNo == 1)
- Info.isMemCpyDst = true;
- else {
- assert(OpNo == 2);
- Info.isMemCpySrc = true;
+/// TypeHasComponent - Return true if T has a component type with the
+/// specified offset and size. If Size is zero, do not check the size.
+bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) {
+ const Type *EltTy;
+ uint64_t EltSize;
+ if (const StructType *ST = dyn_cast<StructType>(T)) {
+ const StructLayout *Layout = TD->getStructLayout(ST);
+ unsigned EltIdx = Layout->getElementContainingOffset(Offset);
+ EltTy = ST->getContainedType(EltIdx);
+ EltSize = TD->getTypeAllocSize(EltTy);
+ Offset -= Layout->getElementOffset(EltIdx);
+ } else if (const ArrayType *AT = dyn_cast<ArrayType>(T)) {
+ EltTy = AT->getElementType();
+ EltSize = TD->getTypeAllocSize(EltTy);
+ if (Offset >= AT->getNumElements() * EltSize)
+ return false;
+ Offset %= EltSize;
+ } else {
+ return false;
}
+ if (Offset == 0 && (Size == 0 || EltSize == Size))
+ return true;
+ // Check if the component spans multiple elements.
+ if (Offset + Size > EltSize)
+ return false;
+ return TypeHasComponent(EltTy, Offset, Size);
}
-/// isSafeUseOfBitCastedAllocation - Check if all users of this bitcast
-/// from an alloca are safe for SROA of that alloca.
-void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocaInst *AI,
- AllocaInfo &Info) {
- for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end();
- UI != E; ++UI) {
- if (BitCastInst *BCU = dyn_cast<BitCastInst>(UI)) {
- isSafeUseOfBitCastedAllocation(BCU, AI, Info);
- } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(UI)) {
- isSafeMemIntrinsicOnAllocation(MI, AI, UI.getOperandNo(), Info);
- } else if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
- if (SI->isVolatile())
- return MarkUnsafe(Info);
-
- // If storing the entire alloca in one chunk through a bitcasted pointer
- // to integer, we can transform it. This happens (for example) when you
- // cast a {i32,i32}* to i64* and store through it. This is similar to the
- // memcpy case and occurs in various "byval" cases and emulated memcpys.
- if (isa<IntegerType>(SI->getOperand(0)->getType()) &&
- TD->getTypeAllocSize(SI->getOperand(0)->getType()) ==
- TD->getTypeAllocSize(AI->getType()->getElementType())) {
- Info.isMemCpyDst = true;
- continue;
- }
- return MarkUnsafe(Info);
- } else if (LoadInst *LI = dyn_cast<LoadInst>(UI)) {
- if (LI->isVolatile())
- return MarkUnsafe(Info);
-
- // If loading the entire alloca in one chunk through a bitcasted pointer
- // to integer, we can transform it. This happens (for example) when you
- // cast a {i32,i32}* to i64* and load through it. This is similar to the
- // memcpy case and occurs in various "byval" cases and emulated memcpys.
- if (isa<IntegerType>(LI->getType()) &&
- TD->getTypeAllocSize(LI->getType()) ==
- TD->getTypeAllocSize(AI->getType()->getElementType())) {
- Info.isMemCpySrc = true;
- continue;
+/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite
+/// the instruction I, which references it, to use the separate elements.
+/// Offset indicates the position within AI that is referenced by this
+/// instruction.
+void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts) {
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
+ RewriteBitCast(BC, AI, Offset, NewElts);
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+ RewriteGEP(GEPI, AI, Offset, NewElts);
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
+ ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
+ uint64_t MemSize = Length->getZExtValue();
+ if (Offset == 0 &&
+ MemSize == TD->getTypeAllocSize(AI->getAllocatedType()))
+ RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts);
+ // Otherwise the intrinsic can only touch a single element and the
+ // address operand will be updated, so nothing else needs to be done.
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ const Type *LIType = LI->getType();
+ if (LIType == AI->getAllocatedType()) {
+ // Replace:
+ // %res = load { i32, i32 }* %alloc
+ // with:
+ // %load.0 = load i32* %alloc.0
+ // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0
+ // %load.1 = load i32* %alloc.1
+ // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1
+ // (Also works for arrays instead of structs)
+ Value *Insert = UndefValue::get(LIType);
+ for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+ Value *Load = new LoadInst(NewElts[i], "load", LI);
+ Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI);
+ }
+ LI->replaceAllUsesWith(Insert);
+ DeadInsts.push_back(LI);
+ } else if (isa<IntegerType>(LIType) &&
+ TD->getTypeAllocSize(LIType) ==
+ TD->getTypeAllocSize(AI->getAllocatedType())) {
+ // If this is a load of the entire alloca to an integer, rewrite it.
+ RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
}
- return MarkUnsafe(Info);
- } else if (isa<DbgInfoIntrinsic>(UI)) {
- // If one user is DbgInfoIntrinsic then check if all users are
- // DbgInfoIntrinsics.
- if (OnlyUsedByDbgInfoIntrinsics(BC)) {
- Info.needsCleanup = true;
- return;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ Value *Val = SI->getOperand(0);
+ const Type *SIType = Val->getType();
+ if (SIType == AI->getAllocatedType()) {
+ // Replace:
+ // store { i32, i32 } %val, { i32, i32 }* %alloc
+ // with:
+ // %val.0 = extractvalue { i32, i32 } %val, 0
+ // store i32 %val.0, i32* %alloc.0
+ // %val.1 = extractvalue { i32, i32 } %val, 1
+ // store i32 %val.1, i32* %alloc.1
+ // (Also works for arrays instead of structs)
+ for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+ Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI);
+ new StoreInst(Extract, NewElts[i], SI);
+ }
+ DeadInsts.push_back(SI);
+ } else if (isa<IntegerType>(SIType) &&
+ TD->getTypeAllocSize(SIType) ==
+ TD->getTypeAllocSize(AI->getAllocatedType())) {
+ // If this is a store of the entire alloca from an integer, rewrite it.
+ RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
}
- else
- MarkUnsafe(Info);
}
- else {
- return MarkUnsafe(Info);
- }
- if (Info.isUnsafe) return;
}
}
-/// RewriteBitCastUserOfAlloca - BCInst (transitively) bitcasts AI, or indexes
-/// to its first element. Transform users of the cast to use the new values
-/// instead.
-void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI,
- SmallVector<AllocaInst*, 32> &NewElts) {
- Value::use_iterator UI = BCInst->use_begin(), UE = BCInst->use_end();
- while (UI != UE) {
- Instruction *User = cast<Instruction>(*UI++);
- if (BitCastInst *BCU = dyn_cast<BitCastInst>(User)) {
- RewriteBitCastUserOfAlloca(BCU, AI, NewElts);
- if (BCU->use_empty()) BCU->eraseFromParent();
- continue;
- }
+/// RewriteBitCast - Update a bitcast reference to the alloca being replaced
+/// and recursively continue updating all of its uses.
+void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts) {
+ RewriteForScalarRepl(BC, AI, Offset, NewElts);
+ if (BC->getOperand(0) != AI)
+ return;
- if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
- // This must be memcpy/memmove/memset of the entire aggregate.
- // Split into one per element.
- RewriteMemIntrinUserOfAlloca(MI, BCInst, AI, NewElts);
- continue;
- }
-
- if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
- // If this is a store of the entire alloca from an integer, rewrite it.
- RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
- continue;
- }
+ // The bitcast references the original alloca. Replace its uses with
+ // references to the first new element alloca.
+ Instruction *Val = NewElts[0];
+ if (Val->getType() != BC->getDestTy()) {
+ Val = new BitCastInst(Val, BC->getDestTy(), "", BC);
+ Val->takeName(BC);
+ }
+ BC->replaceAllUsesWith(Val);
+ DeadInsts.push_back(BC);
+}
- if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
- // If this is a load of the entire alloca to an integer, rewrite it.
- RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
- continue;
- }
-
- // Otherwise it must be some other user of a gep of the first pointer. Just
- // leave these alone.
- continue;
+/// FindElementAndOffset - Return the index of the element containing Offset
+/// within the specified type, which must be either a struct or an array.
+/// Sets T to the type of the element and Offset to the offset within that
+/// element. IdxTy is set to the type of the index result to be used in a
+/// GEP instruction.
+uint64_t SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset,
+ const Type *&IdxTy) {
+ uint64_t Idx = 0;
+ if (const StructType *ST = dyn_cast<StructType>(T)) {
+ const StructLayout *Layout = TD->getStructLayout(ST);
+ Idx = Layout->getElementContainingOffset(Offset);
+ T = ST->getContainedType(Idx);
+ Offset -= Layout->getElementOffset(Idx);
+ IdxTy = Type::getInt32Ty(T->getContext());
+ return Idx;
}
+ const ArrayType *AT = cast<ArrayType>(T);
+ T = AT->getElementType();
+ uint64_t EltSize = TD->getTypeAllocSize(T);
+ Idx = Offset / EltSize;
+ Offset -= Idx * EltSize;
+ IdxTy = Type::getInt64Ty(T->getContext());
+ return Idx;
+}
+
+/// RewriteGEP - Check if this GEP instruction moves the pointer across
+/// elements of the alloca that are being split apart, and if so, rewrite
+/// the GEP to be relative to the new element.
+void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts) {
+ uint64_t OldOffset = Offset;
+ SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
+ Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
+ &Indices[0], Indices.size());
+
+ RewriteForScalarRepl(GEPI, AI, Offset, NewElts);
+
+ const Type *T = AI->getAllocatedType();
+ const Type *IdxTy;
+ uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy);
+ if (GEPI->getOperand(0) == AI)
+ OldIdx = ~0ULL; // Force the GEP to be rewritten.
+
+ T = AI->getAllocatedType();
+ uint64_t EltOffset = Offset;
+ uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy);
+
+ // If this GEP does not move the pointer across elements of the alloca
+ // being split, then it does not needs to be rewritten.
+ if (Idx == OldIdx)
+ return;
+
+ const Type *i32Ty = Type::getInt32Ty(AI->getContext());
+ SmallVector<Value*, 8> NewArgs;
+ NewArgs.push_back(Constant::getNullValue(i32Ty));
+ while (EltOffset != 0) {
+ uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy);
+ NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx));
+ }
+ Instruction *Val = NewElts[Idx];
+ if (NewArgs.size() > 1) {
+ Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(),
+ NewArgs.end(), "", GEPI);
+ Val->takeName(GEPI);
+ }
+ if (Val->getType() != GEPI->getType())
+ Val = new BitCastInst(Val, GEPI->getType(), Val->getNameStr(), GEPI);
+ GEPI->replaceAllUsesWith(Val);
+ DeadInsts.push_back(GEPI);
}
/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI.
/// Rewrite it to copy or set the elements of the scalarized memory.
-void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
+void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts) {
-
// If this is a memcpy/memmove, construct the other pointer as the
// appropriate type. The "Other" pointer is the pointer that goes to memory
// that doesn't have anything to do with the alloca that we are promoting. For
@@ -761,28 +720,41 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
LLVMContext &Context = MI->getContext();
unsigned MemAlignment = MI->getAlignment();
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy
- if (BCInst == MTI->getRawDest())
+ if (Inst == MTI->getRawDest())
OtherPtr = MTI->getRawSource();
else {
- assert(BCInst == MTI->getRawSource());
+ assert(Inst == MTI->getRawSource());
OtherPtr = MTI->getRawDest();
}
}
- // Keep track of the other intrinsic argument, so it can be removed if it
- // is dead when the intrinsic is replaced.
- Value *PossiblyDead = OtherPtr;
-
// If there is an other pointer, we want to convert it to the same pointer
// type as AI has, so we can GEP through it safely.
if (OtherPtr) {
- // It is likely that OtherPtr is a bitcast, if so, remove it.
- if (BitCastInst *BC = dyn_cast<BitCastInst>(OtherPtr))
- OtherPtr = BC->getOperand(0);
- // All zero GEPs are effectively bitcasts.
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(OtherPtr))
- if (GEP->hasAllZeroIndices())
- OtherPtr = GEP->getOperand(0);
+
+ // Remove bitcasts and all-zero GEPs from OtherPtr. This is an
+ // optimization, but it's also required to detect the corner case where
+ // both pointer operands are referencing the same memory, and where
+ // OtherPtr may be a bitcast or GEP that currently being rewritten. (This
+ // function is only called for mem intrinsics that access the whole
+ // aggregate, so non-zero GEPs are not an issue here.)
+ while (1) {
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(OtherPtr)) {
+ OtherPtr = BC->getOperand(0);
+ continue;
+ }
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(OtherPtr)) {
+ // All zero GEPs are effectively bitcasts.
+ if (GEP->hasAllZeroIndices()) {
+ OtherPtr = GEP->getOperand(0);
+ continue;
+ }
+ }
+ break;
+ }
+ // If OtherPtr has already been rewritten, this intrinsic will be dead.
+ if (OtherPtr == NewElts[0])
+ return;
if (ConstantExpr *BCE = dyn_cast<ConstantExpr>(OtherPtr))
if (BCE->getOpcode() == Instruction::BitCast)
@@ -798,7 +770,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
// Process each element of the aggregate.
Value *TheFn = MI->getOperand(0);
const Type *BytePtrTy = MI->getRawDest()->getType();
- bool SROADest = MI->getRawDest() == BCInst;
+ bool SROADest = MI->getRawDest() == Inst;
Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));
@@ -807,12 +779,15 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
Value *OtherElt = 0;
unsigned OtherEltAlign = MemAlignment;
- if (OtherPtr) {
+ if (OtherPtr == AI) {
+ OtherElt = NewElts[i];
+ OtherEltAlign = 0;
+ } else if (OtherPtr) {
Value *Idx[2] = { Zero,
ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) };
- OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2,
+ OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2,
OtherPtr->getNameStr()+"."+Twine(i),
- MI);
+ MI);
uint64_t EltOffset;
const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
if (const StructType *ST =
@@ -924,9 +899,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
}
}
- MI->eraseFromParent();
- if (PossiblyDead)
- RecursivelyDeleteTriviallyDeadInstructions(PossiblyDead);
+ DeadInsts.push_back(MI);
}
/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that
@@ -937,15 +910,9 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
// Extract each element out of the integer according to its structure offset
// and store the element value to the individual alloca.
Value *SrcVal = SI->getOperand(0);
- const Type *AllocaEltTy = AI->getType()->getElementType();
+ const Type *AllocaEltTy = AI->getAllocatedType();
uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
- // If this isn't a store of an integer to the whole alloca, it may be a store
- // to the first element. Just ignore the store in this case and normal SROA
- // will handle it.
- if (!isa<IntegerType>(SrcVal->getType()) ||
- TD->getTypeAllocSizeInBits(SrcVal->getType()) != AllocaSizeBits)
- return;
// Handle tail padding by extending the operand
if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
SrcVal = new ZExtInst(SrcVal,
@@ -1050,7 +1017,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
}
}
- SI->eraseFromParent();
+ DeadInsts.push_back(SI);
}
/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to
@@ -1059,16 +1026,9 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts) {
// Extract each element out of the NewElts according to its structure offset
// and form the result value.
- const Type *AllocaEltTy = AI->getType()->getElementType();
+ const Type *AllocaEltTy = AI->getAllocatedType();
uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
- // If this isn't a load of the whole alloca to an integer, it may be a load
- // of the first element. Just ignore the load in this case and normal SROA
- // will handle it.
- if (!isa<IntegerType>(LI->getType()) ||
- TD->getTypeAllocSizeInBits(LI->getType()) != AllocaSizeBits)
- return;
-
DEBUG(errs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
<< '\n');
@@ -1139,10 +1099,9 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI);
LI->replaceAllUsesWith(ResultVal);
- LI->eraseFromParent();
+ DeadInsts.push_back(LI);
}
-
/// HasPadding - Return true if the specified type has any structure or
/// alignment padding, false otherwise.
static bool HasPadding(const Type *Ty, const TargetData &TD) {
@@ -1192,14 +1151,10 @@ int SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
// the users are safe to transform.
AllocaInfo Info;
- for (Value::use_iterator I = AI->use_begin(), E = AI->use_end();
- I != E; ++I) {
- isSafeUseOfAllocation(cast<Instruction>(*I), AI, Info);
- if (Info.isUnsafe) {
- DEBUG(errs() << "Cannot transform: " << *AI << "\n due to user: "
- << **I << '\n');
- return 0;
- }
+ isSafeForScalarRepl(AI, AI, 0, Info);
+ if (Info.isUnsafe) {
+ DEBUG(errs() << "Cannot transform: " << *AI << '\n');
+ return 0;
}
// Okay, we know all the users are promotable. If the aggregate is a memcpy
@@ -1208,88 +1163,28 @@ int SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
// types, but may actually be used. In these cases, we refuse to promote the
// struct.
if (Info.isMemCpySrc && Info.isMemCpyDst &&
- HasPadding(AI->getType()->getElementType(), *TD))
+ HasPadding(AI->getAllocatedType(), *TD))
return 0;
// If we require cleanup, return 1, otherwise return 3.
return Info.needsCleanup ? 1 : 3;
}
-/// CleanupGEP - GEP is used by an Alloca, which can be promoted after the GEP
-/// is canonicalized here.
-void SROA::CleanupGEP(GetElementPtrInst *GEPI) {
- gep_type_iterator I = gep_type_begin(GEPI);
- ++I;
-
- const ArrayType *AT = dyn_cast<ArrayType>(*I);
- if (!AT)
- return;
-
- uint64_t NumElements = AT->getNumElements();
-
- if (isa<ConstantInt>(I.getOperand()))
- return;
-
- if (NumElements == 1) {
- GEPI->setOperand(2,
- Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())));
- return;
- }
-
- assert(NumElements == 2 && "Unhandled case!");
- // All users of the GEP must be loads. At each use of the GEP, insert
- // two loads of the appropriate indexed GEP and select between them.
- Value *IsOne = new ICmpInst(GEPI, ICmpInst::ICMP_NE, I.getOperand(),
- Constant::getNullValue(I.getOperand()->getType()),
- "isone");
- // Insert the new GEP instructions, which are properly indexed.
- SmallVector<Value*, 8> Indices(GEPI->op_begin()+1, GEPI->op_end());
- Indices[1] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext()));
- Value *ZeroIdx = GetElementPtrInst::Create(GEPI->getOperand(0),
- Indices.begin(),
- Indices.end(),
- GEPI->getName()+".0", GEPI);
- Indices[1] = ConstantInt::get(Type::getInt32Ty(GEPI->getContext()), 1);
- Value *OneIdx = GetElementPtrInst::Create(GEPI->getOperand(0),
- Indices.begin(),
- Indices.end(),
- GEPI->getName()+".1", GEPI);
- // Replace all loads of the variable index GEP with loads from both
- // indexes and a select.
- while (!GEPI->use_empty()) {
- LoadInst *LI = cast<LoadInst>(GEPI->use_back());
- Value *Zero = new LoadInst(ZeroIdx, LI->getName()+".0", LI);
- Value *One = new LoadInst(OneIdx , LI->getName()+".1", LI);
- Value *R = SelectInst::Create(IsOne, One, Zero, LI->getName(), LI);
- LI->replaceAllUsesWith(R);
- LI->eraseFromParent();
- }
- GEPI->eraseFromParent();
-}
-
-
/// CleanupAllocaUsers - If SROA reported that it can promote the specified
/// allocation, but only if cleaned up, perform the cleanups required.
-void SROA::CleanupAllocaUsers(AllocaInst *AI) {
- // At this point, we know that the end result will be SROA'd and promoted, so
- // we can insert ugly code if required so long as sroa+mem2reg will clean it
- // up.
- for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+void SROA::CleanupAllocaUsers(Value *V) {
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
UI != E; ) {
User *U = *UI++;
- if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U))
- CleanupGEP(GEPI);
- else {
- Instruction *I = cast<Instruction>(U);
- SmallVector<DbgInfoIntrinsic *, 2> DbgInUses;
- if (!isa<StoreInst>(I) && OnlyUsedByDbgInfoIntrinsics(I, &DbgInUses)) {
- // Safe to remove debug info uses.
- while (!DbgInUses.empty()) {
- DbgInfoIntrinsic *DI = DbgInUses.back(); DbgInUses.pop_back();
- DI->eraseFromParent();
- }
- I->eraseFromParent();
+ Instruction *I = cast<Instruction>(U);
+ SmallVector<DbgInfoIntrinsic *, 2> DbgInUses;
+ if (!isa<StoreInst>(I) && OnlyUsedByDbgInfoIntrinsics(I, &DbgInUses)) {
+ // Safe to remove debug info uses.
+ while (!DbgInUses.empty()) {
+ DbgInfoIntrinsic *DI = DbgInUses.back(); DbgInUses.pop_back();
+ DI->eraseFromParent();
}
+ I->eraseFromParent();
}
}
}
@@ -1395,7 +1290,7 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
- uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(),
+ uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(),
&Indices[0], Indices.size());
// See if all uses can be converted.
if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, SawVec,Offset+GEPOffset,
@@ -1457,7 +1352,7 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) {
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
- uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(),
+ uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(),
&Indices[0], Indices.size());
ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8);
GEP->eraseFromParent();
@@ -1478,13 +1373,16 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) {
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
assert(SI->getOperand(0) != Ptr && "Consistency error!");
- // FIXME: Remove once builder has Twine API.
- Value *Old = Builder.CreateLoad(NewAI,
- (NewAI->getName()+".in").str().c_str());
+ Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
Builder);
Builder.CreateStore(New, NewAI);
SI->eraseFromParent();
+
+ // If the load we just inserted is now dead, then the inserted store
+ // overwrote the entire thing.
+ if (Old->use_empty())
+ Old->eraseFromParent();
continue;
}
@@ -1504,13 +1402,16 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) {
for (unsigned i = 1; i != NumBytes; ++i)
APVal |= APVal << 8;
- // FIXME: Remove once builder has Twine API.
- Value *Old = Builder.CreateLoad(NewAI,
- (NewAI->getName()+".in").str().c_str());
+ Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
Value *New = ConvertScalar_InsertValue(
ConstantInt::get(User->getContext(), APVal),
Old, Offset, Builder);
Builder.CreateStore(New, NewAI);
+
+ // If the load we just inserted is now dead, then the memset overwrote
+ // the entire thing.
+ if (Old->use_empty())
+ Old->eraseFromParent();
}
MSI->eraseFromParent();
continue;
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index e905952..a36da78 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -189,6 +189,77 @@ static bool RemoveUnreachableBlocksFromFn(Function &F) {
return true;
}
+/// MergeEmptyReturnBlocks - If we have more than one empty (other than phi
+/// node) return blocks, merge them together to promote recursive block merging.
+static bool MergeEmptyReturnBlocks(Function &F) {
+ bool Changed = false;
+
+ BasicBlock *RetBlock = 0;
+
+ // Scan all the blocks in the function, looking for empty return blocks.
+ for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; ) {
+ BasicBlock &BB = *BBI++;
+
+ // Only look at return blocks.
+ ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator());
+ if (Ret == 0) continue;
+
+ // Only look at the block if it is empty or the only other thing in it is a
+ // single PHI node that is the operand to the return.
+ if (Ret != &BB.front()) {
+ // Check for something else in the block.
+ BasicBlock::iterator I = Ret;
+ --I;
+ if (!isa<PHINode>(I) || I != BB.begin() ||
+ Ret->getNumOperands() == 0 ||
+ Ret->getOperand(0) != I)
+ continue;
+ }
+
+ // If this is the first returning block, remember it and keep going.
+ if (RetBlock == 0) {
+ RetBlock = &BB;
+ continue;
+ }
+
+ // Otherwise, we found a duplicate return block. Merge the two.
+ Changed = true;
+
+ // Case when there is no input to the return or when the returned values
+ // agree is trivial. Note that they can't agree if there are phis in the
+ // blocks.
+ if (Ret->getNumOperands() == 0 ||
+ Ret->getOperand(0) ==
+ cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0)) {
+ BB.replaceAllUsesWith(RetBlock);
+ BB.eraseFromParent();
+ continue;
+ }
+
+ // If the canonical return block has no PHI node, create one now.
+ PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin());
+ if (RetBlockPHI == 0) {
+ Value *InVal = cast<ReturnInst>(RetBlock->begin())->getOperand(0);
+ RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(), "merge",
+ &RetBlock->front());
+
+ for (pred_iterator PI = pred_begin(RetBlock), E = pred_end(RetBlock);
+ PI != E; ++PI)
+ RetBlockPHI->addIncoming(InVal, *PI);
+ RetBlock->getTerminator()->setOperand(0, RetBlockPHI);
+ }
+
+ // Turn BB into a block that just unconditionally branches to the return
+ // block. This handles the case when the two return blocks have a common
+ // predecessor but that return different things.
+ RetBlockPHI->addIncoming(Ret->getOperand(0), &BB);
+ BB.getTerminator()->eraseFromParent();
+ BranchInst::Create(RetBlock, &BB);
+ }
+
+ return Changed;
+}
+
/// IterativeSimplifyCFG - Call SimplifyCFG on all the blocks in the function,
/// iterating until no more changes are made.
static bool IterativeSimplifyCFG(Function &F) {
@@ -216,6 +287,7 @@ static bool IterativeSimplifyCFG(Function &F) {
//
bool CFGSimplifyPass::runOnFunction(Function &F) {
bool EverChanged = RemoveUnreachableBlocksFromFn(F);
+ EverChanged |= MergeEmptyReturnBlocks(F);
EverChanged |= IterativeSimplifyCFG(F);
// If neither pass changed anything, we're done.
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 6fd884b..3c28ad2 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -76,6 +76,11 @@ public:
/// return value has 'intptr_t' type.
Value *EmitStrLen(Value *Ptr, IRBuilder<> &B);
+ /// EmitStrChr - Emit a call to the strchr function to the builder, for the
+ /// specified pointer and character. Ptr is required to be some pointer type,
+ /// and the return value has 'i8*' type.
+ Value *EmitStrChr(Value *Ptr, char C, IRBuilder<> &B);
+
/// EmitMemCpy - Emit a call to the memcpy function to the builder. This
/// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len,
@@ -151,6 +156,26 @@ Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder<> &B) {
return CI;
}
+/// EmitStrChr - Emit a call to the strchr function to the builder, for the
+/// specified pointer and character. Ptr is required to be some pointer type,
+/// and the return value has 'i8*' type.
+Value *LibCallOptimization::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B) {
+ Module *M = Caller->getParent();
+ AttributeWithIndex AWI =
+ AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
+
+ const Type *I8Ptr = Type::getInt8PtrTy(*Context);
+ const Type *I32Ty = Type::getInt32Ty(*Context);
+ Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(&AWI, 1),
+ I8Ptr, I8Ptr, I32Ty, NULL);
+ CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B),
+ ConstantInt::get(I32Ty, C), "strchr");
+ if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+
/// EmitMemCpy - Emit a call to the memcpy function to the builder. This always
/// expects that the size has type 'intptr_t' and Dst/Src are pointers.
Value *LibCallOptimization::EmitMemCpy(Value *Dst, Value *Src, Value *Len,
@@ -880,17 +905,16 @@ struct StrLenOpt : public LibCallOptimization {
if (uint64_t Len = GetStringLength(Src))
return ConstantInt::get(CI->getType(), Len-1);
- // Handle strlen(p) != 0.
- if (!IsOnlyUsedInZeroEqualityComparison(CI)) return 0;
-
// strlen(x) != 0 --> *x != 0
// strlen(x) == 0 --> *x == 0
- return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType());
+ if (IsOnlyUsedInZeroEqualityComparison(CI))
+ return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType());
+ return 0;
}
};
//===---------------------------------------===//
-// 'strto*' Optimizations
+// 'strto*' Optimizations. This handles strtol, strtod, strtof, strtoul, etc.
struct StrToOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
@@ -910,6 +934,52 @@ struct StrToOpt : public LibCallOptimization {
}
};
+//===---------------------------------------===//
+// 'strstr' Optimizations
+
+struct StrStrOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ !isa<PointerType>(FT->getParamType(0)) ||
+ !isa<PointerType>(FT->getParamType(1)) ||
+ !isa<PointerType>(FT->getReturnType()))
+ return 0;
+
+ // fold strstr(x, x) -> x.
+ if (CI->getOperand(1) == CI->getOperand(2))
+ return B.CreateBitCast(CI->getOperand(1), CI->getType());
+
+ // See if either input string is a constant string.
+ std::string SearchStr, ToFindStr;
+ bool HasStr1 = GetConstantStringInfo(CI->getOperand(1), SearchStr);
+ bool HasStr2 = GetConstantStringInfo(CI->getOperand(2), ToFindStr);
+
+ // fold strstr(x, "") -> x.
+ if (HasStr2 && ToFindStr.empty())
+ return B.CreateBitCast(CI->getOperand(1), CI->getType());
+
+ // If both strings are known, constant fold it.
+ if (HasStr1 && HasStr2) {
+ std::string::size_type Offset = SearchStr.find(ToFindStr);
+
+ if (Offset == std::string::npos) // strstr("foo", "bar") -> null
+ return Constant::getNullValue(CI->getType());
+
+ // strstr("abcd", "bc") -> gep((char*)"abcd", 1)
+ Value *Result = CastToCStr(CI->getOperand(1), B);
+ Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr");
+ return B.CreateBitCast(Result, CI->getType());
+ }
+
+ // fold strstr(x, "y") -> strchr(x, 'y').
+ if (HasStr2 && ToFindStr.size() == 1)
+ return B.CreateBitCast(EmitStrChr(CI->getOperand(1), ToFindStr[0], B),
+ CI->getType());
+ return 0;
+ }
+};
+
//===---------------------------------------===//
// 'memcmp' Optimizations
@@ -941,19 +1011,6 @@ struct MemCmpOpt : public LibCallOptimization {
return B.CreateSExt(B.CreateSub(LHSV, RHSV, "chardiff"), CI->getType());
}
- // memcmp(S1,S2,2) != 0 -> (*(short*)LHS ^ *(short*)RHS) != 0
- // memcmp(S1,S2,4) != 0 -> (*(int*)LHS ^ *(int*)RHS) != 0
- if ((Len == 2 || Len == 4) && IsOnlyUsedInZeroEqualityComparison(CI)) {
- const Type *PTy = PointerType::getUnqual(Len == 2 ?
- Type::getInt16Ty(*Context) : Type::getInt32Ty(*Context));
- LHS = B.CreateBitCast(LHS, PTy, "tmp");
- RHS = B.CreateBitCast(RHS, PTy, "tmp");
- LoadInst *LHSV = B.CreateLoad(LHS, "lhsv");
- LoadInst *RHSV = B.CreateLoad(RHS, "rhsv");
- LHSV->setAlignment(1); RHSV->setAlignment(1); // Unaligned loads.
- return B.CreateZExt(B.CreateXor(LHSV, RHSV, "shortdiff"), CI->getType());
- }
-
// Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
std::string LHSStr, RHSStr;
if (GetConstantStringInfo(LHS, LHSStr) &&
@@ -1051,7 +1108,7 @@ struct SizeOpt : public LibCallOptimization {
const Type *Ty = Callee->getFunctionType()->getReturnType();
- if (Const->getZExtValue() < 2)
+ if (Const->getZExtValue() == 0)
return Constant::getAllOnesValue(Ty);
else
return ConstantInt::get(Ty, 0);
@@ -1071,8 +1128,8 @@ struct MemCpyChkOpt : public LibCallOptimization {
if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
!isa<PointerType>(FT->getParamType(0)) ||
!isa<PointerType>(FT->getParamType(1)) ||
- !isa<IntegerType>(FT->getParamType(3)) ||
- FT->getParamType(2) != TD->getIntPtrType(*Context))
+ !isa<IntegerType>(FT->getParamType(3)) ||
+ FT->getParamType(2) != TD->getIntPtrType(*Context))
return 0;
ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
@@ -1099,7 +1156,7 @@ struct MemSetChkOpt : public LibCallOptimization {
if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
!isa<PointerType>(FT->getParamType(0)) ||
!isa<IntegerType>(FT->getParamType(1)) ||
- !isa<IntegerType>(FT->getParamType(3)) ||
+ !isa<IntegerType>(FT->getParamType(3)) ||
FT->getParamType(2) != TD->getIntPtrType(*Context))
return 0;
@@ -1129,7 +1186,7 @@ struct MemMoveChkOpt : public LibCallOptimization {
if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
!isa<PointerType>(FT->getParamType(0)) ||
!isa<PointerType>(FT->getParamType(1)) ||
- !isa<IntegerType>(FT->getParamType(3)) ||
+ !isa<IntegerType>(FT->getParamType(3)) ||
FT->getParamType(2) != TD->getIntPtrType(*Context))
return 0;
@@ -1675,8 +1732,8 @@ namespace {
// String and Memory LibCall Optimizations
StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrCmpOpt StrCmp;
StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrNCpyOpt StrNCpy; StrLenOpt StrLen;
- StrToOpt StrTo; MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove;
- MemSetOpt MemSet;
+ StrToOpt StrTo; StrStrOpt StrStr;
+ MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet;
// Math Library Optimizations
PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP;
// Integer Optimizations
@@ -1738,6 +1795,7 @@ void SimplifyLibCalls::InitOptimizations() {
Optimizations["strtoll"] = &StrTo;
Optimizations["strtold"] = &StrTo;
Optimizations["strtoull"] = &StrTo;
+ Optimizations["strstr"] = &StrStr;
Optimizations["memcmp"] = &MemCmp;
Optimizations["memcpy"] = &MemCpy;
Optimizations["memmove"] = &MemMove;
@@ -2644,12 +2702,6 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
// * strcspn("",a) -> 0
// * strcspn(s,"") -> strlen(a)
//
-// strstr: (PR5783)
-// * strstr(x,x) -> x
-// * strstr(x, "") -> x
-// * strstr(x, "a") -> strchr(x, 'a')
-// * strstr(s1,s2) -> result (if s1 and s2 are constant strings)
-//
// tan, tanf, tanl:
// * tan(atan(x)) -> x
//
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index ccd97c8..19c7206 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -309,10 +309,10 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
if (TIL == DestLoop) {
// Both in the same loop, the NewBB joins loop.
DestLoop->addBasicBlockToLoop(NewBB, LI->getBase());
- } else if (TIL->contains(DestLoop->getHeader())) {
+ } else if (TIL->contains(DestLoop)) {
// Edge from an outer loop to an inner loop. Add to the outer loop.
TIL->addBasicBlockToLoop(NewBB, LI->getBase());
- } else if (DestLoop->contains(TIL->getHeader())) {
+ } else if (DestLoop->contains(TIL)) {
// Edge from an inner loop to an outer loop. Add to the outer loop.
DestLoop->addBasicBlockToLoop(NewBB, LI->getBase());
} else {
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 162d7b3..c287747 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -21,6 +21,7 @@
#include "llvm/GlobalVariable.h"
#include "llvm/Function.h"
#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
#include "llvm/Support/CFG.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include "llvm/Analysis/ConstantFolding.h"
@@ -347,8 +348,7 @@ ConstantFoldMappedInstruction(const Instruction *I) {
Ops.size(), TD);
}
-static MDNode *UpdateInlinedAtInfo(MDNode *InsnMD, MDNode *TheCallMD,
- LLVMContext &Context) {
+static MDNode *UpdateInlinedAtInfo(MDNode *InsnMD, MDNode *TheCallMD) {
DILocation ILoc(InsnMD);
if (ILoc.isNull()) return InsnMD;
@@ -358,14 +358,15 @@ static MDNode *UpdateInlinedAtInfo(MDNode *InsnMD, MDNode *TheCallMD,
DILocation OrigLocation = ILoc.getOrigLocation();
MDNode *NewLoc = TheCallMD;
if (!OrigLocation.isNull())
- NewLoc = UpdateInlinedAtInfo(OrigLocation.getNode(), TheCallMD, Context);
-
- SmallVector<Value *, 4> MDVs;
- MDVs.push_back(InsnMD->getElement(0)); // Line
- MDVs.push_back(InsnMD->getElement(1)); // Col
- MDVs.push_back(InsnMD->getElement(2)); // Scope
- MDVs.push_back(NewLoc);
- return MDNode::get(Context, MDVs.data(), MDVs.size());
+ NewLoc = UpdateInlinedAtInfo(OrigLocation.getNode(), TheCallMD);
+
+ Value *MDVs[] = {
+ InsnMD->getOperand(0), // Line
+ InsnMD->getOperand(1), // Col
+ InsnMD->getOperand(2), // Scope
+ NewLoc
+ };
+ return MDNode::get(InsnMD->getContext(), MDVs, 4);
}
/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
@@ -421,12 +422,10 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
//
BasicBlock::iterator I = NewBB->begin();
- LLVMContext &Context = OldFunc->getContext();
- unsigned DbgKind = Context.getMetadata().getMDKind("dbg");
+ unsigned DbgKind = OldFunc->getContext().getMDKindID("dbg");
MDNode *TheCallMD = NULL;
- SmallVector<Value *, 4> MDVs;
if (TheCall && TheCall->hasMetadata())
- TheCallMD = Context.getMetadata().getMD(DbgKind, TheCall);
+ TheCallMD = TheCall->getMetadata(DbgKind);
// Handle PHI nodes specially, as we have to remove references to dead
// blocks.
@@ -436,32 +435,38 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) {
if (I->hasMetadata()) {
if (TheCallMD) {
- if (MDNode *IMD = Context.getMetadata().getMD(DbgKind, I)) {
- MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD, Context);
- Context.getMetadata().addMD(DbgKind, NewMD, I);
+ if (MDNode *IMD = I->getMetadata(DbgKind)) {
+ MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD);
+ I->setMetadata(DbgKind, NewMD);
}
} else {
// The cloned instruction has dbg info but the call instruction
// does not have dbg info. Remove dbg info from cloned instruction.
- Context.getMetadata().removeMD(DbgKind, I);
+ I->setMetadata(DbgKind, 0);
}
}
PHIToResolve.push_back(cast<PHINode>(OldI));
}
}
+ // FIXME:
+ // FIXME:
+ // FIXME: Unclone all this metadata stuff.
+ // FIXME:
+ // FIXME:
+
// Otherwise, remap the rest of the instructions normally.
for (; I != NewBB->end(); ++I) {
if (I->hasMetadata()) {
if (TheCallMD) {
- if (MDNode *IMD = Context.getMetadata().getMD(DbgKind, I)) {
- MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD, Context);
- Context.getMetadata().addMD(DbgKind, NewMD, I);
+ if (MDNode *IMD = I->getMetadata(DbgKind)) {
+ MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD);
+ I->setMetadata(DbgKind, NewMD);
}
} else {
// The cloned instruction has dbg info but the call instruction
// does not have dbg info. Remove dbg info from cloned instruction.
- Context.getMetadata().removeMD(DbgKind, I);
+ I->setMetadata(DbgKind, 0);
}
}
RemapInstruction(I, ValueMap);
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 7a37aa3..2426e3e 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -20,10 +20,9 @@
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Target/TargetData.h"
@@ -31,6 +30,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 690972d..7fcc5f7 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -109,7 +109,7 @@ X("loopsimplify", "Canonicalize natural loops", true);
const PassInfo *const llvm::LoopSimplifyID = &X;
Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
-/// runOnFunction - Run down all loops in the CFG (recursively, but we could do
+/// runOnLoop - Run down all loops in the CFG (recursively, but we could do
/// it in any convenient order) inserting preheaders...
///
bool LoopSimplify::runOnLoop(Loop *l, LPPassManager &LPM) {
@@ -305,12 +305,6 @@ ReprocessLoop:
}
}
- // If there are duplicate phi nodes (for example, from loop rotation),
- // get rid of them.
- for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
- BB != E; ++BB)
- EliminateDuplicatePHINodes(*BB);
-
return Changed;
}
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 6232f32..6b2c591 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -194,7 +194,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
OrigPHINode.push_back(PN);
if (Instruction *I =
dyn_cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)))
- if (L->contains(I->getParent()))
+ if (L->contains(I))
LastValueMap[I] = I;
}
@@ -222,7 +222,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
PHINode *NewPHI = cast<PHINode>(ValueMap[OrigPHINode[i]]);
Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
if (Instruction *InValI = dyn_cast<Instruction>(InVal))
- if (It > 1 && L->contains(InValI->getParent()))
+ if (It > 1 && L->contains(InValI))
InVal = LastValueMap[InValI];
ValueMap[OrigPHINode[i]] = InVal;
New->getInstList().erase(NewPHI);
@@ -244,7 +244,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
UI != UE;) {
Instruction *UseInst = cast<Instruction>(*UI);
++UI;
- if (isa<PHINode>(UseInst) && !L->contains(UseInst->getParent())) {
+ if (isa<PHINode>(UseInst) && !L->contains(UseInst)) {
PHINode *phi = cast<PHINode>(UseInst);
Value *Incoming = phi->getIncomingValueForBlock(*BB);
phi->addIncoming(Incoming, New);
@@ -295,7 +295,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
// If this value was defined in the loop, take the value defined by the
// last iteration of the loop.
if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
- if (L->contains(InValI->getParent()))
+ if (L->contains(InValI))
InVal = LastValueMap[InVal];
}
PN->addIncoming(InVal, LastIterationBB);
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index ba41bf9..9881b3c 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -149,7 +149,29 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
if (SingularValue != 0)
return SingularValue;
- // Otherwise, we do need a PHI: insert one now.
+ // Otherwise, we do need a PHI: check to see if we already have one available
+ // in this block that produces the right value.
+ if (isa<PHINode>(BB->begin())) {
+ DenseMap<BasicBlock*, Value*> ValueMapping(PredValues.begin(),
+ PredValues.end());
+ PHINode *SomePHI;
+ for (BasicBlock::iterator It = BB->begin();
+ (SomePHI = dyn_cast<PHINode>(It)); ++It) {
+ // Scan this phi to see if it is what we need.
+ bool Equal = true;
+ for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i)
+ if (ValueMapping[SomePHI->getIncomingBlock(i)] !=
+ SomePHI->getIncomingValue(i)) {
+ Equal = false;
+ break;
+ }
+
+ if (Equal)
+ return SomePHI;
+ }
+ }
+
+ // Ok, we have no way out, insert a new one now.
PHINode *InsertedPHI = PHINode::Create(PrototypeValue->getType(),
PrototypeValue->getName(),
&BB->front());
@@ -198,7 +220,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
// Query AvailableVals by doing an insertion of null.
std::pair<AvailableValsTy::iterator, bool> InsertRes =
- AvailableVals.insert(std::make_pair(BB, WeakVH()));
+ AvailableVals.insert(std::make_pair(BB, TrackingVH<Value>()));
// Handle the case when the insertion fails because we have already seen BB.
if (!InsertRes.second) {
@@ -214,8 +236,8 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
// it. When we get back to the first instance of the recursion we will fill
// in the PHI node.
return InsertRes.first->second =
- PHINode::Create(PrototypeValue->getType(), PrototypeValue->getName(),
- &BB->front());
+ PHINode::Create(PrototypeValue->getType(), PrototypeValue->getName(),
+ &BB->front());
}
// Okay, the value isn't in the map and we just inserted a null in the entry
OpenPOWER on IntegriCloud