summaryrefslogtreecommitdiffstats
path: root/lib/Transforms/Scalar
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt1
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp25
-rw-r--r--lib/Transforms/Scalar/GVN.cpp109
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp1772
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp4
-rw-r--r--lib/Transforms/Scalar/LICM.cpp104
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp4
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp25
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp7
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp8
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp28
-rw-r--r--lib/Transforms/Scalar/ObjCARC.cpp3595
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp2
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp5
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp240
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp3
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp4
18 files changed, 5020 insertions, 918 deletions
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index fcf914f..c223da6 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -20,6 +20,7 @@ add_llvm_library(LLVMScalarOpts
LoopUnswitch.cpp
LowerAtomic.cpp
MemCpyOptimizer.cpp
+ ObjCARC.cpp
Reassociate.cpp
Reg2Mem.cpp
SCCP.cpp
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 53e4640..cb9b5be 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -437,12 +437,9 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
MemDepResult InstDep = MD->getDependency(Inst);
- // Ignore non-local store liveness.
+ // Ignore any store where we can't find a local dependence.
// FIXME: cross-block DSE would be fun. :)
- if (InstDep.isNonLocal() ||
- // Ignore self dependence, which happens in the entry block of the
- // function.
- InstDep.getInst() == Inst)
+ if (InstDep.isNonLocal() || InstDep.isUnknown())
continue;
// If we're storing the same value back to a pointer that we just
@@ -478,7 +475,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
if (Loc.Ptr == 0)
continue;
- while (!InstDep.isNonLocal()) {
+ while (!InstDep.isNonLocal() && !InstDep.isUnknown()) {
// Get the memory clobbered by the instruction we depend on. MemDep will
// skip any instructions that 'Loc' clearly doesn't interact with. If we
// end up depending on a may- or must-aliased load, then we can't optimize
@@ -542,24 +539,26 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
/// HandleFree - Handle frees of entire structures whose dependency is a store
/// to a field of that structure.
bool DSE::HandleFree(CallInst *F) {
+ bool MadeChange = false;
+
MemDepResult Dep = MD->getDependency(F);
- do {
- if (Dep.isNonLocal()) return false;
-
+
+ while (!Dep.isNonLocal() && !Dep.isUnknown()) {
Instruction *Dependency = Dep.getInst();
if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency))
- return false;
+ return MadeChange;
Value *DepPointer =
GetUnderlyingObject(getStoredPointerOperand(Dependency));
// Check for aliasing.
if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
- return false;
+ return MadeChange;
// DCE instructions only used to calculate that store
DeleteDeadInstruction(Dependency, *MD);
++NumFastStores;
+ MadeChange = true;
// Inst's old Dependency is now deleted. Compute the next dependency,
// which may also be dead, as in
@@ -567,9 +566,9 @@ bool DSE::HandleFree(CallInst *F) {
// s[1] = 0; // This has just been deleted.
// free(s);
Dep = MD->getDependency(F);
- } while (!Dep.isNonLocal());
+ };
- return true;
+ return MadeChange;
}
/// handleEndBlock - Remove dead stores to stack-allocated locations in the
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 2515fd1..87b7317 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -91,6 +91,7 @@ namespace {
uint32_t nextValueNumber;
Expression create_expression(Instruction* I);
+ Expression create_extractvalue_expression(ExtractValueInst* EI);
uint32_t lookup_or_add_call(CallInst* C);
public:
ValueTable() : nextValueNumber(1) { }
@@ -141,7 +142,6 @@ template <> struct DenseMapInfo<Expression> {
// ValueTable Internal Functions
//===----------------------------------------------------------------------===//
-
Expression ValueTable::create_expression(Instruction *I) {
Expression e;
e.type = I->getType();
@@ -150,12 +150,8 @@ Expression ValueTable::create_expression(Instruction *I) {
OI != OE; ++OI)
e.varargs.push_back(lookup_or_add(*OI));
- if (CmpInst *C = dyn_cast<CmpInst>(I))
+ if (CmpInst *C = dyn_cast<CmpInst>(I)) {
e.opcode = (C->getOpcode() << 8) | C->getPredicate();
- else if (ExtractValueInst *E = dyn_cast<ExtractValueInst>(I)) {
- for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
- II != IE; ++II)
- e.varargs.push_back(*II);
} else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) {
for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
II != IE; ++II)
@@ -165,6 +161,58 @@ Expression ValueTable::create_expression(Instruction *I) {
return e;
}
+Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) {
+ assert(EI != 0 && "Not an ExtractValueInst?");
+ Expression e;
+ e.type = EI->getType();
+ e.opcode = 0;
+
+ IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());
+ if (I != 0 && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) {
+ // EI might be an extract from one of our recognised intrinsics. If it
+ // is we'll synthesize a semantically equivalent expression instead on
+ // an extract value expression.
+ switch (I->getIntrinsicID()) {
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ e.opcode = Instruction::Add;
+ break;
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ e.opcode = Instruction::Sub;
+ break;
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ e.opcode = Instruction::Mul;
+ break;
+ default:
+ break;
+ }
+
+ if (e.opcode != 0) {
+ // Intrinsic recognized. Grab its args to finish building the expression.
+ assert(I->getNumArgOperands() == 2 &&
+ "Expect two args for recognised intrinsics.");
+ e.varargs.push_back(lookup_or_add(I->getArgOperand(0)));
+ e.varargs.push_back(lookup_or_add(I->getArgOperand(1)));
+ return e;
+ }
+ }
+
+ // Not a recognised intrinsic. Fall back to producing an extract value
+ // expression.
+ e.opcode = EI->getOpcode();
+ for (Instruction::op_iterator OI = EI->op_begin(), OE = EI->op_end();
+ OI != OE; ++OI)
+ e.varargs.push_back(lookup_or_add(*OI));
+
+ for (ExtractValueInst::idx_iterator II = EI->idx_begin(), IE = EI->idx_end();
+ II != IE; ++II)
+ e.varargs.push_back(*II);
+
+ return e;
+}
+
//===----------------------------------------------------------------------===//
// ValueTable External Functions
//===----------------------------------------------------------------------===//
@@ -227,21 +275,19 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
// Non-local case.
const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
MD->getNonLocalCallDependency(CallSite(C));
- // FIXME: call/call dependencies for readonly calls should return def, not
- // clobber! Move the checking logic to MemDep!
+ // FIXME: Move the checking logic to MemDep!
CallInst* cdep = 0;
// Check to see if we have a single dominating call instruction that is
// identical to C.
for (unsigned i = 0, e = deps.size(); i != e; ++i) {
const NonLocalDepEntry *I = &deps[i];
- // Ignore non-local dependencies.
if (I->getResult().isNonLocal())
continue;
- // We don't handle non-depedencies. If we already have a call, reject
+ // We don't handle non-definitions. If we already have a call, reject
// instruction dependencies.
- if (I->getResult().isClobber() || cdep != 0) {
+ if (!I->getResult().isDef() || cdep != 0) {
cdep = 0;
break;
}
@@ -338,11 +384,13 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
case Instruction::ExtractElement:
case Instruction::InsertElement:
case Instruction::ShuffleVector:
- case Instruction::ExtractValue:
case Instruction::InsertValue:
case Instruction::GetElementPtr:
exp = create_expression(I);
break;
+ case Instruction::ExtractValue:
+ exp = create_extractvalue_expression(cast<ExtractValueInst>(I));
+ break;
default:
valueNumbering[V] = nextValueNumber;
return nextValueNumber++;
@@ -1192,8 +1240,10 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
// escaping uses to any values that are operands to these PHIs.
for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) {
PHINode *P = NewPHIs[i];
- for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii)
- AA->addEscapingUse(P->getOperandUse(2*ii));
+ for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) {
+ unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
+ AA->addEscapingUse(P->getOperandUse(jj));
+ }
}
}
@@ -1224,12 +1274,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
// If we had a phi translation failure, we'll have a single entry which is a
// clobber in the current block. Reject this early.
- if (Deps.size() == 1 && Deps[0].getResult().isClobber() &&
- Deps[0].getResult().getInst()->getParent() == LI->getParent()) {
+ if (Deps.size() == 1 && Deps[0].getResult().isUnknown()) {
DEBUG(
dbgs() << "GVN: non-local load ";
WriteAsOperand(dbgs(), LI);
- dbgs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n';
+ dbgs() << " has unknown dependencies\n";
);
return false;
}
@@ -1245,6 +1294,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
BasicBlock *DepBB = Deps[i].getBB();
MemDepResult DepInfo = Deps[i].getResult();
+ if (DepInfo.isUnknown()) {
+ UnavailableBlocks.push_back(DepBB);
+ continue;
+ }
+
if (DepInfo.isClobber()) {
// The address being loaded in this non-local block may not be the same as
// the pointer operand of the load if PHI translation occurs. Make sure
@@ -1305,6 +1359,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
continue;
}
+ assert(DepInfo.isDef() && "Expecting def here");
+
Instruction *DepInst = DepInfo.getInst();
// Loading the allocation -> undef.
@@ -1691,10 +1747,22 @@ bool GVN::processLoad(LoadInst *L) {
return false;
}
+ if (Dep.isUnknown()) {
+ DEBUG(
+ // fast print dep, using operator<< on instruction is too slow.
+ dbgs() << "GVN: load ";
+ WriteAsOperand(dbgs(), L);
+ dbgs() << " has unknown dependence\n";
+ );
+ return false;
+ }
+
// If it is defined in another block, try harder.
if (Dep.isNonLocal())
return processNonLocalLoad(L);
+ assert(Dep.isDef() && "Expecting def here");
+
Instruction *DepInst = Dep.getInst();
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
Value *StoredVal = DepSI->getValueOperand();
@@ -2133,8 +2201,11 @@ bool GVN::performPRE(Function &F) {
// Because we have added a PHI-use of the pointer value, it has now
// "escaped" from alias analysis' perspective. We need to inform
// AA of this.
- for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; ++ii)
- VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(2*ii));
+ for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee;
+ ++ii) {
+ unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
+ VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj));
+ }
if (MD)
MD->invalidateCachedPointerInfo(Phi);
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 04ee7c8..dee3d38 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -52,30 +52,32 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
using namespace llvm;
-STATISTIC(NumRemoved , "Number of aux indvars removed");
-STATISTIC(NumWidened , "Number of indvars widened");
-STATISTIC(NumInserted, "Number of canonical indvars added");
-STATISTIC(NumReplaced, "Number of exit values replaced");
-STATISTIC(NumLFTR , "Number of loop exit tests replaced");
-STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
-STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
-STATISTIC(NumElimCmp , "Number of IV comparisons eliminated");
-
-// DisableIVRewrite mode currently affects IVUsers, so is defined in libAnalysis
-// and referenced here.
-namespace llvm {
- extern bool DisableIVRewrite;
-}
+STATISTIC(NumRemoved , "Number of aux indvars removed");
+STATISTIC(NumWidened , "Number of indvars widened");
+STATISTIC(NumInserted , "Number of canonical indvars added");
+STATISTIC(NumReplaced , "Number of exit values replaced");
+STATISTIC(NumLFTR , "Number of loop exit tests replaced");
+STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
+STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
+STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
+STATISTIC(NumElimCmp , "Number of IV comparisons eliminated");
+STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
+
+static cl::opt<bool> DisableIVRewrite(
+ "disable-iv-rewrite", cl::Hidden,
+ cl::desc("Disable canonical induction variable rewriting"));
namespace {
class IndVarSimplify : public LoopPass {
@@ -84,12 +86,14 @@ namespace {
ScalarEvolution *SE;
DominatorTree *DT;
TargetData *TD;
+
SmallVector<WeakVH, 16> DeadInsts;
bool Changed;
public:
static char ID; // Pass identification, replacement for typeid
- IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0) {
+ IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0),
+ Changed(false) {
initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
}
@@ -101,36 +105,46 @@ namespace {
AU.addRequired<ScalarEvolution>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
- AU.addRequired<IVUsers>();
+ if (!DisableIVRewrite)
+ AU.addRequired<IVUsers>();
AU.addPreserved<ScalarEvolution>();
AU.addPreservedID(LoopSimplifyID);
AU.addPreservedID(LCSSAID);
- AU.addPreserved<IVUsers>();
+ if (!DisableIVRewrite)
+ AU.addPreserved<IVUsers>();
AU.setPreservesCFG();
}
private:
+ virtual void releaseMemory() {
+ DeadInsts.clear();
+ }
+
bool isValidRewrite(Value *FromVal, Value *ToVal);
+ void HandleFloatingPointIV(Loop *L, PHINode *PH);
+ void RewriteNonIntegerIVs(Loop *L);
+
+ void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
+
void SimplifyIVUsers(SCEVExpander &Rewriter);
+ void SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter);
+
+ bool EliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
void EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
void EliminateIVRemainder(BinaryOperator *Rem,
Value *IVOperand,
- bool IsSigned,
- PHINode *IVPhi);
- void RewriteNonIntegerIVs(Loop *L);
+ bool IsSigned);
+
+ void SimplifyCongruentIVs(Loop *L);
+
+ void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
PHINode *IndVar,
SCEVExpander &Rewriter);
- void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
-
- void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
-
void SinkUnusedInvariants(Loop *L);
-
- void HandleFloatingPointIV(Loop *L, PHINode *PH);
};
}
@@ -197,156 +211,262 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
return true;
}
-/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
-/// count expression can be safely and cheaply expanded into an instruction
-/// sequence that can be used by LinearFunctionTestReplace.
-static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
- const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
- if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
- BackedgeTakenCount->isZero())
- return false;
+//===----------------------------------------------------------------------===//
+// RewriteNonIntegerIVs and helpers. Prefer integer IVs.
+//===----------------------------------------------------------------------===//
- if (!L->getExitingBlock())
+/// ConvertToSInt - Convert APF to an integer, if possible.
+static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
+ bool isExact = false;
+ if (&APF.getSemantics() == &APFloat::PPCDoubleDouble)
return false;
-
- // Can't rewrite non-branch yet.
- BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
- if (!BI)
+ // See if we can convert this to an int64_t
+ uint64_t UIntVal;
+ if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero,
+ &isExact) != APFloat::opOK || !isExact)
return false;
-
- // Special case: If the backedge-taken count is a UDiv, it's very likely a
- // UDiv that ScalarEvolution produced in order to compute a precise
- // expression, rather than a UDiv from the user's code. If we can't find a
- // UDiv in the code with some simple searching, assume the former and forego
- // rewriting the loop.
- if (isa<SCEVUDivExpr>(BackedgeTakenCount)) {
- ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
- if (!OrigCond) return false;
- const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
- R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
- if (R != BackedgeTakenCount) {
- const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
- L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
- if (L != BackedgeTakenCount)
- return false;
- }
- }
+ IntVal = UIntVal;
return true;
}
-/// getBackedgeIVType - Get the widest type used by the loop test after peeking
-/// through Truncs.
+/// HandleFloatingPointIV - If the loop has floating induction variable
+/// then insert corresponding integer induction variable if possible.
+/// For example,
+/// for(double i = 0; i < 10000; ++i)
+/// bar(i)
+/// is converted into
+/// for(int i = 0; i < 10000; ++i)
+/// bar((double)i);
///
-/// TODO: Unnecessary once LinearFunctionTestReplace is removed.
-static const Type *getBackedgeIVType(Loop *L) {
- if (!L->getExitingBlock())
- return 0;
+void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
+ unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
+ unsigned BackEdge = IncomingEdge^1;
- // Can't rewrite non-branch yet.
- BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
- if (!BI)
- return 0;
+ // Check incoming value.
+ ConstantFP *InitValueVal =
+ dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
- ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
- if (!Cond)
- return 0;
+ int64_t InitValue;
+ if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
+ return;
- const Type *Ty = 0;
- for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end();
- OI != OE; ++OI) {
- assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types");
- TruncInst *Trunc = dyn_cast<TruncInst>(*OI);
- if (!Trunc)
- continue;
+ // Check IV increment. Reject this PN if increment operation is not
+ // an add or increment value can not be represented by an integer.
+ BinaryOperator *Incr =
+ dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
+ if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
- return Trunc->getSrcTy();
+ // If this is not an add of the PHI with a constantfp, or if the constant fp
+ // is not an integer, bail out.
+ ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
+ int64_t IncValue;
+ if (IncValueVal == 0 || Incr->getOperand(0) != PN ||
+ !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
+ return;
+
+ // Check Incr uses. One user is PN and the other user is an exit condition
+ // used by the conditional terminator.
+ Value::use_iterator IncrUse = Incr->use_begin();
+ Instruction *U1 = cast<Instruction>(*IncrUse++);
+ if (IncrUse == Incr->use_end()) return;
+ Instruction *U2 = cast<Instruction>(*IncrUse++);
+ if (IncrUse != Incr->use_end()) return;
+
+ // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
+ // only used by a branch, we can't transform it.
+ FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
+ if (!Compare)
+ Compare = dyn_cast<FCmpInst>(U2);
+ if (Compare == 0 || !Compare->hasOneUse() ||
+ !isa<BranchInst>(Compare->use_back()))
+ return;
+
+ BranchInst *TheBr = cast<BranchInst>(Compare->use_back());
+
+ // We need to verify that the branch actually controls the iteration count
+ // of the loop. If not, the new IV can overflow and no one will notice.
+ // The branch block must be in the loop and one of the successors must be out
+ // of the loop.
+ assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
+ if (!L->contains(TheBr->getParent()) ||
+ (L->contains(TheBr->getSuccessor(0)) &&
+ L->contains(TheBr->getSuccessor(1))))
+ return;
+
+
+ // If it isn't a comparison with an integer-as-fp (the exit value), we can't
+ // transform it.
+ ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
+ int64_t ExitValue;
+ if (ExitValueVal == 0 ||
+ !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
+ return;
+
+ // Find new predicate for integer comparison.
+ CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
+ switch (Compare->getPredicate()) {
+ default: return; // Unknown comparison.
+ case CmpInst::FCMP_OEQ:
+ case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
+ case CmpInst::FCMP_ONE:
+ case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
+ case CmpInst::FCMP_OGT:
+ case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
+ case CmpInst::FCMP_OGE:
+ case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
+ case CmpInst::FCMP_OLT:
+ case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
+ case CmpInst::FCMP_OLE:
+ case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
}
- return Ty;
-}
-/// LinearFunctionTestReplace - This method rewrites the exit condition of the
-/// loop to be a canonical != comparison against the incremented loop induction
-/// variable. This pass is able to rewrite the exit tests of any loop where the
-/// SCEV analysis can determine a loop-invariant trip count of the loop, which
-/// is actually a much broader range than just linear tests.
-ICmpInst *IndVarSimplify::
-LinearFunctionTestReplace(Loop *L,
- const SCEV *BackedgeTakenCount,
- PHINode *IndVar,
- SCEVExpander &Rewriter) {
- assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
- BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+ // We convert the floating point induction variable to a signed i32 value if
+ // we can. This is only safe if the comparison will not overflow in a way
+ // that won't be trapped by the integer equivalent operations. Check for this
+ // now.
+ // TODO: We could use i64 if it is native and the range requires it.
- // If the exiting block is not the same as the backedge block, we must compare
- // against the preincremented value, otherwise we prefer to compare against
- // the post-incremented value.
- Value *CmpIndVar;
- const SCEV *RHS = BackedgeTakenCount;
- if (L->getExitingBlock() == L->getLoopLatch()) {
- // Add one to the "backedge-taken" count to get the trip count.
- // If this addition may overflow, we have to be more pessimistic and
- // cast the induction variable before doing the add.
- const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0);
- const SCEV *N =
- SE->getAddExpr(BackedgeTakenCount,
- SE->getConstant(BackedgeTakenCount->getType(), 1));
- if ((isa<SCEVConstant>(N) && !N->isZero()) ||
- SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
- // No overflow. Cast the sum.
- RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType());
- } else {
- // Potential overflow. Cast before doing the add.
- RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
- IndVar->getType());
- RHS = SE->getAddExpr(RHS,
- SE->getConstant(IndVar->getType(), 1));
+ // The start/stride/exit values must all fit in signed i32.
+ if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
+ return;
+
+ // If not actually striding (add x, 0.0), avoid touching the code.
+ if (IncValue == 0)
+ return;
+
+ // Positive and negative strides have different safety conditions.
+ if (IncValue > 0) {
+ // If we have a positive stride, we require the init to be less than the
+ // exit value and an equality or less than comparison.
+ if (InitValue >= ExitValue ||
+ NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE)
+ return;
+
+ uint32_t Range = uint32_t(ExitValue-InitValue);
+ if (NewPred == CmpInst::ICMP_SLE) {
+ // Normalize SLE -> SLT, check for infinite loop.
+ if (++Range == 0) return; // Range overflows.
}
- // The BackedgeTaken expression contains the number of times that the
- // backedge branches to the loop header. This is one less than the
- // number of times the loop executes, so use the incremented indvar.
- CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
+ unsigned Leftover = Range % uint32_t(IncValue);
+
+ // If this is an equality comparison, we require that the strided value
+ // exactly land on the exit value, otherwise the IV condition will wrap
+ // around and do things the fp IV wouldn't.
+ if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
+ Leftover != 0)
+ return;
+
+ // If the stride would wrap around the i32 before exiting, we can't
+ // transform the IV.
+ if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
+ return;
+
} else {
- // We have to use the preincremented value...
- RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
- IndVar->getType());
- CmpIndVar = IndVar;
+ // If we have a negative stride, we require the init to be greater than the
+ // exit value and an equality or greater than comparison.
+ if (InitValue >= ExitValue ||
+ NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE)
+ return;
+
+ uint32_t Range = uint32_t(InitValue-ExitValue);
+ if (NewPred == CmpInst::ICMP_SGE) {
+ // Normalize SGE -> SGT, check for infinite loop.
+ if (++Range == 0) return; // Range overflows.
+ }
+
+ unsigned Leftover = Range % uint32_t(-IncValue);
+
+ // If this is an equality comparison, we require that the strided value
+ // exactly land on the exit value, otherwise the IV condition will wrap
+ // around and do things the fp IV wouldn't.
+ if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
+ Leftover != 0)
+ return;
+
+ // If the stride would wrap around the i32 before exiting, we can't
+ // transform the IV.
+ if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
+ return;
}
- // Expand the code for the iteration count.
- assert(SE->isLoopInvariant(RHS, L) &&
- "Computed iteration count is not loop invariant!");
- Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI);
+ const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
- // Insert a new icmp_ne or icmp_eq instruction before the branch.
- ICmpInst::Predicate Opcode;
- if (L->contains(BI->getSuccessor(0)))
- Opcode = ICmpInst::ICMP_NE;
- else
- Opcode = ICmpInst::ICMP_EQ;
+ // Insert new integer induction variable.
+ PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
+ NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
+ PN->getIncomingBlock(IncomingEdge));
- DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
- << " LHS:" << *CmpIndVar << '\n'
- << " op:\t"
- << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
- << " RHS:\t" << *RHS << "\n");
+ Value *NewAdd =
+ BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
+ Incr->getName()+".int", Incr);
+ NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
- ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond");
+ ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
+ ConstantInt::get(Int32Ty, ExitValue),
+ Compare->getName());
- Value *OrigCond = BI->getCondition();
- // It's tempting to use replaceAllUsesWith here to fully replace the old
- // comparison, but that's not immediately safe, since users of the old
- // comparison may not be dominated by the new comparison. Instead, just
- // update the branch to use the new comparison; in the common case this
- // will make old comparison dead.
- BI->setCondition(Cond);
- DeadInsts.push_back(OrigCond);
+ // In the following deletions, PN may become dead and may be deleted.
+ // Use a WeakVH to observe whether this happens.
+ WeakVH WeakPH = PN;
- ++NumLFTR;
- Changed = true;
- return Cond;
+ // Delete the old floating point exit comparison. The branch starts using the
+ // new comparison.
+ NewCompare->takeName(Compare);
+ Compare->replaceAllUsesWith(NewCompare);
+ RecursivelyDeleteTriviallyDeadInstructions(Compare);
+
+ // Delete the old floating point increment.
+ Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
+ RecursivelyDeleteTriviallyDeadInstructions(Incr);
+
+ // If the FP induction variable still has uses, this is because something else
+ // in the loop uses its value. In order to canonicalize the induction
+ // variable, we chose to eliminate the IV and rewrite it in terms of an
+ // int->fp cast.
+ //
+ // We give preference to sitofp over uitofp because it is faster on most
+ // platforms.
+ if (WeakPH) {
+ Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
+ PN->getParent()->getFirstNonPHI());
+ PN->replaceAllUsesWith(Conv);
+ RecursivelyDeleteTriviallyDeadInstructions(PN);
+ }
+
+ // Add a new IVUsers entry for the newly-created integer PHI.
+ if (IU)
+ IU->AddUsersIfInteresting(NewPHI);
}
+void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
+ // First step. Check to see if there are any floating-point recurrences.
+ // If there are, change them into integer recurrences, permitting analysis by
+ // the SCEV routines.
+ //
+ BasicBlock *Header = L->getHeader();
+
+ SmallVector<WeakVH, 8> PHIs;
+ for (BasicBlock::iterator I = Header->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ PHIs.push_back(PN);
+
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
+ if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
+ HandleFloatingPointIV(L, PN);
+
+ // If the loop previously had floating-point IV, ScalarEvolution
+ // may not have been able to compute a trip count. Now that we've done some
+ // re-writing, the trip count may be computable.
+ if (Changed)
+ SE->forgetLoop(L);
+}
+
+//===----------------------------------------------------------------------===//
+// RewriteLoopExitValues - Optimize IV users outside the loop.
+// As a side effect, reduces the amount of IV processing within the loop.
+//===----------------------------------------------------------------------===//
+
/// RewriteLoopExitValues - Check to see if this loop has a computable
/// loop-invariant execution count. If so, this means that we can compute the
/// final value of any expressions that are recurrent in the loop, and
@@ -460,29 +580,168 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
Rewriter.clearInsertPoint();
}
-void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
- // First step. Check to see if there are any floating-point recurrences.
- // If there are, change them into integer recurrences, permitting analysis by
- // the SCEV routines.
+//===----------------------------------------------------------------------===//
+// Rewrite IV users based on a canonical IV.
+// To be replaced by -disable-iv-rewrite.
+//===----------------------------------------------------------------------===//
+
+/// SimplifyIVUsers - Iteratively perform simplification on IVUsers within this
+/// loop. IVUsers is treated as a worklist. Each successive simplification may
+/// push more users which may themselves be candidates for simplification.
+///
+/// This is the old approach to IV simplification to be replaced by
+/// SimplifyIVUsersNoRewrite.
+///
+void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) {
+ // Each round of simplification involves a round of eliminating operations
+ // followed by a round of widening IVs. A single IVUsers worklist is used
+ // across all rounds. The inner loop advances the user. If widening exposes
+ // more uses, then another pass through the outer loop is triggered.
+ for (IVUsers::iterator I = IU->begin(); I != IU->end(); ++I) {
+ Instruction *UseInst = I->getUser();
+ Value *IVOperand = I->getOperandValToReplace();
+
+ if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
+ EliminateIVComparison(ICmp, IVOperand);
+ continue;
+ }
+ if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
+ bool IsSigned = Rem->getOpcode() == Instruction::SRem;
+ if (IsSigned || Rem->getOpcode() == Instruction::URem) {
+ EliminateIVRemainder(Rem, IVOperand, IsSigned);
+ continue;
+ }
+ }
+ }
+}
+
+// FIXME: It is an extremely bad idea to indvar substitute anything more
+// complex than affine induction variables. Doing so will put expensive
+// polynomial evaluations inside of the loop, and the str reduction pass
+// currently can only reduce affine polynomials. For now just disable
+// indvar subst on anything more complex than an affine addrec, unless
+// it can be expanded to a trivial value.
+static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
+ // Loop-invariant values are safe.
+ if (SE->isLoopInvariant(S, L)) return true;
+
+ // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
+ // to transform them into efficient code.
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+ return AR->isAffine();
+
+ // An add is safe it all its operands are safe.
+ if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) {
+ for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
+ E = Commutative->op_end(); I != E; ++I)
+ if (!isSafe(*I, L, SE)) return false;
+ return true;
+ }
+
+ // A cast is safe if its operand is.
+ if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
+ return isSafe(C->getOperand(), L, SE);
+
+ // A udiv is safe if its operands are.
+ if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
+ return isSafe(UD->getLHS(), L, SE) &&
+ isSafe(UD->getRHS(), L, SE);
+
+ // SCEVUnknown is always safe.
+ if (isa<SCEVUnknown>(S))
+ return true;
+
+ // Nothing else is safe.
+ return false;
+}
+
+void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
+ // Rewrite all induction variable expressions in terms of the canonical
+ // induction variable.
//
- BasicBlock *Header = L->getHeader();
+ // If there were induction variables of other sizes or offsets, manually
+ // add the offsets to the primary induction variable and cast, avoiding
+ // the need for the code evaluation methods to insert induction variables
+ // of different sizes.
+ for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
+ Value *Op = UI->getOperandValToReplace();
+ const Type *UseTy = Op->getType();
+ Instruction *User = UI->getUser();
- SmallVector<WeakVH, 8> PHIs;
- for (BasicBlock::iterator I = Header->begin();
- PHINode *PN = dyn_cast<PHINode>(I); ++I)
- PHIs.push_back(PN);
+ // Compute the final addrec to expand into code.
+ const SCEV *AR = IU->getReplacementExpr(*UI);
- for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
- if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
- HandleFloatingPointIV(L, PN);
+ // Evaluate the expression out of the loop, if possible.
+ if (!L->contains(UI->getUser())) {
+ const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
+ if (SE->isLoopInvariant(ExitVal, L))
+ AR = ExitVal;
+ }
- // If the loop previously had floating-point IV, ScalarEvolution
- // may not have been able to compute a trip count. Now that we've done some
- // re-writing, the trip count may be computable.
- if (Changed)
- SE->forgetLoop(L);
+ // FIXME: It is an extremely bad idea to indvar substitute anything more
+ // complex than affine induction variables. Doing so will put expensive
+ // polynomial evaluations inside of the loop, and the str reduction pass
+ // currently can only reduce affine polynomials. For now just disable
+ // indvar subst on anything more complex than an affine addrec, unless
+ // it can be expanded to a trivial value.
+ if (!isSafe(AR, L, SE))
+ continue;
+
+ // Determine the insertion point for this user. By default, insert
+ // immediately before the user. The SCEVExpander class will automatically
+ // hoist loop invariants out of the loop. For PHI nodes, there may be
+ // multiple uses, so compute the nearest common dominator for the
+ // incoming blocks.
+ Instruction *InsertPt = User;
+ if (PHINode *PHI = dyn_cast<PHINode>(InsertPt))
+ for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
+ if (PHI->getIncomingValue(i) == Op) {
+ if (InsertPt == User)
+ InsertPt = PHI->getIncomingBlock(i)->getTerminator();
+ else
+ InsertPt =
+ DT->findNearestCommonDominator(InsertPt->getParent(),
+ PHI->getIncomingBlock(i))
+ ->getTerminator();
+ }
+
+ // Now expand it into actual Instructions and patch it into place.
+ Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
+
+ DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
+ << " into = " << *NewVal << "\n");
+
+ if (!isValidRewrite(Op, NewVal)) {
+ DeadInsts.push_back(NewVal);
+ continue;
+ }
+ // Inform ScalarEvolution that this value is changing. The change doesn't
+ // affect its value, but it does potentially affect which use lists the
+ // value will be on after the replacement, which affects ScalarEvolution's
+ // ability to walk use lists and drop dangling pointers when a value is
+ // deleted.
+ SE->forgetValue(User);
+
+ // Patch the new value into place.
+ if (Op->hasName())
+ NewVal->takeName(Op);
+ if (Instruction *NewValI = dyn_cast<Instruction>(NewVal))
+ NewValI->setDebugLoc(User->getDebugLoc());
+ User->replaceUsesOfWith(Op, NewVal);
+ UI->setOperandValToReplace(NewVal);
+
+ ++NumRemoved;
+ Changed = true;
+
+ // The old value may be dead now.
+ DeadInsts.push_back(Op);
+ }
}
+//===----------------------------------------------------------------------===//
+// IV Widening - Extend the width of an IV to cover its widest uses.
+//===----------------------------------------------------------------------===//
+
namespace {
// Collect information about induction variables that are used by sign/zero
// extend operations. This information is recorded by CollectExtend and
@@ -493,33 +752,30 @@ namespace {
WideIVInfo() : WidestNativeType(0), IsSigned(false) {}
};
- typedef std::map<PHINode *, WideIVInfo> WideIVMap;
}
/// CollectExtend - Update information about the induction variable that is
/// extended by this sign or zero extend operation. This is used to determine
/// the final width of the IV before actually widening it.
-static void CollectExtend(CastInst *Cast, PHINode *Phi, bool IsSigned,
- WideIVMap &IVMap, ScalarEvolution *SE,
- const TargetData *TD) {
+static void CollectExtend(CastInst *Cast, bool IsSigned, WideIVInfo &WI,
+ ScalarEvolution *SE, const TargetData *TD) {
const Type *Ty = Cast->getType();
uint64_t Width = SE->getTypeSizeInBits(Ty);
if (TD && !TD->isLegalInteger(Width))
return;
- WideIVInfo &IVInfo = IVMap[Phi];
- if (!IVInfo.WidestNativeType) {
- IVInfo.WidestNativeType = SE->getEffectiveSCEVType(Ty);
- IVInfo.IsSigned = IsSigned;
+ if (!WI.WidestNativeType) {
+ WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
+ WI.IsSigned = IsSigned;
return;
}
// We extend the IV to satisfy the sign of its first user, arbitrarily.
- if (IVInfo.IsSigned != IsSigned)
+ if (WI.IsSigned != IsSigned)
return;
- if (Width > SE->getTypeSizeInBits(IVInfo.WidestNativeType))
- IVInfo.WidestNativeType = SE->getEffectiveSCEVType(Ty);
+ if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
+ WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
}
namespace {
@@ -529,43 +785,45 @@ namespace {
/// inserting truncs whenever we stop propagating the type.
///
class WidenIV {
+ // Parameters
PHINode *OrigPhi;
const Type *WideType;
bool IsSigned;
- IVUsers *IU;
- LoopInfo *LI;
- Loop *L;
+ // Context
+ LoopInfo *LI;
+ Loop *L;
ScalarEvolution *SE;
- DominatorTree *DT;
- SmallVectorImpl<WeakVH> &DeadInsts;
+ DominatorTree *DT;
+ // Result
PHINode *WidePhi;
Instruction *WideInc;
const SCEV *WideIncExpr;
+ SmallVectorImpl<WeakVH> &DeadInsts;
- SmallPtrSet<Instruction*,16> Processed;
+ SmallPtrSet<Instruction*,16> Widened;
+ SmallVector<std::pair<Use *, Instruction *>, 8> NarrowIVUsers;
public:
- WidenIV(PHINode *PN, const WideIVInfo &IVInfo, IVUsers *IUsers,
- LoopInfo *LInfo, ScalarEvolution *SEv, DominatorTree *DTree,
+ WidenIV(PHINode *PN, const WideIVInfo &WI, LoopInfo *LInfo,
+ ScalarEvolution *SEv, DominatorTree *DTree,
SmallVectorImpl<WeakVH> &DI) :
OrigPhi(PN),
- WideType(IVInfo.WidestNativeType),
- IsSigned(IVInfo.IsSigned),
- IU(IUsers),
+ WideType(WI.WidestNativeType),
+ IsSigned(WI.IsSigned),
LI(LInfo),
L(LI->getLoopFor(OrigPhi->getParent())),
SE(SEv),
DT(DTree),
- DeadInsts(DI),
WidePhi(0),
WideInc(0),
- WideIncExpr(0) {
+ WideIncExpr(0),
+ DeadInsts(DI) {
assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
}
- bool CreateWideIV(SCEVExpander &Rewriter);
+ PHINode *CreateWideIV(SCEVExpander &Rewriter);
protected:
Instruction *CloneIVUser(Instruction *NarrowUse,
@@ -574,58 +832,13 @@ protected:
const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse);
- Instruction *WidenIVUse(Instruction *NarrowUse,
- Instruction *NarrowDef,
+ Instruction *WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef,
Instruction *WideDef);
+
+ void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
};
} // anonymous namespace
-/// SimplifyIVUsers - Iteratively perform simplification on IVUsers within this
-/// loop. IVUsers is treated as a worklist. Each successive simplification may
-/// push more users which may themselves be candidates for simplification.
-///
-void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) {
- WideIVMap IVMap;
-
- // Each round of simplification involves a round of eliminating operations
- // followed by a round of widening IVs. A single IVUsers worklist is used
- // across all rounds. The inner loop advances the user. If widening exposes
- // more uses, then another pass through the outer loop is triggered.
- for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E;) {
- for(; I != E; ++I) {
- Instruction *UseInst = I->getUser();
- Value *IVOperand = I->getOperandValToReplace();
-
- if (DisableIVRewrite) {
- if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) {
- bool IsSigned = Cast->getOpcode() == Instruction::SExt;
- if (IsSigned || Cast->getOpcode() == Instruction::ZExt) {
- CollectExtend(Cast, I->getPhi(), IsSigned, IVMap, SE, TD);
- continue;
- }
- }
- }
- if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
- EliminateIVComparison(ICmp, IVOperand);
- continue;
- }
- if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
- bool IsSigned = Rem->getOpcode() == Instruction::SRem;
- if (IsSigned || Rem->getOpcode() == Instruction::URem) {
- EliminateIVRemainder(Rem, IVOperand, IsSigned, I->getPhi());
- continue;
- }
- }
- }
- for (WideIVMap::const_iterator I = IVMap.begin(), E = IVMap.end();
- I != E; ++I) {
- WidenIV Widener(I->first, I->second, IU, LI, SE, DT, DeadInsts);
- if (Widener.CreateWideIV(Rewriter))
- Changed = true;
- }
- }
-}
-
static Value *getExtend( Value *NarrowOper, const Type *WideType,
bool IsSigned, IRBuilder<> &Builder) {
return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
@@ -671,34 +884,16 @@ Instruction *WidenIV::CloneIVUser(Instruction *NarrowUse,
LHS, RHS,
NarrowBO->getName());
Builder.Insert(WideBO);
- if (NarrowBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap();
- if (NarrowBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap();
-
+ if (const OverflowingBinaryOperator *OBO =
+ dyn_cast<OverflowingBinaryOperator>(NarrowBO)) {
+ if (OBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap();
+ if (OBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap();
+ }
return WideBO;
}
llvm_unreachable(0);
}
-// GetWideRecurrence - Is this instruction potentially interesting from IVUsers'
-// perspective after widening it's type? In other words, can the extend be
-// safely hoisted out of the loop with SCEV reducing the value to a recurrence
-// on the same loop. If so, return the sign or zero extended
-// recurrence. Otherwise return NULL.
-const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
- if (!SE->isSCEVable(NarrowUse->getType()))
- return 0;
-
- const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
- const SCEV *WideExpr = IsSigned ?
- SE->getSignExtendExpr(NarrowExpr, WideType) :
- SE->getZeroExtendExpr(NarrowExpr, WideType);
- const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
- if (!AddRec || AddRec->getLoop() != L)
- return 0;
-
- return AddRec;
-}
-
/// HoistStep - Attempt to hoist an IV increment above a potential use.
///
/// To successfully hoist, two criteria must be met:
@@ -733,18 +928,41 @@ static bool HoistStep(Instruction *IncV, Instruction *InsertPos,
return true;
}
+// GetWideRecurrence - Is this instruction potentially interesting from IVUsers'
+// perspective after widening it's type? In other words, can the extend be
+// safely hoisted out of the loop with SCEV reducing the value to a recurrence
+// on the same loop. If so, return the sign or zero extended
+// recurrence. Otherwise return NULL.
+const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
+ if (!SE->isSCEVable(NarrowUse->getType()))
+ return 0;
+
+ const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
+ if (SE->getTypeSizeInBits(NarrowExpr->getType())
+ >= SE->getTypeSizeInBits(WideType)) {
+ // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
+ // index. So don't follow this use.
+ return 0;
+ }
+
+ const SCEV *WideExpr = IsSigned ?
+ SE->getSignExtendExpr(NarrowExpr, WideType) :
+ SE->getZeroExtendExpr(NarrowExpr, WideType);
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
+ if (!AddRec || AddRec->getLoop() != L)
+ return 0;
+
+ return AddRec;
+}
+
/// WidenIVUse - Determine whether an individual user of the narrow IV can be
/// widened. If so, return the wide clone of the user.
-Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse,
- Instruction *NarrowDef,
+Instruction *WidenIV::WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef,
Instruction *WideDef) {
- // To be consistent with IVUsers, stop traversing the def-use chain at
- // inner-loop phis or post-loop phis.
- if (isa<PHINode>(NarrowUse) && LI->getLoopFor(NarrowUse->getParent()) != L)
- return 0;
+ Instruction *NarrowUse = cast<Instruction>(NarrowDefUse.getUser());
- // Handle data flow merges and bizarre phi cycles.
- if (!Processed.insert(NarrowUse))
+ // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
+ if (isa<PHINode>(NarrowUse) && LI->getLoopFor(NarrowUse->getParent()) != L)
return 0;
// Our raison d'etre! Eliminate sign and zero extension.
@@ -755,7 +973,7 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse,
unsigned IVWidth = SE->getTypeSizeInBits(WideType);
if (CastWidth < IVWidth) {
// The cast isn't as wide as the IV, so insert a Trunc.
- IRBuilder<> Builder(NarrowUse);
+ IRBuilder<> Builder(NarrowDefUse);
NewDef = Builder.CreateTrunc(WideDef, NarrowUse->getType());
}
else {
@@ -775,23 +993,32 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse,
NarrowUse->replaceAllUsesWith(NewDef);
DeadInsts.push_back(NarrowUse);
}
- // Now that the extend is gone, expose it's uses to IVUsers for potential
- // further simplification within SimplifyIVUsers.
- IU->AddUsersIfInteresting(WideDef, WidePhi);
+ // Now that the extend is gone, we want to expose it's uses for potential
+ // further simplification. We don't need to directly inform SimplifyIVUsers
+ // of the new users, because their parent IV will be processed later as a
+ // new loop phi. If we preserved IVUsers analysis, we would also want to
+ // push the uses of WideDef here.
// No further widening is needed. The deceased [sz]ext had done it for us.
return 0;
}
+
+ // Does this user itself evaluate to a recurrence after widening?
const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(NarrowUse);
if (!WideAddRec) {
// This user does not evaluate to a recurence after widening, so don't
// follow it. Instead insert a Trunc to kill off the original use,
// eventually isolating the original narrow IV so it can be removed.
- IRBuilder<> Builder(NarrowUse);
+ IRBuilder<> Builder(NarrowDefUse);
Value *Trunc = Builder.CreateTrunc(WideDef, NarrowDef->getType());
NarrowUse->replaceUsesOfWith(NarrowDef, Trunc);
return 0;
}
+ // We assume that block terminators are not SCEVable. We wouldn't want to
+ // insert a Trunc after a terminator if there happens to be a critical edge.
+ assert(NarrowUse != NarrowUse->getParent()->getTerminator() &&
+ "SCEV is not expected to evaluate a block terminator");
+
// Reuse the IV increment that SCEVExpander created as long as it dominates
// NarrowUse.
Instruction *WideUse = 0;
@@ -803,11 +1030,11 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse,
if (!WideUse)
return 0;
}
- // GetWideRecurrence ensured that the narrow expression could be extended
- // outside the loop without overflow. This suggests that the wide use
+ // Evaluation of WideAddRec ensured that the narrow expression could be
+ // extended outside the loop without overflow. This suggests that the wide use
// evaluates to the same expression as the extended narrow use, but doesn't
// absolutely guarantee it. Hence the following failsafe check. In rare cases
- // where it fails, we simple throw away the newly created wide use.
+ // where it fails, we simply throw away the newly created wide use.
if (WideAddRec != SE->getSCEV(WideUse)) {
DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
<< ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n");
@@ -819,21 +1046,36 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse,
return WideUse;
}
+/// pushNarrowIVUsers - Add eligible users of NarrowDef to NarrowIVUsers.
+///
+void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
+ for (Value::use_iterator UI = NarrowDef->use_begin(),
+ UE = NarrowDef->use_end(); UI != UE; ++UI) {
+ Use &U = UI.getUse();
+
+ // Handle data flow merges and bizarre phi cycles.
+ if (!Widened.insert(cast<Instruction>(U.getUser())))
+ continue;
+
+ NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WideDef));
+ }
+}
+
/// CreateWideIV - Process a single induction variable. First use the
/// SCEVExpander to create a wide induction variable that evaluates to the same
/// recurrence as the original narrow IV. Then use a worklist to forward
-/// traverse the narrow IV's def-use chain. After WidenIVUse as processed all
+/// traverse the narrow IV's def-use chain. After WidenIVUse has processed all
/// interesting IV users, the narrow IV will be isolated for removal by
/// DeleteDeadPHIs.
///
/// It would be simpler to delete uses as they are processed, but we must avoid
/// invalidating SCEV expressions.
///
-bool WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
+PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
// Is this phi an induction variable?
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
if (!AddRec)
- return false;
+ return NULL;
// Widen the induction variable expression.
const SCEV *WideIVExpr = IsSigned ?
@@ -846,9 +1088,9 @@ bool WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
// Can the IV be extended outside the loop without overflow?
AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
if (!AddRec || AddRec->getLoop() != L)
- return false;
+ return NULL;
- // An AddRec must have loop-invariant operands. Since this AddRec it
+ // An AddRec must have loop-invariant operands. Since this AddRec is
// materialized by a loop header phi, the expression cannot have any post-loop
// operands, so they must dominate the loop header.
assert(SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
@@ -876,39 +1118,37 @@ bool WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
++NumWidened;
// Traverse the def-use chain using a worklist starting at the original IV.
- assert(Processed.empty() && "expect initial state" );
+ assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
+
+ Widened.insert(OrigPhi);
+ pushNarrowIVUsers(OrigPhi, WidePhi);
- // Each worklist entry has a Narrow def-use link and Wide def.
- SmallVector<std::pair<Use *, Instruction *>, 8> NarrowIVUsers;
- for (Value::use_iterator UI = OrigPhi->use_begin(),
- UE = OrigPhi->use_end(); UI != UE; ++UI) {
- NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WidePhi));
- }
while (!NarrowIVUsers.empty()) {
- Use *NarrowDefUse;
+ Use *UsePtr;
Instruction *WideDef;
- tie(NarrowDefUse, WideDef) = NarrowIVUsers.pop_back_val();
+ tie(UsePtr, WideDef) = NarrowIVUsers.pop_back_val();
+ Use &NarrowDefUse = *UsePtr;
// Process a def-use edge. This may replace the use, so don't hold a
// use_iterator across it.
- Instruction *NarrowDef = cast<Instruction>(NarrowDefUse->get());
- Instruction *NarrowUse = cast<Instruction>(NarrowDefUse->getUser());
- Instruction *WideUse = WidenIVUse(NarrowUse, NarrowDef, WideDef);
+ Instruction *NarrowDef = cast<Instruction>(NarrowDefUse.get());
+ Instruction *WideUse = WidenIVUse(NarrowDefUse, NarrowDef, WideDef);
// Follow all def-use edges from the previous narrow use.
- if (WideUse) {
- for (Value::use_iterator UI = NarrowUse->use_begin(),
- UE = NarrowUse->use_end(); UI != UE; ++UI) {
- NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WideUse));
- }
- }
+ if (WideUse)
+ pushNarrowIVUsers(cast<Instruction>(NarrowDefUse.getUser()), WideUse);
+
// WidenIVUse may have removed the def-use edge.
if (NarrowDef->use_empty())
DeadInsts.push_back(NarrowDef);
}
- return true;
+ return WidePhi;
}
+//===----------------------------------------------------------------------===//
+// Simplification of IV users based on SCEV evaluation.
+//===----------------------------------------------------------------------===//
+
void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
unsigned IVOperIdx = 0;
ICmpInst::Predicate Pred = ICmp->getPredicate();
@@ -945,8 +1185,7 @@ void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
void IndVarSimplify::EliminateIVRemainder(BinaryOperator *Rem,
Value *IVOperand,
- bool IsSigned,
- PHINode *IVPhi) {
+ bool IsSigned) {
// We're only interested in the case where we know something about
// the numerator.
if (IVOperand != Rem->getOperand(0))
@@ -989,15 +1228,465 @@ void IndVarSimplify::EliminateIVRemainder(BinaryOperator *Rem,
}
// Inform IVUsers about the new users.
- if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0)))
- IU->AddUsersIfInteresting(I, IVPhi);
-
+ if (IU) {
+ if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0)))
+ IU->AddUsersIfInteresting(I);
+ }
DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
++NumElimRem;
Changed = true;
DeadInsts.push_back(Rem);
}
+/// EliminateIVUser - Eliminate an operation that consumes a simple IV and has
+/// no observable side-effect given the range of IV values.
+bool IndVarSimplify::EliminateIVUser(Instruction *UseInst,
+ Instruction *IVOperand) {
+ if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
+ EliminateIVComparison(ICmp, IVOperand);
+ return true;
+ }
+ if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
+ bool IsSigned = Rem->getOpcode() == Instruction::SRem;
+ if (IsSigned || Rem->getOpcode() == Instruction::URem) {
+ EliminateIVRemainder(Rem, IVOperand, IsSigned);
+ return true;
+ }
+ }
+
+ // Eliminate any operation that SCEV can prove is an identity function.
+ if (!SE->isSCEVable(UseInst->getType()) ||
+ (UseInst->getType() != IVOperand->getType()) ||
+ (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand)))
+ return false;
+
+ DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');
+
+ UseInst->replaceAllUsesWith(IVOperand);
+ ++NumElimIdentity;
+ Changed = true;
+ DeadInsts.push_back(UseInst);
+ return true;
+}
+
+/// pushIVUsers - Add all uses of Def to the current IV's worklist.
+///
+static void pushIVUsers(
+ Instruction *Def,
+ SmallPtrSet<Instruction*,16> &Simplified,
+ SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) {
+
+ for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Avoid infinite or exponential worklist processing.
+ // Also ensure unique worklist users.
+ // If Def is a LoopPhi, it may not be in the Simplified set, so check for
+ // self edges first.
+ if (User != Def && Simplified.insert(User))
+ SimpleIVUsers.push_back(std::make_pair(User, Def));
+ }
+}
+
+/// isSimpleIVUser - Return true if this instruction generates a simple SCEV
+/// expression in terms of that IV.
+///
+/// This is similar to IVUsers' isInsteresting() but processes each instruction
+/// non-recursively when the operand is already known to be a simpleIVUser.
+///
+static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) {
+ if (!SE->isSCEVable(I->getType()))
+ return false;
+
+ // Get the symbolic expression for this instruction.
+ const SCEV *S = SE->getSCEV(I);
+
+ // We assume that terminators are not SCEVable.
+ assert((!S || I != I->getParent()->getTerminator()) &&
+ "can't fold terminators");
+
+ // Only consider affine recurrences.
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
+ if (AR && AR->getLoop() == L)
+ return true;
+
+ return false;
+}
+
+/// SimplifyIVUsersNoRewrite - Iteratively perform simplification on a worklist
+/// of IV users. Each successive simplification may push more users which may
+/// themselves be candidates for simplification.
+///
+/// The "NoRewrite" algorithm does not require IVUsers analysis. Instead, it
+/// simplifies instructions in-place during analysis. Rather than rewriting
+/// induction variables bottom-up from their users, it transforms a chain of
+/// IVUsers top-down, updating the IR only when it encouters a clear
+/// optimization opportunitiy. A SCEVExpander "Rewriter" instance is still
+/// needed, but only used to generate a new IV (phi) of wider type for sign/zero
+/// extend elimination.
+///
+/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
+///
+void IndVarSimplify::SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter) {
+ std::map<PHINode *, WideIVInfo> WideIVMap;
+
+ SmallVector<PHINode*, 8> LoopPhis;
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+ LoopPhis.push_back(cast<PHINode>(I));
+ }
+ // Each round of simplification iterates through the SimplifyIVUsers worklist
+ // for all current phis, then determines whether any IVs can be
+ // widened. Widening adds new phis to LoopPhis, inducing another round of
+ // simplification on the wide IVs.
+ while (!LoopPhis.empty()) {
+ // Evaluate as many IV expressions as possible before widening any IVs. This
+ // forces SCEV to set no-wrap flags before evaluating sign/zero
+ // extension. The first time SCEV attempts to normalize sign/zero extension,
+ // the result becomes final. So for the most predictable results, we delay
+ // evaluation of sign/zero extend evaluation until needed, and avoid running
+ // other SCEV based analysis prior to SimplifyIVUsersNoRewrite.
+ do {
+ PHINode *CurrIV = LoopPhis.pop_back_val();
+
+ // Information about sign/zero extensions of CurrIV.
+ WideIVInfo WI;
+
+ // Instructions processed by SimplifyIVUsers for CurrIV.
+ SmallPtrSet<Instruction*,16> Simplified;
+
+ // Use-def pairs if IV users waiting to be processed for CurrIV.
+ SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers;
+
+ // Push users of the current LoopPhi. In rare cases, pushIVUsers may be
+ // called multiple times for the same LoopPhi. This is the proper thing to
+ // do for loop header phis that use each other.
+ pushIVUsers(CurrIV, Simplified, SimpleIVUsers);
+
+ while (!SimpleIVUsers.empty()) {
+ Instruction *UseInst, *Operand;
+ tie(UseInst, Operand) = SimpleIVUsers.pop_back_val();
+ // Bypass back edges to avoid extra work.
+ if (UseInst == CurrIV) continue;
+
+ if (EliminateIVUser(UseInst, Operand)) {
+ pushIVUsers(Operand, Simplified, SimpleIVUsers);
+ continue;
+ }
+ if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) {
+ bool IsSigned = Cast->getOpcode() == Instruction::SExt;
+ if (IsSigned || Cast->getOpcode() == Instruction::ZExt) {
+ CollectExtend(Cast, IsSigned, WI, SE, TD);
+ }
+ continue;
+ }
+ if (isSimpleIVUser(UseInst, L, SE)) {
+ pushIVUsers(UseInst, Simplified, SimpleIVUsers);
+ }
+ }
+ if (WI.WidestNativeType) {
+ WideIVMap[CurrIV] = WI;
+ }
+ } while(!LoopPhis.empty());
+
+ for (std::map<PHINode *, WideIVInfo>::const_iterator I = WideIVMap.begin(),
+ E = WideIVMap.end(); I != E; ++I) {
+ WidenIV Widener(I->first, I->second, LI, SE, DT, DeadInsts);
+ if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) {
+ Changed = true;
+ LoopPhis.push_back(WidePhi);
+ }
+ }
+ WideIVMap.clear();
+ }
+}
+
+/// SimplifyCongruentIVs - Check for congruent phis in this loop header and
+/// populate ExprToIVMap for use later.
+///
+void IndVarSimplify::SimplifyCongruentIVs(Loop *L) {
+ DenseMap<const SCEV *, PHINode *> ExprToIVMap;
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+ PHINode *Phi = cast<PHINode>(I);
+ if (!SE->isSCEVable(Phi->getType()))
+ continue;
+
+ const SCEV *S = SE->getSCEV(Phi);
+ DenseMap<const SCEV *, PHINode *>::const_iterator Pos;
+ bool Inserted;
+ tie(Pos, Inserted) = ExprToIVMap.insert(std::make_pair(S, Phi));
+ if (Inserted)
+ continue;
+ PHINode *OrigPhi = Pos->second;
+ // Replacing the congruent phi is sufficient because acyclic redundancy
+ // elimination, CSE/GVN, should handle the rest. However, once SCEV proves
+ // that a phi is congruent, it's almost certain to be the head of an IV
+ // user cycle that is isomorphic with the original phi. So it's worth
+ // eagerly cleaning up the common case of a single IV increment.
+ if (BasicBlock *LatchBlock = L->getLoopLatch()) {
+ Instruction *OrigInc =
+ cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
+ Instruction *IsomorphicInc =
+ cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
+ if (OrigInc != IsomorphicInc &&
+ SE->getSCEV(OrigInc) == SE->getSCEV(IsomorphicInc) &&
+ HoistStep(OrigInc, IsomorphicInc, DT)) {
+ DEBUG(dbgs() << "INDVARS: Eliminated congruent iv.inc: "
+ << *IsomorphicInc << '\n');
+ IsomorphicInc->replaceAllUsesWith(OrigInc);
+ DeadInsts.push_back(IsomorphicInc);
+ }
+ }
+ DEBUG(dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi << '\n');
+ ++NumElimIV;
+ Phi->replaceAllUsesWith(OrigPhi);
+ DeadInsts.push_back(Phi);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.
+//===----------------------------------------------------------------------===//
+
+/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
+/// count expression can be safely and cheaply expanded into an instruction
+/// sequence that can be used by LinearFunctionTestReplace.
+static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
+ const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
+ BackedgeTakenCount->isZero())
+ return false;
+
+ if (!L->getExitingBlock())
+ return false;
+
+ // Can't rewrite non-branch yet.
+ BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
+ if (!BI)
+ return false;
+
+ // Special case: If the backedge-taken count is a UDiv, it's very likely a
+ // UDiv that ScalarEvolution produced in order to compute a precise
+ // expression, rather than a UDiv from the user's code. If we can't find a
+ // UDiv in the code with some simple searching, assume the former and forego
+ // rewriting the loop.
+ if (isa<SCEVUDivExpr>(BackedgeTakenCount)) {
+ ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!OrigCond) return false;
+ const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
+ R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
+ if (R != BackedgeTakenCount) {
+ const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
+ L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
+ if (L != BackedgeTakenCount)
+ return false;
+ }
+ }
+ return true;
+}
+
+/// getBackedgeIVType - Get the widest type used by the loop test after peeking
+/// through Truncs.
+///
+/// TODO: Unnecessary if LFTR does not force a canonical IV.
+static const Type *getBackedgeIVType(Loop *L) {
+ if (!L->getExitingBlock())
+ return 0;
+
+ // Can't rewrite non-branch yet.
+ BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
+ if (!BI)
+ return 0;
+
+ ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!Cond)
+ return 0;
+
+ const Type *Ty = 0;
+ for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end();
+ OI != OE; ++OI) {
+ assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types");
+ TruncInst *Trunc = dyn_cast<TruncInst>(*OI);
+ if (!Trunc)
+ continue;
+
+ return Trunc->getSrcTy();
+ }
+ return Ty;
+}
+
+/// LinearFunctionTestReplace - This method rewrites the exit condition of the
+/// loop to be a canonical != comparison against the incremented loop induction
+/// variable. This pass is able to rewrite the exit tests of any loop where the
+/// SCEV analysis can determine a loop-invariant trip count of the loop, which
+/// is actually a much broader range than just linear tests.
+ICmpInst *IndVarSimplify::
+LinearFunctionTestReplace(Loop *L,
+ const SCEV *BackedgeTakenCount,
+ PHINode *IndVar,
+ SCEVExpander &Rewriter) {
+ assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
+ BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+
+ // If the exiting block is not the same as the backedge block, we must compare
+ // against the preincremented value, otherwise we prefer to compare against
+ // the post-incremented value.
+ Value *CmpIndVar;
+ const SCEV *RHS = BackedgeTakenCount;
+ if (L->getExitingBlock() == L->getLoopLatch()) {
+ // Add one to the "backedge-taken" count to get the trip count.
+ // If this addition may overflow, we have to be more pessimistic and
+ // cast the induction variable before doing the add.
+ const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0);
+ const SCEV *N =
+ SE->getAddExpr(BackedgeTakenCount,
+ SE->getConstant(BackedgeTakenCount->getType(), 1));
+ if ((isa<SCEVConstant>(N) && !N->isZero()) ||
+ SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
+ // No overflow. Cast the sum.
+ RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType());
+ } else {
+ // Potential overflow. Cast before doing the add.
+ RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
+ IndVar->getType());
+ RHS = SE->getAddExpr(RHS,
+ SE->getConstant(IndVar->getType(), 1));
+ }
+
+ // The BackedgeTaken expression contains the number of times that the
+ // backedge branches to the loop header. This is one less than the
+ // number of times the loop executes, so use the incremented indvar.
+ CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
+ } else {
+ // We have to use the preincremented value...
+ RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
+ IndVar->getType());
+ CmpIndVar = IndVar;
+ }
+
+ // Expand the code for the iteration count.
+ assert(SE->isLoopInvariant(RHS, L) &&
+ "Computed iteration count is not loop invariant!");
+ Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI);
+
+ // Insert a new icmp_ne or icmp_eq instruction before the branch.
+ ICmpInst::Predicate Opcode;
+ if (L->contains(BI->getSuccessor(0)))
+ Opcode = ICmpInst::ICMP_NE;
+ else
+ Opcode = ICmpInst::ICMP_EQ;
+
+ DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
+ << " LHS:" << *CmpIndVar << '\n'
+ << " op:\t"
+ << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
+ << " RHS:\t" << *RHS << "\n");
+
+ ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond");
+ Cond->setDebugLoc(BI->getDebugLoc());
+ Value *OrigCond = BI->getCondition();
+ // It's tempting to use replaceAllUsesWith here to fully replace the old
+ // comparison, but that's not immediately safe, since users of the old
+ // comparison may not be dominated by the new comparison. Instead, just
+ // update the branch to use the new comparison; in the common case this
+ // will make old comparison dead.
+ BI->setCondition(Cond);
+ DeadInsts.push_back(OrigCond);
+
+ ++NumLFTR;
+ Changed = true;
+ return Cond;
+}
+
+//===----------------------------------------------------------------------===//
+// SinkUnusedInvariants. A late subpass to cleanup loop preheaders.
+//===----------------------------------------------------------------------===//
+
+/// If there's a single exit block, sink any loop-invariant values that
+/// were defined in the preheader but not used inside the loop into the
+/// exit block to reduce register pressure in the loop.
+void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
+ BasicBlock *ExitBlock = L->getExitBlock();
+ if (!ExitBlock) return;
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) return;
+
+ Instruction *InsertPt = ExitBlock->getFirstNonPHI();
+ BasicBlock::iterator I = Preheader->getTerminator();
+ while (I != Preheader->begin()) {
+ --I;
+ // New instructions were inserted at the end of the preheader.
+ if (isa<PHINode>(I))
+ break;
+
+ // Don't move instructions which might have side effects, since the side
+ // effects need to complete before instructions inside the loop. Also don't
+ // move instructions which might read memory, since the loop may modify
+ // memory. Note that it's okay if the instruction might have undefined
+ // behavior: LoopSimplify guarantees that the preheader dominates the exit
+ // block.
+ if (I->mayHaveSideEffects() || I->mayReadFromMemory())
+ continue;
+
+ // Skip debug info intrinsics.
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+
+ // Don't sink static AllocaInsts out of the entry block, which would
+ // turn them into dynamic allocas!
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+ if (AI->isStaticAlloca())
+ continue;
+
+ // Determine if there is a use in or before the loop (direct or
+ // otherwise).
+ bool UsedInLoop = false;
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI) {
+ User *U = *UI;
+ BasicBlock *UseBB = cast<Instruction>(U)->getParent();
+ if (PHINode *P = dyn_cast<PHINode>(U)) {
+ unsigned i =
+ PHINode::getIncomingValueNumForOperand(UI.getOperandNo());
+ UseBB = P->getIncomingBlock(i);
+ }
+ if (UseBB == Preheader || L->contains(UseBB)) {
+ UsedInLoop = true;
+ break;
+ }
+ }
+
+ // If there is, the def must remain in the preheader.
+ if (UsedInLoop)
+ continue;
+
+ // Otherwise, sink it to the exit block.
+ Instruction *ToMove = I;
+ bool Done = false;
+
+ if (I != Preheader->begin()) {
+ // Skip debug info intrinsics.
+ do {
+ --I;
+ } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
+
+ if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
+ Done = true;
+ } else {
+ Done = true;
+ }
+
+ ToMove->moveBefore(InsertPt);
+ if (Done) break;
+ InsertPt = ToMove;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// IndVarSimplify driver. Manage several subpasses of IV simplification.
+//===----------------------------------------------------------------------===//
+
bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// If LoopSimplify form is not available, stay out of trouble. Some notes:
// - LSR currently only supports LoopSimplify-form loops. Indvars'
@@ -1010,7 +1699,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
if (!L->isLoopSimplifyForm())
return false;
- IU = &getAnalysis<IVUsers>();
+ if (!DisableIVRewrite)
+ IU = &getAnalysis<IVUsers>();
LI = &getAnalysis<LoopInfo>();
SE = &getAnalysis<ScalarEvolution>();
DT = &getAnalysis<DominatorTree>();
@@ -1026,9 +1716,18 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
// Create a rewriter object which we'll use to transform the code with.
- SCEVExpander Rewriter(*SE);
- if (DisableIVRewrite)
+ SCEVExpander Rewriter(*SE, "indvars");
+
+ // Eliminate redundant IV users.
+ //
+ // Simplification works best when run before other consumers of SCEV. We
+ // attempt to avoid evaluating SCEVs for sign/zero extend operations until
+ // other expressions involving loop IVs have been evaluated. This helps SCEV
+ // set no-wrap flags before normalizing sign/zero extension.
+ if (DisableIVRewrite) {
Rewriter.disableCanonicalMode();
+ SimplifyIVUsersNoRewrite(L, Rewriter);
+ }
// Check to see if this loop has a computable loop-invariant execution count.
// If so, this means that we can compute the final value of any expressions
@@ -1040,7 +1739,12 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
RewriteLoopExitValues(L, Rewriter);
// Eliminate redundant IV users.
- SimplifyIVUsers(Rewriter);
+ if (!DisableIVRewrite)
+ SimplifyIVUsers(Rewriter);
+
+ // Eliminate redundant IV cycles.
+ if (DisableIVRewrite)
+ SimplifyCongruentIVs(L);
// Compute the type of the largest recurrence expression, and decide whether
// a canonical induction variable should be inserted.
@@ -1119,8 +1823,18 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
"canonical IV disrupted BackedgeTaken expansion");
assert(NeedCannIV &&
"LinearFunctionTestReplace requires a canonical induction variable");
- NewICmp = LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
- Rewriter);
+ // Check preconditions for proper SCEVExpander operation. SCEV does not
+ // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
+ // pass that uses the SCEVExpander must do it. This does not work well for
+ // loop passes because SCEVExpander makes assumptions about all loops, while
+ // LoopPassManager only forces the current loop to be simplified.
+ //
+ // FIXME: SCEV expansion has no way to bail out, so the caller must
+ // explicitly check any assumptions made by SCEV. Brittle.
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
+ if (!AR || AR->getLoop()->getLoopPreheader())
+ NewICmp =
+ LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, Rewriter);
}
// Rewrite IV-derived expressions.
if (!DisableIVRewrite)
@@ -1146,9 +1860,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// For completeness, inform IVUsers of the IV use in the newly-created
// loop exit test instruction.
- if (NewICmp)
- IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0)),
- IndVar);
+ if (NewICmp && IU)
+ IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0)));
// Clean up dead instructions.
Changed |= DeleteDeadPHIs(L->getHeader());
@@ -1156,428 +1869,3 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
assert(L->isLCSSAForm(*DT) && "Indvars did not leave the loop in lcssa form!");
return Changed;
}
-
-// FIXME: It is an extremely bad idea to indvar substitute anything more
-// complex than affine induction variables. Doing so will put expensive
-// polynomial evaluations inside of the loop, and the str reduction pass
-// currently can only reduce affine polynomials. For now just disable
-// indvar subst on anything more complex than an affine addrec, unless
-// it can be expanded to a trivial value.
-static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
- // Loop-invariant values are safe.
- if (SE->isLoopInvariant(S, L)) return true;
-
- // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
- // to transform them into efficient code.
- if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
- return AR->isAffine();
-
- // An add is safe it all its operands are safe.
- if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) {
- for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
- E = Commutative->op_end(); I != E; ++I)
- if (!isSafe(*I, L, SE)) return false;
- return true;
- }
-
- // A cast is safe if its operand is.
- if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
- return isSafe(C->getOperand(), L, SE);
-
- // A udiv is safe if its operands are.
- if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
- return isSafe(UD->getLHS(), L, SE) &&
- isSafe(UD->getRHS(), L, SE);
-
- // SCEVUnknown is always safe.
- if (isa<SCEVUnknown>(S))
- return true;
-
- // Nothing else is safe.
- return false;
-}
-
-void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
- // Rewrite all induction variable expressions in terms of the canonical
- // induction variable.
- //
- // If there were induction variables of other sizes or offsets, manually
- // add the offsets to the primary induction variable and cast, avoiding
- // the need for the code evaluation methods to insert induction variables
- // of different sizes.
- for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
- Value *Op = UI->getOperandValToReplace();
- const Type *UseTy = Op->getType();
- Instruction *User = UI->getUser();
-
- // Compute the final addrec to expand into code.
- const SCEV *AR = IU->getReplacementExpr(*UI);
-
- // Evaluate the expression out of the loop, if possible.
- if (!L->contains(UI->getUser())) {
- const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
- if (SE->isLoopInvariant(ExitVal, L))
- AR = ExitVal;
- }
-
- // FIXME: It is an extremely bad idea to indvar substitute anything more
- // complex than affine induction variables. Doing so will put expensive
- // polynomial evaluations inside of the loop, and the str reduction pass
- // currently can only reduce affine polynomials. For now just disable
- // indvar subst on anything more complex than an affine addrec, unless
- // it can be expanded to a trivial value.
- if (!isSafe(AR, L, SE))
- continue;
-
- // Determine the insertion point for this user. By default, insert
- // immediately before the user. The SCEVExpander class will automatically
- // hoist loop invariants out of the loop. For PHI nodes, there may be
- // multiple uses, so compute the nearest common dominator for the
- // incoming blocks.
- Instruction *InsertPt = User;
- if (PHINode *PHI = dyn_cast<PHINode>(InsertPt))
- for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
- if (PHI->getIncomingValue(i) == Op) {
- if (InsertPt == User)
- InsertPt = PHI->getIncomingBlock(i)->getTerminator();
- else
- InsertPt =
- DT->findNearestCommonDominator(InsertPt->getParent(),
- PHI->getIncomingBlock(i))
- ->getTerminator();
- }
-
- // Now expand it into actual Instructions and patch it into place.
- Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
-
- DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
- << " into = " << *NewVal << "\n");
-
- if (!isValidRewrite(Op, NewVal)) {
- DeadInsts.push_back(NewVal);
- continue;
- }
- // Inform ScalarEvolution that this value is changing. The change doesn't
- // affect its value, but it does potentially affect which use lists the
- // value will be on after the replacement, which affects ScalarEvolution's
- // ability to walk use lists and drop dangling pointers when a value is
- // deleted.
- SE->forgetValue(User);
-
- // Patch the new value into place.
- if (Op->hasName())
- NewVal->takeName(Op);
- User->replaceUsesOfWith(Op, NewVal);
- UI->setOperandValToReplace(NewVal);
-
- ++NumRemoved;
- Changed = true;
-
- // The old value may be dead now.
- DeadInsts.push_back(Op);
- }
-}
-
-/// If there's a single exit block, sink any loop-invariant values that
-/// were defined in the preheader but not used inside the loop into the
-/// exit block to reduce register pressure in the loop.
-void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
- BasicBlock *ExitBlock = L->getExitBlock();
- if (!ExitBlock) return;
-
- BasicBlock *Preheader = L->getLoopPreheader();
- if (!Preheader) return;
-
- Instruction *InsertPt = ExitBlock->getFirstNonPHI();
- BasicBlock::iterator I = Preheader->getTerminator();
- while (I != Preheader->begin()) {
- --I;
- // New instructions were inserted at the end of the preheader.
- if (isa<PHINode>(I))
- break;
-
- // Don't move instructions which might have side effects, since the side
- // effects need to complete before instructions inside the loop. Also don't
- // move instructions which might read memory, since the loop may modify
- // memory. Note that it's okay if the instruction might have undefined
- // behavior: LoopSimplify guarantees that the preheader dominates the exit
- // block.
- if (I->mayHaveSideEffects() || I->mayReadFromMemory())
- continue;
-
- // Skip debug info intrinsics.
- if (isa<DbgInfoIntrinsic>(I))
- continue;
-
- // Don't sink static AllocaInsts out of the entry block, which would
- // turn them into dynamic allocas!
- if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
- if (AI->isStaticAlloca())
- continue;
-
- // Determine if there is a use in or before the loop (direct or
- // otherwise).
- bool UsedInLoop = false;
- for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
- UI != UE; ++UI) {
- User *U = *UI;
- BasicBlock *UseBB = cast<Instruction>(U)->getParent();
- if (PHINode *P = dyn_cast<PHINode>(U)) {
- unsigned i =
- PHINode::getIncomingValueNumForOperand(UI.getOperandNo());
- UseBB = P->getIncomingBlock(i);
- }
- if (UseBB == Preheader || L->contains(UseBB)) {
- UsedInLoop = true;
- break;
- }
- }
-
- // If there is, the def must remain in the preheader.
- if (UsedInLoop)
- continue;
-
- // Otherwise, sink it to the exit block.
- Instruction *ToMove = I;
- bool Done = false;
-
- if (I != Preheader->begin()) {
- // Skip debug info intrinsics.
- do {
- --I;
- } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
-
- if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
- Done = true;
- } else {
- Done = true;
- }
-
- ToMove->moveBefore(InsertPt);
- if (Done) break;
- InsertPt = ToMove;
- }
-}
-
-/// ConvertToSInt - Convert APF to an integer, if possible.
-static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
- bool isExact = false;
- if (&APF.getSemantics() == &APFloat::PPCDoubleDouble)
- return false;
- // See if we can convert this to an int64_t
- uint64_t UIntVal;
- if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero,
- &isExact) != APFloat::opOK || !isExact)
- return false;
- IntVal = UIntVal;
- return true;
-}
-
-/// HandleFloatingPointIV - If the loop has floating induction variable
-/// then insert corresponding integer induction variable if possible.
-/// For example,
-/// for(double i = 0; i < 10000; ++i)
-/// bar(i)
-/// is converted into
-/// for(int i = 0; i < 10000; ++i)
-/// bar((double)i);
-///
-void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
- unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
- unsigned BackEdge = IncomingEdge^1;
-
- // Check incoming value.
- ConstantFP *InitValueVal =
- dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
-
- int64_t InitValue;
- if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
- return;
-
- // Check IV increment. Reject this PN if increment operation is not
- // an add or increment value can not be represented by an integer.
- BinaryOperator *Incr =
- dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
- if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
-
- // If this is not an add of the PHI with a constantfp, or if the constant fp
- // is not an integer, bail out.
- ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
- int64_t IncValue;
- if (IncValueVal == 0 || Incr->getOperand(0) != PN ||
- !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
- return;
-
- // Check Incr uses. One user is PN and the other user is an exit condition
- // used by the conditional terminator.
- Value::use_iterator IncrUse = Incr->use_begin();
- Instruction *U1 = cast<Instruction>(*IncrUse++);
- if (IncrUse == Incr->use_end()) return;
- Instruction *U2 = cast<Instruction>(*IncrUse++);
- if (IncrUse != Incr->use_end()) return;
-
- // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
- // only used by a branch, we can't transform it.
- FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
- if (!Compare)
- Compare = dyn_cast<FCmpInst>(U2);
- if (Compare == 0 || !Compare->hasOneUse() ||
- !isa<BranchInst>(Compare->use_back()))
- return;
-
- BranchInst *TheBr = cast<BranchInst>(Compare->use_back());
-
- // We need to verify that the branch actually controls the iteration count
- // of the loop. If not, the new IV can overflow and no one will notice.
- // The branch block must be in the loop and one of the successors must be out
- // of the loop.
- assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
- if (!L->contains(TheBr->getParent()) ||
- (L->contains(TheBr->getSuccessor(0)) &&
- L->contains(TheBr->getSuccessor(1))))
- return;
-
-
- // If it isn't a comparison with an integer-as-fp (the exit value), we can't
- // transform it.
- ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
- int64_t ExitValue;
- if (ExitValueVal == 0 ||
- !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
- return;
-
- // Find new predicate for integer comparison.
- CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
- switch (Compare->getPredicate()) {
- default: return; // Unknown comparison.
- case CmpInst::FCMP_OEQ:
- case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
- case CmpInst::FCMP_ONE:
- case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
- case CmpInst::FCMP_OGT:
- case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
- case CmpInst::FCMP_OGE:
- case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
- case CmpInst::FCMP_OLT:
- case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
- case CmpInst::FCMP_OLE:
- case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
- }
-
- // We convert the floating point induction variable to a signed i32 value if
- // we can. This is only safe if the comparison will not overflow in a way
- // that won't be trapped by the integer equivalent operations. Check for this
- // now.
- // TODO: We could use i64 if it is native and the range requires it.
-
- // The start/stride/exit values must all fit in signed i32.
- if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
- return;
-
- // If not actually striding (add x, 0.0), avoid touching the code.
- if (IncValue == 0)
- return;
-
- // Positive and negative strides have different safety conditions.
- if (IncValue > 0) {
- // If we have a positive stride, we require the init to be less than the
- // exit value and an equality or less than comparison.
- if (InitValue >= ExitValue ||
- NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE)
- return;
-
- uint32_t Range = uint32_t(ExitValue-InitValue);
- if (NewPred == CmpInst::ICMP_SLE) {
- // Normalize SLE -> SLT, check for infinite loop.
- if (++Range == 0) return; // Range overflows.
- }
-
- unsigned Leftover = Range % uint32_t(IncValue);
-
- // If this is an equality comparison, we require that the strided value
- // exactly land on the exit value, otherwise the IV condition will wrap
- // around and do things the fp IV wouldn't.
- if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
- Leftover != 0)
- return;
-
- // If the stride would wrap around the i32 before exiting, we can't
- // transform the IV.
- if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
- return;
-
- } else {
- // If we have a negative stride, we require the init to be greater than the
- // exit value and an equality or greater than comparison.
- if (InitValue >= ExitValue ||
- NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE)
- return;
-
- uint32_t Range = uint32_t(InitValue-ExitValue);
- if (NewPred == CmpInst::ICMP_SGE) {
- // Normalize SGE -> SGT, check for infinite loop.
- if (++Range == 0) return; // Range overflows.
- }
-
- unsigned Leftover = Range % uint32_t(-IncValue);
-
- // If this is an equality comparison, we require that the strided value
- // exactly land on the exit value, otherwise the IV condition will wrap
- // around and do things the fp IV wouldn't.
- if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
- Leftover != 0)
- return;
-
- // If the stride would wrap around the i32 before exiting, we can't
- // transform the IV.
- if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
- return;
- }
-
- const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
-
- // Insert new integer induction variable.
- PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
- NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
- PN->getIncomingBlock(IncomingEdge));
-
- Value *NewAdd =
- BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
- Incr->getName()+".int", Incr);
- NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
-
- ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
- ConstantInt::get(Int32Ty, ExitValue),
- Compare->getName());
-
- // In the following deletions, PN may become dead and may be deleted.
- // Use a WeakVH to observe whether this happens.
- WeakVH WeakPH = PN;
-
- // Delete the old floating point exit comparison. The branch starts using the
- // new comparison.
- NewCompare->takeName(Compare);
- Compare->replaceAllUsesWith(NewCompare);
- RecursivelyDeleteTriviallyDeadInstructions(Compare);
-
- // Delete the old floating point increment.
- Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
- RecursivelyDeleteTriviallyDeadInstructions(Incr);
-
- // If the FP induction variable still has uses, this is because something else
- // in the loop uses its value. In order to canonicalize the induction
- // variable, we chose to eliminate the IV and rewrite it in terms of an
- // int->fp cast.
- //
- // We give preference to sitofp over uitofp because it is faster on most
- // platforms.
- if (WeakPH) {
- Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
- PN->getParent()->getFirstNonPHI());
- PN->replaceAllUsesWith(Conv);
- RecursivelyDeleteTriviallyDeadInstructions(PN);
- }
-
- // Add a new IVUsers entry for the newly-created integer PHI.
- IU->AddUsersIfInteresting(NewPHI, NewPHI);
-}
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index cf18ff0..b500d5b 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -600,8 +600,10 @@ static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) {
for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
TestBB = BBTerm->getSuccessor(i);
unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
- if (NumPreds < MinNumPreds)
+ if (NumPreds < MinNumPreds) {
MinSucc = i;
+ MinNumPreds = NumPreds;
+ }
}
return MinSucc;
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 13bd022..66add6c 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -178,7 +178,7 @@ INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
Pass *llvm::createLICMPass() { return new LICM(); }
/// Hoist expressions out of the specified loop. Note, alias info for inner
-/// loop is not preserved so it is not a good idea to run LICM multiple
+/// loop is not preserved so it is not a good idea to run LICM multiple
/// times on one loop.
///
bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
@@ -199,13 +199,13 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// What if InnerLoop was modified by other passes ?
CurAST->add(*InnerAST);
-
+
// Once we've incorporated the inner loop's AST into ours, we don't need the
// subloop's anymore.
delete InnerAST;
LoopToAliasSetMap.erase(InnerL);
}
-
+
CurLoop = L;
// Get the preheader block to move instructions into...
@@ -245,7 +245,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
I != E; ++I)
PromoteAliasSet(*I);
}
-
+
// Clear out loops state information for the next iteration
CurLoop = 0;
Preheader = 0;
@@ -283,7 +283,7 @@ void LICM::SinkRegion(DomTreeNode *N) {
for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) {
Instruction &I = *--II;
-
+
// If the instruction is dead, we would try to sink it because it isn't used
// in the loop, instead, just delete it.
if (isInstructionTriviallyDead(&I)) {
@@ -336,7 +336,7 @@ void LICM::HoistRegion(DomTreeNode *N) {
I.eraseFromParent();
continue;
}
-
+
// Try hoisting the instruction out to the preheader. We can only do this
// if all of the operands of the instruction are loop invariant and if it
// is safe to hoist the instruction.
@@ -364,7 +364,7 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
// in the same alias set as something that ends up being modified.
if (AA->pointsToConstantMemory(LI->getOperand(0)))
return true;
-
+
// Don't hoist loads which have may-aliased stores in loop.
uint64_t Size = 0;
if (LI->getType()->isSized())
@@ -470,7 +470,7 @@ void LICM::sink(Instruction &I) {
}
return;
}
-
+
if (ExitBlocks.empty()) {
// The instruction is actually dead if there ARE NO exit blocks.
CurAST->deleteValue(&I);
@@ -482,30 +482,30 @@ void LICM::sink(Instruction &I) {
I.eraseFromParent();
return;
}
-
+
// Otherwise, if we have multiple exits, use the SSAUpdater to do all of the
// hard work of inserting PHI nodes as necessary.
SmallVector<PHINode*, 8> NewPHIs;
SSAUpdater SSA(&NewPHIs);
-
+
if (!I.use_empty())
SSA.Initialize(I.getType(), I.getName());
-
+
// Insert a copy of the instruction in each exit block of the loop that is
// dominated by the instruction. Each exit block is known to only be in the
// ExitBlocks list once.
BasicBlock *InstOrigBB = I.getParent();
unsigned NumInserted = 0;
-
+
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
BasicBlock *ExitBlock = ExitBlocks[i];
-
+
if (!DT->dominates(InstOrigBB, ExitBlock))
continue;
-
+
// Insert the code after the last PHI node.
BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI();
-
+
// If this is the first exit block processed, just move the original
// instruction, otherwise clone the original instruction and insert
// the copy.
@@ -519,12 +519,12 @@ void LICM::sink(Instruction &I) {
New->setName(I.getName()+".le");
ExitBlock->getInstList().insert(InsertPt, New);
}
-
+
// Now that we have inserted the instruction, inform SSAUpdater.
if (!I.use_empty())
SSA.AddAvailableValue(ExitBlock, New);
}
-
+
// If the instruction doesn't dominate any exit blocks, it must be dead.
if (NumInserted == 0) {
CurAST->deleteValue(&I);
@@ -533,7 +533,7 @@ void LICM::sink(Instruction &I) {
I.eraseFromParent();
return;
}
-
+
// Next, rewrite uses of the instruction, inserting PHI nodes as needed.
for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) {
// Grab the use before incrementing the iterator.
@@ -542,12 +542,12 @@ void LICM::sink(Instruction &I) {
++UI;
SSA.RewriteUseAfterInsertions(U);
}
-
+
// Update CurAST for NewPHIs if I had pointer type.
if (I.getType()->isPointerTy())
for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
CurAST->copyValue(&I, NewPHIs[i]);
-
+
// Finally, remove the instruction from CurAST. It is no longer in the loop.
CurAST->deleteValue(&I);
}
@@ -606,15 +606,17 @@ namespace {
SmallVectorImpl<BasicBlock*> &LoopExitBlocks;
AliasSetTracker &AST;
DebugLoc DL;
+ int Alignment;
public:
LoopPromoter(Value *SP,
const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
SmallPtrSet<Value*, 4> &PMA,
SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast,
- DebugLoc dl)
- : LoadAndStorePromoter(Insts, S, 0, 0), SomePtr(SP),
- PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl) {}
-
+ DebugLoc dl, int alignment)
+ : LoadAndStorePromoter(Insts, S), SomePtr(SP),
+ PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl),
+ Alignment(alignment) {}
+
virtual bool isInstInList(Instruction *I,
const SmallVectorImpl<Instruction*> &) const {
Value *Ptr;
@@ -624,7 +626,7 @@ namespace {
Ptr = cast<StoreInst>(I)->getPointerOperand();
return PointerMustAliases.count(Ptr);
}
-
+
virtual void doExtraRewritesBeforeFinalDeletion() const {
// Insert stores after in the loop exit blocks. Each exit block gets a
// store of the live-out values that feed them. Since we've already told
@@ -635,6 +637,7 @@ namespace {
Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
Instruction *InsertPos = ExitBlock->getFirstNonPHI();
StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos);
+ NewSI->setAlignment(Alignment);
NewSI->setDebugLoc(DL);
}
}
@@ -661,7 +664,7 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
return;
-
+
assert(!AS.empty() &&
"Must alias set should have at least one pointer element in it!");
Value *SomePtr = AS.begin()->getValue();
@@ -676,60 +679,78 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
// tmp = *P; for () { if (c) tmp +=1; } *P = tmp;
//
// is not safe, because *P may only be valid to access if 'c' is true.
- //
+ //
// It is safe to promote P if all uses are direct load/stores and if at
// least one is guaranteed to be executed.
bool GuaranteedToExecute = false;
-
+
SmallVector<Instruction*, 64> LoopUses;
SmallPtrSet<Value*, 4> PointerMustAliases;
+ // We start with an alignment of one and try to find instructions that allow
+ // us to prove better alignment.
+ unsigned Alignment = 1;
+
// Check that all of the pointers in the alias set have the same type. We
// cannot (yet) promote a memory location that is loaded and stored in
// different sizes.
for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
Value *ASIV = ASI->getValue();
PointerMustAliases.insert(ASIV);
-
+
// Check that all of the pointers in the alias set have the same type. We
// cannot (yet) promote a memory location that is loaded and stored in
// different sizes.
if (SomePtr->getType() != ASIV->getType())
return;
-
+
for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end();
UI != UE; ++UI) {
// Ignore instructions that are outside the loop.
Instruction *Use = dyn_cast<Instruction>(*UI);
if (!Use || !CurLoop->contains(Use))
continue;
-
+
// If there is an non-load/store instruction in the loop, we can't promote
// it.
- if (isa<LoadInst>(Use))
+ unsigned InstAlignment;
+ if (LoadInst *load = dyn_cast<LoadInst>(Use)) {
assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken");
- else if (isa<StoreInst>(Use)) {
+ InstAlignment = load->getAlignment();
+ } else if (StoreInst *store = dyn_cast<StoreInst>(Use)) {
// Stores *of* the pointer are not interesting, only stores *to* the
// pointer.
if (Use->getOperand(1) != ASIV)
continue;
+ InstAlignment = store->getAlignment();
assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken");
} else
return; // Not a load or store.
-
+
+ // If the alignment of this instruction allows us to specify a more
+ // restrictive (and performant) alignment and if we are sure this
+ // instruction will be executed, update the alignment.
+ // Larger is better, with the exception of 0 being the best alignment.
+ if ((InstAlignment > Alignment || InstAlignment == 0)
+ && (Alignment != 0))
+ if (isSafeToExecuteUnconditionally(*Use)) {
+ GuaranteedToExecute = true;
+ Alignment = InstAlignment;
+ }
+
if (!GuaranteedToExecute)
GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use);
-
+
LoopUses.push_back(Use);
}
}
-
+
// If there isn't a guaranteed-to-execute instruction, we can't promote.
if (!GuaranteedToExecute)
return;
-
+
// Otherwise, this is safe to promote, lets do it!
- DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');
+ DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');
Changed = true;
++NumPromoted;
@@ -741,18 +762,19 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
SmallVector<BasicBlock*, 8> ExitBlocks;
CurLoop->getUniqueExitBlocks(ExitBlocks);
-
+
// We use the SSAUpdater interface to insert phi nodes as required.
SmallVector<PHINode*, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
- *CurAST, DL);
-
+ *CurAST, DL, Alignment);
+
// Set up the preheader to have a definition of the value. It is the live-out
// value from the preheader that uses in the loop will use.
LoadInst *PreheaderLoad =
new LoadInst(SomePtr, SomePtr->getName()+".promoted",
Preheader->getTerminator());
+ PreheaderLoad->setAlignment(Alignment);
PreheaderLoad->setDebugLoc(DL);
SSA.AddAvailableValue(Preheader, PreheaderLoad);
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 753a558..f7f3298 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -190,7 +190,9 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
BasicBlock* exitingBlock = exitingBlocks[0];
BasicBlock::iterator BI = exitBlock->begin();
while (PHINode* P = dyn_cast<PHINode>(BI)) {
- P->replaceUsesOfWith(exitingBlock, preheader);
+ int j = P->getBasicBlockIndex(exitingBlock);
+ assert(j >= 0 && "Can't find exiting block in exit block's phi node!");
+ P->setIncomingBlock(j, preheader);
for (unsigned i = 1; i < exitingBlocks.size(); ++i)
P->removeIncomingValue(exitingBlocks[i]);
++BI;
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index dbf6eec..a0e41d9 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -167,12 +167,17 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) {
if (Instruction *I = dyn_cast<Instruction>(V))
if (isInstructionTriviallyDead(I))
- deleteDeadInstruction(I, SE);
+ deleteDeadInstruction(I, SE);
}
bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
CurLoop = L;
+ // Disable loop idiom recognition if the function's name is a common idiom.
+ StringRef Name = L->getHeader()->getParent()->getName();
+ if (Name == "memset" || Name == "memcpy")
+ return false;
+
// The trip count of the loop must be analyzable.
SE = &getAnalysis<ScalarEvolution>();
if (!SE->hasLoopInvariantBackedgeTakenCount(L))
@@ -467,8 +472,8 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// header. This allows us to insert code for it in the preheader.
BasicBlock *Preheader = CurLoop->getLoopPreheader();
IRBuilder<> Builder(Preheader->getTerminator());
- SCEVExpander Expander(*SE);
-
+ SCEVExpander Expander(*SE, "loop-idiom");
+
// Okay, we have a strided store "p[i]" of a splattable value. We can turn
// this into a memset in the loop preheader now if we want. However, this
// would be unsafe to do if there is anything else in the loop that may read
@@ -488,7 +493,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
deleteIfDeadInstruction(BasePtr, *SE);
return false;
}
-
+
// Okay, everything looks good, insert the memset.
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
@@ -556,8 +561,8 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
// header. This allows us to insert code for it in the preheader.
BasicBlock *Preheader = CurLoop->getLoopPreheader();
IRBuilder<> Builder(Preheader->getTerminator());
- SCEVExpander Expander(*SE);
-
+ SCEVExpander Expander(*SE, "loop-idiom");
+
// Okay, we have a strided store "p[i]" of a loaded value. We can turn
// this into a memcpy in the loop preheader now if we want. However, this
// would be unsafe to do if there is anything else in the loop that may read
@@ -568,7 +573,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
Expander.expandCodeFor(StoreEv->getStart(),
Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
Preheader->getTerminator());
-
+
if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef,
CurLoop, BECount, StoreSize,
getAnalysis<AliasAnalysis>(), SI)) {
@@ -593,9 +598,9 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
deleteIfDeadInstruction(StoreBasePtr, *SE);
return false;
}
-
+
// Okay, everything is safe, we can transform this!
-
+
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
@@ -619,7 +624,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n"
<< " from load ptr=" << *LoadEv << " at: " << *LI << "\n"
<< " from store ptr=" << *StoreEv << " at: " << *SI << "\n");
-
+
// Okay, the memset has been formed. Zap the original store and anything that
// feeds into it.
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 47dced3..9fd0958 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -220,7 +220,7 @@ bool LoopRotate::rotateLoop(Loop *L) {
// For PHI nodes, the value available in OldPreHeader is just the
// incoming value from OldPreHeader.
for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
- ValueMap[PN] = PN->getIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
+ ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader);
// For the rest of the instructions, either hoist to the OrigPreheader if
// possible or create a clone in the OldPreHeader if not.
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 73ebd61..509d026 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1804,8 +1804,7 @@ LSRInstance::OptimizeLoopTermCond() {
ExitingBlock->getInstList().insert(TermBr, Cond);
// Clone the IVUse, as the old use still exists!
- CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace(),
- CondUse->getPhi());
+ CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
TermBr->replaceUsesOfWith(OldCond, Cond);
}
}
@@ -2768,7 +2767,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
// value to the immediate would produce a value closer to zero than the
// immediate itself, then the formula isn't worthwhile.
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
- if (C->getValue()->getValue().isNegative() !=
+ if (C->getValue()->isNegative() !=
(NewF.AM.BaseOffs < 0) &&
(C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale))
.ule(abs64(NewF.AM.BaseOffs)))
@@ -3699,7 +3698,7 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
// we can remove them after we are done working.
SmallVector<WeakVH, 16> DeadInsts;
- SCEVExpander Rewriter(SE);
+ SCEVExpander Rewriter(SE, "lsr");
Rewriter.disableCanonicalMode();
Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index e05f29c..840c4b6 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -1021,6 +1021,10 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
while (PHINode *PN = dyn_cast<PHINode>(Succ->begin()))
ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM);
+ // If Succ has any successors with PHI nodes, update them to have
+ // entries coming from Pred instead of Succ.
+ Succ->replaceAllUsesWith(Pred);
+
// Move all of the successor contents from Succ to Pred.
Pred->getInstList().splice(BI, Succ->getInstList(), Succ->begin(),
Succ->end());
@@ -1028,10 +1032,6 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
BI->eraseFromParent();
RemoveFromWorklist(BI, Worklist);
- // If Succ has any successors with PHI nodes, update them to have
- // entries coming from Pred instead of Succ.
- Succ->replaceAllUsesWith(Pred);
-
// Remove Succ from the loop tree.
LI->removeBlock(Succ);
LPM->deleteSimpleAnalysisValue(Succ, L);
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index be5aa2e..7ed3db6 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -487,7 +487,8 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// happen to be using a load-store pair to implement it, rather than
// a memcpy.
if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
- if (!LI->isVolatile() && LI->hasOneUse()) {
+ if (!LI->isVolatile() && LI->hasOneUse() &&
+ LI->getParent() == SI->getParent()) {
MemDepResult ldep = MD->getDependency(LI);
CallInst *C = 0;
if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
@@ -496,17 +497,14 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (C) {
// Check that nothing touches the dest of the "copy" between
// the call and the store.
- MemDepResult sdep = MD->getDependency(SI);
- if (!sdep.isNonLocal()) {
- bool FoundCall = false;
- for (BasicBlock::iterator I = SI, E = sdep.getInst(); I != E; --I) {
- if (&*I == C) {
- FoundCall = true;
- break;
- }
- }
- if (!FoundCall)
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ AliasAnalysis::Location StoreLoc = AA.getLocation(SI);
+ for (BasicBlock::iterator I = --BasicBlock::iterator(SI),
+ E = C; I != E; --I) {
+ if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) {
C = 0;
+ break;
+ }
}
}
@@ -842,11 +840,11 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
// If not, then we know we can transform this.
Module *Mod = M->getParent()->getParent()->getParent();
- const Type *ArgTys[3] = { M->getRawDest()->getType(),
- M->getRawSource()->getType(),
- M->getLength()->getType() };
+ Type *ArgTys[3] = { M->getRawDest()->getType(),
+ M->getRawSource()->getType(),
+ M->getLength()->getType() };
M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy,
- ArgTys, 3));
+ ArgTys));
// MemDep may have over conservative information about this instruction, just
// conservatively flush it from the cache.
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
new file mode 100644
index 0000000..ee132d3
--- /dev/null
+++ b/lib/Transforms/Scalar/ObjCARC.cpp
@@ -0,0 +1,3595 @@
+//===- ObjCARC.cpp - ObjC ARC Optimization --------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines ObjC ARC optimizations. ARC stands for
+// Automatic Reference Counting and is a system for managing reference counts
+// for objects in Objective C.
+//
+// The optimizations performed include elimination of redundant, partially
+// redundant, and inconsequential reference count operations, elimination of
+// redundant weak pointer operations, pattern-matching and replacement of
+// low-level operations into higher-level operations, and numerous minor
+// simplifications.
+//
+// This file also defines a simple ARC-aware AliasAnalysis.
+//
+// WARNING: This file knows about certain library functions. It recognizes them
+// by name, and hardwires knowedge of their semantics.
+//
+// WARNING: This file knows about how certain Objective-C library functions are
+// used. Naive LLVM IR transformations which would otherwise be
+// behavior-preserving may break these assumptions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "objc-arc"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+// A handy option to enable/disable all optimizations in this file.
+static cl::opt<bool> EnableARCOpts("enable-objc-arc-opts", cl::init(true));
+
+//===----------------------------------------------------------------------===//
+// Misc. Utilities
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// MapVector - An associative container with fast insertion-order
+ /// (deterministic) iteration over its elements. Plus the special
+ /// blot operation.
+ template<class KeyT, class ValueT>
+ class MapVector {
+ /// Map - Map keys to indices in Vector.
+ typedef DenseMap<KeyT, size_t> MapTy;
+ MapTy Map;
+
+ /// Vector - Keys and values.
+ typedef std::vector<std::pair<KeyT, ValueT> > VectorTy;
+ VectorTy Vector;
+
+ public:
+ typedef typename VectorTy::iterator iterator;
+ typedef typename VectorTy::const_iterator const_iterator;
+ iterator begin() { return Vector.begin(); }
+ iterator end() { return Vector.end(); }
+ const_iterator begin() const { return Vector.begin(); }
+ const_iterator end() const { return Vector.end(); }
+
+#ifdef XDEBUG
+ ~MapVector() {
+ assert(Vector.size() >= Map.size()); // May differ due to blotting.
+ for (typename MapTy::const_iterator I = Map.begin(), E = Map.end();
+ I != E; ++I) {
+ assert(I->second < Vector.size());
+ assert(Vector[I->second].first == I->first);
+ }
+ for (typename VectorTy::const_iterator I = Vector.begin(),
+ E = Vector.end(); I != E; ++I)
+ assert(!I->first ||
+ (Map.count(I->first) &&
+ Map[I->first] == size_t(I - Vector.begin())));
+ }
+#endif
+
+ ValueT &operator[](KeyT Arg) {
+ std::pair<typename MapTy::iterator, bool> Pair =
+ Map.insert(std::make_pair(Arg, size_t(0)));
+ if (Pair.second) {
+ Pair.first->second = Vector.size();
+ Vector.push_back(std::make_pair(Arg, ValueT()));
+ return Vector.back().second;
+ }
+ return Vector[Pair.first->second].second;
+ }
+
+ std::pair<iterator, bool>
+ insert(const std::pair<KeyT, ValueT> &InsertPair) {
+ std::pair<typename MapTy::iterator, bool> Pair =
+ Map.insert(std::make_pair(InsertPair.first, size_t(0)));
+ if (Pair.second) {
+ Pair.first->second = Vector.size();
+ Vector.push_back(InsertPair);
+ return std::make_pair(llvm::prior(Vector.end()), true);
+ }
+ return std::make_pair(Vector.begin() + Pair.first->second, false);
+ }
+
+ const_iterator find(KeyT Key) const {
+ typename MapTy::const_iterator It = Map.find(Key);
+ if (It == Map.end()) return Vector.end();
+ return Vector.begin() + It->second;
+ }
+
+ /// blot - This is similar to erase, but instead of removing the element
+ /// from the vector, it just zeros out the key in the vector. This leaves
+ /// iterators intact, but clients must be prepared for zeroed-out keys when
+ /// iterating.
+ void blot(KeyT Key) {
+ typename MapTy::iterator It = Map.find(Key);
+ if (It == Map.end()) return;
+ Vector[It->second].first = KeyT();
+ Map.erase(It);
+ }
+
+ void clear() {
+ Map.clear();
+ Vector.clear();
+ }
+ };
+}
+
+//===----------------------------------------------------------------------===//
+// ARC Utilities.
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// InstructionClass - A simple classification for instructions.
+ enum InstructionClass {
+ IC_Retain, ///< objc_retain
+ IC_RetainRV, ///< objc_retainAutoreleasedReturnValue
+ IC_RetainBlock, ///< objc_retainBlock
+ IC_Release, ///< objc_release
+ IC_Autorelease, ///< objc_autorelease
+ IC_AutoreleaseRV, ///< objc_autoreleaseReturnValue
+ IC_AutoreleasepoolPush, ///< objc_autoreleasePoolPush
+ IC_AutoreleasepoolPop, ///< objc_autoreleasePoolPop
+ IC_NoopCast, ///< objc_retainedObject, etc.
+ IC_FusedRetainAutorelease, ///< objc_retainAutorelease
+ IC_FusedRetainAutoreleaseRV, ///< objc_retainAutoreleaseReturnValue
+ IC_LoadWeakRetained, ///< objc_loadWeakRetained (primitive)
+ IC_StoreWeak, ///< objc_storeWeak (primitive)
+ IC_InitWeak, ///< objc_initWeak (derived)
+ IC_LoadWeak, ///< objc_loadWeak (derived)
+ IC_MoveWeak, ///< objc_moveWeak (derived)
+ IC_CopyWeak, ///< objc_copyWeak (derived)
+ IC_DestroyWeak, ///< objc_destroyWeak (derived)
+ IC_CallOrUser, ///< could call objc_release and/or "use" pointers
+ IC_Call, ///< could call objc_release
+ IC_User, ///< could "use" a pointer
+ IC_None ///< anything else
+ };
+}
+
+/// IsPotentialUse - Test whether the given value is possible a
+/// reference-counted pointer.
+static bool IsPotentialUse(const Value *Op) {
+ // Pointers to static or stack storage are not reference-counted pointers.
+ if (isa<Constant>(Op) || isa<AllocaInst>(Op))
+ return false;
+ // Special arguments are not reference-counted.
+ if (const Argument *Arg = dyn_cast<Argument>(Op))
+ if (Arg->hasByValAttr() ||
+ Arg->hasNestAttr() ||
+ Arg->hasStructRetAttr())
+ return false;
+ // Only consider values with pointer types, and not function pointers.
+ const PointerType *Ty = dyn_cast<PointerType>(Op->getType());
+ if (!Ty || isa<FunctionType>(Ty->getElementType()))
+ return false;
+ // Conservatively assume anything else is a potential use.
+ return true;
+}
+
+/// GetCallSiteClass - Helper for GetInstructionClass. Determines what kind
+/// of construct CS is.
+static InstructionClass GetCallSiteClass(ImmutableCallSite CS) {
+ for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+ I != E; ++I)
+ if (IsPotentialUse(*I))
+ return CS.onlyReadsMemory() ? IC_User : IC_CallOrUser;
+
+ return CS.onlyReadsMemory() ? IC_None : IC_Call;
+}
+
+/// GetFunctionClass - Determine if F is one of the special known Functions.
+/// If it isn't, return IC_CallOrUser.
+static InstructionClass GetFunctionClass(const Function *F) {
+ Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+
+ // No arguments.
+ if (AI == AE)
+ return StringSwitch<InstructionClass>(F->getName())
+ .Case("objc_autoreleasePoolPush", IC_AutoreleasepoolPush)
+ .Default(IC_CallOrUser);
+
+ // One argument.
+ const Argument *A0 = AI++;
+ if (AI == AE)
+ // Argument is a pointer.
+ if (const PointerType *PTy = dyn_cast<PointerType>(A0->getType())) {
+ const Type *ETy = PTy->getElementType();
+ // Argument is i8*.
+ if (ETy->isIntegerTy(8))
+ return StringSwitch<InstructionClass>(F->getName())
+ .Case("objc_retain", IC_Retain)
+ .Case("objc_retainAutoreleasedReturnValue", IC_RetainRV)
+ .Case("objc_retainBlock", IC_RetainBlock)
+ .Case("objc_release", IC_Release)
+ .Case("objc_autorelease", IC_Autorelease)
+ .Case("objc_autoreleaseReturnValue", IC_AutoreleaseRV)
+ .Case("objc_autoreleasePoolPop", IC_AutoreleasepoolPop)
+ .Case("objc_retainedObject", IC_NoopCast)
+ .Case("objc_unretainedObject", IC_NoopCast)
+ .Case("objc_unretainedPointer", IC_NoopCast)
+ .Case("objc_retain_autorelease", IC_FusedRetainAutorelease)
+ .Case("objc_retainAutorelease", IC_FusedRetainAutorelease)
+ .Case("objc_retainAutoreleaseReturnValue",IC_FusedRetainAutoreleaseRV)
+ .Default(IC_CallOrUser);
+
+ // Argument is i8**
+ if (const PointerType *Pte = dyn_cast<PointerType>(ETy))
+ if (Pte->getElementType()->isIntegerTy(8))
+ return StringSwitch<InstructionClass>(F->getName())
+ .Case("objc_loadWeakRetained", IC_LoadWeakRetained)
+ .Case("objc_loadWeak", IC_LoadWeak)
+ .Case("objc_destroyWeak", IC_DestroyWeak)
+ .Default(IC_CallOrUser);
+ }
+
+ // Two arguments, first is i8**.
+ const Argument *A1 = AI++;
+ if (AI == AE)
+ if (const PointerType *PTy = dyn_cast<PointerType>(A0->getType()))
+ if (const PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType()))
+ if (Pte->getElementType()->isIntegerTy(8))
+ if (const PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) {
+ const Type *ETy1 = PTy1->getElementType();
+ // Second argument is i8*
+ if (ETy1->isIntegerTy(8))
+ return StringSwitch<InstructionClass>(F->getName())
+ .Case("objc_storeWeak", IC_StoreWeak)
+ .Case("objc_initWeak", IC_InitWeak)
+ .Default(IC_CallOrUser);
+ // Second argument is i8**.
+ if (const PointerType *Pte1 = dyn_cast<PointerType>(ETy1))
+ if (Pte1->getElementType()->isIntegerTy(8))
+ return StringSwitch<InstructionClass>(F->getName())
+ .Case("objc_moveWeak", IC_MoveWeak)
+ .Case("objc_copyWeak", IC_CopyWeak)
+ .Default(IC_CallOrUser);
+ }
+
+ // Anything else.
+ return IC_CallOrUser;
+}
+
+/// GetInstructionClass - Determine what kind of construct V is.
+static InstructionClass GetInstructionClass(const Value *V) {
+ if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ // Any instruction other than bitcast and gep with a pointer operand have a
+ // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer
+ // to a subsequent use, rather than using it themselves, in this sense.
+ // As a short cut, several other opcodes are known to have no pointer
+ // operands of interest. And ret is never followed by a release, so it's
+ // not interesting to examine.
+ switch (I->getOpcode()) {
+ case Instruction::Call: {
+ const CallInst *CI = cast<CallInst>(I);
+ // Check for calls to special functions.
+ if (const Function *F = CI->getCalledFunction()) {
+ InstructionClass Class = GetFunctionClass(F);
+ if (Class != IC_CallOrUser)
+ return Class;
+
+ // None of the intrinsic functions do objc_release. For intrinsics, the
+ // only question is whether or not they may be users.
+ switch (F->getIntrinsicID()) {
+ case 0: break;
+ case Intrinsic::bswap: case Intrinsic::ctpop:
+ case Intrinsic::ctlz: case Intrinsic::cttz:
+ case Intrinsic::returnaddress: case Intrinsic::frameaddress:
+ case Intrinsic::stacksave: case Intrinsic::stackrestore:
+ case Intrinsic::vastart: case Intrinsic::vacopy: case Intrinsic::vaend:
+ // Don't let dbg info affect our results.
+ case Intrinsic::dbg_declare: case Intrinsic::dbg_value:
+ // Short cut: Some intrinsics obviously don't use ObjC pointers.
+ return IC_None;
+ default:
+ for (Function::const_arg_iterator AI = F->arg_begin(),
+ AE = F->arg_end(); AI != AE; ++AI)
+ if (IsPotentialUse(AI))
+ return IC_User;
+ return IC_None;
+ }
+ }
+ return GetCallSiteClass(CI);
+ }
+ case Instruction::Invoke:
+ return GetCallSiteClass(cast<InvokeInst>(I));
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ case Instruction::Select: case Instruction::PHI:
+ case Instruction::Ret: case Instruction::Br:
+ case Instruction::Switch: case Instruction::IndirectBr:
+ case Instruction::Alloca: case Instruction::VAArg:
+ case Instruction::Add: case Instruction::FAdd:
+ case Instruction::Sub: case Instruction::FSub:
+ case Instruction::Mul: case Instruction::FMul:
+ case Instruction::SDiv: case Instruction::UDiv: case Instruction::FDiv:
+ case Instruction::SRem: case Instruction::URem: case Instruction::FRem:
+ case Instruction::Shl: case Instruction::LShr: case Instruction::AShr:
+ case Instruction::And: case Instruction::Or: case Instruction::Xor:
+ case Instruction::SExt: case Instruction::ZExt: case Instruction::Trunc:
+ case Instruction::IntToPtr: case Instruction::FCmp:
+ case Instruction::FPTrunc: case Instruction::FPExt:
+ case Instruction::FPToUI: case Instruction::FPToSI:
+ case Instruction::UIToFP: case Instruction::SIToFP:
+ case Instruction::InsertElement: case Instruction::ExtractElement:
+ case Instruction::ShuffleVector:
+ case Instruction::ExtractValue:
+ break;
+ case Instruction::ICmp:
+ // Comparing a pointer with null, or any other constant, isn't an
+ // interesting use, because we don't care what the pointer points to, or
+ // about the values of any other dynamic reference-counted pointers.
+ if (IsPotentialUse(I->getOperand(1)))
+ return IC_User;
+ break;
+ default:
+ // For anything else, check all the operands.
+ for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end();
+ OI != OE; ++OI)
+ if (IsPotentialUse(*OI))
+ return IC_User;
+ }
+ }
+
+ // Otherwise, it's totally inert for ARC purposes.
+ return IC_None;
+}
+
+/// GetBasicInstructionClass - Determine what kind of construct V is. This is
+/// similar to GetInstructionClass except that it only detects objc runtine
+/// calls. This allows it to be faster.
+static InstructionClass GetBasicInstructionClass(const Value *V) {
+ if (const CallInst *CI = dyn_cast<CallInst>(V)) {
+ if (const Function *F = CI->getCalledFunction())
+ return GetFunctionClass(F);
+ // Otherwise, be conservative.
+ return IC_CallOrUser;
+ }
+
+ // Otherwise, be conservative.
+ return IC_User;
+}
+
+/// IsRetain - Test if the the given class is objc_retain or
+/// equivalent.
+static bool IsRetain(InstructionClass Class) {
+ return Class == IC_Retain ||
+ Class == IC_RetainRV;
+}
+
+/// IsAutorelease - Test if the the given class is objc_autorelease or
+/// equivalent.
+static bool IsAutorelease(InstructionClass Class) {
+ return Class == IC_Autorelease ||
+ Class == IC_AutoreleaseRV;
+}
+
+/// IsForwarding - Test if the given class represents instructions which return
+/// their argument verbatim.
+static bool IsForwarding(InstructionClass Class) {
+ // objc_retainBlock technically doesn't always return its argument
+ // verbatim, but it doesn't matter for our purposes here.
+ return Class == IC_Retain ||
+ Class == IC_RetainRV ||
+ Class == IC_Autorelease ||
+ Class == IC_AutoreleaseRV ||
+ Class == IC_RetainBlock ||
+ Class == IC_NoopCast;
+}
+
+/// IsNoopOnNull - Test if the given class represents instructions which do
+/// nothing if passed a null pointer.
+static bool IsNoopOnNull(InstructionClass Class) {
+ return Class == IC_Retain ||
+ Class == IC_RetainRV ||
+ Class == IC_Release ||
+ Class == IC_Autorelease ||
+ Class == IC_AutoreleaseRV ||
+ Class == IC_RetainBlock;
+}
+
+/// IsAlwaysTail - Test if the given class represents instructions which are
+/// always safe to mark with the "tail" keyword.
+static bool IsAlwaysTail(InstructionClass Class) {
+ // IC_RetainBlock may be given a stack argument.
+ return Class == IC_Retain ||
+ Class == IC_RetainRV ||
+ Class == IC_Autorelease ||
+ Class == IC_AutoreleaseRV;
+}
+
+/// IsNoThrow - Test if the given class represents instructions which are always
+/// safe to mark with the nounwind attribute..
+static bool IsNoThrow(InstructionClass Class) {
+ return Class == IC_Retain ||
+ Class == IC_RetainRV ||
+ Class == IC_RetainBlock ||
+ Class == IC_Release ||
+ Class == IC_Autorelease ||
+ Class == IC_AutoreleaseRV ||
+ Class == IC_AutoreleasepoolPush ||
+ Class == IC_AutoreleasepoolPop;
+}
+
+/// EraseInstruction - Erase the given instruction. ObjC calls return their
+/// argument verbatim, so if it's such a call and the return value has users,
+/// replace them with the argument value.
+static void EraseInstruction(Instruction *CI) {
+ Value *OldArg = cast<CallInst>(CI)->getArgOperand(0);
+
+ bool Unused = CI->use_empty();
+
+ if (!Unused) {
+ // Replace the return value with the argument.
+ assert(IsForwarding(GetBasicInstructionClass(CI)) &&
+ "Can't delete non-forwarding instruction with users!");
+ CI->replaceAllUsesWith(OldArg);
+ }
+
+ CI->eraseFromParent();
+
+ if (Unused)
+ RecursivelyDeleteTriviallyDeadInstructions(OldArg);
+}
+
+/// GetUnderlyingObjCPtr - This is a wrapper around getUnderlyingObject which
+/// also knows how to look through objc_retain and objc_autorelease calls, which
+/// we know to return their argument verbatim.
+static const Value *GetUnderlyingObjCPtr(const Value *V) {
+ for (;;) {
+ V = GetUnderlyingObject(V);
+ if (!IsForwarding(GetBasicInstructionClass(V)))
+ break;
+ V = cast<CallInst>(V)->getArgOperand(0);
+ }
+
+ return V;
+}
+
+/// StripPointerCastsAndObjCCalls - This is a wrapper around
+/// Value::stripPointerCasts which also knows how to look through objc_retain
+/// and objc_autorelease calls, which we know to return their argument verbatim.
+static const Value *StripPointerCastsAndObjCCalls(const Value *V) {
+ for (;;) {
+ V = V->stripPointerCasts();
+ if (!IsForwarding(GetBasicInstructionClass(V)))
+ break;
+ V = cast<CallInst>(V)->getArgOperand(0);
+ }
+ return V;
+}
+
+/// StripPointerCastsAndObjCCalls - This is a wrapper around
+/// Value::stripPointerCasts which also knows how to look through objc_retain
+/// and objc_autorelease calls, which we know to return their argument verbatim.
+static Value *StripPointerCastsAndObjCCalls(Value *V) {
+ for (;;) {
+ V = V->stripPointerCasts();
+ if (!IsForwarding(GetBasicInstructionClass(V)))
+ break;
+ V = cast<CallInst>(V)->getArgOperand(0);
+ }
+ return V;
+}
+
+/// GetObjCArg - Assuming the given instruction is one of the special calls such
+/// as objc_retain or objc_release, return the argument value, stripped of no-op
+/// casts and forwarding calls.
+static Value *GetObjCArg(Value *Inst) {
+ return StripPointerCastsAndObjCCalls(cast<CallInst>(Inst)->getArgOperand(0));
+}
+
+/// IsObjCIdentifiedObject - This is similar to AliasAnalysis'
+/// isObjCIdentifiedObject, except that it uses special knowledge of
+/// ObjC conventions...
+static bool IsObjCIdentifiedObject(const Value *V) {
+ // Assume that call results and arguments have their own "provenance".
+ // Constants (including GlobalVariables) and Allocas are never
+ // reference-counted.
+ if (isa<CallInst>(V) || isa<InvokeInst>(V) ||
+ isa<Argument>(V) || isa<Constant>(V) ||
+ isa<AllocaInst>(V))
+ return true;
+
+ if (const LoadInst *LI = dyn_cast<LoadInst>(V)) {
+ const Value *Pointer =
+ StripPointerCastsAndObjCCalls(LI->getPointerOperand());
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) {
+ StringRef Name = GV->getName();
+ // These special variables are known to hold values which are not
+ // reference-counted pointers.
+ if (Name.startswith("\01L_OBJC_SELECTOR_REFERENCES_") ||
+ Name.startswith("\01L_OBJC_CLASSLIST_REFERENCES_") ||
+ Name.startswith("\01L_OBJC_CLASSLIST_SUP_REFS_$_") ||
+ Name.startswith("\01L_OBJC_METH_VAR_NAME_") ||
+ Name.startswith("\01l_objc_msgSend_fixup_"))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// FindSingleUseIdentifiedObject - This is similar to
+/// StripPointerCastsAndObjCCalls but it stops as soon as it finds a value
+/// with multiple uses.
+static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
+ if (Arg->hasOneUse()) {
+ if (const BitCastInst *BC = dyn_cast<BitCastInst>(Arg))
+ return FindSingleUseIdentifiedObject(BC->getOperand(0));
+ if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Arg))
+ if (GEP->hasAllZeroIndices())
+ return FindSingleUseIdentifiedObject(GEP->getPointerOperand());
+ if (IsForwarding(GetBasicInstructionClass(Arg)))
+ return FindSingleUseIdentifiedObject(
+ cast<CallInst>(Arg)->getArgOperand(0));
+ if (!IsObjCIdentifiedObject(Arg))
+ return 0;
+ return Arg;
+ }
+
+ // If we found an identifiable object but it has multiple uses, but they
+ // are trivial uses, we can still consider this to be a single-use
+ // value.
+ if (IsObjCIdentifiedObject(Arg)) {
+ for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
+ UI != UE; ++UI) {
+ const User *U = *UI;
+ if (!U->use_empty() || StripPointerCastsAndObjCCalls(U) != Arg)
+ return 0;
+ }
+
+ return Arg;
+ }
+
+ return 0;
+}
+
+/// ModuleHasARC - Test if the given module looks interesting to run ARC
+/// optimization on.
+static bool ModuleHasARC(const Module &M) {
+ return
+ M.getNamedValue("objc_retain") ||
+ M.getNamedValue("objc_release") ||
+ M.getNamedValue("objc_autorelease") ||
+ M.getNamedValue("objc_retainAutoreleasedReturnValue") ||
+ M.getNamedValue("objc_retainBlock") ||
+ M.getNamedValue("objc_autoreleaseReturnValue") ||
+ M.getNamedValue("objc_autoreleasePoolPush") ||
+ M.getNamedValue("objc_loadWeakRetained") ||
+ M.getNamedValue("objc_loadWeak") ||
+ M.getNamedValue("objc_destroyWeak") ||
+ M.getNamedValue("objc_storeWeak") ||
+ M.getNamedValue("objc_initWeak") ||
+ M.getNamedValue("objc_moveWeak") ||
+ M.getNamedValue("objc_copyWeak") ||
+ M.getNamedValue("objc_retainedObject") ||
+ M.getNamedValue("objc_unretainedObject") ||
+ M.getNamedValue("objc_unretainedPointer");
+}
+
+//===----------------------------------------------------------------------===//
+// ARC AliasAnalysis.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+
+namespace {
+ /// ObjCARCAliasAnalysis - This is a simple alias analysis
+ /// implementation that uses knowledge of ARC constructs to answer queries.
+ ///
+ /// TODO: This class could be generalized to know about other ObjC-specific
+ /// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing
+ /// even though their offsets are dynamic.
+ class ObjCARCAliasAnalysis : public ImmutablePass,
+ public AliasAnalysis {
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ ObjCARCAliasAnalysis() : ImmutablePass(ID) {
+ initializeObjCARCAliasAnalysisPass(*PassRegistry::getPassRegistry());
+ }
+
+ private:
+ virtual void initializePass() {
+ InitializeAliasAnalysis(this);
+ }
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(const void *PI) {
+ if (PI == &AliasAnalysis::ID)
+ return (AliasAnalysis*)this;
+ return this;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual AliasResult alias(const Location &LocA, const Location &LocB);
+ virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
+ virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+ virtual ModRefBehavior getModRefBehavior(const Function *F);
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc);
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2);
+ };
+} // End of anonymous namespace
+
+// Register this pass...
+char ObjCARCAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS(ObjCARCAliasAnalysis, AliasAnalysis, "objc-arc-aa",
+ "ObjC-ARC-Based Alias Analysis", false, true, false)
+
+ImmutablePass *llvm::createObjCARCAliasAnalysisPass() {
+ return new ObjCARCAliasAnalysis();
+}
+
+void
+ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AliasAnalysis::getAnalysisUsage(AU);
+}
+
+AliasAnalysis::AliasResult
+ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) {
+ if (!EnableARCOpts)
+ return AliasAnalysis::alias(LocA, LocB);
+
+ // First, strip off no-ops, including ObjC-specific no-ops, and try making a
+ // precise alias query.
+ const Value *SA = StripPointerCastsAndObjCCalls(LocA.Ptr);
+ const Value *SB = StripPointerCastsAndObjCCalls(LocB.Ptr);
+ AliasResult Result =
+ AliasAnalysis::alias(Location(SA, LocA.Size, LocA.TBAATag),
+ Location(SB, LocB.Size, LocB.TBAATag));
+ if (Result != MayAlias)
+ return Result;
+
+ // If that failed, climb to the underlying object, including climbing through
+ // ObjC-specific no-ops, and try making an imprecise alias query.
+ const Value *UA = GetUnderlyingObjCPtr(SA);
+ const Value *UB = GetUnderlyingObjCPtr(SB);
+ if (UA != SA || UB != SB) {
+ Result = AliasAnalysis::alias(Location(UA), Location(UB));
+ // We can't use MustAlias or PartialAlias results here because
+ // GetUnderlyingObjCPtr may return an offsetted pointer value.
+ if (Result == NoAlias)
+ return NoAlias;
+ }
+
+ // If that failed, fail. We don't need to chain here, since that's covered
+ // by the earlier precise query.
+ return MayAlias;
+}
+
+bool
+ObjCARCAliasAnalysis::pointsToConstantMemory(const Location &Loc,
+ bool OrLocal) {
+ if (!EnableARCOpts)
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+
+ // First, strip off no-ops, including ObjC-specific no-ops, and try making
+ // a precise alias query.
+ const Value *S = StripPointerCastsAndObjCCalls(Loc.Ptr);
+ if (AliasAnalysis::pointsToConstantMemory(Location(S, Loc.Size, Loc.TBAATag),
+ OrLocal))
+ return true;
+
+ // If that failed, climb to the underlying object, including climbing through
+ // ObjC-specific no-ops, and try making an imprecise alias query.
+ const Value *U = GetUnderlyingObjCPtr(S);
+ if (U != S)
+ return AliasAnalysis::pointsToConstantMemory(Location(U), OrLocal);
+
+ // If that failed, fail. We don't need to chain here, since that's covered
+ // by the earlier precise query.
+ return false;
+}
+
+AliasAnalysis::ModRefBehavior
+ObjCARCAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+ // We have nothing to do. Just chain to the next AliasAnalysis.
+ return AliasAnalysis::getModRefBehavior(CS);
+}
+
+AliasAnalysis::ModRefBehavior
+ObjCARCAliasAnalysis::getModRefBehavior(const Function *F) {
+ if (!EnableARCOpts)
+ return AliasAnalysis::getModRefBehavior(F);
+
+ switch (GetFunctionClass(F)) {
+ case IC_NoopCast:
+ return DoesNotAccessMemory;
+ default:
+ break;
+ }
+
+ return AliasAnalysis::getModRefBehavior(F);
+}
+
+AliasAnalysis::ModRefResult
+ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) {
+ if (!EnableARCOpts)
+ return AliasAnalysis::getModRefInfo(CS, Loc);
+
+ switch (GetBasicInstructionClass(CS.getInstruction())) {
+ case IC_Retain:
+ case IC_RetainRV:
+ case IC_RetainBlock:
+ case IC_Autorelease:
+ case IC_AutoreleaseRV:
+ case IC_NoopCast:
+ case IC_AutoreleasepoolPush:
+ case IC_FusedRetainAutorelease:
+ case IC_FusedRetainAutoreleaseRV:
+ // These functions don't access any memory visible to the compiler.
+ return NoModRef;
+ default:
+ break;
+ }
+
+ return AliasAnalysis::getModRefInfo(CS, Loc);
+}
+
+AliasAnalysis::ModRefResult
+ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ // TODO: Theoretically we could check for dependencies between objc_* calls
+ // and OnlyAccessesArgumentPointees calls or other well-behaved calls.
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
+}
+
+//===----------------------------------------------------------------------===//
+// ARC expansion.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Transforms/Scalar.h"
+
+namespace {
+ /// ObjCARCExpand - Early ARC transformations.
+ class ObjCARCExpand : public FunctionPass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual bool doInitialization(Module &M);
+ virtual bool runOnFunction(Function &F);
+
+ /// Run - A flag indicating whether this optimization pass should run.
+ bool Run;
+
+ public:
+ static char ID;
+ ObjCARCExpand() : FunctionPass(ID) {
+ initializeObjCARCExpandPass(*PassRegistry::getPassRegistry());
+ }
+ };
+}
+
+char ObjCARCExpand::ID = 0;
+INITIALIZE_PASS(ObjCARCExpand,
+ "objc-arc-expand", "ObjC ARC expansion", false, false)
+
+Pass *llvm::createObjCARCExpandPass() {
+ return new ObjCARCExpand();
+}
+
+void ObjCARCExpand::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+}
+
+bool ObjCARCExpand::doInitialization(Module &M) {
+ Run = ModuleHasARC(M);
+ return false;
+}
+
+bool ObjCARCExpand::runOnFunction(Function &F) {
+ if (!EnableARCOpts)
+ return false;
+
+ // If nothing in the Module uses ARC, don't do anything.
+ if (!Run)
+ return false;
+
+ bool Changed = false;
+
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
+ Instruction *Inst = &*I;
+
+ switch (GetBasicInstructionClass(Inst)) {
+ case IC_Retain:
+ case IC_RetainRV:
+ case IC_Autorelease:
+ case IC_AutoreleaseRV:
+ case IC_FusedRetainAutorelease:
+ case IC_FusedRetainAutoreleaseRV:
+ // These calls return their argument verbatim, as a low-level
+ // optimization. However, this makes high-level optimizations
+ // harder. Undo any uses of this optimization that the front-end
+ // emitted here. We'll redo them in a later pass.
+ Changed = true;
+ Inst->replaceAllUsesWith(cast<CallInst>(Inst)->getArgOperand(0));
+ break;
+ default:
+ break;
+ }
+ }
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// ARC optimization.
+//===----------------------------------------------------------------------===//
+
+// TODO: On code like this:
+//
+// objc_retain(%x)
+// stuff_that_cannot_release()
+// objc_autorelease(%x)
+// stuff_that_cannot_release()
+// objc_retain(%x)
+// stuff_that_cannot_release()
+// objc_autorelease(%x)
+//
+// The second retain and autorelease can be deleted.
+
+// TODO: It should be possible to delete
+// objc_autoreleasePoolPush and objc_autoreleasePoolPop
+// pairs if nothing is actually autoreleased between them. Also, autorelease
+// calls followed by objc_autoreleasePoolPop calls (perhaps in ObjC++ code
+// after inlining) can be turned into plain release calls.
+
+// TODO: Critical-edge splitting. If the optimial insertion point is
+// a critical edge, the current algorithm has to fail, because it doesn't
+// know how to split edges. It should be possible to make the optimizer
+// think in terms of edges, rather than blocks, and then split critical
+// edges on demand.
+
+// TODO: OptimizeSequences could generalized to be Interprocedural.
+
+// TODO: Recognize that a bunch of other objc runtime calls have
+// non-escaping arguments and non-releasing arguments, and may be
+// non-autoreleasing.
+
+// TODO: Sink autorelease calls as far as possible. Unfortunately we
+// usually can't sink them past other calls, which would be the main
+// case where it would be useful.
+
+/// TODO: The pointer returned from objc_loadWeakRetained is retained.
+
+#include "llvm/GlobalAlias.h"
+#include "llvm/Constants.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+
+STATISTIC(NumNoops, "Number of no-op objc calls eliminated");
+STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated");
+STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases");
+STATISTIC(NumRets, "Number of return value forwarding "
+ "retain+autoreleaes eliminated");
+STATISTIC(NumRRs, "Number of retain+release paths eliminated");
+STATISTIC(NumPeeps, "Number of calls peephole-optimized");
+
+namespace {
+ /// ProvenanceAnalysis - This is similar to BasicAliasAnalysis, and it
+ /// uses many of the same techniques, except it uses special ObjC-specific
+ /// reasoning about pointer relationships.
+ class ProvenanceAnalysis {
+ AliasAnalysis *AA;
+
+ typedef std::pair<const Value *, const Value *> ValuePairTy;
+ typedef DenseMap<ValuePairTy, bool> CachedResultsTy;
+ CachedResultsTy CachedResults;
+
+ bool relatedCheck(const Value *A, const Value *B);
+ bool relatedSelect(const SelectInst *A, const Value *B);
+ bool relatedPHI(const PHINode *A, const Value *B);
+
+ // Do not implement.
+ void operator=(const ProvenanceAnalysis &);
+ ProvenanceAnalysis(const ProvenanceAnalysis &);
+
+ public:
+ ProvenanceAnalysis() {}
+
+ void setAA(AliasAnalysis *aa) { AA = aa; }
+
+ AliasAnalysis *getAA() const { return AA; }
+
+ bool related(const Value *A, const Value *B);
+
+ void clear() {
+ CachedResults.clear();
+ }
+ };
+}
+
+bool ProvenanceAnalysis::relatedSelect(const SelectInst *A, const Value *B) {
+ // If the values are Selects with the same condition, we can do a more precise
+ // check: just check for relations between the values on corresponding arms.
+ if (const SelectInst *SB = dyn_cast<SelectInst>(B))
+ if (A->getCondition() == SB->getCondition()) {
+ if (related(A->getTrueValue(), SB->getTrueValue()))
+ return true;
+ if (related(A->getFalseValue(), SB->getFalseValue()))
+ return true;
+ return false;
+ }
+
+ // Check both arms of the Select node individually.
+ if (related(A->getTrueValue(), B))
+ return true;
+ if (related(A->getFalseValue(), B))
+ return true;
+
+ // The arms both checked out.
+ return false;
+}
+
+bool ProvenanceAnalysis::relatedPHI(const PHINode *A, const Value *B) {
+ // If the values are PHIs in the same block, we can do a more precise as well
+ // as efficient check: just check for relations between the values on
+ // corresponding edges.
+ if (const PHINode *PNB = dyn_cast<PHINode>(B))
+ if (PNB->getParent() == A->getParent()) {
+ for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i)
+ if (related(A->getIncomingValue(i),
+ PNB->getIncomingValueForBlock(A->getIncomingBlock(i))))
+ return true;
+ return false;
+ }
+
+ // Check each unique source of the PHI node against B.
+ SmallPtrSet<const Value *, 4> UniqueSrc;
+ for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) {
+ const Value *PV1 = A->getIncomingValue(i);
+ if (UniqueSrc.insert(PV1) && related(PV1, B))
+ return true;
+ }
+
+ // All of the arms checked out.
+ return false;
+}
+
+/// isStoredObjCPointer - Test if the value of P, or any value covered by its
+/// provenance, is ever stored within the function (not counting callees).
+static bool isStoredObjCPointer(const Value *P) {
+ SmallPtrSet<const Value *, 8> Visited;
+ SmallVector<const Value *, 8> Worklist;
+ Worklist.push_back(P);
+ Visited.insert(P);
+ do {
+ P = Worklist.pop_back_val();
+ for (Value::const_use_iterator UI = P->use_begin(), UE = P->use_end();
+ UI != UE; ++UI) {
+ const User *Ur = *UI;
+ if (isa<StoreInst>(Ur)) {
+ if (UI.getOperandNo() == 0)
+ // The pointer is stored.
+ return true;
+ // The pointed is stored through.
+ continue;
+ }
+ if (isa<CallInst>(Ur))
+ // The pointer is passed as an argument, ignore this.
+ continue;
+ if (isa<PtrToIntInst>(P))
+ // Assume the worst.
+ return true;
+ if (Visited.insert(Ur))
+ Worklist.push_back(Ur);
+ }
+ } while (!Worklist.empty());
+
+ // Everything checked out.
+ return false;
+}
+
+bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B) {
+ // Skip past provenance pass-throughs.
+ A = GetUnderlyingObjCPtr(A);
+ B = GetUnderlyingObjCPtr(B);
+
+ // Quick check.
+ if (A == B)
+ return true;
+
+ // Ask regular AliasAnalysis, for a first approximation.
+ switch (AA->alias(A, B)) {
+ case AliasAnalysis::NoAlias:
+ return false;
+ case AliasAnalysis::MustAlias:
+ case AliasAnalysis::PartialAlias:
+ return true;
+ case AliasAnalysis::MayAlias:
+ break;
+ }
+
+ bool AIsIdentified = IsObjCIdentifiedObject(A);
+ bool BIsIdentified = IsObjCIdentifiedObject(B);
+
+ // An ObjC-Identified object can't alias a load if it is never locally stored.
+ if (AIsIdentified) {
+ if (BIsIdentified) {
+ // If both pointers have provenance, they can be directly compared.
+ if (A != B)
+ return false;
+ } else {
+ if (isa<LoadInst>(B))
+ return isStoredObjCPointer(A);
+ }
+ } else {
+ if (BIsIdentified && isa<LoadInst>(A))
+ return isStoredObjCPointer(B);
+ }
+
+ // Special handling for PHI and Select.
+ if (const PHINode *PN = dyn_cast<PHINode>(A))
+ return relatedPHI(PN, B);
+ if (const PHINode *PN = dyn_cast<PHINode>(B))
+ return relatedPHI(PN, A);
+ if (const SelectInst *S = dyn_cast<SelectInst>(A))
+ return relatedSelect(S, B);
+ if (const SelectInst *S = dyn_cast<SelectInst>(B))
+ return relatedSelect(S, A);
+
+ // Conservative.
+ return true;
+}
+
+bool ProvenanceAnalysis::related(const Value *A, const Value *B) {
+ // Begin by inserting a conservative value into the map. If the insertion
+ // fails, we have the answer already. If it succeeds, leave it there until we
+ // compute the real answer to guard against recursive queries.
+ if (A > B) std::swap(A, B);
+ std::pair<CachedResultsTy::iterator, bool> Pair =
+ CachedResults.insert(std::make_pair(ValuePairTy(A, B), true));
+ if (!Pair.second)
+ return Pair.first->second;
+
+ bool Result = relatedCheck(A, B);
+ CachedResults[ValuePairTy(A, B)] = Result;
+ return Result;
+}
+
+namespace {
+ // Sequence - A sequence of states that a pointer may go through in which an
+ // objc_retain and objc_release are actually needed.
+ enum Sequence {
+ S_None,
+ S_Retain, ///< objc_retain(x)
+ S_CanRelease, ///< foo(x) -- x could possibly see a ref count decrement
+ S_Use, ///< any use of x
+ S_Stop, ///< like S_Release, but code motion is stopped
+ S_Release, ///< objc_release(x)
+ S_MovableRelease ///< objc_release(x), !clang.imprecise_release
+ };
+}
+
+static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) {
+ // The easy cases.
+ if (A == B)
+ return A;
+ if (A == S_None || B == S_None)
+ return S_None;
+
+ // Note that we can't merge S_CanRelease and S_Use.
+ if (A > B) std::swap(A, B);
+ if (TopDown) {
+ // Choose the side which is further along in the sequence.
+ if (A == S_Retain && (B == S_CanRelease || B == S_Use))
+ return B;
+ } else {
+ // Choose the side which is further along in the sequence.
+ if ((A == S_Use || A == S_CanRelease) &&
+ (B == S_Release || B == S_Stop || B == S_MovableRelease))
+ return A;
+ // If both sides are releases, choose the more conservative one.
+ if (A == S_Stop && (B == S_Release || B == S_MovableRelease))
+ return A;
+ if (A == S_Release && B == S_MovableRelease)
+ return A;
+ }
+
+ return S_None;
+}
+
+namespace {
+ /// RRInfo - Unidirectional information about either a
+ /// retain-decrement-use-release sequence or release-use-decrement-retain
+ /// reverese sequence.
+ struct RRInfo {
+ /// KnownIncremented - After an objc_retain, the reference count of the
+ /// referenced object is known to be positive. Similarly, before an
+ /// objc_release, the reference count of the referenced object is known to
+ /// be positive. If there are retain-release pairs in code regions where the
+ /// retain count is known to be positive, they can be eliminated, regardless
+ /// of any side effects between them.
+ bool KnownIncremented;
+
+ /// IsRetainBlock - True if the Calls are objc_retainBlock calls (as
+ /// opposed to objc_retain calls).
+ bool IsRetainBlock;
+
+ /// IsTailCallRelease - True of the objc_release calls are all marked
+ /// with the "tail" keyword.
+ bool IsTailCallRelease;
+
+ /// ReleaseMetadata - If the Calls are objc_release calls and they all have
+ /// a clang.imprecise_release tag, this is the metadata tag.
+ MDNode *ReleaseMetadata;
+
+ /// Calls - For a top-down sequence, the set of objc_retains or
+ /// objc_retainBlocks. For bottom-up, the set of objc_releases.
+ SmallPtrSet<Instruction *, 2> Calls;
+
+ /// ReverseInsertPts - The set of optimal insert positions for
+ /// moving calls in the opposite sequence.
+ SmallPtrSet<Instruction *, 2> ReverseInsertPts;
+
+ RRInfo() :
+ KnownIncremented(false), IsRetainBlock(false), IsTailCallRelease(false),
+ ReleaseMetadata(0) {}
+
+ void clear();
+ };
+}
+
+void RRInfo::clear() {
+ KnownIncremented = false;
+ IsRetainBlock = false;
+ IsTailCallRelease = false;
+ ReleaseMetadata = 0;
+ Calls.clear();
+ ReverseInsertPts.clear();
+}
+
+namespace {
+ /// PtrState - This class summarizes several per-pointer runtime properties
+ /// which are propogated through the flow graph.
+ class PtrState {
+ /// RefCount - The known minimum number of reference count increments.
+ unsigned RefCount;
+
+ /// Seq - The current position in the sequence.
+ Sequence Seq;
+
+ public:
+ /// RRI - Unidirectional information about the current sequence.
+ /// TODO: Encapsulate this better.
+ RRInfo RRI;
+
+ PtrState() : RefCount(0), Seq(S_None) {}
+
+ void IncrementRefCount() {
+ if (RefCount != UINT_MAX) ++RefCount;
+ }
+
+ void DecrementRefCount() {
+ if (RefCount != 0) --RefCount;
+ }
+
+ void ClearRefCount() {
+ RefCount = 0;
+ }
+
+ bool IsKnownIncremented() const {
+ return RefCount > 0;
+ }
+
+ void SetSeq(Sequence NewSeq) {
+ Seq = NewSeq;
+ }
+
+ void SetSeqToRelease(MDNode *M) {
+ if (Seq == S_None || Seq == S_Use) {
+ Seq = M ? S_MovableRelease : S_Release;
+ RRI.ReleaseMetadata = M;
+ } else if (Seq != S_MovableRelease || RRI.ReleaseMetadata != M) {
+ Seq = S_Release;
+ RRI.ReleaseMetadata = 0;
+ }
+ }
+
+ Sequence GetSeq() const {
+ return Seq;
+ }
+
+ void ClearSequenceProgress() {
+ Seq = S_None;
+ RRI.clear();
+ }
+
+ void Merge(const PtrState &Other, bool TopDown);
+ };
+}
+
+void
+PtrState::Merge(const PtrState &Other, bool TopDown) {
+ Seq = MergeSeqs(Seq, Other.Seq, TopDown);
+ RefCount = std::min(RefCount, Other.RefCount);
+
+ // We can't merge a plain objc_retain with an objc_retainBlock.
+ if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock)
+ Seq = S_None;
+
+ if (Seq == S_None) {
+ RRI.clear();
+ } else {
+ // Conservatively merge the ReleaseMetadata information.
+ if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata)
+ RRI.ReleaseMetadata = 0;
+
+ RRI.KnownIncremented = RRI.KnownIncremented && Other.RRI.KnownIncremented;
+ RRI.IsTailCallRelease = RRI.IsTailCallRelease && Other.RRI.IsTailCallRelease;
+ RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end());
+ RRI.ReverseInsertPts.insert(Other.RRI.ReverseInsertPts.begin(),
+ Other.RRI.ReverseInsertPts.end());
+ }
+}
+
+namespace {
+ /// BBState - Per-BasicBlock state.
+ class BBState {
+ /// TopDownPathCount - The number of unique control paths from the entry
+ /// which can reach this block.
+ unsigned TopDownPathCount;
+
+ /// BottomUpPathCount - The number of unique control paths to exits
+ /// from this block.
+ unsigned BottomUpPathCount;
+
+ /// MapTy - A type for PerPtrTopDown and PerPtrBottomUp.
+ typedef MapVector<const Value *, PtrState> MapTy;
+
+ /// PerPtrTopDown - The top-down traversal uses this to record information
+ /// known about a pointer at the bottom of each block.
+ MapTy PerPtrTopDown;
+
+ /// PerPtrBottomUp - The bottom-up traversal uses this to record information
+ /// known about a pointer at the top of each block.
+ MapTy PerPtrBottomUp;
+
+ public:
+ BBState() : TopDownPathCount(0), BottomUpPathCount(0) {}
+
+ typedef MapTy::iterator ptr_iterator;
+ typedef MapTy::const_iterator ptr_const_iterator;
+
+ ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); }
+ ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); }
+ ptr_const_iterator top_down_ptr_begin() const {
+ return PerPtrTopDown.begin();
+ }
+ ptr_const_iterator top_down_ptr_end() const {
+ return PerPtrTopDown.end();
+ }
+
+ ptr_iterator bottom_up_ptr_begin() { return PerPtrBottomUp.begin(); }
+ ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); }
+ ptr_const_iterator bottom_up_ptr_begin() const {
+ return PerPtrBottomUp.begin();
+ }
+ ptr_const_iterator bottom_up_ptr_end() const {
+ return PerPtrBottomUp.end();
+ }
+
+ /// SetAsEntry - Mark this block as being an entry block, which has one
+ /// path from the entry by definition.
+ void SetAsEntry() { TopDownPathCount = 1; }
+
+ /// SetAsExit - Mark this block as being an exit block, which has one
+ /// path to an exit by definition.
+ void SetAsExit() { BottomUpPathCount = 1; }
+
+ PtrState &getPtrTopDownState(const Value *Arg) {
+ return PerPtrTopDown[Arg];
+ }
+
+ PtrState &getPtrBottomUpState(const Value *Arg) {
+ return PerPtrBottomUp[Arg];
+ }
+
+ void clearBottomUpPointers() {
+ PerPtrTopDown.clear();
+ }
+
+ void clearTopDownPointers() {
+ PerPtrTopDown.clear();
+ }
+
+ void InitFromPred(const BBState &Other);
+ void InitFromSucc(const BBState &Other);
+ void MergePred(const BBState &Other);
+ void MergeSucc(const BBState &Other);
+
+ /// GetAllPathCount - Return the number of possible unique paths from an
+ /// entry to an exit which pass through this block. This is only valid
+ /// after both the top-down and bottom-up traversals are complete.
+ unsigned GetAllPathCount() const {
+ return TopDownPathCount * BottomUpPathCount;
+ }
+ };
+}
+
+void BBState::InitFromPred(const BBState &Other) {
+ PerPtrTopDown = Other.PerPtrTopDown;
+ TopDownPathCount = Other.TopDownPathCount;
+}
+
+void BBState::InitFromSucc(const BBState &Other) {
+ PerPtrBottomUp = Other.PerPtrBottomUp;
+ BottomUpPathCount = Other.BottomUpPathCount;
+}
+
+/// MergePred - The top-down traversal uses this to merge information about
+/// predecessors to form the initial state for a new block.
+void BBState::MergePred(const BBState &Other) {
+ // Other.TopDownPathCount can be 0, in which case it is either dead or a
+ // loop backedge. Loop backedges are special.
+ TopDownPathCount += Other.TopDownPathCount;
+
+ // For each entry in the other set, if our set has an entry with the same key,
+ // merge the entries. Otherwise, copy the entry and merge it with an empty
+ // entry.
+ for (ptr_const_iterator MI = Other.top_down_ptr_begin(),
+ ME = Other.top_down_ptr_end(); MI != ME; ++MI) {
+ std::pair<ptr_iterator, bool> Pair = PerPtrTopDown.insert(*MI);
+ Pair.first->second.Merge(Pair.second ? PtrState() : MI->second,
+ /*TopDown=*/true);
+ }
+
+ // For each entry in our set, if the other set doens't have an entry with the
+ // same key, force it to merge with an empty entry.
+ for (ptr_iterator MI = top_down_ptr_begin(),
+ ME = top_down_ptr_end(); MI != ME; ++MI)
+ if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end())
+ MI->second.Merge(PtrState(), /*TopDown=*/true);
+}
+
+/// MergeSucc - The bottom-up traversal uses this to merge information about
+/// successors to form the initial state for a new block.
+void BBState::MergeSucc(const BBState &Other) {
+ // Other.BottomUpPathCount can be 0, in which case it is either dead or a
+ // loop backedge. Loop backedges are special.
+ BottomUpPathCount += Other.BottomUpPathCount;
+
+ // For each entry in the other set, if our set has an entry with the
+ // same key, merge the entries. Otherwise, copy the entry and merge
+ // it with an empty entry.
+ for (ptr_const_iterator MI = Other.bottom_up_ptr_begin(),
+ ME = Other.bottom_up_ptr_end(); MI != ME; ++MI) {
+ std::pair<ptr_iterator, bool> Pair = PerPtrBottomUp.insert(*MI);
+ Pair.first->second.Merge(Pair.second ? PtrState() : MI->second,
+ /*TopDown=*/false);
+ }
+
+ // For each entry in our set, if the other set doens't have an entry
+ // with the same key, force it to merge with an empty entry.
+ for (ptr_iterator MI = bottom_up_ptr_begin(),
+ ME = bottom_up_ptr_end(); MI != ME; ++MI)
+ if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end())
+ MI->second.Merge(PtrState(), /*TopDown=*/false);
+}
+
+namespace {
+ /// ObjCARCOpt - The main ARC optimization pass.
+ class ObjCARCOpt : public FunctionPass {
+ bool Changed;
+ ProvenanceAnalysis PA;
+
+ /// Run - A flag indicating whether this optimization pass should run.
+ bool Run;
+
+ /// RetainFunc, RelaseFunc - Declarations for objc_retain,
+ /// objc_retainBlock, and objc_release.
+ Function *RetainFunc, *RetainBlockFunc, *RetainRVFunc, *ReleaseFunc;
+
+ /// RetainRVCallee, etc. - Declarations for ObjC runtime
+ /// functions, for use in creating calls to them. These are initialized
+ /// lazily to avoid cluttering up the Module with unused declarations.
+ Constant *RetainRVCallee, *AutoreleaseRVCallee, *ReleaseCallee,
+ *RetainCallee, *AutoreleaseCallee;
+
+ /// UsedInThisFunciton - Flags which determine whether each of the
+ /// interesting runtine functions is in fact used in the current function.
+ unsigned UsedInThisFunction;
+
+ /// ImpreciseReleaseMDKind - The Metadata Kind for clang.imprecise_release
+ /// metadata.
+ unsigned ImpreciseReleaseMDKind;
+
+ Constant *getRetainRVCallee(Module *M);
+ Constant *getAutoreleaseRVCallee(Module *M);
+ Constant *getReleaseCallee(Module *M);
+ Constant *getRetainCallee(Module *M);
+ Constant *getAutoreleaseCallee(Module *M);
+
+ void OptimizeRetainCall(Function &F, Instruction *Retain);
+ bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
+ void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV);
+ void OptimizeIndividualCalls(Function &F);
+
+ void CheckForCFGHazards(const BasicBlock *BB,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ BBState &MyStates) const;
+ bool VisitBottomUp(BasicBlock *BB,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ MapVector<Value *, RRInfo> &Retains);
+ bool VisitTopDown(BasicBlock *BB,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ DenseMap<Value *, RRInfo> &Releases);
+ bool Visit(Function &F,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases);
+
+ void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases,
+ SmallVectorImpl<Instruction *> &DeadInsts);
+
+ bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases);
+
+ void OptimizeWeakCalls(Function &F);
+
+ bool OptimizeSequences(Function &F);
+
+ void OptimizeReturns(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual bool doInitialization(Module &M);
+ virtual bool runOnFunction(Function &F);
+ virtual void releaseMemory();
+
+ public:
+ static char ID;
+ ObjCARCOpt() : FunctionPass(ID) {
+ initializeObjCARCOptPass(*PassRegistry::getPassRegistry());
+ }
+ };
+}
+
+char ObjCARCOpt::ID = 0;
+INITIALIZE_PASS_BEGIN(ObjCARCOpt,
+ "objc-arc", "ObjC ARC optimization", false, false)
+INITIALIZE_PASS_DEPENDENCY(ObjCARCAliasAnalysis)
+INITIALIZE_PASS_END(ObjCARCOpt,
+ "objc-arc", "ObjC ARC optimization", false, false)
+
+Pass *llvm::createObjCARCOptPass() {
+ return new ObjCARCOpt();
+}
+
+void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<ObjCARCAliasAnalysis>();
+ AU.addRequired<AliasAnalysis>();
+ // ARC optimization doesn't currently split critical edges.
+ AU.setPreservesCFG();
+}
+
+Constant *ObjCARCOpt::getRetainRVCallee(Module *M) {
+ if (!RetainRVCallee) {
+ LLVMContext &C = M->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ std::vector<Type *> Params;
+ Params.push_back(I8X);
+ const FunctionType *FTy =
+ FunctionType::get(I8X, Params, /*isVarArg=*/false);
+ AttrListPtr Attributes;
+ Attributes.addAttr(~0u, Attribute::NoUnwind);
+ RetainRVCallee =
+ M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy,
+ Attributes);
+ }
+ return RetainRVCallee;
+}
+
+Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
+ if (!AutoreleaseRVCallee) {
+ LLVMContext &C = M->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ std::vector<Type *> Params;
+ Params.push_back(I8X);
+ const FunctionType *FTy =
+ FunctionType::get(I8X, Params, /*isVarArg=*/false);
+ AttrListPtr Attributes;
+ Attributes.addAttr(~0u, Attribute::NoUnwind);
+ AutoreleaseRVCallee =
+ M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy,
+ Attributes);
+ }
+ return AutoreleaseRVCallee;
+}
+
+Constant *ObjCARCOpt::getReleaseCallee(Module *M) {
+ if (!ReleaseCallee) {
+ LLVMContext &C = M->getContext();
+ std::vector<Type *> Params;
+ Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C)));
+ AttrListPtr Attributes;
+ Attributes.addAttr(~0u, Attribute::NoUnwind);
+ ReleaseCallee =
+ M->getOrInsertFunction(
+ "objc_release",
+ FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false),
+ Attributes);
+ }
+ return ReleaseCallee;
+}
+
+Constant *ObjCARCOpt::getRetainCallee(Module *M) {
+ if (!RetainCallee) {
+ LLVMContext &C = M->getContext();
+ std::vector<Type *> Params;
+ Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C)));
+ AttrListPtr Attributes;
+ Attributes.addAttr(~0u, Attribute::NoUnwind);
+ RetainCallee =
+ M->getOrInsertFunction(
+ "objc_retain",
+ FunctionType::get(Params[0], Params, /*isVarArg=*/false),
+ Attributes);
+ }
+ return RetainCallee;
+}
+
+Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
+ if (!AutoreleaseCallee) {
+ LLVMContext &C = M->getContext();
+ std::vector<Type *> Params;
+ Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C)));
+ AttrListPtr Attributes;
+ Attributes.addAttr(~0u, Attribute::NoUnwind);
+ AutoreleaseCallee =
+ M->getOrInsertFunction(
+ "objc_autorelease",
+ FunctionType::get(Params[0], Params, /*isVarArg=*/false),
+ Attributes);
+ }
+ return AutoreleaseCallee;
+}
+
+/// CanAlterRefCount - Test whether the given instruction can result in a
+/// reference count modification (positive or negative) for the pointer's
+/// object.
+static bool
+CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
+ ProvenanceAnalysis &PA, InstructionClass Class) {
+ switch (Class) {
+ case IC_Autorelease:
+ case IC_AutoreleaseRV:
+ case IC_User:
+ // These operations never directly modify a reference count.
+ return false;
+ default: break;
+ }
+
+ ImmutableCallSite CS = static_cast<const Value *>(Inst);
+ assert(CS && "Only calls can alter reference counts!");
+
+ // See if AliasAnalysis can help us with the call.
+ AliasAnalysis::ModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS);
+ if (AliasAnalysis::onlyReadsMemory(MRB))
+ return false;
+ if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
+ for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+ I != E; ++I) {
+ const Value *Op = *I;
+ if (IsPotentialUse(Op) && PA.related(Ptr, Op))
+ return true;
+ }
+ return false;
+ }
+
+ // Assume the worst.
+ return true;
+}
+
+/// CanUse - Test whether the given instruction can "use" the given pointer's
+/// object in a way that requires the reference count to be positive.
+static bool
+CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA,
+ InstructionClass Class) {
+ // IC_Call operations (as opposed to IC_CallOrUser) never "use" objc pointers.
+ if (Class == IC_Call)
+ return false;
+
+ // Consider various instructions which may have pointer arguments which are
+ // not "uses".
+ if (const ICmpInst *ICI = dyn_cast<ICmpInst>(Inst)) {
+ // Comparing a pointer with null, or any other constant, isn't really a use,
+ // because we don't care what the pointer points to, or about the values
+ // of any other dynamic reference-counted pointers.
+ if (!IsPotentialUse(ICI->getOperand(1)))
+ return false;
+ } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) {
+ // For calls, just check the arguments (and not the callee operand).
+ for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(),
+ OE = CS.arg_end(); OI != OE; ++OI) {
+ const Value *Op = *OI;
+ if (IsPotentialUse(Op) && PA.related(Ptr, Op))
+ return true;
+ }
+ return false;
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ // Special-case stores, because we don't care about the stored value, just
+ // the store address.
+ const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand());
+ // If we can't tell what the underlying object was, assume there is a
+ // dependence.
+ return IsPotentialUse(Op) && PA.related(Op, Ptr);
+ }
+
+ // Check each operand for a match.
+ for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end();
+ OI != OE; ++OI) {
+ const Value *Op = *OI;
+ if (IsPotentialUse(Op) && PA.related(Ptr, Op))
+ return true;
+ }
+ return false;
+}
+
+/// CanInterruptRV - Test whether the given instruction can autorelease
+/// any pointer or cause an autoreleasepool pop.
+static bool
+CanInterruptRV(InstructionClass Class) {
+ switch (Class) {
+ case IC_AutoreleasepoolPop:
+ case IC_CallOrUser:
+ case IC_Call:
+ case IC_Autorelease:
+ case IC_AutoreleaseRV:
+ case IC_FusedRetainAutorelease:
+ case IC_FusedRetainAutoreleaseRV:
+ return true;
+ default:
+ return false;
+ }
+}
+
+namespace {
+ /// DependenceKind - There are several kinds of dependence-like concepts in
+ /// use here.
+ enum DependenceKind {
+ NeedsPositiveRetainCount,
+ CanChangeRetainCount,
+ RetainAutoreleaseDep, ///< Blocks objc_retainAutorelease.
+ RetainAutoreleaseRVDep, ///< Blocks objc_retainAutoreleaseReturnValue.
+ RetainRVDep ///< Blocks objc_retainAutoreleasedReturnValue.
+ };
+}
+
+/// Depends - Test if there can be dependencies on Inst through Arg. This
+/// function only tests dependencies relevant for removing pairs of calls.
+static bool
+Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
+ ProvenanceAnalysis &PA) {
+ // If we've reached the definition of Arg, stop.
+ if (Inst == Arg)
+ return true;
+
+ switch (Flavor) {
+ case NeedsPositiveRetainCount: {
+ InstructionClass Class = GetInstructionClass(Inst);
+ switch (Class) {
+ case IC_AutoreleasepoolPop:
+ case IC_AutoreleasepoolPush:
+ case IC_None:
+ return false;
+ default:
+ return CanUse(Inst, Arg, PA, Class);
+ }
+ }
+
+ case CanChangeRetainCount: {
+ InstructionClass Class = GetInstructionClass(Inst);
+ switch (Class) {
+ case IC_AutoreleasepoolPop:
+ // Conservatively assume this can decrement any count.
+ return true;
+ case IC_AutoreleasepoolPush:
+ case IC_None:
+ return false;
+ default:
+ return CanAlterRefCount(Inst, Arg, PA, Class);
+ }
+ }
+
+ case RetainAutoreleaseDep:
+ switch (GetBasicInstructionClass(Inst)) {
+ case IC_AutoreleasepoolPop:
+ // Don't merge an objc_autorelease with an objc_retain inside a different
+ // autoreleasepool scope.
+ return true;
+ case IC_Retain:
+ case IC_RetainRV:
+ // Check for a retain of the same pointer for merging.
+ return GetObjCArg(Inst) == Arg;
+ default:
+ // Nothing else matters for objc_retainAutorelease formation.
+ return false;
+ }
+ break;
+
+ case RetainAutoreleaseRVDep: {
+ InstructionClass Class = GetBasicInstructionClass(Inst);
+ switch (Class) {
+ case IC_Retain:
+ case IC_RetainRV:
+ // Check for a retain of the same pointer for merging.
+ return GetObjCArg(Inst) == Arg;
+ default:
+ // Anything that can autorelease interrupts
+ // retainAutoreleaseReturnValue formation.
+ return CanInterruptRV(Class);
+ }
+ break;
+ }
+
+ case RetainRVDep:
+ return CanInterruptRV(GetBasicInstructionClass(Inst));
+ }
+
+ llvm_unreachable("Invalid dependence flavor");
+ return true;
+}
+
+/// FindDependencies - Walk up the CFG from StartPos (which is in StartBB) and
+/// find local and non-local dependencies on Arg.
+/// TODO: Cache results?
+static void
+FindDependencies(DependenceKind Flavor,
+ const Value *Arg,
+ BasicBlock *StartBB, Instruction *StartInst,
+ SmallPtrSet<Instruction *, 4> &DependingInstructions,
+ SmallPtrSet<const BasicBlock *, 4> &Visited,
+ ProvenanceAnalysis &PA) {
+ BasicBlock::iterator StartPos = StartInst;
+
+ SmallVector<std::pair<BasicBlock *, BasicBlock::iterator>, 4> Worklist;
+ Worklist.push_back(std::make_pair(StartBB, StartPos));
+ do {
+ std::pair<BasicBlock *, BasicBlock::iterator> Pair =
+ Worklist.pop_back_val();
+ BasicBlock *LocalStartBB = Pair.first;
+ BasicBlock::iterator LocalStartPos = Pair.second;
+ BasicBlock::iterator StartBBBegin = LocalStartBB->begin();
+ for (;;) {
+ if (LocalStartPos == StartBBBegin) {
+ pred_iterator PI(LocalStartBB), PE(LocalStartBB, false);
+ if (PI == PE)
+ // If we've reached the function entry, produce a null dependence.
+ DependingInstructions.insert(0);
+ else
+ // Add the predecessors to the worklist.
+ do {
+ BasicBlock *PredBB = *PI;
+ if (Visited.insert(PredBB))
+ Worklist.push_back(std::make_pair(PredBB, PredBB->end()));
+ } while (++PI != PE);
+ break;
+ }
+
+ Instruction *Inst = --LocalStartPos;
+ if (Depends(Flavor, Inst, Arg, PA)) {
+ DependingInstructions.insert(Inst);
+ break;
+ }
+ }
+ } while (!Worklist.empty());
+
+ // Determine whether the original StartBB post-dominates all of the blocks we
+ // visited. If not, insert a sentinal indicating that most optimizations are
+ // not safe.
+ for (SmallPtrSet<const BasicBlock *, 4>::const_iterator I = Visited.begin(),
+ E = Visited.end(); I != E; ++I) {
+ const BasicBlock *BB = *I;
+ if (BB == StartBB)
+ continue;
+ const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+ for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) {
+ const BasicBlock *Succ = *SI;
+ if (Succ != StartBB && !Visited.count(Succ)) {
+ DependingInstructions.insert(reinterpret_cast<Instruction *>(-1));
+ return;
+ }
+ }
+ }
+}
+
+static bool isNullOrUndef(const Value *V) {
+ return isa<ConstantPointerNull>(V) || isa<UndefValue>(V);
+}
+
+static bool isNoopInstruction(const Instruction *I) {
+ return isa<BitCastInst>(I) ||
+ (isa<GetElementPtrInst>(I) &&
+ cast<GetElementPtrInst>(I)->hasAllZeroIndices());
+}
+
+/// OptimizeRetainCall - Turn objc_retain into
+/// objc_retainAutoreleasedReturnValue if the operand is a return value.
+void
+ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) {
+ CallSite CS(GetObjCArg(Retain));
+ Instruction *Call = CS.getInstruction();
+ if (!Call) return;
+ if (Call->getParent() != Retain->getParent()) return;
+
+ // Check that the call is next to the retain.
+ BasicBlock::iterator I = Call;
+ ++I;
+ while (isNoopInstruction(I)) ++I;
+ if (&*I != Retain)
+ return;
+
+ // Turn it to an objc_retainAutoreleasedReturnValue..
+ Changed = true;
+ ++NumPeeps;
+ cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent()));
+}
+
+/// OptimizeRetainRVCall - Turn objc_retainAutoreleasedReturnValue into
+/// objc_retain if the operand is not a return value. Or, if it can be
+/// paired with an objc_autoreleaseReturnValue, delete the pair and
+/// return true.
+bool
+ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
+ // Check for the argument being from an immediately preceding call.
+ Value *Arg = GetObjCArg(RetainRV);
+ CallSite CS(Arg);
+ if (Instruction *Call = CS.getInstruction())
+ if (Call->getParent() == RetainRV->getParent()) {
+ BasicBlock::iterator I = Call;
+ ++I;
+ while (isNoopInstruction(I)) ++I;
+ if (&*I == RetainRV)
+ return false;
+ }
+
+ // Check for being preceded by an objc_autoreleaseReturnValue on the same
+ // pointer. In this case, we can delete the pair.
+ BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin();
+ if (I != Begin) {
+ do --I; while (I != Begin && isNoopInstruction(I));
+ if (GetBasicInstructionClass(I) == IC_AutoreleaseRV &&
+ GetObjCArg(I) == Arg) {
+ Changed = true;
+ ++NumPeeps;
+ EraseInstruction(I);
+ EraseInstruction(RetainRV);
+ return true;
+ }
+ }
+
+ // Turn it to a plain objc_retain.
+ Changed = true;
+ ++NumPeeps;
+ cast<CallInst>(RetainRV)->setCalledFunction(getRetainCallee(F.getParent()));
+ return false;
+}
+
+/// OptimizeAutoreleaseRVCall - Turn objc_autoreleaseReturnValue into
+/// objc_autorelease if the result is not used as a return value.
+void
+ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV) {
+ // Check for a return of the pointer value.
+ const Value *Ptr = GetObjCArg(AutoreleaseRV);
+ for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end();
+ UI != UE; ++UI) {
+ const User *I = *UI;
+ if (isa<ReturnInst>(I) || GetBasicInstructionClass(I) == IC_RetainRV)
+ return;
+ }
+
+ Changed = true;
+ ++NumPeeps;
+ cast<CallInst>(AutoreleaseRV)->
+ setCalledFunction(getAutoreleaseCallee(F.getParent()));
+}
+
+/// OptimizeIndividualCalls - Visit each call, one at a time, and make
+/// simplifications without doing any additional analysis.
+void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
+ // Reset all the flags in preparation for recomputing them.
+ UsedInThisFunction = 0;
+
+ // Visit all objc_* calls in F.
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+ Instruction *Inst = &*I++;
+ InstructionClass Class = GetBasicInstructionClass(Inst);
+
+ switch (Class) {
+ default: break;
+
+ // Delete no-op casts. These function calls have special semantics, but
+ // the semantics are entirely implemented via lowering in the front-end,
+ // so by the time they reach the optimizer, they are just no-op calls
+ // which return their argument.
+ //
+ // There are gray areas here, as the ability to cast reference-counted
+ // pointers to raw void* and back allows code to break ARC assumptions,
+ // however these are currently considered to be unimportant.
+ case IC_NoopCast:
+ Changed = true;
+ ++NumNoops;
+ EraseInstruction(Inst);
+ continue;
+
+ // If the pointer-to-weak-pointer is null, it's undefined behavior.
+ case IC_StoreWeak:
+ case IC_LoadWeak:
+ case IC_LoadWeakRetained:
+ case IC_InitWeak:
+ case IC_DestroyWeak: {
+ CallInst *CI = cast<CallInst>(Inst);
+ if (isNullOrUndef(CI->getArgOperand(0))) {
+ const Type *Ty = CI->getArgOperand(0)->getType();
+ new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
+ Constant::getNullValue(Ty),
+ CI);
+ CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
+ CI->eraseFromParent();
+ continue;
+ }
+ break;
+ }
+ case IC_CopyWeak:
+ case IC_MoveWeak: {
+ CallInst *CI = cast<CallInst>(Inst);
+ if (isNullOrUndef(CI->getArgOperand(0)) ||
+ isNullOrUndef(CI->getArgOperand(1))) {
+ const Type *Ty = CI->getArgOperand(0)->getType();
+ new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
+ Constant::getNullValue(Ty),
+ CI);
+ CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
+ CI->eraseFromParent();
+ continue;
+ }
+ break;
+ }
+ case IC_Retain:
+ OptimizeRetainCall(F, Inst);
+ break;
+ case IC_RetainRV:
+ if (OptimizeRetainRVCall(F, Inst))
+ continue;
+ break;
+ case IC_AutoreleaseRV:
+ OptimizeAutoreleaseRVCall(F, Inst);
+ break;
+ }
+
+ // objc_autorelease(x) -> objc_release(x) if x is otherwise unused.
+ if (IsAutorelease(Class) && Inst->use_empty()) {
+ CallInst *Call = cast<CallInst>(Inst);
+ const Value *Arg = Call->getArgOperand(0);
+ Arg = FindSingleUseIdentifiedObject(Arg);
+ if (Arg) {
+ Changed = true;
+ ++NumAutoreleases;
+
+ // Create the declaration lazily.
+ LLVMContext &C = Inst->getContext();
+ CallInst *NewCall =
+ CallInst::Create(getReleaseCallee(F.getParent()),
+ Call->getArgOperand(0), "", Call);
+ NewCall->setMetadata(ImpreciseReleaseMDKind,
+ MDNode::get(C, ArrayRef<Value *>()));
+ EraseInstruction(Call);
+ Inst = NewCall;
+ Class = IC_Release;
+ }
+ }
+
+ // For functions which can never be passed stack arguments, add
+ // a tail keyword.
+ if (IsAlwaysTail(Class)) {
+ Changed = true;
+ cast<CallInst>(Inst)->setTailCall();
+ }
+
+ // Set nounwind as needed.
+ if (IsNoThrow(Class)) {
+ Changed = true;
+ cast<CallInst>(Inst)->setDoesNotThrow();
+ }
+
+ if (!IsNoopOnNull(Class)) {
+ UsedInThisFunction |= 1 << Class;
+ continue;
+ }
+
+ const Value *Arg = GetObjCArg(Inst);
+
+ // ARC calls with null are no-ops. Delete them.
+ if (isNullOrUndef(Arg)) {
+ Changed = true;
+ ++NumNoops;
+ EraseInstruction(Inst);
+ continue;
+ }
+
+ // Keep track of which of retain, release, autorelease, and retain_block
+ // are actually present in this function.
+ UsedInThisFunction |= 1 << Class;
+
+ // If Arg is a PHI, and one or more incoming values to the
+ // PHI are null, and the call is control-equivalent to the PHI, and there
+ // are no relevant side effects between the PHI and the call, the call
+ // could be pushed up to just those paths with non-null incoming values.
+ // For now, don't bother splitting critical edges for this.
+ SmallVector<std::pair<Instruction *, const Value *>, 4> Worklist;
+ Worklist.push_back(std::make_pair(Inst, Arg));
+ do {
+ std::pair<Instruction *, const Value *> Pair = Worklist.pop_back_val();
+ Inst = Pair.first;
+ Arg = Pair.second;
+
+ const PHINode *PN = dyn_cast<PHINode>(Arg);
+ if (!PN) continue;
+
+ // Determine if the PHI has any null operands, or any incoming
+ // critical edges.
+ bool HasNull = false;
+ bool HasCriticalEdges = false;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *Incoming =
+ StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
+ if (isNullOrUndef(Incoming))
+ HasNull = true;
+ else if (cast<TerminatorInst>(PN->getIncomingBlock(i)->back())
+ .getNumSuccessors() != 1) {
+ HasCriticalEdges = true;
+ break;
+ }
+ }
+ // If we have null operands and no critical edges, optimize.
+ if (!HasCriticalEdges && HasNull) {
+ SmallPtrSet<Instruction *, 4> DependingInstructions;
+ SmallPtrSet<const BasicBlock *, 4> Visited;
+
+ // Check that there is nothing that cares about the reference
+ // count between the call and the phi.
+ FindDependencies(NeedsPositiveRetainCount, Arg,
+ Inst->getParent(), Inst,
+ DependingInstructions, Visited, PA);
+ if (DependingInstructions.size() == 1 &&
+ *DependingInstructions.begin() == PN) {
+ Changed = true;
+ ++NumPartialNoops;
+ // Clone the call into each predecessor that has a non-null value.
+ CallInst *CInst = cast<CallInst>(Inst);
+ const Type *ParamTy = CInst->getArgOperand(0)->getType();
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *Incoming =
+ StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
+ if (!isNullOrUndef(Incoming)) {
+ CallInst *Clone = cast<CallInst>(CInst->clone());
+ Value *Op = PN->getIncomingValue(i);
+ Instruction *InsertPos = &PN->getIncomingBlock(i)->back();
+ if (Op->getType() != ParamTy)
+ Op = new BitCastInst(Op, ParamTy, "", InsertPos);
+ Clone->setArgOperand(0, Op);
+ Clone->insertBefore(InsertPos);
+ Worklist.push_back(std::make_pair(Clone, Incoming));
+ }
+ }
+ // Erase the original call.
+ EraseInstruction(CInst);
+ continue;
+ }
+ }
+ } while (!Worklist.empty());
+ }
+}
+
+/// CheckForCFGHazards - Check for critical edges, loop boundaries, irreducible
+/// control flow, or other CFG structures where moving code across the edge
+/// would result in it being executed more.
+void
+ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ BBState &MyStates) const {
+ // If any top-down local-use or possible-dec has a succ which is earlier in
+ // the sequence, forget it.
+ for (BBState::ptr_const_iterator I = MyStates.top_down_ptr_begin(),
+ E = MyStates.top_down_ptr_end(); I != E; ++I)
+ switch (I->second.GetSeq()) {
+ default: break;
+ case S_Use: {
+ const Value *Arg = I->first;
+ const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+ bool SomeSuccHasSame = false;
+ bool AllSuccsHaveSame = true;
+ for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI)
+ switch (BBStates[*SI].getPtrBottomUpState(Arg).GetSeq()) {
+ case S_None:
+ case S_CanRelease:
+ MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
+ SomeSuccHasSame = false;
+ break;
+ case S_Use:
+ SomeSuccHasSame = true;
+ break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ AllSuccsHaveSame = false;
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+ // If the state at the other end of any of the successor edges
+ // matches the current state, require all edges to match. This
+ // guards against loops in the middle of a sequence.
+ if (SomeSuccHasSame && !AllSuccsHaveSame)
+ MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
+ }
+ case S_CanRelease: {
+ const Value *Arg = I->first;
+ const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+ bool SomeSuccHasSame = false;
+ bool AllSuccsHaveSame = true;
+ for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI)
+ switch (BBStates[*SI].getPtrBottomUpState(Arg).GetSeq()) {
+ case S_None:
+ MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
+ SomeSuccHasSame = false;
+ break;
+ case S_CanRelease:
+ SomeSuccHasSame = true;
+ break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ case S_Use:
+ AllSuccsHaveSame = false;
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+ // If the state at the other end of any of the successor edges
+ // matches the current state, require all edges to match. This
+ // guards against loops in the middle of a sequence.
+ if (SomeSuccHasSame && !AllSuccsHaveSame)
+ MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
+ }
+ }
+}
+
+bool
+ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ MapVector<Value *, RRInfo> &Retains) {
+ bool NestingDetected = false;
+ BBState &MyStates = BBStates[BB];
+
+ // Merge the states from each successor to compute the initial state
+ // for the current block.
+ const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+ succ_const_iterator SI(TI), SE(TI, false);
+ if (SI == SE)
+ MyStates.SetAsExit();
+ else
+ do {
+ const BasicBlock *Succ = *SI++;
+ if (Succ == BB)
+ continue;
+ DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ);
+ if (I == BBStates.end())
+ continue;
+ MyStates.InitFromSucc(I->second);
+ while (SI != SE) {
+ Succ = *SI++;
+ if (Succ != BB) {
+ I = BBStates.find(Succ);
+ if (I != BBStates.end())
+ MyStates.MergeSucc(I->second);
+ }
+ }
+ break;
+ } while (SI != SE);
+
+ // Visit all the instructions, bottom-up.
+ for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) {
+ Instruction *Inst = llvm::prior(I);
+ InstructionClass Class = GetInstructionClass(Inst);
+ const Value *Arg = 0;
+
+ switch (Class) {
+ case IC_Release: {
+ Arg = GetObjCArg(Inst);
+
+ PtrState &S = MyStates.getPtrBottomUpState(Arg);
+
+ // If we see two releases in a row on the same pointer. If so, make
+ // a note, and we'll cicle back to revisit it after we've
+ // hopefully eliminated the second release, which may allow us to
+ // eliminate the first release too.
+ // Theoretically we could implement removal of nested retain+release
+ // pairs by making PtrState hold a stack of states, but this is
+ // simple and avoids adding overhead for the non-nested case.
+ if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease)
+ NestingDetected = true;
+
+ S.SetSeqToRelease(Inst->getMetadata(ImpreciseReleaseMDKind));
+ S.RRI.clear();
+ S.RRI.KnownIncremented = S.IsKnownIncremented();
+ S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
+ S.RRI.Calls.insert(Inst);
+
+ S.IncrementRefCount();
+ break;
+ }
+ case IC_RetainBlock:
+ case IC_Retain:
+ case IC_RetainRV: {
+ Arg = GetObjCArg(Inst);
+
+ PtrState &S = MyStates.getPtrBottomUpState(Arg);
+ S.DecrementRefCount();
+
+ switch (S.GetSeq()) {
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ case S_Use:
+ S.RRI.ReverseInsertPts.clear();
+ // FALL THROUGH
+ case S_CanRelease:
+ // Don't do retain+release tracking for IC_RetainRV, because it's
+ // better to let it remain as the first instruction after a call.
+ if (Class != IC_RetainRV) {
+ S.RRI.IsRetainBlock = Class == IC_RetainBlock;
+ Retains[Inst] = S.RRI;
+ }
+ S.ClearSequenceProgress();
+ break;
+ case S_None:
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+ break;
+ }
+ case IC_AutoreleasepoolPop:
+ // Conservatively, clear MyStates for all known pointers.
+ MyStates.clearBottomUpPointers();
+ continue;
+ case IC_AutoreleasepoolPush:
+ case IC_None:
+ // These are irrelevant.
+ continue;
+ default:
+ break;
+ }
+
+ // Consider any other possible effects of this instruction on each
+ // pointer being tracked.
+ for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(),
+ ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) {
+ const Value *Ptr = MI->first;
+ if (Ptr == Arg)
+ continue; // Handled above.
+ PtrState &S = MI->second;
+ Sequence Seq = S.GetSeq();
+
+ // Check for possible retains and releases.
+ if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
+ // Check for a retain (we're going bottom-up here).
+ S.DecrementRefCount();
+
+ // Check for a release.
+ if (!IsRetain(Class) && Class != IC_RetainBlock)
+ switch (Seq) {
+ case S_Use:
+ S.SetSeq(S_CanRelease);
+ continue;
+ case S_CanRelease:
+ case S_Release:
+ case S_MovableRelease:
+ case S_Stop:
+ case S_None:
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+ }
+
+ // Check for possible direct uses.
+ switch (Seq) {
+ case S_Release:
+ case S_MovableRelease:
+ if (CanUse(Inst, Ptr, PA, Class)) {
+ S.RRI.ReverseInsertPts.clear();
+ S.RRI.ReverseInsertPts.insert(Inst);
+ S.SetSeq(S_Use);
+ } else if (Seq == S_Release &&
+ (Class == IC_User || Class == IC_CallOrUser)) {
+ // Non-movable releases depend on any possible objc pointer use.
+ S.SetSeq(S_Stop);
+ S.RRI.ReverseInsertPts.clear();
+ S.RRI.ReverseInsertPts.insert(Inst);
+ }
+ break;
+ case S_Stop:
+ if (CanUse(Inst, Ptr, PA, Class))
+ S.SetSeq(S_Use);
+ break;
+ case S_CanRelease:
+ case S_Use:
+ case S_None:
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+ }
+ }
+
+ return NestingDetected;
+}
+
+bool
+ObjCARCOpt::VisitTopDown(BasicBlock *BB,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ DenseMap<Value *, RRInfo> &Releases) {
+ bool NestingDetected = false;
+ BBState &MyStates = BBStates[BB];
+
+ // Merge the states from each predecessor to compute the initial state
+ // for the current block.
+ const_pred_iterator PI(BB), PE(BB, false);
+ if (PI == PE)
+ MyStates.SetAsEntry();
+ else
+ do {
+ const BasicBlock *Pred = *PI++;
+ if (Pred == BB)
+ continue;
+ DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
+ if (I == BBStates.end())
+ continue;
+ MyStates.InitFromPred(I->second);
+ while (PI != PE) {
+ Pred = *PI++;
+ if (Pred != BB) {
+ I = BBStates.find(Pred);
+ if (I != BBStates.end())
+ MyStates.MergePred(I->second);
+ }
+ }
+ break;
+ } while (PI != PE);
+
+ // Visit all the instructions, top-down.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ Instruction *Inst = I;
+ InstructionClass Class = GetInstructionClass(Inst);
+ const Value *Arg = 0;
+
+ switch (Class) {
+ case IC_RetainBlock:
+ case IC_Retain:
+ case IC_RetainRV: {
+ Arg = GetObjCArg(Inst);
+
+ PtrState &S = MyStates.getPtrTopDownState(Arg);
+
+ // Don't do retain+release tracking for IC_RetainRV, because it's
+ // better to let it remain as the first instruction after a call.
+ if (Class != IC_RetainRV) {
+ // If we see two retains in a row on the same pointer. If so, make
+ // a note, and we'll cicle back to revisit it after we've
+ // hopefully eliminated the second retain, which may allow us to
+ // eliminate the first retain too.
+ // Theoretically we could implement removal of nested retain+release
+ // pairs by making PtrState hold a stack of states, but this is
+ // simple and avoids adding overhead for the non-nested case.
+ if (S.GetSeq() == S_Retain)
+ NestingDetected = true;
+
+ S.SetSeq(S_Retain);
+ S.RRI.clear();
+ S.RRI.IsRetainBlock = Class == IC_RetainBlock;
+ S.RRI.KnownIncremented = S.IsKnownIncremented();
+ S.RRI.Calls.insert(Inst);
+ }
+
+ S.IncrementRefCount();
+ break;
+ }
+ case IC_Release: {
+ Arg = GetObjCArg(Inst);
+
+ PtrState &S = MyStates.getPtrTopDownState(Arg);
+ S.DecrementRefCount();
+
+ switch (S.GetSeq()) {
+ case S_Retain:
+ case S_CanRelease:
+ S.RRI.ReverseInsertPts.clear();
+ // FALL THROUGH
+ case S_Use:
+ S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+ S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
+ Releases[Inst] = S.RRI;
+ S.ClearSequenceProgress();
+ break;
+ case S_None:
+ break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ llvm_unreachable("top-down pointer in release state!");
+ }
+ break;
+ }
+ case IC_AutoreleasepoolPop:
+ // Conservatively, clear MyStates for all known pointers.
+ MyStates.clearTopDownPointers();
+ continue;
+ case IC_AutoreleasepoolPush:
+ case IC_None:
+ // These are irrelevant.
+ continue;
+ default:
+ break;
+ }
+
+ // Consider any other possible effects of this instruction on each
+ // pointer being tracked.
+ for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(),
+ ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) {
+ const Value *Ptr = MI->first;
+ if (Ptr == Arg)
+ continue; // Handled above.
+ PtrState &S = MI->second;
+ Sequence Seq = S.GetSeq();
+
+ // Check for possible releases.
+ if (!IsRetain(Class) && Class != IC_RetainBlock &&
+ CanAlterRefCount(Inst, Ptr, PA, Class)) {
+ // Check for a release.
+ S.DecrementRefCount();
+
+ // Check for a release.
+ switch (Seq) {
+ case S_Retain:
+ S.SetSeq(S_CanRelease);
+ S.RRI.ReverseInsertPts.clear();
+ S.RRI.ReverseInsertPts.insert(Inst);
+
+ // One call can't cause a transition from S_Retain to S_CanRelease
+ // and S_CanRelease to S_Use. If we've made the first transition,
+ // we're done.
+ continue;
+ case S_Use:
+ case S_CanRelease:
+ case S_None:
+ break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ llvm_unreachable("top-down pointer in release state!");
+ }
+ }
+
+ // Check for possible direct uses.
+ switch (Seq) {
+ case S_CanRelease:
+ if (CanUse(Inst, Ptr, PA, Class))
+ S.SetSeq(S_Use);
+ break;
+ case S_Use:
+ case S_Retain:
+ case S_None:
+ break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ llvm_unreachable("top-down pointer in release state!");
+ }
+ }
+ }
+
+ CheckForCFGHazards(BB, BBStates, MyStates);
+ return NestingDetected;
+}
+
+// Visit - Visit the function both top-down and bottom-up.
+bool
+ObjCARCOpt::Visit(Function &F,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases) {
+ // Use postorder for bottom-up, and reverse-postorder for top-down, because we
+ // magically know that loops will be well behaved, i.e. they won't repeatedly
+ // call retain on a single pointer without doing a release.
+ bool BottomUpNestingDetected = false;
+ SmallVector<BasicBlock *, 8> PostOrder;
+ for (po_iterator<Function *> I = po_begin(&F), E = po_end(&F); I != E; ++I) {
+ BasicBlock *BB = *I;
+ PostOrder.push_back(BB);
+
+ BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains);
+ }
+
+ // Iterate through the post-order in reverse order, achieving a
+ // reverse-postorder traversal. We don't use the ReversePostOrderTraversal
+ // class here because it works by computing its own full postorder iteration,
+ // recording the sequence, and playing it back in reverse. Since we're already
+ // doing a full iteration above, we can just record the sequence manually and
+ // avoid the cost of having ReversePostOrderTraversal compute it.
+ bool TopDownNestingDetected = false;
+ for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator
+ RI = PostOrder.rbegin(), RE = PostOrder.rend(); RI != RE; ++RI)
+ TopDownNestingDetected |= VisitTopDown(*RI, BBStates, Releases);
+
+ return TopDownNestingDetected && BottomUpNestingDetected;
+}
+
+/// MoveCalls - Move the calls in RetainsToMove and ReleasesToMove.
+void ObjCARCOpt::MoveCalls(Value *Arg,
+ RRInfo &RetainsToMove,
+ RRInfo &ReleasesToMove,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases,
+ SmallVectorImpl<Instruction *> &DeadInsts) {
+ const Type *ArgTy = Arg->getType();
+ const Type *ParamTy =
+ (RetainRVFunc ? RetainRVFunc :
+ RetainFunc ? RetainFunc :
+ RetainBlockFunc)->arg_begin()->getType();
+
+ // Insert the new retain and release calls.
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ PI = ReleasesToMove.ReverseInsertPts.begin(),
+ PE = ReleasesToMove.ReverseInsertPts.end(); PI != PE; ++PI) {
+ Instruction *InsertPt = *PI;
+ Value *MyArg = ArgTy == ParamTy ? Arg :
+ new BitCastInst(Arg, ParamTy, "", InsertPt);
+ CallInst *Call =
+ CallInst::Create(RetainsToMove.IsRetainBlock ?
+ RetainBlockFunc : RetainFunc,
+ MyArg, "", InsertPt);
+ Call->setDoesNotThrow();
+ if (!RetainsToMove.IsRetainBlock)
+ Call->setTailCall();
+ }
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ PI = RetainsToMove.ReverseInsertPts.begin(),
+ PE = RetainsToMove.ReverseInsertPts.end(); PI != PE; ++PI) {
+ Instruction *LastUse = *PI;
+ Instruction *InsertPts[] = { 0, 0, 0 };
+ if (InvokeInst *II = dyn_cast<InvokeInst>(LastUse)) {
+ // We can't insert code immediately after an invoke instruction, so
+ // insert code at the beginning of both successor blocks instead.
+ // The invoke's return value isn't available in the unwind block,
+ // but our releases will never depend on it, because they must be
+ // paired with retains from before the invoke.
+ InsertPts[0] = II->getNormalDest()->getFirstNonPHI();
+ InsertPts[1] = II->getUnwindDest()->getFirstNonPHI();
+ } else {
+ // Insert code immediately after the last use.
+ InsertPts[0] = llvm::next(BasicBlock::iterator(LastUse));
+ }
+
+ for (Instruction **I = InsertPts; *I; ++I) {
+ Instruction *InsertPt = *I;
+ Value *MyArg = ArgTy == ParamTy ? Arg :
+ new BitCastInst(Arg, ParamTy, "", InsertPt);
+ CallInst *Call = CallInst::Create(ReleaseFunc, MyArg, "", InsertPt);
+ // Attach a clang.imprecise_release metadata tag, if appropriate.
+ if (MDNode *M = ReleasesToMove.ReleaseMetadata)
+ Call->setMetadata(ImpreciseReleaseMDKind, M);
+ Call->setDoesNotThrow();
+ if (ReleasesToMove.IsTailCallRelease)
+ Call->setTailCall();
+ }
+ }
+
+ // Delete the original retain and release calls.
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ AI = RetainsToMove.Calls.begin(),
+ AE = RetainsToMove.Calls.end(); AI != AE; ++AI) {
+ Instruction *OrigRetain = *AI;
+ Retains.blot(OrigRetain);
+ DeadInsts.push_back(OrigRetain);
+ }
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ AI = ReleasesToMove.Calls.begin(),
+ AE = ReleasesToMove.Calls.end(); AI != AE; ++AI) {
+ Instruction *OrigRelease = *AI;
+ Releases.erase(OrigRelease);
+ DeadInsts.push_back(OrigRelease);
+ }
+}
+
+bool
+ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
+ &BBStates,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases) {
+ bool AnyPairsCompletelyEliminated = false;
+ RRInfo RetainsToMove;
+ RRInfo ReleasesToMove;
+ SmallVector<Instruction *, 4> NewRetains;
+ SmallVector<Instruction *, 4> NewReleases;
+ SmallVector<Instruction *, 8> DeadInsts;
+
+ for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(),
+ E = Retains.end(); I != E; ) {
+ Value *V = (I++)->first;
+ if (!V) continue; // blotted
+
+ Instruction *Retain = cast<Instruction>(V);
+ Value *Arg = GetObjCArg(Retain);
+
+ // If the object being released is in static or stack storage, we know it's
+ // not being managed by ObjC reference counting, so we can delete pairs
+ // regardless of what possible decrements or uses lie between them.
+ bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg);
+
+ // If a pair happens in a region where it is known that the reference count
+ // is already incremented, we can similarly ignore possible decrements.
+ bool KnownIncrementedTD = true, KnownIncrementedBU = true;
+
+ // Connect the dots between the top-down-collected RetainsToMove and
+ // bottom-up-collected ReleasesToMove to form sets of related calls.
+ // This is an iterative process so that we connect multiple releases
+ // to multiple retains if needed.
+ unsigned OldDelta = 0;
+ unsigned NewDelta = 0;
+ unsigned OldCount = 0;
+ unsigned NewCount = 0;
+ bool FirstRelease = true;
+ bool FirstRetain = true;
+ NewRetains.push_back(Retain);
+ for (;;) {
+ for (SmallVectorImpl<Instruction *>::const_iterator
+ NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) {
+ Instruction *NewRetain = *NI;
+ MapVector<Value *, RRInfo>::const_iterator It = Retains.find(NewRetain);
+ assert(It != Retains.end());
+ const RRInfo &NewRetainRRI = It->second;
+ KnownIncrementedTD &= NewRetainRRI.KnownIncremented;
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ LI = NewRetainRRI.Calls.begin(),
+ LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) {
+ Instruction *NewRetainRelease = *LI;
+ DenseMap<Value *, RRInfo>::const_iterator Jt =
+ Releases.find(NewRetainRelease);
+ if (Jt == Releases.end())
+ goto next_retain;
+ const RRInfo &NewRetainReleaseRRI = Jt->second;
+ assert(NewRetainReleaseRRI.Calls.count(NewRetain));
+ if (ReleasesToMove.Calls.insert(NewRetainRelease)) {
+ OldDelta -=
+ BBStates[NewRetainRelease->getParent()].GetAllPathCount();
+
+ // Merge the ReleaseMetadata and IsTailCallRelease values.
+ if (FirstRelease) {
+ ReleasesToMove.ReleaseMetadata =
+ NewRetainReleaseRRI.ReleaseMetadata;
+ ReleasesToMove.IsTailCallRelease =
+ NewRetainReleaseRRI.IsTailCallRelease;
+ FirstRelease = false;
+ } else {
+ if (ReleasesToMove.ReleaseMetadata !=
+ NewRetainReleaseRRI.ReleaseMetadata)
+ ReleasesToMove.ReleaseMetadata = 0;
+ if (ReleasesToMove.IsTailCallRelease !=
+ NewRetainReleaseRRI.IsTailCallRelease)
+ ReleasesToMove.IsTailCallRelease = false;
+ }
+
+ // Collect the optimal insertion points.
+ if (!KnownSafe)
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ RI = NewRetainReleaseRRI.ReverseInsertPts.begin(),
+ RE = NewRetainReleaseRRI.ReverseInsertPts.end();
+ RI != RE; ++RI) {
+ Instruction *RIP = *RI;
+ if (ReleasesToMove.ReverseInsertPts.insert(RIP))
+ NewDelta -= BBStates[RIP->getParent()].GetAllPathCount();
+ }
+ NewReleases.push_back(NewRetainRelease);
+ }
+ }
+ }
+ NewRetains.clear();
+ if (NewReleases.empty()) break;
+
+ // Back the other way.
+ for (SmallVectorImpl<Instruction *>::const_iterator
+ NI = NewReleases.begin(), NE = NewReleases.end(); NI != NE; ++NI) {
+ Instruction *NewRelease = *NI;
+ DenseMap<Value *, RRInfo>::const_iterator It =
+ Releases.find(NewRelease);
+ assert(It != Releases.end());
+ const RRInfo &NewReleaseRRI = It->second;
+ KnownIncrementedBU &= NewReleaseRRI.KnownIncremented;
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ LI = NewReleaseRRI.Calls.begin(),
+ LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) {
+ Instruction *NewReleaseRetain = *LI;
+ MapVector<Value *, RRInfo>::const_iterator Jt =
+ Retains.find(NewReleaseRetain);
+ if (Jt == Retains.end())
+ goto next_retain;
+ const RRInfo &NewReleaseRetainRRI = Jt->second;
+ assert(NewReleaseRetainRRI.Calls.count(NewRelease));
+ if (RetainsToMove.Calls.insert(NewReleaseRetain)) {
+ unsigned PathCount =
+ BBStates[NewReleaseRetain->getParent()].GetAllPathCount();
+ OldDelta += PathCount;
+ OldCount += PathCount;
+
+ // Merge the IsRetainBlock values.
+ if (FirstRetain) {
+ RetainsToMove.IsRetainBlock = NewReleaseRetainRRI.IsRetainBlock;
+ FirstRetain = false;
+ } else if (ReleasesToMove.IsRetainBlock !=
+ NewReleaseRetainRRI.IsRetainBlock)
+ // It's not possible to merge the sequences if one uses
+ // objc_retain and the other uses objc_retainBlock.
+ goto next_retain;
+
+ // Collect the optimal insertion points.
+ if (!KnownSafe)
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ RI = NewReleaseRetainRRI.ReverseInsertPts.begin(),
+ RE = NewReleaseRetainRRI.ReverseInsertPts.end();
+ RI != RE; ++RI) {
+ Instruction *RIP = *RI;
+ if (RetainsToMove.ReverseInsertPts.insert(RIP)) {
+ PathCount = BBStates[RIP->getParent()].GetAllPathCount();
+ NewDelta += PathCount;
+ NewCount += PathCount;
+ }
+ }
+ NewRetains.push_back(NewReleaseRetain);
+ }
+ }
+ }
+ NewReleases.clear();
+ if (NewRetains.empty()) break;
+ }
+
+ // If the pointer is known incremented, we can safely delete the pair
+ // regardless of what's between them.
+ if (KnownIncrementedTD || KnownIncrementedBU) {
+ RetainsToMove.ReverseInsertPts.clear();
+ ReleasesToMove.ReverseInsertPts.clear();
+ NewCount = 0;
+ }
+
+ // Determine whether the original call points are balanced in the retain and
+ // release calls through the program. If not, conservatively don't touch
+ // them.
+ // TODO: It's theoretically possible to do code motion in this case, as
+ // long as the existing imbalances are maintained.
+ if (OldDelta != 0)
+ goto next_retain;
+
+ // Determine whether the new insertion points we computed preserve the
+ // balance of retain and release calls through the program.
+ // TODO: If the fully aggressive solution isn't valid, try to find a
+ // less aggressive solution which is.
+ if (NewDelta != 0)
+ goto next_retain;
+
+ // Ok, everything checks out and we're all set. Let's move some code!
+ Changed = true;
+ AnyPairsCompletelyEliminated = NewCount == 0;
+ NumRRs += OldCount - NewCount;
+ MoveCalls(Arg, RetainsToMove, ReleasesToMove, Retains, Releases, DeadInsts);
+
+ next_retain:
+ NewReleases.clear();
+ NewRetains.clear();
+ RetainsToMove.clear();
+ ReleasesToMove.clear();
+ }
+
+ // Now that we're done moving everything, we can delete the newly dead
+ // instructions, as we no longer need them as insert points.
+ while (!DeadInsts.empty())
+ EraseInstruction(DeadInsts.pop_back_val());
+
+ return AnyPairsCompletelyEliminated;
+}
+
+/// OptimizeWeakCalls - Weak pointer optimizations.
+void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
+ // First, do memdep-style RLE and S2L optimizations. We can't use memdep
+ // itself because it uses AliasAnalysis and we need to do provenance
+ // queries instead.
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+ Instruction *Inst = &*I++;
+ InstructionClass Class = GetBasicInstructionClass(Inst);
+ if (Class != IC_LoadWeak && Class != IC_LoadWeakRetained)
+ continue;
+
+ // Delete objc_loadWeak calls with no users.
+ if (Class == IC_LoadWeak && Inst->use_empty()) {
+ Inst->eraseFromParent();
+ continue;
+ }
+
+ // TODO: For now, just look for an earlier available version of this value
+ // within the same block. Theoretically, we could do memdep-style non-local
+ // analysis too, but that would want caching. A better approach would be to
+ // use the technique that EarlyCSE uses.
+ inst_iterator Current = llvm::prior(I);
+ BasicBlock *CurrentBB = Current.getBasicBlockIterator();
+ for (BasicBlock::iterator B = CurrentBB->begin(),
+ J = Current.getInstructionIterator();
+ J != B; --J) {
+ Instruction *EarlierInst = &*llvm::prior(J);
+ InstructionClass EarlierClass = GetInstructionClass(EarlierInst);
+ switch (EarlierClass) {
+ case IC_LoadWeak:
+ case IC_LoadWeakRetained: {
+ // If this is loading from the same pointer, replace this load's value
+ // with that one.
+ CallInst *Call = cast<CallInst>(Inst);
+ CallInst *EarlierCall = cast<CallInst>(EarlierInst);
+ Value *Arg = Call->getArgOperand(0);
+ Value *EarlierArg = EarlierCall->getArgOperand(0);
+ switch (PA.getAA()->alias(Arg, EarlierArg)) {
+ case AliasAnalysis::MustAlias:
+ Changed = true;
+ // If the load has a builtin retain, insert a plain retain for it.
+ if (Class == IC_LoadWeakRetained) {
+ CallInst *CI =
+ CallInst::Create(getRetainCallee(F.getParent()), EarlierCall,
+ "", Call);
+ CI->setTailCall();
+ }
+ // Zap the fully redundant load.
+ Call->replaceAllUsesWith(EarlierCall);
+ Call->eraseFromParent();
+ goto clobbered;
+ case AliasAnalysis::MayAlias:
+ case AliasAnalysis::PartialAlias:
+ goto clobbered;
+ case AliasAnalysis::NoAlias:
+ break;
+ }
+ break;
+ }
+ case IC_StoreWeak:
+ case IC_InitWeak: {
+ // If this is storing to the same pointer and has the same size etc.
+ // replace this load's value with the stored value.
+ CallInst *Call = cast<CallInst>(Inst);
+ CallInst *EarlierCall = cast<CallInst>(EarlierInst);
+ Value *Arg = Call->getArgOperand(0);
+ Value *EarlierArg = EarlierCall->getArgOperand(0);
+ switch (PA.getAA()->alias(Arg, EarlierArg)) {
+ case AliasAnalysis::MustAlias:
+ Changed = true;
+ // If the load has a builtin retain, insert a plain retain for it.
+ if (Class == IC_LoadWeakRetained) {
+ CallInst *CI =
+ CallInst::Create(getRetainCallee(F.getParent()), EarlierCall,
+ "", Call);
+ CI->setTailCall();
+ }
+ // Zap the fully redundant load.
+ Call->replaceAllUsesWith(EarlierCall->getArgOperand(1));
+ Call->eraseFromParent();
+ goto clobbered;
+ case AliasAnalysis::MayAlias:
+ case AliasAnalysis::PartialAlias:
+ goto clobbered;
+ case AliasAnalysis::NoAlias:
+ break;
+ }
+ break;
+ }
+ case IC_MoveWeak:
+ case IC_CopyWeak:
+ // TOOD: Grab the copied value.
+ goto clobbered;
+ case IC_AutoreleasepoolPush:
+ case IC_None:
+ case IC_User:
+ // Weak pointers are only modified through the weak entry points
+ // (and arbitrary calls, which could call the weak entry points).
+ break;
+ default:
+ // Anything else could modify the weak pointer.
+ goto clobbered;
+ }
+ }
+ clobbered:;
+ }
+
+ // Then, for each destroyWeak with an alloca operand, check to see if
+ // the alloca and all its users can be zapped.
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+ Instruction *Inst = &*I++;
+ InstructionClass Class = GetBasicInstructionClass(Inst);
+ if (Class != IC_DestroyWeak)
+ continue;
+
+ CallInst *Call = cast<CallInst>(Inst);
+ Value *Arg = Call->getArgOperand(0);
+ if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) {
+ for (Value::use_iterator UI = Alloca->use_begin(),
+ UE = Alloca->use_end(); UI != UE; ++UI) {
+ Instruction *UserInst = cast<Instruction>(*UI);
+ switch (GetBasicInstructionClass(UserInst)) {
+ case IC_InitWeak:
+ case IC_StoreWeak:
+ case IC_DestroyWeak:
+ continue;
+ default:
+ goto done;
+ }
+ }
+ Changed = true;
+ for (Value::use_iterator UI = Alloca->use_begin(),
+ UE = Alloca->use_end(); UI != UE; ) {
+ CallInst *UserInst = cast<CallInst>(*UI++);
+ if (!UserInst->use_empty())
+ UserInst->replaceAllUsesWith(UserInst->getOperand(1));
+ UserInst->eraseFromParent();
+ }
+ Alloca->eraseFromParent();
+ done:;
+ }
+ }
+}
+
+/// OptimizeSequences - Identify program paths which execute sequences of
+/// retains and releases which can be eliminated.
+bool ObjCARCOpt::OptimizeSequences(Function &F) {
+ /// Releases, Retains - These are used to store the results of the main flow
+ /// analysis. These use Value* as the key instead of Instruction* so that the
+ /// map stays valid when we get around to rewriting code and calls get
+ /// replaced by arguments.
+ DenseMap<Value *, RRInfo> Releases;
+ MapVector<Value *, RRInfo> Retains;
+
+ /// BBStates, This is used during the traversal of the function to track the
+ /// states for each identified object at each block.
+ DenseMap<const BasicBlock *, BBState> BBStates;
+
+ // Analyze the CFG of the function, and all instructions.
+ bool NestingDetected = Visit(F, BBStates, Retains, Releases);
+
+ // Transform.
+ return PerformCodePlacement(BBStates, Retains, Releases) && NestingDetected;
+}
+
+/// OptimizeReturns - Look for this pattern:
+///
+/// %call = call i8* @something(...)
+/// %2 = call i8* @objc_retain(i8* %call)
+/// %3 = call i8* @objc_autorelease(i8* %2)
+/// ret i8* %3
+///
+/// And delete the retain and autorelease.
+///
+/// Otherwise if it's just this:
+///
+/// %3 = call i8* @objc_autorelease(i8* %2)
+/// ret i8* %3
+///
+/// convert the autorelease to autoreleaseRV.
+void ObjCARCOpt::OptimizeReturns(Function &F) {
+ if (!F.getReturnType()->isPointerTy())
+ return;
+
+ SmallPtrSet<Instruction *, 4> DependingInstructions;
+ SmallPtrSet<const BasicBlock *, 4> Visited;
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ BasicBlock *BB = FI;
+ ReturnInst *Ret = dyn_cast<ReturnInst>(&BB->back());
+ if (!Ret) continue;
+
+ const Value *Arg = StripPointerCastsAndObjCCalls(Ret->getOperand(0));
+ FindDependencies(NeedsPositiveRetainCount, Arg,
+ BB, Ret, DependingInstructions, Visited, PA);
+ if (DependingInstructions.size() != 1)
+ goto next_block;
+
+ {
+ CallInst *Autorelease =
+ dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
+ if (!Autorelease)
+ goto next_block;
+ InstructionClass AutoreleaseClass =
+ GetBasicInstructionClass(Autorelease);
+ if (!IsAutorelease(AutoreleaseClass))
+ goto next_block;
+ if (GetObjCArg(Autorelease) != Arg)
+ goto next_block;
+
+ DependingInstructions.clear();
+ Visited.clear();
+
+ // Check that there is nothing that can affect the reference
+ // count between the autorelease and the retain.
+ FindDependencies(CanChangeRetainCount, Arg,
+ BB, Autorelease, DependingInstructions, Visited, PA);
+ if (DependingInstructions.size() != 1)
+ goto next_block;
+
+ {
+ CallInst *Retain =
+ dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
+
+ // Check that we found a retain with the same argument.
+ if (!Retain ||
+ !IsRetain(GetBasicInstructionClass(Retain)) ||
+ GetObjCArg(Retain) != Arg)
+ goto next_block;
+
+ DependingInstructions.clear();
+ Visited.clear();
+
+ // Convert the autorelease to an autoreleaseRV, since it's
+ // returning the value.
+ if (AutoreleaseClass == IC_Autorelease) {
+ Autorelease->setCalledFunction(getAutoreleaseRVCallee(F.getParent()));
+ AutoreleaseClass = IC_AutoreleaseRV;
+ }
+
+ // Check that there is nothing that can affect the reference
+ // count between the retain and the call.
+ FindDependencies(CanChangeRetainCount, Arg, BB, Retain,
+ DependingInstructions, Visited, PA);
+ if (DependingInstructions.size() != 1)
+ goto next_block;
+
+ {
+ CallInst *Call =
+ dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
+
+ // Check that the pointer is the return value of the call.
+ if (!Call || Arg != Call)
+ goto next_block;
+
+ // Check that the call is a regular call.
+ InstructionClass Class = GetBasicInstructionClass(Call);
+ if (Class != IC_CallOrUser && Class != IC_Call)
+ goto next_block;
+
+ // If so, we can zap the retain and autorelease.
+ Changed = true;
+ ++NumRets;
+ EraseInstruction(Retain);
+ EraseInstruction(Autorelease);
+ }
+ }
+ }
+
+ next_block:
+ DependingInstructions.clear();
+ Visited.clear();
+ }
+}
+
+bool ObjCARCOpt::doInitialization(Module &M) {
+ if (!EnableARCOpts)
+ return false;
+
+ Run = ModuleHasARC(M);
+ if (!Run)
+ return false;
+
+ // Identify the imprecise release metadata kind.
+ ImpreciseReleaseMDKind =
+ M.getContext().getMDKindID("clang.imprecise_release");
+
+ // Identify the declarations for objc_retain and friends.
+ RetainFunc = M.getFunction("objc_retain");
+ RetainBlockFunc = M.getFunction("objc_retainBlock");
+ RetainRVFunc = M.getFunction("objc_retainAutoreleasedReturnValue");
+ ReleaseFunc = M.getFunction("objc_release");
+
+ // Intuitively, objc_retain and others are nocapture, however in practice
+ // they are not, because they return their argument value. And objc_release
+ // calls finalizers.
+
+ // These are initialized lazily.
+ RetainRVCallee = 0;
+ AutoreleaseRVCallee = 0;
+ ReleaseCallee = 0;
+ RetainCallee = 0;
+ AutoreleaseCallee = 0;
+
+ return false;
+}
+
+bool ObjCARCOpt::runOnFunction(Function &F) {
+ if (!EnableARCOpts)
+ return false;
+
+ // If nothing in the Module uses ARC, don't do anything.
+ if (!Run)
+ return false;
+
+ Changed = false;
+
+ PA.setAA(&getAnalysis<AliasAnalysis>());
+
+ // This pass performs several distinct transformations. As a compile-time aid
+ // when compiling code that isn't ObjC, skip these if the relevant ObjC
+ // library functions aren't declared.
+
+ // Preliminary optimizations. This also computs UsedInThisFunction.
+ OptimizeIndividualCalls(F);
+
+ // Optimizations for weak pointers.
+ if (UsedInThisFunction & ((1 << IC_LoadWeak) |
+ (1 << IC_LoadWeakRetained) |
+ (1 << IC_StoreWeak) |
+ (1 << IC_InitWeak) |
+ (1 << IC_CopyWeak) |
+ (1 << IC_MoveWeak) |
+ (1 << IC_DestroyWeak)))
+ OptimizeWeakCalls(F);
+
+ // Optimizations for retain+release pairs.
+ if (UsedInThisFunction & ((1 << IC_Retain) |
+ (1 << IC_RetainRV) |
+ (1 << IC_RetainBlock)))
+ if (UsedInThisFunction & (1 << IC_Release))
+ // Run OptimizeSequences until it either stops making changes or
+ // no retain+release pair nesting is detected.
+ while (OptimizeSequences(F)) {}
+
+ // Optimizations if objc_autorelease is used.
+ if (UsedInThisFunction &
+ ((1 << IC_Autorelease) | (1 << IC_AutoreleaseRV)))
+ OptimizeReturns(F);
+
+ return Changed;
+}
+
+void ObjCARCOpt::releaseMemory() {
+ PA.clear();
+}
+
+//===----------------------------------------------------------------------===//
+// ARC contraction.
+//===----------------------------------------------------------------------===//
+
+// TODO: ObjCARCContract could insert PHI nodes when uses aren't
+// dominated by single calls.
+
+#include "llvm/Operator.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Analysis/Dominators.h"
+
+STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed");
+
+namespace {
+ /// ObjCARCContract - Late ARC optimizations. These change the IR in a way
+ /// that makes it difficult to be analyzed by ObjCARCOpt, so it's run late.
+ class ObjCARCContract : public FunctionPass {
+ bool Changed;
+ AliasAnalysis *AA;
+ DominatorTree *DT;
+ ProvenanceAnalysis PA;
+
+ /// Run - A flag indicating whether this optimization pass should run.
+ bool Run;
+
+ /// StoreStrongCallee, etc. - Declarations for ObjC runtime
+ /// functions, for use in creating calls to them. These are initialized
+ /// lazily to avoid cluttering up the Module with unused declarations.
+ Constant *StoreStrongCallee,
+ *RetainAutoreleaseCallee, *RetainAutoreleaseRVCallee;
+
+ /// RetainRVMarker - The inline asm string to insert between calls and
+ /// RetainRV calls to make the optimization work on targets which need it.
+ const MDString *RetainRVMarker;
+
+ Constant *getStoreStrongCallee(Module *M);
+ Constant *getRetainAutoreleaseCallee(Module *M);
+ Constant *getRetainAutoreleaseRVCallee(Module *M);
+
+ bool ContractAutorelease(Function &F, Instruction *Autorelease,
+ InstructionClass Class,
+ SmallPtrSet<Instruction *, 4>
+ &DependingInstructions,
+ SmallPtrSet<const BasicBlock *, 4>
+ &Visited);
+
+ void ContractRelease(Instruction *Release,
+ inst_iterator &Iter);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual bool doInitialization(Module &M);
+ virtual bool runOnFunction(Function &F);
+
+ public:
+ static char ID;
+ ObjCARCContract() : FunctionPass(ID) {
+ initializeObjCARCContractPass(*PassRegistry::getPassRegistry());
+ }
+ };
+}
+
+char ObjCARCContract::ID = 0;
+INITIALIZE_PASS_BEGIN(ObjCARCContract,
+ "objc-arc-contract", "ObjC ARC contraction", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(ObjCARCContract,
+ "objc-arc-contract", "ObjC ARC contraction", false, false)
+
+Pass *llvm::createObjCARCContractPass() {
+ return new ObjCARCContract();
+}
+
+void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<DominatorTree>();
+ AU.setPreservesCFG();
+}
+
+Constant *ObjCARCContract::getStoreStrongCallee(Module *M) {
+ if (!StoreStrongCallee) {
+ LLVMContext &C = M->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ Type *I8XX = PointerType::getUnqual(I8X);
+ std::vector<Type *> Params;
+ Params.push_back(I8XX);
+ Params.push_back(I8X);
+
+ AttrListPtr Attributes;
+ Attributes.addAttr(~0u, Attribute::NoUnwind);
+ Attributes.addAttr(1, Attribute::NoCapture);
+
+ StoreStrongCallee =
+ M->getOrInsertFunction(
+ "objc_storeStrong",
+ FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false),
+ Attributes);
+ }
+ return StoreStrongCallee;
+}
+
+Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) {
+ if (!RetainAutoreleaseCallee) {
+ LLVMContext &C = M->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ std::vector<Type *> Params;
+ Params.push_back(I8X);
+ const FunctionType *FTy =
+ FunctionType::get(I8X, Params, /*isVarArg=*/false);
+ AttrListPtr Attributes;
+ Attributes.addAttr(~0u, Attribute::NoUnwind);
+ RetainAutoreleaseCallee =
+ M->getOrInsertFunction("objc_retainAutorelease", FTy, Attributes);
+ }
+ return RetainAutoreleaseCallee;
+}
+
+Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) {
+ if (!RetainAutoreleaseRVCallee) {
+ LLVMContext &C = M->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ std::vector<Type *> Params;
+ Params.push_back(I8X);
+ const FunctionType *FTy =
+ FunctionType::get(I8X, Params, /*isVarArg=*/false);
+ AttrListPtr Attributes;
+ Attributes.addAttr(~0u, Attribute::NoUnwind);
+ RetainAutoreleaseRVCallee =
+ M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy,
+ Attributes);
+ }
+ return RetainAutoreleaseRVCallee;
+}
+
+/// ContractAutorelease - Merge an autorelease with a retain into a fused
+/// call.
+bool
+ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
+ InstructionClass Class,
+ SmallPtrSet<Instruction *, 4>
+ &DependingInstructions,
+ SmallPtrSet<const BasicBlock *, 4>
+ &Visited) {
+ const Value *Arg = GetObjCArg(Autorelease);
+
+ // Check that there are no instructions between the retain and the autorelease
+ // (such as an autorelease_pop) which may change the count.
+ CallInst *Retain = 0;
+ if (Class == IC_AutoreleaseRV)
+ FindDependencies(RetainAutoreleaseRVDep, Arg,
+ Autorelease->getParent(), Autorelease,
+ DependingInstructions, Visited, PA);
+ else
+ FindDependencies(RetainAutoreleaseDep, Arg,
+ Autorelease->getParent(), Autorelease,
+ DependingInstructions, Visited, PA);
+
+ Visited.clear();
+ if (DependingInstructions.size() != 1) {
+ DependingInstructions.clear();
+ return false;
+ }
+
+ Retain = dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
+ DependingInstructions.clear();
+
+ if (!Retain ||
+ GetBasicInstructionClass(Retain) != IC_Retain ||
+ GetObjCArg(Retain) != Arg)
+ return false;
+
+ Changed = true;
+ ++NumPeeps;
+
+ if (Class == IC_AutoreleaseRV)
+ Retain->setCalledFunction(getRetainAutoreleaseRVCallee(F.getParent()));
+ else
+ Retain->setCalledFunction(getRetainAutoreleaseCallee(F.getParent()));
+
+ EraseInstruction(Autorelease);
+ return true;
+}
+
+/// ContractRelease - Attempt to merge an objc_release with a store, load, and
+/// objc_retain to form an objc_storeStrong. This can be a little tricky because
+/// the instructions don't always appear in order, and there may be unrelated
+/// intervening instructions.
+void ObjCARCContract::ContractRelease(Instruction *Release,
+ inst_iterator &Iter) {
+ LoadInst *Load = dyn_cast<LoadInst>(GetObjCArg(Release));
+ if (!Load || Load->isVolatile()) return;
+
+ // For now, require everything to be in one basic block.
+ BasicBlock *BB = Release->getParent();
+ if (Load->getParent() != BB) return;
+
+ // Walk down to find the store.
+ BasicBlock::iterator I = Load, End = BB->end();
+ ++I;
+ AliasAnalysis::Location Loc = AA->getLocation(Load);
+ while (I != End &&
+ (&*I == Release ||
+ IsRetain(GetBasicInstructionClass(I)) ||
+ !(AA->getModRefInfo(I, Loc) & AliasAnalysis::Mod)))
+ ++I;
+ StoreInst *Store = dyn_cast<StoreInst>(I);
+ if (!Store || Store->isVolatile()) return;
+ if (Store->getPointerOperand() != Loc.Ptr) return;
+
+ Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand());
+
+ // Walk up to find the retain.
+ I = Store;
+ BasicBlock::iterator Begin = BB->begin();
+ while (I != Begin && GetBasicInstructionClass(I) != IC_Retain)
+ --I;
+ Instruction *Retain = I;
+ if (GetBasicInstructionClass(Retain) != IC_Retain) return;
+ if (GetObjCArg(Retain) != New) return;
+
+ Changed = true;
+ ++NumStoreStrongs;
+
+ LLVMContext &C = Release->getContext();
+ const Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ const Type *I8XX = PointerType::getUnqual(I8X);
+
+ Value *Args[] = { Load->getPointerOperand(), New };
+ if (Args[0]->getType() != I8XX)
+ Args[0] = new BitCastInst(Args[0], I8XX, "", Store);
+ if (Args[1]->getType() != I8X)
+ Args[1] = new BitCastInst(Args[1], I8X, "", Store);
+ CallInst *StoreStrong =
+ CallInst::Create(getStoreStrongCallee(BB->getParent()->getParent()),
+ Args, "", Store);
+ StoreStrong->setDoesNotThrow();
+ StoreStrong->setDebugLoc(Store->getDebugLoc());
+
+ if (&*Iter == Store) ++Iter;
+ Store->eraseFromParent();
+ Release->eraseFromParent();
+ EraseInstruction(Retain);
+ if (Load->use_empty())
+ Load->eraseFromParent();
+}
+
+bool ObjCARCContract::doInitialization(Module &M) {
+ Run = ModuleHasARC(M);
+ if (!Run)
+ return false;
+
+ // These are initialized lazily.
+ StoreStrongCallee = 0;
+ RetainAutoreleaseCallee = 0;
+ RetainAutoreleaseRVCallee = 0;
+
+ // Initialize RetainRVMarker.
+ RetainRVMarker = 0;
+ if (NamedMDNode *NMD =
+ M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker"))
+ if (NMD->getNumOperands() == 1) {
+ const MDNode *N = NMD->getOperand(0);
+ if (N->getNumOperands() == 1)
+ if (const MDString *S = dyn_cast<MDString>(N->getOperand(0)))
+ RetainRVMarker = S;
+ }
+
+ return false;
+}
+
+bool ObjCARCContract::runOnFunction(Function &F) {
+ if (!EnableARCOpts)
+ return false;
+
+ // If nothing in the Module uses ARC, don't do anything.
+ if (!Run)
+ return false;
+
+ Changed = false;
+ AA = &getAnalysis<AliasAnalysis>();
+ DT = &getAnalysis<DominatorTree>();
+
+ PA.setAA(&getAnalysis<AliasAnalysis>());
+
+ // For ObjC library calls which return their argument, replace uses of the
+ // argument with uses of the call return value, if it dominates the use. This
+ // reduces register pressure.
+ SmallPtrSet<Instruction *, 4> DependingInstructions;
+ SmallPtrSet<const BasicBlock *, 4> Visited;
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+ Instruction *Inst = &*I++;
+
+ // Only these library routines return their argument. In particular,
+ // objc_retainBlock does not necessarily return its argument.
+ InstructionClass Class = GetBasicInstructionClass(Inst);
+ switch (Class) {
+ case IC_Retain:
+ case IC_FusedRetainAutorelease:
+ case IC_FusedRetainAutoreleaseRV:
+ break;
+ case IC_Autorelease:
+ case IC_AutoreleaseRV:
+ if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited))
+ continue;
+ break;
+ case IC_RetainRV: {
+ // If we're compiling for a target which needs a special inline-asm
+ // marker to do the retainAutoreleasedReturnValue optimization,
+ // insert it now.
+ if (!RetainRVMarker)
+ break;
+ BasicBlock::iterator BBI = Inst;
+ --BBI;
+ while (isNoopInstruction(BBI)) --BBI;
+ if (&*BBI == GetObjCArg(Inst)) {
+ InlineAsm *IA =
+ InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()),
+ /*isVarArg=*/false),
+ RetainRVMarker->getString(),
+ /*Constraints=*/"", /*hasSideEffects=*/true);
+ CallInst::Create(IA, "", Inst);
+ }
+ break;
+ }
+ case IC_InitWeak: {
+ // objc_initWeak(p, null) => *p = null
+ CallInst *CI = cast<CallInst>(Inst);
+ if (isNullOrUndef(CI->getArgOperand(1))) {
+ Value *Null =
+ ConstantPointerNull::get(cast<PointerType>(CI->getType()));
+ Changed = true;
+ new StoreInst(Null, CI->getArgOperand(0), CI);
+ CI->replaceAllUsesWith(Null);
+ CI->eraseFromParent();
+ }
+ continue;
+ }
+ case IC_Release:
+ ContractRelease(Inst, I);
+ continue;
+ default:
+ continue;
+ }
+
+ // Don't use GetObjCArg because we don't want to look through bitcasts
+ // and such; to do the replacement, the argument must have type i8*.
+ const Value *Arg = cast<CallInst>(Inst)->getArgOperand(0);
+ for (;;) {
+ // If we're compiling bugpointed code, don't get in trouble.
+ if (!isa<Instruction>(Arg) && !isa<Argument>(Arg))
+ break;
+ // Look through the uses of the pointer.
+ for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
+ UI != UE; ) {
+ Use &U = UI.getUse();
+ unsigned OperandNo = UI.getOperandNo();
+ ++UI; // Increment UI now, because we may unlink its element.
+ if (Instruction *UserInst = dyn_cast<Instruction>(U.getUser()))
+ if (Inst != UserInst && DT->dominates(Inst, UserInst)) {
+ Changed = true;
+ Instruction *Replacement = Inst;
+ const Type *UseTy = U.get()->getType();
+ if (PHINode *PHI = dyn_cast<PHINode>(UserInst)) {
+ // For PHI nodes, insert the bitcast in the predecessor block.
+ unsigned ValNo =
+ PHINode::getIncomingValueNumForOperand(OperandNo);
+ BasicBlock *BB =
+ PHI->getIncomingBlock(ValNo);
+ if (Replacement->getType() != UseTy)
+ Replacement = new BitCastInst(Replacement, UseTy, "",
+ &BB->back());
+ for (unsigned i = 0, e = PHI->getNumIncomingValues();
+ i != e; ++i)
+ if (PHI->getIncomingBlock(i) == BB) {
+ // Keep the UI iterator valid.
+ if (&PHI->getOperandUse(
+ PHINode::getOperandNumForIncomingValue(i)) ==
+ &UI.getUse())
+ ++UI;
+ PHI->setIncomingValue(i, Replacement);
+ }
+ } else {
+ if (Replacement->getType() != UseTy)
+ Replacement = new BitCastInst(Replacement, UseTy, "", UserInst);
+ U.set(Replacement);
+ }
+ }
+ }
+
+ // If Arg is a no-op casted pointer, strip one level of casts and
+ // iterate.
+ if (const BitCastInst *BI = dyn_cast<BitCastInst>(Arg))
+ Arg = BI->getOperand(0);
+ else if (isa<GEPOperator>(Arg) &&
+ cast<GEPOperator>(Arg)->hasAllZeroIndices())
+ Arg = cast<GEPOperator>(Arg)->getPointerOperand();
+ else if (isa<GlobalAlias>(Arg) &&
+ !cast<GlobalAlias>(Arg)->mayBeOverridden())
+ Arg = cast<GlobalAlias>(Arg)->getAliasee();
+ else
+ break;
+ }
+ }
+
+ return Changed;
+}
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index c1dfe15..e6341ae 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -812,7 +812,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
// because we can percolate the negate out. Watch for minint, which
// cannot be positivified.
if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor))
- if (CI->getValue().isNegative() && !CI->getValue().isMinSignedValue()) {
+ if (CI->isNegative() && !CI->isMinValue(true)) {
Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
assert(!Duplicates.count(Factor) &&
"Shouldn't have two constant factors, missed a canonicalize");
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 32a0506..302c287 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -48,7 +48,12 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLoopUnswitchPass(Registry);
initializeLoopIdiomRecognizePass(Registry);
initializeLowerAtomicPass(Registry);
+ initializeLowerExpectIntrinsicPass(Registry);
initializeMemCpyOptPass(Registry);
+ initializeObjCARCAliasAnalysisPass(Registry);
+ initializeObjCARCExpandPass(Registry);
+ initializeObjCARCContractPass(Registry);
+ initializeObjCARCOptPass(Registry);
initializeReassociatePass(Registry);
initializeRegToMemPass(Registry);
initializeSCCPPass(Registry);
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 8938b28..7d6349c 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -30,6 +30,7 @@
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Analysis/DIBuilder.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/Loads.h"
@@ -152,7 +153,8 @@ namespace {
void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts);
- static MemTransferInst *isOnlyCopiedFromConstantGlobal(AllocaInst *AI);
+ static MemTransferInst *isOnlyCopiedFromConstantGlobal(
+ AllocaInst *AI, SmallVector<Instruction*, 4> &ToDelete);
};
// SROA_DT - SROA that uses DominatorTree.
@@ -228,16 +230,30 @@ class ConvertToScalarInfo {
/// which means that mem2reg can't promote it.
bool IsNotTrivial;
+ /// ScalarKind - Tracks the kind of alloca being considered for promotion,
+ /// computed based on the uses of the alloca rather than the LLVM type system.
+ enum {
+ Unknown,
+
+ // Accesses via GEPs that are consistent with element access of a vector
+ // type. This will not be converted into a vector unless there is a later
+ // access using an actual vector type.
+ ImplicitVector,
+
+ // Accesses via vector operations and GEPs that are consistent with the
+ // layout of a vector type.
+ Vector,
+
+ // An integer bag-of-bits with bitwise operations for insertion and
+ // extraction. Any combination of types can be converted into this kind
+ // of scalar.
+ Integer
+ } ScalarKind;
+
/// VectorTy - This tracks the type that we should promote the vector to if
/// it is possible to turn it into a vector. This starts out null, and if it
/// isn't possible to turn into a vector type, it gets set to VoidTy.
- const Type *VectorTy;
-
- /// HadAVector - True if there is at least one vector access to the alloca.
- /// We don't want to turn random arrays into vectors and use vector element
- /// insert/extract, but if there are element accesses to something that is
- /// also declared as a vector, we do want to promote to a vector.
- bool HadAVector;
+ const VectorType *VectorTy;
/// HadNonMemTransferAccess - True if there is at least one access to the
/// alloca that is not a MemTransferInst. We don't want to turn structs into
@@ -246,14 +262,14 @@ class ConvertToScalarInfo {
public:
explicit ConvertToScalarInfo(unsigned Size, const TargetData &td)
- : AllocaSize(Size), TD(td), IsNotTrivial(false), VectorTy(0),
- HadAVector(false), HadNonMemTransferAccess(false) { }
+ : AllocaSize(Size), TD(td), IsNotTrivial(false), ScalarKind(Unknown),
+ VectorTy(0), HadNonMemTransferAccess(false) { }
AllocaInst *TryConvert(AllocaInst *AI);
private:
bool CanConvertToScalar(Value *V, uint64_t Offset);
- void MergeInType(const Type *In, uint64_t Offset, bool IsLoadOrStore);
+ void MergeInTypeForLoadOrStore(const Type *In, uint64_t Offset);
bool MergeInVectorType(const VectorType *VInTy, uint64_t Offset);
void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
@@ -274,6 +290,16 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
if (!CanConvertToScalar(AI, 0) || !IsNotTrivial)
return 0;
+ // If an alloca has only memset / memcpy uses, it may still have an Unknown
+ // ScalarKind. Treat it as an Integer below.
+ if (ScalarKind == Unknown)
+ ScalarKind = Integer;
+
+ // FIXME: It should be possible to promote the vector type up to the alloca's
+ // size.
+ if (ScalarKind == Vector && VectorTy->getBitWidth() != AllocaSize * 8)
+ ScalarKind = Integer;
+
// If we were able to find a vector type that can handle this with
// insert/extract elements, and if there was at least one use that had
// a vector type, promote this to a vector. We don't want to promote
@@ -281,14 +307,15 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
// we just get a lot of insert/extracts. If at least one vector is
// involved, then we probably really do have a union of vector/array.
const Type *NewTy;
- if (VectorTy && VectorTy->isVectorTy() && HadAVector) {
+ if (ScalarKind == Vector) {
+ assert(VectorTy && "Missing type for vector scalar.");
DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = "
<< *VectorTy << '\n');
NewTy = VectorTy; // Use the vector type.
} else {
unsigned BitWidth = AllocaSize * 8;
- if (!HadAVector && !HadNonMemTransferAccess &&
- !TD.fitsInLegalInteger(BitWidth))
+ if ((ScalarKind == ImplicitVector || ScalarKind == Integer) &&
+ !HadNonMemTransferAccess && !TD.fitsInLegalInteger(BitWidth))
return 0;
DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
@@ -300,8 +327,9 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
return NewAI;
}
-/// MergeInType - Add the 'In' type to the accumulated vector type (VectorTy)
-/// so far at the offset specified by Offset (which is specified in bytes).
+/// MergeInTypeForLoadOrStore - Add the 'In' type to the accumulated vector type
+/// (VectorTy) so far at the offset specified by Offset (which is specified in
+/// bytes).
///
/// There are three cases we handle here:
/// 1) A union of vector types of the same size and potentially its elements.
@@ -316,11 +344,11 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
/// large) integer type with extract and insert operations where the loads
/// and stores would mutate the memory. We mark this by setting VectorTy
/// to VoidTy.
-void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset,
- bool IsLoadOrStore) {
+void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In,
+ uint64_t Offset) {
// If we already decided to turn this into a blob of integer memory, there is
// nothing to be done.
- if (VectorTy && VectorTy->isVoidTy())
+ if (ScalarKind == Integer)
return;
// If this could be contributing to a vector, analyze it.
@@ -336,7 +364,7 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset,
// Full width accesses can be ignored, because they can always be turned
// into bitcasts.
unsigned EltSize = In->getPrimitiveSizeInBits()/8;
- if (IsLoadOrStore && EltSize == AllocaSize)
+ if (EltSize == AllocaSize)
return;
// If we're accessing something that could be an element of a vector, see
@@ -345,11 +373,12 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset,
if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
(!VectorTy || Offset * 8 < VectorTy->getPrimitiveSizeInBits())) {
if (!VectorTy) {
+ ScalarKind = ImplicitVector;
VectorTy = VectorType::get(In, AllocaSize/EltSize);
return;
}
- unsigned CurrentEltSize = cast<VectorType>(VectorTy)->getElementType()
+ unsigned CurrentEltSize = VectorTy->getElementType()
->getPrimitiveSizeInBits()/8;
if (EltSize == CurrentEltSize)
return;
@@ -361,16 +390,13 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset,
// Otherwise, we have a case that we can't handle with an optimized vector
// form. We can still turn this into a large integer.
- VectorTy = Type::getVoidTy(In->getContext());
+ ScalarKind = Integer;
}
-/// MergeInVectorType - Handles the vector case of MergeInType, returning true
-/// if the type was successfully merged and false otherwise.
+/// MergeInVectorType - Handles the vector case of MergeInTypeForLoadOrStore,
+/// returning true if the type was successfully merged and false otherwise.
bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy,
uint64_t Offset) {
- // Remember if we saw a vector type.
- HadAVector = true;
-
// TODO: Support nonzero offsets?
if (Offset != 0)
return false;
@@ -382,19 +408,22 @@ bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy,
// If this the first vector we see, remember the type so that we know the
// element size.
if (!VectorTy) {
+ ScalarKind = Vector;
VectorTy = VInTy;
return true;
}
- unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();
+ unsigned BitWidth = VectorTy->getBitWidth();
unsigned InBitWidth = VInTy->getBitWidth();
// Vectors of the same size can be converted using a simple bitcast.
- if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8))
+ if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8)) {
+ ScalarKind = Vector;
return true;
+ }
- const Type *ElementTy = cast<VectorType>(VectorTy)->getElementType();
- const Type *InElementTy = cast<VectorType>(VInTy)->getElementType();
+ const Type *ElementTy = VectorTy->getElementType();
+ const Type *InElementTy = VInTy->getElementType();
// Do not allow mixed integer and floating-point accesses from vectors of
// different sizes.
@@ -429,6 +458,7 @@ bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy,
}
// Pick the largest of the two vector types.
+ ScalarKind = Vector;
if (InBitWidth > BitWidth)
VectorTy = VInTy;
@@ -456,7 +486,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
if (LI->getType()->isX86_MMXTy())
return false;
HadNonMemTransferAccess = true;
- MergeInType(LI->getType(), Offset, true);
+ MergeInTypeForLoadOrStore(LI->getType(), Offset);
continue;
}
@@ -467,7 +497,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
if (SI->getOperand(0)->getType()->isX86_MMXTy())
return false;
HadNonMemTransferAccess = true;
- MergeInType(SI->getOperand(0)->getType(), Offset, true);
+ MergeInTypeForLoadOrStore(SI->getOperand(0)->getType(), Offset);
continue;
}
@@ -498,10 +528,22 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
// If this is a constant sized memset of a constant value (e.g. 0) we can
// handle it.
if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
- // Store of constant value and constant size.
- if (!isa<ConstantInt>(MSI->getValue()) ||
- !isa<ConstantInt>(MSI->getLength()))
+ // Store of constant value.
+ if (!isa<ConstantInt>(MSI->getValue()))
+ return false;
+
+ // Store of constant size.
+ ConstantInt *Len = dyn_cast<ConstantInt>(MSI->getLength());
+ if (!Len)
return false;
+
+ // If the size differs from the alloca, we can only convert the alloca to
+ // an integer bag-of-bits.
+ // FIXME: This should handle all of the cases that are currently accepted
+ // as vector element insertions.
+ if (Len->getZExtValue() != AllocaSize || Offset != 0)
+ ScalarKind = Integer;
+
IsNotTrivial = true; // Can't be mem2reg'd.
HadNonMemTransferAccess = true;
continue;
@@ -1053,16 +1095,37 @@ bool SROA::runOnFunction(Function &F) {
namespace {
class AllocaPromoter : public LoadAndStorePromoter {
AllocaInst *AI;
+ DIBuilder *DIB;
+ SmallVector<DbgDeclareInst *, 4> DDIs;
+ SmallVector<DbgValueInst *, 4> DVIs;
public:
AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
- DbgDeclareInst *DD, DIBuilder *&DB)
- : LoadAndStorePromoter(Insts, S, DD, DB), AI(0) {}
+ DIBuilder *DB)
+ : LoadAndStorePromoter(Insts, S), AI(0), DIB(DB) {}
void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) {
// Remember which alloca we're promoting (for isInstInList).
this->AI = AI;
+ if (MDNode *DebugNode = MDNode::getIfExists(AI->getContext(), AI))
+ for (Value::use_iterator UI = DebugNode->use_begin(),
+ E = DebugNode->use_end(); UI != E; ++UI)
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI))
+ DDIs.push_back(DDI);
+ else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(*UI))
+ DVIs.push_back(DVI);
+
LoadAndStorePromoter::run(Insts);
AI->eraseFromParent();
+ for (SmallVector<DbgDeclareInst *, 4>::iterator I = DDIs.begin(),
+ E = DDIs.end(); I != E; ++I) {
+ DbgDeclareInst *DDI = *I;
+ DDI->eraseFromParent();
+ }
+ for (SmallVector<DbgValueInst *, 4>::iterator I = DVIs.begin(),
+ E = DVIs.end(); I != E; ++I) {
+ DbgValueInst *DVI = *I;
+ DVI->eraseFromParent();
+ }
}
virtual bool isInstInList(Instruction *I,
@@ -1071,6 +1134,45 @@ public:
return LI->getOperand(0) == AI;
return cast<StoreInst>(I)->getPointerOperand() == AI;
}
+
+ virtual void updateDebugInfo(Instruction *Inst) const {
+ for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(),
+ E = DDIs.end(); I != E; ++I) {
+ DbgDeclareInst *DDI = *I;
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
+ else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+ ConvertDebugDeclareToDebugValue(DDI, LI, *DIB);
+ }
+ for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(),
+ E = DVIs.end(); I != E; ++I) {
+ DbgValueInst *DVI = *I;
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ Instruction *DbgVal = NULL;
+ // If an argument is zero extended then use argument directly. The ZExt
+ // may be zapped by an optimization pass in future.
+ Argument *ExtendedArg = NULL;
+ if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
+ ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0));
+ if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
+ ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
+ if (ExtendedArg)
+ DbgVal = DIB->insertDbgValueIntrinsic(ExtendedArg, 0,
+ DIVariable(DVI->getVariable()),
+ SI);
+ else
+ DbgVal = DIB->insertDbgValueIntrinsic(SI->getOperand(0), 0,
+ DIVariable(DVI->getVariable()),
+ SI);
+ DbgVal->setDebugLoc(DVI->getDebugLoc());
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ Instruction *DbgVal =
+ DIB->insertDbgValueIntrinsic(LI->getOperand(0), 0,
+ DIVariable(DVI->getVariable()), LI);
+ DbgVal->setDebugLoc(DVI->getDebugLoc());
+ }
+ }
+ }
};
} // end anon namespace
@@ -1262,7 +1364,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
LoadInst *TrueLoad =
Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t");
LoadInst *FalseLoad =
- Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".t");
+ Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".f");
// Transfer alignment and TBAA info if present.
TrueLoad->setAlignment(LI->getAlignment());
@@ -1340,10 +1442,9 @@ bool SROA::performPromotion(Function &F) {
DT = &getAnalysis<DominatorTree>();
BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
-
+ DIBuilder DIB(*F.getParent());
bool Changed = false;
SmallVector<Instruction*, 64> Insts;
- DIBuilder *DIB = 0;
while (1) {
Allocas.clear();
@@ -1367,11 +1468,7 @@ bool SROA::performPromotion(Function &F) {
for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
UI != E; ++UI)
Insts.push_back(cast<Instruction>(*UI));
-
- DbgDeclareInst *DDI = FindAllocaDbgDeclare(AI);
- if (DDI && !DIB)
- DIB = new DIBuilder(*AI->getParent()->getParent()->getParent());
- AllocaPromoter(Insts, SSA, DDI, DIB).run(AI, Insts);
+ AllocaPromoter(Insts, SSA, &DIB).run(AI, Insts);
Insts.clear();
}
}
@@ -1379,10 +1476,6 @@ bool SROA::performPromotion(Function &F) {
Changed = true;
}
- // FIXME: Is there a better way to handle the lazy initialization of DIB
- // so that there doesn't need to be an explicit delete?
- delete DIB;
-
return Changed;
}
@@ -1403,8 +1496,8 @@ static bool ShouldAttemptScalarRepl(AllocaInst *AI) {
// performScalarRepl - This algorithm is a simple worklist driven algorithm,
-// which runs on all of the malloc/alloca instructions in the function, removing
-// them if they are only used by getelementptr instructions.
+// which runs on all of the alloca instructions in the function, removing them
+// if they are only used by getelementptr instructions.
//
bool SROA::performScalarRepl(Function &F) {
std::vector<AllocaInst*> WorkList;
@@ -1438,12 +1531,15 @@ bool SROA::performScalarRepl(Function &F) {
// the constant global instead. This is commonly produced by the CFE by
// constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
// is only subsequently read.
- if (MemTransferInst *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) {
+ SmallVector<Instruction *, 4> ToDelete;
+ if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(AI, ToDelete)) {
DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n');
- DEBUG(dbgs() << " memcpy = " << *TheCopy << '\n');
- Constant *TheSrc = cast<Constant>(TheCopy->getSource());
+ DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
+ for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
+ ToDelete[i]->eraseFromParent();
+ Constant *TheSrc = cast<Constant>(Copy->getSource());
AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
- TheCopy->eraseFromParent(); // Don't mutate the global.
+ Copy->eraseFromParent(); // Don't mutate the global.
AI->eraseFromParent();
++NumGlobals;
Changed = true;
@@ -2467,8 +2563,14 @@ static bool PointsToConstantGlobal(Value *V) {
/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
/// the alloca, and if the source pointer is a pointer to a constant global, we
/// can optimize this.
-static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
- bool isOffset) {
+static bool
+isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
+ bool isOffset,
+ SmallVector<Instruction *, 4> &LifetimeMarkers) {
+ // We track lifetime intrinsics as we encounter them. If we decide to go
+ // ahead and replace the value with the global, this lets the caller quickly
+ // eliminate the markers.
+
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
User *U = cast<Instruction>(*UI);
@@ -2480,7 +2582,8 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
// If uses of the bitcast are ok, we are ok.
- if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset))
+ if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset,
+ LifetimeMarkers))
return false;
continue;
}
@@ -2488,7 +2591,8 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
// If the GEP has all zero indices, it doesn't offset the pointer. If it
// doesn't, it does.
if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy,
- isOffset || !GEP->hasAllZeroIndices()))
+ isOffset || !GEP->hasAllZeroIndices(),
+ LifetimeMarkers))
return false;
continue;
}
@@ -2514,6 +2618,16 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
continue;
}
+ // Lifetime intrinsics can be handled by the caller.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ assert(II->use_empty() && "Lifetime markers have no result to use!");
+ LifetimeMarkers.push_back(II);
+ continue;
+ }
+ }
+
// If this is isn't our memcpy/memmove, reject it as something we can't
// handle.
MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
@@ -2550,9 +2664,11 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
/// modified by a copy from a constant global. If we can prove this, we can
/// replace any uses of the alloca with uses of the global directly.
-MemTransferInst *SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI) {
+MemTransferInst *
+SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
+ SmallVector<Instruction*, 4> &ToDelete) {
MemTransferInst *TheCopy = 0;
- if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false))
+ if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false, ToDelete))
return TheCopy;
return 0;
}
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 7e9cc80..a66b3e3 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -91,8 +91,7 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
static void ChangeToCall(InvokeInst *II) {
BasicBlock *BB = II->getParent();
SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
- CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args.begin(),
- Args.end(), "", II);
+ CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II);
NewCall->takeName(II);
NewCall->setCallingConv(II->getCallingConv());
NewCall->setAttributes(II->getAttributes());
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 6247b03..7c415e5 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -992,9 +992,9 @@ struct FFSOpt : public LibCallOptimization {
}
// ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
- const Type *ArgType = Op->getType();
+ Type *ArgType = Op->getType();
Value *F = Intrinsic::getDeclaration(Callee->getParent(),
- Intrinsic::cttz, &ArgType, 1);
+ Intrinsic::cttz, ArgType);
Value *V = B.CreateCall(F, Op, "cttz");
V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp");
V = B.CreateIntCast(V, B.getInt32Ty(), false, "tmp");
OpenPOWER on IntegriCloud