summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Transforms/Scalar
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/Scalar')
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ADCE.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp108
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp182
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GVN.cpp274
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp1142
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LICM.cpp64
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp10
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp215
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp61
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp28
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp173
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp14
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ObjCARC.cpp440
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SCCP.cpp250
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Scalar.cpp5
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp598
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp111
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Sink.cpp13
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp373
22 files changed, 2131 insertions, 1946 deletions
diff --git a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
index a5adb5e..ba214d1 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -57,6 +57,7 @@ bool ADCE::runOnFunction(Function& F) {
for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
if (isa<TerminatorInst>(I.getInstructionIterator()) ||
isa<DbgInfoIntrinsic>(I.getInstructionIterator()) ||
+ isa<LandingPadInst>(I.getInstructionIterator()) ||
I->mayHaveSideEffects()) {
alive.insert(I.getInstructionIterator());
worklist.push_back(I.getInstructionIterator());
@@ -65,7 +66,6 @@ bool ADCE::runOnFunction(Function& F) {
// Propagate liveness backwards to operands.
while (!worklist.empty()) {
Instruction* curr = worklist.pop_back_val();
-
for (Instruction::op_iterator OI = curr->op_begin(), OE = curr->op_end();
OI != OE; ++OI)
if (Instruction* Inst = dyn_cast<Instruction>(OI))
diff --git a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 0af14ed..f8f18b2 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -58,6 +58,7 @@ STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
STATISTIC(NumRetsDup, "Number of return instructions duplicated");
+STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
static cl::opt<bool> DisableBranchOpts(
"disable-cgp-branch-opts", cl::Hidden, cl::init(false),
@@ -104,12 +105,13 @@ namespace {
void EliminateMostlyEmptyBlock(BasicBlock *BB);
bool OptimizeBlock(BasicBlock &BB);
bool OptimizeInst(Instruction *I);
- bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy);
+ bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy);
bool OptimizeInlineAsmInst(CallInst *CS);
bool OptimizeCallInst(CallInst *CI);
bool MoveExtToFormExtLoad(Instruction *I);
bool OptimizeExtUses(Instruction *I);
bool DupRetToEnableTailCallOpts(ReturnInst *RI);
+ bool PlaceDbgValues(Function &F);
};
}
@@ -132,6 +134,11 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// unconditional branch.
EverMadeChange |= EliminateMostlyEmptyBlocks(F);
+ // llvm.dbg.value is far away from the value then iSel may not be able
+ // handle it properly. iSel will drop llvm.dbg.value if it can not
+ // find a node corresponding to the value.
+ EverMadeChange |= PlaceDbgValues(F);
+
bool MadeChange = true;
while (MadeChange) {
MadeChange = false;
@@ -410,8 +417,7 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
CastInst *&InsertedCast = InsertedCasts[UserBB];
if (!InsertedCast) {
- BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
-
+ BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
InsertedCast =
CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
InsertPt);
@@ -467,8 +473,7 @@ static bool OptimizeCmpExpression(CmpInst *CI) {
CmpInst *&InsertedCmp = InsertedCmps[UserBB];
if (!InsertedCmp) {
- BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
-
+ BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
InsertedCmp =
CmpInst::Create(CI->getOpcode(),
CI->getPredicate(), CI->getOperand(0),
@@ -528,7 +533,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
- const Type *ReturnTy = CI->getType();
+ Type *ReturnTy = CI->getType();
Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
// Substituting this can cause recursive simplifications, which can
@@ -551,22 +556,6 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
// From here on out we're working with named functions.
if (CI->getCalledFunction() == 0) return false;
- // llvm.dbg.value is far away from the value then iSel may not be able
- // handle it properly. iSel will drop llvm.dbg.value if it can not
- // find a node corresponding to the value.
- if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(CI))
- if (Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue()))
- if (!VI->isTerminator() &&
- (DVI->getParent() != VI->getParent() || DT->dominates(DVI, VI))) {
- DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
- DVI->removeFromParent();
- if (isa<PHINode>(VI))
- DVI->insertBefore(VI->getParent()->getFirstNonPHI());
- else
- DVI->insertAfter(VI);
- return true;
- }
-
// We'll need TargetData from here on out.
const TargetData *TD = TLI ? TLI->getTargetData() : 0;
if (!TD) return false;
@@ -724,7 +713,7 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
/// This method is used to optimize both load/store and inline asms with memory
/// operands.
bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
- const Type *AccessTy) {
+ Type *AccessTy) {
Value *Repl = Addr;
// Try to collapse single-value PHI nodes. This is necessary to undo
@@ -746,13 +735,11 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
worklist.pop_back();
// Break use-def graph loops.
- if (Visited.count(V)) {
+ if (!Visited.insert(V)) {
Consensus = 0;
break;
}
- Visited.insert(V);
-
// For a PHI node, push all of its incoming values.
if (PHINode *P = dyn_cast<PHINode>(V)) {
for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
@@ -763,7 +750,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// For non-PHIs, determine the addressing mode being computed.
SmallVector<Instruction*, 16> NewAddrModeInsts;
ExtAddrMode NewAddrMode =
- AddressingModeMatcher::Match(V, AccessTy,MemoryInst,
+ AddressingModeMatcher::Match(V, AccessTy, MemoryInst,
NewAddrModeInsts, *TLI);
// This check is broken into two cases with very similar code to avoid using
@@ -822,7 +809,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// Insert this computation right after this user. Since our caller is
// scanning from the top of the BB to the bottom, reuse of the expr are
// guaranteed to happen later.
- BasicBlock::iterator InsertPt = MemoryInst;
+ IRBuilder<> Builder(MemoryInst);
// Now that we determined the addressing expression we want to use and know
// that we have to sink it into this block. Check to see if we have already
@@ -833,11 +820,11 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst);
if (SunkAddr->getType() != Addr->getType())
- SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt);
+ SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
} else {
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst);
- const Type *IntPtrTy =
+ Type *IntPtrTy =
TLI->getTargetData()->getIntPtrType(AccessTy->getContext());
Value *Result = 0;
@@ -850,10 +837,9 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
if (AddrMode.BaseReg) {
Value *V = AddrMode.BaseReg;
if (V->getType()->isPointerTy())
- V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
+ V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
if (V->getType() != IntPtrTy)
- V = CastInst::CreateIntegerCast(V, IntPtrTy, /*isSigned=*/true,
- "sunkaddr", InsertPt);
+ V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
Result = V;
}
@@ -863,29 +849,27 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
if (V->getType() == IntPtrTy) {
// done.
} else if (V->getType()->isPointerTy()) {
- V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
+ V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
} else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
cast<IntegerType>(V->getType())->getBitWidth()) {
- V = new TruncInst(V, IntPtrTy, "sunkaddr", InsertPt);
+ V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
} else {
- V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt);
+ V = Builder.CreateSExt(V, IntPtrTy, "sunkaddr");
}
if (AddrMode.Scale != 1)
- V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy,
- AddrMode.Scale),
- "sunkaddr", InsertPt);
+ V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
+ "sunkaddr");
if (Result)
- Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
+ Result = Builder.CreateAdd(Result, V, "sunkaddr");
else
Result = V;
}
// Add in the BaseGV if present.
if (AddrMode.BaseGV) {
- Value *V = new PtrToIntInst(AddrMode.BaseGV, IntPtrTy, "sunkaddr",
- InsertPt);
+ Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
if (Result)
- Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
+ Result = Builder.CreateAdd(Result, V, "sunkaddr");
else
Result = V;
}
@@ -894,7 +878,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
if (AddrMode.BaseOffs) {
Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
if (Result)
- Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
+ Result = Builder.CreateAdd(Result, V, "sunkaddr");
else
Result = V;
}
@@ -902,7 +886,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
if (Result == 0)
SunkAddr = Constant::getNullValue(Addr->getType());
else
- SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt);
+ SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
}
MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
@@ -1059,8 +1043,7 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
if (!InsertedTrunc) {
- BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
-
+ BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
}
@@ -1159,3 +1142,34 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
return MadeChange;
}
+
+// llvm.dbg.value is far away from the value then iSel may not be able
+// handle it properly. iSel will drop llvm.dbg.value if it can not
+// find a node corresponding to the value.
+bool CodeGenPrepare::PlaceDbgValues(Function &F) {
+ bool MadeChange = false;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ Instruction *PrevNonDbgInst = NULL;
+ for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
+ Instruction *Insn = BI; ++BI;
+ DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
+ if (!DVI) {
+ PrevNonDbgInst = Insn;
+ continue;
+ }
+
+ Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
+ if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
+ DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
+ DVI->removeFromParent();
+ if (isa<PHINode>(VI))
+ DVI->insertBefore(VI->getParent()->getFirstInsertionPt());
+ else
+ DVI->insertAfter(VI);
+ MadeChange = true;
+ ++NumDbgValueMoved;
+ }
+ }
+ }
+ return MadeChange;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index cb9b5be..a593d0f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -52,18 +52,18 @@ namespace {
AA = &getAnalysis<AliasAnalysis>();
MD = &getAnalysis<MemoryDependenceAnalysis>();
DominatorTree &DT = getAnalysis<DominatorTree>();
-
+
bool Changed = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
// Only check non-dead blocks. Dead blocks may have strange pointer
// cycles that will confuse alias analysis.
if (DT.isReachableFromEntry(I))
Changed |= runOnBasicBlock(*I);
-
+
AA = 0; MD = 0;
return Changed;
}
-
+
bool runOnBasicBlock(BasicBlock &BB);
bool HandleFree(CallInst *F);
bool handleEndBlock(BasicBlock &BB);
@@ -105,34 +105,34 @@ static void DeleteDeadInstruction(Instruction *I,
MemoryDependenceAnalysis &MD,
SmallPtrSet<Value*, 16> *ValueSet = 0) {
SmallVector<Instruction*, 32> NowDeadInsts;
-
+
NowDeadInsts.push_back(I);
--NumFastOther;
-
+
// Before we touch this instruction, remove it from memdep!
do {
Instruction *DeadInst = NowDeadInsts.pop_back_val();
++NumFastOther;
-
+
// This instruction is dead, zap it, in stages. Start by removing it from
// MemDep, which needs to know the operands and needs it to be in the
// function.
MD.removeInstruction(DeadInst);
-
+
for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
Value *Op = DeadInst->getOperand(op);
DeadInst->setOperand(op, 0);
-
+
// If this operand just became dead, add it to the NowDeadInsts list.
if (!Op->use_empty()) continue;
-
+
if (Instruction *OpI = dyn_cast<Instruction>(Op))
if (isInstructionTriviallyDead(OpI))
NowDeadInsts.push_back(OpI);
}
-
+
DeadInst->eraseFromParent();
-
+
if (ValueSet) ValueSet->erase(DeadInst);
} while (!NowDeadInsts.empty());
}
@@ -159,11 +159,13 @@ static bool hasMemoryWrite(Instruction *I) {
}
/// getLocForWrite - Return a Location stored to by the specified instruction.
+/// If isRemovable returns true, this function and getLocForRead completely
+/// describe the memory operations for this instruction.
static AliasAnalysis::Location
getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
return AA.getLocation(SI);
-
+
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) {
// memcpy/memmove/memset.
AliasAnalysis::Location Loc = AA.getLocationForDest(MI);
@@ -174,10 +176,10 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
return AliasAnalysis::Location();
return Loc;
}
-
+
IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst);
if (II == 0) return AliasAnalysis::Location();
-
+
switch (II->getIntrinsicID()) {
default: return AliasAnalysis::Location(); // Unhandled intrinsic.
case Intrinsic::init_trampoline:
@@ -185,7 +187,7 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
// that we should use the size of the pointee type. This isn't valid for
// init.trampoline, which writes more than an i8.
if (AA.getTargetData() == 0) return AliasAnalysis::Location();
-
+
// FIXME: We don't know the size of the trampoline, so we can't really
// handle it here.
return AliasAnalysis::Location(II->getArgOperand(0));
@@ -198,10 +200,10 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
/// getLocForRead - Return the location read by the specified "hasMemoryWrite"
/// instruction if any.
-static AliasAnalysis::Location
+static AliasAnalysis::Location
getLocForRead(Instruction *Inst, AliasAnalysis &AA) {
assert(hasMemoryWrite(Inst) && "Unknown instruction case");
-
+
// The only instructions that both read and write are the mem transfer
// instructions (memcpy/memmove).
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(Inst))
@@ -213,10 +215,10 @@ getLocForRead(Instruction *Inst, AliasAnalysis &AA) {
/// isRemovable - If the value of this instruction and the memory it writes to
/// is unused, may we delete this instruction?
static bool isRemovable(Instruction *I) {
- // Don't remove volatile stores.
+ // Don't remove volatile/atomic stores.
if (StoreInst *SI = dyn_cast<StoreInst>(I))
- return !SI->isVolatile();
-
+ return SI->isUnordered();
+
IntrinsicInst *II = cast<IntrinsicInst>(I);
switch (II->getIntrinsicID()) {
default: assert(0 && "doesn't pass 'hasMemoryWrite' predicate");
@@ -227,7 +229,7 @@ static bool isRemovable(Instruction *I) {
case Intrinsic::init_trampoline:
// Always safe to remove init_trampoline.
return true;
-
+
case Intrinsic::memset:
case Intrinsic::memmove:
case Intrinsic::memcpy:
@@ -255,16 +257,16 @@ static uint64_t getPointerSize(Value *V, AliasAnalysis &AA) {
const TargetData *TD = AA.getTargetData();
if (TD == 0)
return AliasAnalysis::UnknownSize;
-
+
if (AllocaInst *A = dyn_cast<AllocaInst>(V)) {
// Get size information for the alloca
if (ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize()))
return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType());
return AliasAnalysis::UnknownSize;
}
-
+
assert(isa<Argument>(V) && "Expected AllocaInst or Argument!");
- const PointerType *PT = cast<PointerType>(V->getType());
+ PointerType *PT = cast<PointerType>(V->getType());
return TD->getTypeAllocSize(PT->getElementType());
}
@@ -287,7 +289,7 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
AliasAnalysis &AA) {
const Value *P1 = Earlier.Ptr->stripPointerCasts();
const Value *P2 = Later.Ptr->stripPointerCasts();
-
+
// If the start pointers are the same, we just have to compare sizes to see if
// the later store was larger than the earlier store.
if (P1 == P2) {
@@ -302,33 +304,33 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
return Later.Ptr->getType() == Earlier.Ptr->getType();
return false;
}
-
+
// Make sure that the Later size is >= the Earlier size.
if (Later.Size < Earlier.Size)
return false;
return true;
}
-
+
// Otherwise, we have to have size information, and the later store has to be
// larger than the earlier one.
if (Later.Size == AliasAnalysis::UnknownSize ||
Earlier.Size == AliasAnalysis::UnknownSize ||
Later.Size <= Earlier.Size || AA.getTargetData() == 0)
return false;
-
+
// Check to see if the later store is to the entire object (either a global,
// an alloca, or a byval argument). If so, then it clearly overwrites any
// other store to the same object.
const TargetData &TD = *AA.getTargetData();
-
+
const Value *UO1 = GetUnderlyingObject(P1, &TD),
*UO2 = GetUnderlyingObject(P2, &TD);
-
+
// If we can't resolve the same pointers to the same object, then we can't
// analyze them at all.
if (UO1 != UO2)
return false;
-
+
// If the "Later" store is to a recognizable object, get its size.
if (isObjectPointerWithTrustworthySize(UO2)) {
uint64_t ObjectSize =
@@ -336,26 +338,26 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
if (ObjectSize == Later.Size)
return true;
}
-
+
// Okay, we have stores to two completely different pointers. Try to
// decompose the pointer into a "base + constant_offset" form. If the base
// pointers are equal, then we can reason about the two stores.
int64_t EarlierOff = 0, LaterOff = 0;
const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD);
const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD);
-
+
// If the base pointers still differ, we have two completely different stores.
if (BP1 != BP2)
return false;
// The later store completely overlaps the earlier store if:
- //
+ //
// 1. Both start at the same offset and the later one's size is greater than
// or equal to the earlier one's, or
//
// |--earlier--|
// |-- later --|
- //
+ //
// 2. The earlier store has an offset greater than the later offset, but which
// still lies completely within the later store.
//
@@ -373,7 +375,7 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
/// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a
/// memory region into an identical pointer) then it doesn't actually make its
-/// input dead in the traditional sense. Consider this case:
+/// input dead in the traditional sense. Consider this case:
///
/// memcpy(A <- B)
/// memcpy(A <- A)
@@ -391,10 +393,10 @@ static bool isPossibleSelfRead(Instruction *Inst,
// location read.
AliasAnalysis::Location InstReadLoc = getLocForRead(Inst, AA);
if (InstReadLoc.Ptr == 0) return false; // Not a reading instruction.
-
+
// If the read and written loc obviously don't alias, it isn't a read.
if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false;
-
+
// Okay, 'Inst' may copy over itself. However, we can still remove a the
// DepWrite instruction if we can prove that it reads from the same location
// as Inst. This handles useful cases like:
@@ -404,10 +406,10 @@ static bool isPossibleSelfRead(Instruction *Inst,
// aliases, so removing the first memcpy is safe (assuming it writes <= #
// bytes as the second one.
AliasAnalysis::Location DepReadLoc = getLocForRead(DepWrite, AA);
-
+
if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr))
return false;
-
+
// If DepWrite doesn't read memory or if we can't prove it is a must alias,
// then it can't be considered dead.
return true;
@@ -420,43 +422,43 @@ static bool isPossibleSelfRead(Instruction *Inst,
bool DSE::runOnBasicBlock(BasicBlock &BB) {
bool MadeChange = false;
-
+
// Do a top-down walk on the BB.
for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
Instruction *Inst = BBI++;
-
+
// Handle 'free' calls specially.
if (CallInst *F = isFreeCall(Inst)) {
MadeChange |= HandleFree(F);
continue;
}
-
+
// If we find something that writes memory, get its memory dependence.
if (!hasMemoryWrite(Inst))
continue;
MemDepResult InstDep = MD->getDependency(Inst);
-
+
// Ignore any store where we can't find a local dependence.
// FIXME: cross-block DSE would be fun. :)
- if (InstDep.isNonLocal() || InstDep.isUnknown())
+ if (!InstDep.isDef() && !InstDep.isClobber())
continue;
-
+
// If we're storing the same value back to a pointer that we just
// loaded from, then the store can be removed.
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
- SI->getOperand(0) == DepLoad && !SI->isVolatile()) {
+ SI->getOperand(0) == DepLoad && isRemovable(SI)) {
DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n "
<< "LOAD: " << *DepLoad << "\n STORE: " << *SI << '\n');
-
+
// DeleteDeadInstruction can delete the current instruction. Save BBI
// in case we need it.
WeakVH NextInst(BBI);
-
+
DeleteDeadInstruction(SI, *MD);
-
+
if (NextInst == 0) // Next instruction deleted.
BBI = BB.begin();
else if (BBI != BB.begin()) // Revisit this instruction if possible.
@@ -467,15 +469,15 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
}
}
}
-
+
// Figure out what location is being stored to.
AliasAnalysis::Location Loc = getLocForWrite(Inst, *AA);
// If we didn't get a useful location, fail.
if (Loc.Ptr == 0)
continue;
-
- while (!InstDep.isNonLocal() && !InstDep.isUnknown()) {
+
+ while (InstDep.isDef() || InstDep.isClobber()) {
// Get the memory clobbered by the instruction we depend on. MemDep will
// skip any instructions that 'Loc' clearly doesn't interact with. If we
// end up depending on a may- or must-aliased load, then we can't optimize
@@ -496,12 +498,12 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
!isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
<< *DepWrite << "\n KILLER: " << *Inst << '\n');
-
+
// Delete the store and now-dead instructions that feed it.
DeleteDeadInstruction(DepWrite, *MD);
++NumFastStores;
MadeChange = true;
-
+
// DeleteDeadInstruction can delete the current instruction in loop
// cases, reset BBI.
BBI = Inst;
@@ -509,7 +511,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
--BBI;
break;
}
-
+
// If this is a may-aliased store that is clobbering the store value, we
// can keep searching past it for another must-aliased pointer that stores
// to the same location. For example, in:
@@ -519,20 +521,20 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
// we can remove the first store to P even though we don't know if P and Q
// alias.
if (DepWrite == &BB.front()) break;
-
+
// Can't look past this instruction if it might read 'Loc'.
if (AA->getModRefInfo(DepWrite, Loc) & AliasAnalysis::Ref)
break;
-
+
InstDep = MD->getPointerDependencyFrom(Loc, false, DepWrite, &BB);
}
}
-
+
// If this block ends in a return, unwind, or unreachable, all allocas are
// dead at its end, which means stores to them are also dead.
if (BB.getTerminator()->getNumSuccessors() == 0)
MadeChange |= handleEndBlock(BB);
-
+
return MadeChange;
}
@@ -543,18 +545,18 @@ bool DSE::HandleFree(CallInst *F) {
MemDepResult Dep = MD->getDependency(F);
- while (!Dep.isNonLocal() && !Dep.isUnknown()) {
+ while (Dep.isDef() || Dep.isClobber()) {
Instruction *Dependency = Dep.getInst();
if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency))
return MadeChange;
-
+
Value *DepPointer =
GetUnderlyingObject(getStoredPointerOperand(Dependency));
// Check for aliasing.
if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
return MadeChange;
-
+
// DCE instructions only used to calculate that store
DeleteDeadInstruction(Dependency, *MD);
++NumFastStores;
@@ -567,7 +569,7 @@ bool DSE::HandleFree(CallInst *F) {
// free(s);
Dep = MD->getDependency(F);
};
-
+
return MadeChange;
}
@@ -579,28 +581,28 @@ bool DSE::HandleFree(CallInst *F) {
/// ret void
bool DSE::handleEndBlock(BasicBlock &BB) {
bool MadeChange = false;
-
+
// Keep track of all of the stack objects that are dead at the end of the
// function.
SmallPtrSet<Value*, 16> DeadStackObjects;
-
+
// Find all of the alloca'd pointers in the entry block.
BasicBlock *Entry = BB.getParent()->begin();
for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I)
if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
DeadStackObjects.insert(AI);
-
+
// Treat byval arguments the same, stores to them are dead at the end of the
// function.
for (Function::arg_iterator AI = BB.getParent()->arg_begin(),
AE = BB.getParent()->arg_end(); AI != AE; ++AI)
if (AI->hasByValAttr())
DeadStackObjects.insert(AI);
-
+
// Scan the basic block backwards
for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){
--BBI;
-
+
// If we find a store, check to see if it points into a dead stack value.
if (hasMemoryWrite(BBI) && isRemovable(BBI)) {
// See through pointer-to-pointer bitcasts
@@ -609,10 +611,10 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// Stores to stack values are valid candidates for removal.
if (DeadStackObjects.count(Pointer)) {
Instruction *Dead = BBI++;
-
+
DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: "
<< *Dead << "\n Object: " << *Pointer << '\n');
-
+
// DCE instructions only used to calculate that store.
DeleteDeadInstruction(Dead, *MD, &DeadStackObjects);
++NumFastStores;
@@ -620,7 +622,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
continue;
}
}
-
+
// Remove any dead non-memory-mutating instructions.
if (isInstructionTriviallyDead(BBI)) {
Instruction *Inst = BBI++;
@@ -629,55 +631,61 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
MadeChange = true;
continue;
}
-
+
if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) {
DeadStackObjects.erase(A);
continue;
}
-
+
if (CallSite CS = cast<Value>(BBI)) {
// If this call does not access memory, it can't be loading any of our
// pointers.
if (AA->doesNotAccessMemory(CS))
continue;
-
+
// If the call might load from any of our allocas, then any store above
// the call is live.
SmallVector<Value*, 8> LiveAllocas;
for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
E = DeadStackObjects.end(); I != E; ++I) {
// See if the call site touches it.
- AliasAnalysis::ModRefResult A =
+ AliasAnalysis::ModRefResult A =
AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA));
-
+
if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref)
LiveAllocas.push_back(*I);
}
-
+
for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(),
E = LiveAllocas.end(); I != E; ++I)
DeadStackObjects.erase(*I);
-
+
// If all of the allocas were clobbered by the call then we're not going
// to find anything else to process.
if (DeadStackObjects.empty())
return MadeChange;
-
+
continue;
}
-
+
AliasAnalysis::Location LoadedLoc;
-
+
// If we encounter a use of the pointer, it is no longer considered dead
if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
+ if (!L->isUnordered()) // Be conservative with atomic/volatile load
+ break;
LoadedLoc = AA->getLocation(L);
} else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
LoadedLoc = AA->getLocation(V);
} else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) {
LoadedLoc = AA->getLocationForSource(MTI);
- } else {
- // Not a loading instruction.
+ } else if (!BBI->mayReadFromMemory()) {
+ // Instruction doesn't read memory. Note that stores that weren't removed
+ // above will hit this case.
continue;
+ } else {
+ // Unknown inst; assume it clobbers everything.
+ break;
}
// Remove any allocas from the DeadPointer set that are loaded, as this
@@ -689,7 +697,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
if (DeadStackObjects.empty())
break;
}
-
+
return MadeChange;
}
@@ -703,14 +711,14 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
// A constant can't be in the dead pointer set.
if (isa<Constant>(UnderlyingPointer))
return;
-
+
// If the kill pointer can be easily reduced to an alloca, don't bother doing
// extraneous AA queries.
if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) {
DeadStackObjects.erase(const_cast<Value*>(UnderlyingPointer));
return;
}
-
+
SmallVector<Value*, 16> NowLive;
for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
E = DeadStackObjects.end(); I != E; ++I) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 3d3f17b..c0223d2 100644
--- a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -92,7 +92,7 @@ unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
// Hash in all of the operands as pointers.
unsigned Res = 0;
for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
- Res ^= getHash(Inst->getOperand(i)) << i;
+ Res ^= getHash(Inst->getOperand(i)) << (i & 0xF);
if (CastInst *CI = dyn_cast<CastInst>(Inst))
Res ^= getHash(CI->getType());
@@ -185,7 +185,7 @@ unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) {
assert(!Inst->getOperand(i)->getType()->isMetadataTy() &&
"Cannot value number calls with metadata operands");
- Res ^= getHash(Inst->getOperand(i)) << i;
+ Res ^= getHash(Inst->getOperand(i)) << (i & 0xF);
}
// Mix in the opcode.
@@ -357,7 +357,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// If this is a non-volatile load, process it.
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
// Ignore volatile loads.
- if (LI->isVolatile()) {
+ if (!LI->isSimple()) {
LastStore = 0;
continue;
}
@@ -437,7 +437,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
std::pair<Value*, unsigned>(SI->getValueOperand(), CurrentGeneration));
// Remember that this was the last store we saw for DSE.
- if (!SI->isVolatile())
+ if (SI->isSimple())
LastStore = SI;
}
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
index 87b7317..cbfdbcd 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -41,12 +41,16 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/PatternMatch.h"
using namespace llvm;
+using namespace PatternMatch;
STATISTIC(NumGVNInstr, "Number of instructions deleted");
STATISTIC(NumGVNLoad, "Number of loads deleted");
STATISTIC(NumGVNPRE, "Number of instructions PRE'd");
STATISTIC(NumGVNBlocks, "Number of blocks merged");
+STATISTIC(NumGVNSimpl, "Number of instructions simplified");
+STATISTIC(NumGVNEqProp, "Number of equalities propagated");
STATISTIC(NumPRELoad, "Number of loads PRE'd");
static cl::opt<bool> EnablePRE("enable-pre",
@@ -63,7 +67,7 @@ static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
namespace {
struct Expression {
uint32_t opcode;
- const Type *type;
+ Type *type;
SmallVector<uint32_t, 4> varargs;
Expression(uint32_t o = ~2U) : opcode(o) { }
@@ -548,6 +552,9 @@ namespace {
void cleanupGlobalSets();
void verifyRemoved(const Instruction *I) const;
bool splitCriticalEdges();
+ unsigned replaceAllDominatedUsesWith(Value *From, Value *To,
+ BasicBlock *Root);
+ bool propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root);
};
char GVN::ID = 0;
@@ -655,7 +662,7 @@ SpeculationFailure:
/// CanCoerceMustAliasedValueToLoad - Return true if
/// CoerceAvailableValueToLoadType will succeed.
static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
- const Type *LoadTy,
+ Type *LoadTy,
const TargetData &TD) {
// If the loaded or stored value is an first class array or struct, don't try
// to transform them. We need to be able to bitcast to integer.
@@ -680,17 +687,17 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
///
/// If we can't do it, return null.
static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
- const Type *LoadedTy,
+ Type *LoadedTy,
Instruction *InsertPt,
const TargetData &TD) {
if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
return 0;
// If this is already the right type, just return it.
- const Type *StoredValTy = StoredVal->getType();
+ Type *StoredValTy = StoredVal->getType();
- uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy);
- uint64_t LoadSize = TD.getTypeStoreSizeInBits(LoadedTy);
+ uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy);
+ uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
// If the store and reload are the same size, we can always reuse it.
if (StoreSize == LoadSize) {
@@ -704,7 +711,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
}
- const Type *TypeToCastTo = LoadedTy;
+ Type *TypeToCastTo = LoadedTy;
if (TypeToCastTo->isPointerTy())
TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext());
@@ -743,7 +750,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
}
// Truncate the integer to the right size now.
- const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize);
+ Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize);
StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt);
if (LoadedTy == NewIntTy)
@@ -765,7 +772,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
/// Check this case to see if there is anything more we can do before we give
/// up. This returns -1 if we have to give up, or a byte number in the stored
/// value of the piece that feeds the load.
-static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
+static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
Value *WritePtr,
uint64_t WriteSizeInBits,
const TargetData &TD) {
@@ -839,7 +846,7 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
/// AnalyzeLoadFromClobberingStore - This function is called when we have a
/// memdep query of a load that ends up being a clobbering store.
-static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr,
+static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
StoreInst *DepSI,
const TargetData &TD) {
// Cannot handle reading from store of first-class aggregate yet.
@@ -856,7 +863,7 @@ static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr,
/// AnalyzeLoadFromClobberingLoad - This function is called when we have a
/// memdep query of a load that ends up being clobbered by another load. See if
/// the other load can feed into the second load.
-static int AnalyzeLoadFromClobberingLoad(const Type *LoadTy, Value *LoadPtr,
+static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr,
LoadInst *DepLI, const TargetData &TD){
// Cannot handle reading from store of first-class aggregate yet.
if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
@@ -883,7 +890,7 @@ static int AnalyzeLoadFromClobberingLoad(const Type *LoadTy, Value *LoadPtr,
-static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
+static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
MemIntrinsic *MI,
const TargetData &TD) {
// If the mem operation is a non-constant size, we can't handle it.
@@ -920,7 +927,7 @@ static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
llvm::Type::getInt8PtrTy(Src->getContext()));
Constant *OffsetCst =
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
- Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
+ Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
if (ConstantFoldLoadFromConstPtr(Src, &TD))
return Offset;
@@ -934,7 +941,7 @@ static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
/// mustalias. Check this case to see if there is anything more we can do
/// before we give up.
static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
- const Type *LoadTy,
+ Type *LoadTy,
Instruction *InsertPt, const TargetData &TD){
LLVMContext &Ctx = SrcVal->getType()->getContext();
@@ -946,10 +953,9 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
// Compute which bits of the stored value are being used by the load. Convert
// to an integer type to start with.
if (SrcVal->getType()->isPointerTy())
- SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp");
+ SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx));
if (!SrcVal->getType()->isIntegerTy())
- SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8),
- "tmp");
+ SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8));
// Shift the bits to the least significant depending on endianness.
unsigned ShiftAmt;
@@ -959,11 +965,10 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
ShiftAmt = (StoreSize-LoadSize-Offset)*8;
if (ShiftAmt)
- SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt, "tmp");
+ SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt);
if (LoadSize != StoreSize)
- SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8),
- "tmp");
+ SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8));
return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
}
@@ -974,7 +979,7 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
/// because the pointers don't mustalias. Check this case to see if there is
/// anything more we can do before we give up.
static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
- const Type *LoadTy, Instruction *InsertPt,
+ Type *LoadTy, Instruction *InsertPt,
GVN &gvn) {
const TargetData &TD = *gvn.getTargetData();
// If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
@@ -982,8 +987,8 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType());
unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
if (Offset+LoadSize > SrcValSize) {
- assert(!SrcVal->isVolatile() && "Cannot widen volatile load!");
- assert(isa<IntegerType>(SrcVal->getType())&&"Can't widen non-integer load");
+ assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!");
+ assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load");
// If we have a load/load clobber an DepLI can be widened to cover this
// load, then we should widen it to the next power of 2 size big enough!
unsigned NewLoadSize = Offset+LoadSize;
@@ -996,7 +1001,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
// memdep queries will find the new load. We can't easily remove the old
// load completely because it is already in the value numbering table.
IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal));
- const Type *DestPTy =
+ Type *DestPTy =
IntegerType::get(LoadTy->getContext(), NewLoadSize*8);
DestPTy = PointerType::get(DestPTy,
cast<PointerType>(PtrVal->getType())->getAddressSpace());
@@ -1034,7 +1039,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
/// GetMemInstValueForLoad - This function is called when we have a
/// memdep query of a load that ends up being a clobbering mem intrinsic.
static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
- const Type *LoadTy, Instruction *InsertPt,
+ Type *LoadTy, Instruction *InsertPt,
const TargetData &TD){
LLVMContext &Ctx = LoadTy->getContext();
uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
@@ -1081,7 +1086,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
llvm::Type::getInt8PtrTy(Src->getContext()));
Constant *OffsetCst =
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
- Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
+ Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
return ConstantFoldLoadFromConstPtr(Src, &TD);
}
@@ -1154,7 +1159,7 @@ struct AvailableValueInBlock {
/// MaterializeAdjustedValue - Emit code into this block to adjust the value
/// defined here to the specified type. This handles various coercion cases.
- Value *MaterializeAdjustedValue(const Type *LoadTy, GVN &gvn) const {
+ Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const {
Value *Res;
if (isSimpleValue()) {
Res = getSimpleValue();
@@ -1213,7 +1218,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
SSAUpdater SSAUpdate(&NewPHIs);
SSAUpdate.Initialize(LI->getType(), LI->getName());
- const Type *LoadTy = LI->getType();
+ Type *LoadTy = LI->getType();
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
const AvailableValueInBlock &AV = ValuesPerBlock[i];
@@ -1274,7 +1279,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
// If we had a phi translation failure, we'll have a single entry which is a
// clobber in the current block. Reject this early.
- if (Deps.size() == 1 && Deps[0].getResult().isUnknown()) {
+ if (Deps.size() == 1
+ && !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber())
+ {
DEBUG(
dbgs() << "GVN: non-local load ";
WriteAsOperand(dbgs(), LI);
@@ -1294,7 +1301,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
BasicBlock *DepBB = Deps[i].getBB();
MemDepResult DepInfo = Deps[i].getResult();
- if (DepInfo.isUnknown()) {
+ if (!DepInfo.isDef() && !DepInfo.isClobber()) {
UnavailableBlocks.push_back(DepBB);
continue;
}
@@ -1359,7 +1366,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
continue;
}
- assert(DepInfo.isDef() && "Expecting def here");
+ // DepInfo.isDef() here
Instruction *DepInst = DepInfo.getInst();
@@ -1446,8 +1453,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
Blockers.insert(UnavailableBlocks[i]);
- // Lets find first basic block with more than one predecessor. Walk backwards
- // through predecessors if needed.
+ // Let's find the first basic block with more than one predecessor. Walk
+ // backwards through predecessors if needed.
BasicBlock *LoadBB = LI->getParent();
BasicBlock *TmpBB = LoadBB;
@@ -1519,10 +1526,19 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
<< Pred->getName() << "': " << *LI << '\n');
return false;
}
+
+ if (LoadBB->isLandingPad()) {
+ DEBUG(dbgs()
+ << "COULD NOT PRE LOAD BECAUSE OF LANDING PAD CRITICAL EDGE '"
+ << Pred->getName() << "': " << *LI << '\n');
+ return false;
+ }
+
unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB);
NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum));
}
}
+
if (!NeedToSplit.empty()) {
toSplit.append(NeedToSplit.begin(), NeedToSplit.end());
return false;
@@ -1660,7 +1676,7 @@ bool GVN::processLoad(LoadInst *L) {
if (!MD)
return false;
- if (L->isVolatile())
+ if (!L->isSimple())
return false;
if (L->use_empty()) {
@@ -1747,7 +1763,11 @@ bool GVN::processLoad(LoadInst *L) {
return false;
}
- if (Dep.isUnknown()) {
+ // If it is defined in another block, try harder.
+ if (Dep.isNonLocal())
+ return processNonLocalLoad(L);
+
+ if (!Dep.isDef()) {
DEBUG(
// fast print dep, using operator<< on instruction is too slow.
dbgs() << "GVN: load ";
@@ -1757,12 +1777,6 @@ bool GVN::processLoad(LoadInst *L) {
return false;
}
- // If it is defined in another block, try harder.
- if (Dep.isNonLocal())
- return processNonLocalLoad(L);
-
- assert(Dep.isDef() && "Expecting def here");
-
Instruction *DepInst = Dep.getInst();
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
Value *StoredVal = DepSI->getValueOperand();
@@ -1874,6 +1888,133 @@ Value *GVN::findLeader(BasicBlock *BB, uint32_t num) {
return Val;
}
+/// replaceAllDominatedUsesWith - Replace all uses of 'From' with 'To' if the
+/// use is dominated by the given basic block. Returns the number of uses that
+/// were replaced.
+unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To,
+ BasicBlock *Root) {
+ unsigned Count = 0;
+ for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
+ UI != UE; ) {
+ Instruction *User = cast<Instruction>(*UI);
+ unsigned OpNum = UI.getOperandNo();
+ ++UI;
+
+ if (DT->dominates(Root, User->getParent())) {
+ User->setOperand(OpNum, To);
+ ++Count;
+ }
+ }
+ return Count;
+}
+
+/// propagateEquality - The given values are known to be equal in every block
+/// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with
+/// 'RHS' everywhere in the scope. Returns whether a change was made.
+bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
+ if (LHS == RHS) return false;
+ assert(LHS->getType() == RHS->getType() && "Equal but types differ!");
+
+ // Don't try to propagate equalities between constants.
+ if (isa<Constant>(LHS) && isa<Constant>(RHS))
+ return false;
+
+ // Make sure that any constants are on the right-hand side. In general the
+ // best results are obtained by placing the longest lived value on the RHS.
+ if (isa<Constant>(LHS))
+ std::swap(LHS, RHS);
+
+ // If neither term is constant then bail out. This is not for correctness,
+ // it's just that the non-constant case is much less useful: it occurs just
+ // as often as the constant case but handling it hardly ever results in an
+ // improvement.
+ if (!isa<Constant>(RHS))
+ return false;
+
+ // If value numbering later deduces that an instruction in the scope is equal
+ // to 'LHS' then ensure it will be turned into 'RHS'.
+ addToLeaderTable(VN.lookup_or_add(LHS), RHS, Root);
+
+ // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope.
+ unsigned NumReplacements = replaceAllDominatedUsesWith(LHS, RHS, Root);
+ bool Changed = NumReplacements > 0;
+ NumGVNEqProp += NumReplacements;
+
+ // Now try to deduce additional equalities from this one. For example, if the
+ // known equality was "(A != B)" == "false" then it follows that A and B are
+ // equal in the scope. Only boolean equalities with an explicit true or false
+ // RHS are currently supported.
+ if (!RHS->getType()->isIntegerTy(1))
+ // Not a boolean equality - bail out.
+ return Changed;
+ ConstantInt *CI = dyn_cast<ConstantInt>(RHS);
+ if (!CI)
+ // RHS neither 'true' nor 'false' - bail out.
+ return Changed;
+ // Whether RHS equals 'true'. Otherwise it equals 'false'.
+ bool isKnownTrue = CI->isAllOnesValue();
+ bool isKnownFalse = !isKnownTrue;
+
+ // If "A && B" is known true then both A and B are known true. If "A || B"
+ // is known false then both A and B are known false.
+ Value *A, *B;
+ if ((isKnownTrue && match(LHS, m_And(m_Value(A), m_Value(B)))) ||
+ (isKnownFalse && match(LHS, m_Or(m_Value(A), m_Value(B))))) {
+ Changed |= propagateEquality(A, RHS, Root);
+ Changed |= propagateEquality(B, RHS, Root);
+ return Changed;
+ }
+
+ // If we are propagating an equality like "(A == B)" == "true" then also
+ // propagate the equality A == B.
+ if (ICmpInst *Cmp = dyn_cast<ICmpInst>(LHS)) {
+ // Only equality comparisons are supported.
+ if ((isKnownTrue && Cmp->getPredicate() == CmpInst::ICMP_EQ) ||
+ (isKnownFalse && Cmp->getPredicate() == CmpInst::ICMP_NE)) {
+ Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
+ Changed |= propagateEquality(Op0, Op1, Root);
+ }
+ return Changed;
+ }
+
+ return Changed;
+}
+
+/// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'. Return
+/// true if every path from the entry block to 'Dst' passes via this edge. In
+/// particular 'Dst' must not be reachable via another edge from 'Src'.
+static bool isOnlyReachableViaThisEdge(BasicBlock *Src, BasicBlock *Dst,
+ DominatorTree *DT) {
+ // First off, there must not be more than one edge from Src to Dst, there
+ // should be exactly one. So keep track of the number of times Src occurs
+ // as a predecessor of Dst and fail if it's more than once. Secondly, any
+ // other predecessors of Dst should be dominated by Dst (see logic below).
+ bool SawEdgeFromSrc = false;
+ for (pred_iterator PI = pred_begin(Dst), PE = pred_end(Dst); PI != PE; ++PI) {
+ BasicBlock *Pred = *PI;
+ if (Pred == Src) {
+ // An edge from Src to Dst.
+ if (SawEdgeFromSrc)
+ // There are multiple edges from Src to Dst - fail.
+ return false;
+ SawEdgeFromSrc = true;
+ continue;
+ }
+ // If the predecessor is not dominated by Dst, then it must be possible to
+ // reach it either without passing through Src (and thus not via the edge)
+ // or by passing through Src but taking a different edge out of Src. Either
+ // way it is possible to reach Dst without passing via the edge, so fail.
+ if (!DT->dominates(Dst, *PI))
+ return false;
+ }
+ assert(SawEdgeFromSrc && "No edge between these basic blocks!");
+
+ // Every path from the entry block to Dst must at some point pass to Dst from
+ // a predecessor that is not dominated by Dst. This predecessor can only be
+ // Src, since all others are dominated by Dst. As there is only one edge from
+ // Src to Dst, the path passes by this edge.
+ return true;
+}
/// processInstruction - When calculating availability, handle an instruction
/// by inserting it into the appropriate sets
@@ -1891,6 +2032,7 @@ bool GVN::processInstruction(Instruction *I) {
if (MD && V->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
markInstructionForDeletion(I);
+ ++NumGVNSimpl;
return true;
}
@@ -1903,30 +2045,45 @@ bool GVN::processInstruction(Instruction *I) {
return false;
}
- // For conditions branches, we can perform simple conditional propagation on
+ // For conditional branches, we can perform simple conditional propagation on
// the condition value itself.
if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
return false;
-
+
Value *BranchCond = BI->getCondition();
- uint32_t CondVN = VN.lookup_or_add(BranchCond);
-
+
BasicBlock *TrueSucc = BI->getSuccessor(0);
BasicBlock *FalseSucc = BI->getSuccessor(1);
-
- if (TrueSucc->getSinglePredecessor())
- addToLeaderTable(CondVN,
- ConstantInt::getTrue(TrueSucc->getContext()),
- TrueSucc);
- if (FalseSucc->getSinglePredecessor())
- addToLeaderTable(CondVN,
- ConstantInt::getFalse(TrueSucc->getContext()),
- FalseSucc);
-
- return false;
+ BasicBlock *Parent = BI->getParent();
+ bool Changed = false;
+
+ if (isOnlyReachableViaThisEdge(Parent, TrueSucc, DT))
+ Changed |= propagateEquality(BranchCond,
+ ConstantInt::getTrue(TrueSucc->getContext()),
+ TrueSucc);
+
+ if (isOnlyReachableViaThisEdge(Parent, FalseSucc, DT))
+ Changed |= propagateEquality(BranchCond,
+ ConstantInt::getFalse(FalseSucc->getContext()),
+ FalseSucc);
+
+ return Changed;
}
-
+
+ // For switches, propagate the case values into the case destinations.
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
+ Value *SwitchCond = SI->getCondition();
+ BasicBlock *Parent = SI->getParent();
+ bool Changed = false;
+ for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) {
+ BasicBlock *Dst = SI->getSuccessor(i);
+ if (isOnlyReachableViaThisEdge(Parent, Dst, DT))
+ Changed |= propagateEquality(SwitchCond, SI->getCaseValue(i), Dst);
+ }
+ return Changed;
+ }
+
// Instructions with void type don't return a value, so there's
// no point in trying to find redudancies in them.
if (I->getType()->isVoidTy()) return false;
@@ -2071,6 +2228,9 @@ bool GVN::performPRE(Function &F) {
// Nothing to PRE in the entry block.
if (CurrentBlock == &F.getEntryBlock()) continue;
+ // Don't perform PRE on a landing pad.
+ if (CurrentBlock->isLandingPad()) continue;
+
for (BasicBlock::iterator BI = CurrentBlock->begin(),
BE = CurrentBlock->end(); BI != BE; ) {
Instruction *CurInst = BI++;
diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index dee3d38..75fa011 100644
--- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -11,17 +11,6 @@
// computations derived from them) into simpler forms suitable for subsequent
// analysis and transformation.
//
-// This transformation makes the following changes to each loop with an
-// identifiable induction variable:
-// 1. All loops are transformed to have a SINGLE canonical induction variable
-// which starts at zero and steps by one.
-// 2. The canonical induction variable is guaranteed to be the first PHI node
-// in the loop header block.
-// 3. The canonical induction variable is guaranteed to be in a wide enough
-// type so that IV expressions need not be (directly) zero-extended or
-// sign-extended.
-// 4. Any pointer arithmetic recurrences are raised to use array subscripts.
-//
// If the trip count of a loop is computable, this pass also makes the following
// changes:
// 1. The exit condition for the loop is canonicalized to compare the
@@ -33,9 +22,6 @@
// purpose of the loop is to compute the exit value of some derived
// expression, this transformation will make the loop dead.
//
-// This transformation should be followed by strength reduction after all of the
-// desired loop transformations have been performed.
-//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "indvars"
@@ -57,11 +43,11 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SimplifyIndVar.h"
#include "llvm/Target/TargetData.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
using namespace llvm;
STATISTIC(NumRemoved , "Number of aux indvars removed");
@@ -69,15 +55,21 @@ STATISTIC(NumWidened , "Number of indvars widened");
STATISTIC(NumInserted , "Number of canonical indvars added");
STATISTIC(NumReplaced , "Number of exit values replaced");
STATISTIC(NumLFTR , "Number of loop exit tests replaced");
-STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
-STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
-STATISTIC(NumElimCmp , "Number of IV comparisons eliminated");
STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
-static cl::opt<bool> DisableIVRewrite(
- "disable-iv-rewrite", cl::Hidden,
- cl::desc("Disable canonical induction variable rewriting"));
+namespace llvm {
+ cl::opt<bool> EnableIVRewrite(
+ "enable-iv-rewrite", cl::Hidden,
+ cl::desc("Enable canonical induction variable rewriting"));
+
+ // Trip count verification can be enabled by default under NDEBUG if we
+ // implement a strong expression equivalence checker in SCEV. Until then, we
+ // use the verify-indvars flag, which may assert in some cases.
+ cl::opt<bool> VerifyIndvars(
+ "verify-indvars", cl::Hidden,
+ cl::desc("Verify the ScalarEvolution result after running indvars"));
+}
namespace {
class IndVarSimplify : public LoopPass {
@@ -105,12 +97,12 @@ namespace {
AU.addRequired<ScalarEvolution>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
- if (!DisableIVRewrite)
+ if (EnableIVRewrite)
AU.addRequired<IVUsers>();
AU.addPreserved<ScalarEvolution>();
AU.addPreservedID(LoopSimplifyID);
AU.addPreservedID(LCSSAID);
- if (!DisableIVRewrite)
+ if (EnableIVRewrite)
AU.addPreserved<IVUsers>();
AU.setPreservesCFG();
}
@@ -125,24 +117,14 @@ namespace {
void HandleFloatingPointIV(Loop *L, PHINode *PH);
void RewriteNonIntegerIVs(Loop *L);
- void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
-
- void SimplifyIVUsers(SCEVExpander &Rewriter);
- void SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter);
+ void SimplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LPPassManager &LPM);
- bool EliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
- void EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
- void EliminateIVRemainder(BinaryOperator *Rem,
- Value *IVOperand,
- bool IsSigned);
-
- void SimplifyCongruentIVs(Loop *L);
+ void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
- ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
- PHINode *IndVar,
- SCEVExpander &Rewriter);
+ Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
+ PHINode *IndVar, SCEVExpander &Rewriter);
void SinkUnusedInvariants(Loop *L);
};
@@ -211,6 +193,36 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
return true;
}
+/// Determine the insertion point for this user. By default, insert immediately
+/// before the user. SCEVExpander or LICM will hoist loop invariants out of the
+/// loop. For PHI nodes, there may be multiple uses, so compute the nearest
+/// common dominator for the incoming blocks.
+static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
+ DominatorTree *DT) {
+ PHINode *PHI = dyn_cast<PHINode>(User);
+ if (!PHI)
+ return User;
+
+ Instruction *InsertPt = 0;
+ for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
+ if (PHI->getIncomingValue(i) != Def)
+ continue;
+
+ BasicBlock *InsertBB = PHI->getIncomingBlock(i);
+ if (!InsertPt) {
+ InsertPt = InsertBB->getTerminator();
+ continue;
+ }
+ InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
+ InsertPt = InsertBB->getTerminator();
+ }
+ assert(InsertPt && "Missing phi operand");
+ assert((!isa<Instruction>(Def) ||
+ DT->dominates(cast<Instruction>(Def), InsertPt)) &&
+ "def does not dominate all uses");
+ return InsertPt;
+}
+
//===----------------------------------------------------------------------===//
// RewriteNonIntegerIVs and helpers. Prefer integer IVs.
//===----------------------------------------------------------------------===//
@@ -337,14 +349,14 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// Positive and negative strides have different safety conditions.
if (IncValue > 0) {
// If we have a positive stride, we require the init to be less than the
- // exit value and an equality or less than comparison.
- if (InitValue >= ExitValue ||
- NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE)
+ // exit value.
+ if (InitValue >= ExitValue)
return;
uint32_t Range = uint32_t(ExitValue-InitValue);
- if (NewPred == CmpInst::ICMP_SLE) {
- // Normalize SLE -> SLT, check for infinite loop.
+ // Check for infinite loop, either:
+ // while (i <= Exit) or until (i > Exit)
+ if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
if (++Range == 0) return; // Range overflows.
}
@@ -364,14 +376,14 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
} else {
// If we have a negative stride, we require the init to be greater than the
- // exit value and an equality or greater than comparison.
- if (InitValue >= ExitValue ||
- NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE)
+ // exit value.
+ if (InitValue <= ExitValue)
return;
uint32_t Range = uint32_t(InitValue-ExitValue);
- if (NewPred == CmpInst::ICMP_SGE) {
- // Normalize SGE -> SGT, check for infinite loop.
+ // Check for infinite loop, either:
+ // while (i >= Exit) or until (i < Exit)
+ if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
if (++Range == 0) return; // Range overflows.
}
@@ -390,7 +402,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
return;
}
- const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
+ IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
// Insert new integer induction variable.
PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
@@ -429,7 +441,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// platforms.
if (WeakPH) {
Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
- PN->getParent()->getFirstNonPHI());
+ PN->getParent()->getFirstInsertionPt());
PN->replaceAllUsesWith(Conv);
RecursivelyDeleteTriviallyDeadInstructions(PN);
}
@@ -437,6 +449,8 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// Add a new IVUsers entry for the newly-created integer PHI.
if (IU)
IU->AddUsersIfInteresting(NewPHI);
+
+ Changed = true;
}
void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
@@ -582,45 +596,15 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
//===----------------------------------------------------------------------===//
// Rewrite IV users based on a canonical IV.
-// To be replaced by -disable-iv-rewrite.
+// Only for use with -enable-iv-rewrite.
//===----------------------------------------------------------------------===//
-/// SimplifyIVUsers - Iteratively perform simplification on IVUsers within this
-/// loop. IVUsers is treated as a worklist. Each successive simplification may
-/// push more users which may themselves be candidates for simplification.
-///
-/// This is the old approach to IV simplification to be replaced by
-/// SimplifyIVUsersNoRewrite.
-///
-void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) {
- // Each round of simplification involves a round of eliminating operations
- // followed by a round of widening IVs. A single IVUsers worklist is used
- // across all rounds. The inner loop advances the user. If widening exposes
- // more uses, then another pass through the outer loop is triggered.
- for (IVUsers::iterator I = IU->begin(); I != IU->end(); ++I) {
- Instruction *UseInst = I->getUser();
- Value *IVOperand = I->getOperandValToReplace();
-
- if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
- EliminateIVComparison(ICmp, IVOperand);
- continue;
- }
- if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
- bool IsSigned = Rem->getOpcode() == Instruction::SRem;
- if (IsSigned || Rem->getOpcode() == Instruction::URem) {
- EliminateIVRemainder(Rem, IVOperand, IsSigned);
- continue;
- }
- }
- }
-}
-
-// FIXME: It is an extremely bad idea to indvar substitute anything more
-// complex than affine induction variables. Doing so will put expensive
-// polynomial evaluations inside of the loop, and the str reduction pass
-// currently can only reduce affine polynomials. For now just disable
-// indvar subst on anything more complex than an affine addrec, unless
-// it can be expanded to a trivial value.
+/// FIXME: It is an extremely bad idea to indvar substitute anything more
+/// complex than affine induction variables. Doing so will put expensive
+/// polynomial evaluations inside of the loop, and the str reduction pass
+/// currently can only reduce affine polynomials. For now just disable
+/// indvar subst on anything more complex than an affine addrec, unless
+/// it can be expanded to a trivial value.
static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
// Loop-invariant values are safe.
if (SE->isLoopInvariant(S, L)) return true;
@@ -631,7 +615,8 @@ static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
return AR->isAffine();
// An add is safe it all its operands are safe.
- if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) {
+ if (const SCEVCommutativeExpr *Commutative
+ = dyn_cast<SCEVCommutativeExpr>(S)) {
for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
E = Commutative->op_end(); I != E; ++I)
if (!isSafe(*I, L, SE)) return false;
@@ -665,7 +650,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
// of different sizes.
for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
Value *Op = UI->getOperandValToReplace();
- const Type *UseTy = Op->getType();
+ Type *UseTy = Op->getType();
Instruction *User = UI->getUser();
// Compute the final addrec to expand into code.
@@ -692,18 +677,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
// hoist loop invariants out of the loop. For PHI nodes, there may be
// multiple uses, so compute the nearest common dominator for the
// incoming blocks.
- Instruction *InsertPt = User;
- if (PHINode *PHI = dyn_cast<PHINode>(InsertPt))
- for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
- if (PHI->getIncomingValue(i) == Op) {
- if (InsertPt == User)
- InsertPt = PHI->getIncomingBlock(i)->getTerminator();
- else
- InsertPt =
- DT->findNearestCommonDominator(InsertPt->getParent(),
- PHI->getIncomingBlock(i))
- ->getTerminator();
- }
+ Instruction *InsertPt = getInsertPointForUses(User, Op, DT);
// Now expand it into actual Instructions and patch it into place.
Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
@@ -747,19 +721,38 @@ namespace {
// extend operations. This information is recorded by CollectExtend and
// provides the input to WidenIV.
struct WideIVInfo {
- const Type *WidestNativeType; // Widest integer type created [sz]ext
- bool IsSigned; // Was an sext user seen before a zext?
+ PHINode *NarrowIV;
+ Type *WidestNativeType; // Widest integer type created [sz]ext
+ bool IsSigned; // Was an sext user seen before a zext?
- WideIVInfo() : WidestNativeType(0), IsSigned(false) {}
+ WideIVInfo() : NarrowIV(0), WidestNativeType(0), IsSigned(false) {}
+ };
+
+ class WideIVVisitor : public IVVisitor {
+ ScalarEvolution *SE;
+ const TargetData *TD;
+
+ public:
+ WideIVInfo WI;
+
+ WideIVVisitor(PHINode *NarrowIV, ScalarEvolution *SCEV,
+ const TargetData *TData) :
+ SE(SCEV), TD(TData) { WI.NarrowIV = NarrowIV; }
+
+ // Implement the interface used by simplifyUsersOfIV.
+ virtual void visitCast(CastInst *Cast);
};
}
-/// CollectExtend - Update information about the induction variable that is
+/// visitCast - Update information about the induction variable that is
/// extended by this sign or zero extend operation. This is used to determine
/// the final width of the IV before actually widening it.
-static void CollectExtend(CastInst *Cast, bool IsSigned, WideIVInfo &WI,
- ScalarEvolution *SE, const TargetData *TD) {
- const Type *Ty = Cast->getType();
+void WideIVVisitor::visitCast(CastInst *Cast) {
+ bool IsSigned = Cast->getOpcode() == Instruction::SExt;
+ if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
+ return;
+
+ Type *Ty = Cast->getType();
uint64_t Width = SE->getTypeSizeInBits(Ty);
if (TD && !TD->isLegalInteger(Width))
return;
@@ -779,6 +772,21 @@ static void CollectExtend(CastInst *Cast, bool IsSigned, WideIVInfo &WI,
}
namespace {
+
+/// NarrowIVDefUse - Record a link in the Narrow IV def-use chain along with the
+/// WideIV that computes the same value as the Narrow IV def. This avoids
+/// caching Use* pointers.
+struct NarrowIVDefUse {
+ Instruction *NarrowDef;
+ Instruction *NarrowUse;
+ Instruction *WideDef;
+
+ NarrowIVDefUse(): NarrowDef(0), NarrowUse(0), WideDef(0) {}
+
+ NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD):
+ NarrowDef(ND), NarrowUse(NU), WideDef(WD) {}
+};
+
/// WidenIV - The goal of this transform is to remove sign and zero extends
/// without creating any new induction variables. To do this, it creates a new
/// phi of the wider type and redirects all users, either removing extends or
@@ -787,7 +795,7 @@ namespace {
class WidenIV {
// Parameters
PHINode *OrigPhi;
- const Type *WideType;
+ Type *WideType;
bool IsSigned;
// Context
@@ -803,13 +811,13 @@ class WidenIV {
SmallVectorImpl<WeakVH> &DeadInsts;
SmallPtrSet<Instruction*,16> Widened;
- SmallVector<std::pair<Use *, Instruction *>, 8> NarrowIVUsers;
+ SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
public:
- WidenIV(PHINode *PN, const WideIVInfo &WI, LoopInfo *LInfo,
+ WidenIV(const WideIVInfo &WI, LoopInfo *LInfo,
ScalarEvolution *SEv, DominatorTree *DTree,
SmallVectorImpl<WeakVH> &DI) :
- OrigPhi(PN),
+ OrigPhi(WI.NarrowIV),
WideType(WI.WidestNativeType),
IsSigned(WI.IsSigned),
LI(LInfo),
@@ -826,21 +834,42 @@ public:
PHINode *CreateWideIV(SCEVExpander &Rewriter);
protected:
- Instruction *CloneIVUser(Instruction *NarrowUse,
- Instruction *NarrowDef,
- Instruction *WideDef);
+ Value *getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
+ Instruction *Use);
+
+ Instruction *CloneIVUser(NarrowIVDefUse DU);
const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse);
- Instruction *WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef,
- Instruction *WideDef);
+ const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU);
+
+ Instruction *WidenIVUse(NarrowIVDefUse DU);
void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
};
} // anonymous namespace
-static Value *getExtend( Value *NarrowOper, const Type *WideType,
- bool IsSigned, IRBuilder<> &Builder) {
+/// isLoopInvariant - Perform a quick domtree based check for loop invariance
+/// assuming that V is used within the loop. LoopInfo::isLoopInvariant() seems
+/// gratuitous for this purpose.
+static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
+ Instruction *Inst = dyn_cast<Instruction>(V);
+ if (!Inst)
+ return true;
+
+ return DT->properlyDominates(Inst->getParent(), L->getHeader());
+}
+
+Value *WidenIV::getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
+ Instruction *Use) {
+ // Set the debug location and conservative insertion point.
+ IRBuilder<> Builder(Use);
+ // Hoist the insertion point into loop preheaders as far as possible.
+ for (const Loop *L = LI->getLoopFor(Use->getParent());
+ L && L->getLoopPreheader() && isLoopInvariant(NarrowOper, L, DT);
+ L = L->getParentLoop())
+ Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
+
return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
Builder.CreateZExt(NarrowOper, WideType);
}
@@ -848,10 +877,8 @@ static Value *getExtend( Value *NarrowOper, const Type *WideType,
/// CloneIVUser - Instantiate a wide operation to replace a narrow
/// operation. This only needs to handle operations that can evaluation to
/// SCEVAddRec. It can safely return 0 for any operation we decide not to clone.
-Instruction *WidenIV::CloneIVUser(Instruction *NarrowUse,
- Instruction *NarrowDef,
- Instruction *WideDef) {
- unsigned Opcode = NarrowUse->getOpcode();
+Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
+ unsigned Opcode = DU.NarrowUse->getOpcode();
switch (Opcode) {
default:
return 0;
@@ -865,24 +892,23 @@ Instruction *WidenIV::CloneIVUser(Instruction *NarrowUse,
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
- DEBUG(dbgs() << "Cloning IVUser: " << *NarrowUse << "\n");
-
- IRBuilder<> Builder(NarrowUse);
+ DEBUG(dbgs() << "Cloning IVUser: " << *DU.NarrowUse << "\n");
// Replace NarrowDef operands with WideDef. Otherwise, we don't know
// anything about the narrow operand yet so must insert a [sz]ext. It is
// probably loop invariant and will be folded or hoisted. If it actually
// comes from a widened IV, it should be removed during a future call to
// WidenIVUse.
- Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) ? WideDef :
- getExtend(NarrowUse->getOperand(0), WideType, IsSigned, Builder);
- Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) ? WideDef :
- getExtend(NarrowUse->getOperand(1), WideType, IsSigned, Builder);
+ Value *LHS = (DU.NarrowUse->getOperand(0) == DU.NarrowDef) ? DU.WideDef :
+ getExtend(DU.NarrowUse->getOperand(0), WideType, IsSigned, DU.NarrowUse);
+ Value *RHS = (DU.NarrowUse->getOperand(1) == DU.NarrowDef) ? DU.WideDef :
+ getExtend(DU.NarrowUse->getOperand(1), WideType, IsSigned, DU.NarrowUse);
- BinaryOperator *NarrowBO = cast<BinaryOperator>(NarrowUse);
+ BinaryOperator *NarrowBO = cast<BinaryOperator>(DU.NarrowUse);
BinaryOperator *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(),
LHS, RHS,
NarrowBO->getName());
+ IRBuilder<> Builder(DU.NarrowUse);
Builder.Insert(WideBO);
if (const OverflowingBinaryOperator *OBO =
dyn_cast<OverflowingBinaryOperator>(NarrowBO)) {
@@ -894,45 +920,46 @@ Instruction *WidenIV::CloneIVUser(Instruction *NarrowUse,
llvm_unreachable(0);
}
-/// HoistStep - Attempt to hoist an IV increment above a potential use.
-///
-/// To successfully hoist, two criteria must be met:
-/// - IncV operands dominate InsertPos and
-/// - InsertPos dominates IncV
-///
-/// Meeting the second condition means that we don't need to check all of IncV's
-/// existing uses (it's moving up in the domtree).
-///
-/// This does not yet recursively hoist the operands, although that would
-/// not be difficult.
-static bool HoistStep(Instruction *IncV, Instruction *InsertPos,
- const DominatorTree *DT)
-{
- if (DT->dominates(IncV, InsertPos))
- return true;
+/// No-wrap operations can transfer sign extension of their result to their
+/// operands. Generate the SCEV value for the widened operation without
+/// actually modifying the IR yet. If the expression after extending the
+/// operands is an AddRec for this loop, return it.
+const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
+ // Handle the common case of add<nsw/nuw>
+ if (DU.NarrowUse->getOpcode() != Instruction::Add)
+ return 0;
- if (!DT->dominates(InsertPos->getParent(), IncV->getParent()))
- return false;
+ // One operand (NarrowDef) has already been extended to WideDef. Now determine
+ // if extending the other will lead to a recurrence.
+ unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
+ assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
+
+ const SCEV *ExtendOperExpr = 0;
+ const OverflowingBinaryOperator *OBO =
+ cast<OverflowingBinaryOperator>(DU.NarrowUse);
+ if (IsSigned && OBO->hasNoSignedWrap())
+ ExtendOperExpr = SE->getSignExtendExpr(
+ SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
+ else if(!IsSigned && OBO->hasNoUnsignedWrap())
+ ExtendOperExpr = SE->getZeroExtendExpr(
+ SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
+ else
+ return 0;
- if (IncV->mayHaveSideEffects())
- return false;
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(
+ SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr,
+ IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW));
- // Attempt to hoist IncV
- for (User::op_iterator OI = IncV->op_begin(), OE = IncV->op_end();
- OI != OE; ++OI) {
- Instruction *OInst = dyn_cast<Instruction>(OI);
- if (OInst && !DT->dominates(OInst, InsertPos))
- return false;
- }
- IncV->moveBefore(InsertPos);
- return true;
+ if (!AddRec || AddRec->getLoop() != L)
+ return 0;
+ return AddRec;
}
-// GetWideRecurrence - Is this instruction potentially interesting from IVUsers'
-// perspective after widening it's type? In other words, can the extend be
-// safely hoisted out of the loop with SCEV reducing the value to a recurrence
-// on the same loop. If so, return the sign or zero extended
-// recurrence. Otherwise return NULL.
+/// GetWideRecurrence - Is this instruction potentially interesting from
+/// IVUsers' perspective after widening it's type? In other words, can the
+/// extend be safely hoisted out of the loop with SCEV reducing the value to a
+/// recurrence on the same loop. If so, return the sign or zero extended
+/// recurrence. Otherwise return NULL.
const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
if (!SE->isSCEVable(NarrowUse->getType()))
return 0;
@@ -951,47 +978,45 @@ const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
if (!AddRec || AddRec->getLoop() != L)
return 0;
-
return AddRec;
}
/// WidenIVUse - Determine whether an individual user of the narrow IV can be
/// widened. If so, return the wide clone of the user.
-Instruction *WidenIV::WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef,
- Instruction *WideDef) {
- Instruction *NarrowUse = cast<Instruction>(NarrowDefUse.getUser());
+Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU) {
// Stop traversing the def-use chain at inner-loop phis or post-loop phis.
- if (isa<PHINode>(NarrowUse) && LI->getLoopFor(NarrowUse->getParent()) != L)
+ if (isa<PHINode>(DU.NarrowUse) &&
+ LI->getLoopFor(DU.NarrowUse->getParent()) != L)
return 0;
// Our raison d'etre! Eliminate sign and zero extension.
- if (IsSigned ? isa<SExtInst>(NarrowUse) : isa<ZExtInst>(NarrowUse)) {
- Value *NewDef = WideDef;
- if (NarrowUse->getType() != WideType) {
- unsigned CastWidth = SE->getTypeSizeInBits(NarrowUse->getType());
+ if (IsSigned ? isa<SExtInst>(DU.NarrowUse) : isa<ZExtInst>(DU.NarrowUse)) {
+ Value *NewDef = DU.WideDef;
+ if (DU.NarrowUse->getType() != WideType) {
+ unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
unsigned IVWidth = SE->getTypeSizeInBits(WideType);
if (CastWidth < IVWidth) {
// The cast isn't as wide as the IV, so insert a Trunc.
- IRBuilder<> Builder(NarrowDefUse);
- NewDef = Builder.CreateTrunc(WideDef, NarrowUse->getType());
+ IRBuilder<> Builder(DU.NarrowUse);
+ NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
}
else {
// A wider extend was hidden behind a narrower one. This may induce
// another round of IV widening in which the intermediate IV becomes
// dead. It should be very rare.
DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
- << " not wide enough to subsume " << *NarrowUse << "\n");
- NarrowUse->replaceUsesOfWith(NarrowDef, WideDef);
- NewDef = NarrowUse;
+ << " not wide enough to subsume " << *DU.NarrowUse << "\n");
+ DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
+ NewDef = DU.NarrowUse;
}
}
- if (NewDef != NarrowUse) {
- DEBUG(dbgs() << "INDVARS: eliminating " << *NarrowUse
- << " replaced by " << *WideDef << "\n");
+ if (NewDef != DU.NarrowUse) {
+ DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
+ << " replaced by " << *DU.WideDef << "\n");
++NumElimExt;
- NarrowUse->replaceAllUsesWith(NewDef);
- DeadInsts.push_back(NarrowUse);
+ DU.NarrowUse->replaceAllUsesWith(NewDef);
+ DeadInsts.push_back(DU.NarrowUse);
}
// Now that the extend is gone, we want to expose it's uses for potential
// further simplification. We don't need to directly inform SimplifyIVUsers
@@ -1004,29 +1029,32 @@ Instruction *WidenIV::WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef,
}
// Does this user itself evaluate to a recurrence after widening?
- const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(NarrowUse);
+ const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(DU.NarrowUse);
+ if (!WideAddRec) {
+ WideAddRec = GetExtendedOperandRecurrence(DU);
+ }
if (!WideAddRec) {
// This user does not evaluate to a recurence after widening, so don't
// follow it. Instead insert a Trunc to kill off the original use,
// eventually isolating the original narrow IV so it can be removed.
- IRBuilder<> Builder(NarrowDefUse);
- Value *Trunc = Builder.CreateTrunc(WideDef, NarrowDef->getType());
- NarrowUse->replaceUsesOfWith(NarrowDef, Trunc);
+ IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
+ Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
+ DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
return 0;
}
- // We assume that block terminators are not SCEVable. We wouldn't want to
+ // Assume block terminators cannot evaluate to a recurrence. We can't to
// insert a Trunc after a terminator if there happens to be a critical edge.
- assert(NarrowUse != NarrowUse->getParent()->getTerminator() &&
+ assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
"SCEV is not expected to evaluate a block terminator");
// Reuse the IV increment that SCEVExpander created as long as it dominates
// NarrowUse.
Instruction *WideUse = 0;
- if (WideAddRec == WideIncExpr && HoistStep(WideInc, NarrowUse, DT)) {
+ if (WideAddRec == WideIncExpr
+ && SCEVExpander::hoistStep(WideInc, DU.NarrowUse, DT))
WideUse = WideInc;
- }
else {
- WideUse = CloneIVUser(NarrowUse, NarrowDef, WideDef);
+ WideUse = CloneIVUser(DU);
if (!WideUse)
return 0;
}
@@ -1051,13 +1079,13 @@ Instruction *WidenIV::WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef,
void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
for (Value::use_iterator UI = NarrowDef->use_begin(),
UE = NarrowDef->use_end(); UI != UE; ++UI) {
- Use &U = UI.getUse();
+ Instruction *NarrowUse = cast<Instruction>(*UI);
// Handle data flow merges and bizarre phi cycles.
- if (!Widened.insert(cast<Instruction>(U.getUser())))
+ if (!Widened.insert(NarrowUse))
continue;
- NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WideDef));
+ NarrowIVUsers.push_back(NarrowIVDefUse(NarrowDef, NarrowUse, WideDef));
}
}
@@ -1124,23 +1152,19 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
pushNarrowIVUsers(OrigPhi, WidePhi);
while (!NarrowIVUsers.empty()) {
- Use *UsePtr;
- Instruction *WideDef;
- tie(UsePtr, WideDef) = NarrowIVUsers.pop_back_val();
- Use &NarrowDefUse = *UsePtr;
+ NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
// Process a def-use edge. This may replace the use, so don't hold a
// use_iterator across it.
- Instruction *NarrowDef = cast<Instruction>(NarrowDefUse.get());
- Instruction *WideUse = WidenIVUse(NarrowDefUse, NarrowDef, WideDef);
+ Instruction *WideUse = WidenIVUse(DU);
// Follow all def-use edges from the previous narrow use.
if (WideUse)
- pushNarrowIVUsers(cast<Instruction>(NarrowDefUse.getUser()), WideUse);
+ pushNarrowIVUsers(DU.NarrowUse, WideUse);
// WidenIVUse may have removed the def-use edge.
- if (NarrowDef->use_empty())
- DeadInsts.push_back(NarrowDef);
+ if (DU.NarrowDef->use_empty())
+ DeadInsts.push_back(DU.NarrowDef);
}
return WidePhi;
}
@@ -1149,187 +1173,17 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
// Simplification of IV users based on SCEV evaluation.
//===----------------------------------------------------------------------===//
-void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
- unsigned IVOperIdx = 0;
- ICmpInst::Predicate Pred = ICmp->getPredicate();
- if (IVOperand != ICmp->getOperand(0)) {
- // Swapped
- assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand");
- IVOperIdx = 1;
- Pred = ICmpInst::getSwappedPredicate(Pred);
- }
-
- // Get the SCEVs for the ICmp operands.
- const SCEV *S = SE->getSCEV(ICmp->getOperand(IVOperIdx));
- const SCEV *X = SE->getSCEV(ICmp->getOperand(1 - IVOperIdx));
-
- // Simplify unnecessary loops away.
- const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
- S = SE->getSCEVAtScope(S, ICmpLoop);
- X = SE->getSCEVAtScope(X, ICmpLoop);
-
- // If the condition is always true or always false, replace it with
- // a constant value.
- if (SE->isKnownPredicate(Pred, S, X))
- ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext()));
- else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X))
- ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext()));
- else
- return;
-
- DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
- ++NumElimCmp;
- Changed = true;
- DeadInsts.push_back(ICmp);
-}
-
-void IndVarSimplify::EliminateIVRemainder(BinaryOperator *Rem,
- Value *IVOperand,
- bool IsSigned) {
- // We're only interested in the case where we know something about
- // the numerator.
- if (IVOperand != Rem->getOperand(0))
- return;
-
- // Get the SCEVs for the ICmp operands.
- const SCEV *S = SE->getSCEV(Rem->getOperand(0));
- const SCEV *X = SE->getSCEV(Rem->getOperand(1));
-
- // Simplify unnecessary loops away.
- const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent());
- S = SE->getSCEVAtScope(S, ICmpLoop);
- X = SE->getSCEVAtScope(X, ICmpLoop);
-
- // i % n --> i if i is in [0,n).
- if ((!IsSigned || SE->isKnownNonNegative(S)) &&
- SE->isKnownPredicate(IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
- S, X))
- Rem->replaceAllUsesWith(Rem->getOperand(0));
- else {
- // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n).
- const SCEV *LessOne =
- SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1));
- if (IsSigned && !SE->isKnownNonNegative(LessOne))
- return;
-
- if (!SE->isKnownPredicate(IsSigned ?
- ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
- LessOne, X))
- return;
-
- ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ,
- Rem->getOperand(0), Rem->getOperand(1),
- "tmp");
- SelectInst *Sel =
- SelectInst::Create(ICmp,
- ConstantInt::get(Rem->getType(), 0),
- Rem->getOperand(0), "tmp", Rem);
- Rem->replaceAllUsesWith(Sel);
- }
-
- // Inform IVUsers about the new users.
- if (IU) {
- if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0)))
- IU->AddUsersIfInteresting(I);
- }
- DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
- ++NumElimRem;
- Changed = true;
- DeadInsts.push_back(Rem);
-}
-
-/// EliminateIVUser - Eliminate an operation that consumes a simple IV and has
-/// no observable side-effect given the range of IV values.
-bool IndVarSimplify::EliminateIVUser(Instruction *UseInst,
- Instruction *IVOperand) {
- if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
- EliminateIVComparison(ICmp, IVOperand);
- return true;
- }
- if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
- bool IsSigned = Rem->getOpcode() == Instruction::SRem;
- if (IsSigned || Rem->getOpcode() == Instruction::URem) {
- EliminateIVRemainder(Rem, IVOperand, IsSigned);
- return true;
- }
- }
-
- // Eliminate any operation that SCEV can prove is an identity function.
- if (!SE->isSCEVable(UseInst->getType()) ||
- (UseInst->getType() != IVOperand->getType()) ||
- (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand)))
- return false;
-
- DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');
-
- UseInst->replaceAllUsesWith(IVOperand);
- ++NumElimIdentity;
- Changed = true;
- DeadInsts.push_back(UseInst);
- return true;
-}
-
-/// pushIVUsers - Add all uses of Def to the current IV's worklist.
-///
-static void pushIVUsers(
- Instruction *Def,
- SmallPtrSet<Instruction*,16> &Simplified,
- SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) {
-
- for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end();
- UI != E; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
-
- // Avoid infinite or exponential worklist processing.
- // Also ensure unique worklist users.
- // If Def is a LoopPhi, it may not be in the Simplified set, so check for
- // self edges first.
- if (User != Def && Simplified.insert(User))
- SimpleIVUsers.push_back(std::make_pair(User, Def));
- }
-}
-
-/// isSimpleIVUser - Return true if this instruction generates a simple SCEV
-/// expression in terms of that IV.
-///
-/// This is similar to IVUsers' isInsteresting() but processes each instruction
-/// non-recursively when the operand is already known to be a simpleIVUser.
-///
-static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) {
- if (!SE->isSCEVable(I->getType()))
- return false;
-
- // Get the symbolic expression for this instruction.
- const SCEV *S = SE->getSCEV(I);
-
- // We assume that terminators are not SCEVable.
- assert((!S || I != I->getParent()->getTerminator()) &&
- "can't fold terminators");
-
- // Only consider affine recurrences.
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
- if (AR && AR->getLoop() == L)
- return true;
-
- return false;
-}
-/// SimplifyIVUsersNoRewrite - Iteratively perform simplification on a worklist
-/// of IV users. Each successive simplification may push more users which may
+/// SimplifyAndExtend - Iteratively perform simplification on a worklist of IV
+/// users. Each successive simplification may push more users which may
/// themselves be candidates for simplification.
///
-/// The "NoRewrite" algorithm does not require IVUsers analysis. Instead, it
-/// simplifies instructions in-place during analysis. Rather than rewriting
-/// induction variables bottom-up from their users, it transforms a chain of
-/// IVUsers top-down, updating the IR only when it encouters a clear
-/// optimization opportunitiy. A SCEVExpander "Rewriter" instance is still
-/// needed, but only used to generate a new IV (phi) of wider type for sign/zero
-/// extend elimination.
+/// Sign/Zero extend elimination is interleaved with IV simplification.
///
-/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
-///
-void IndVarSimplify::SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter) {
- std::map<PHINode *, WideIVInfo> WideIVMap;
+void IndVarSimplify::SimplifyAndExtend(Loop *L,
+ SCEVExpander &Rewriter,
+ LPPassManager &LPM) {
+ SmallVector<WideIVInfo, 8> WideIVs;
SmallVector<PHINode*, 8> LoopPhis;
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
@@ -1345,108 +1199,81 @@ void IndVarSimplify::SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter) {
// extension. The first time SCEV attempts to normalize sign/zero extension,
// the result becomes final. So for the most predictable results, we delay
// evaluation of sign/zero extend evaluation until needed, and avoid running
- // other SCEV based analysis prior to SimplifyIVUsersNoRewrite.
+ // other SCEV based analysis prior to SimplifyAndExtend.
do {
PHINode *CurrIV = LoopPhis.pop_back_val();
// Information about sign/zero extensions of CurrIV.
- WideIVInfo WI;
-
- // Instructions processed by SimplifyIVUsers for CurrIV.
- SmallPtrSet<Instruction*,16> Simplified;
-
- // Use-def pairs if IV users waiting to be processed for CurrIV.
- SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers;
-
- // Push users of the current LoopPhi. In rare cases, pushIVUsers may be
- // called multiple times for the same LoopPhi. This is the proper thing to
- // do for loop header phis that use each other.
- pushIVUsers(CurrIV, Simplified, SimpleIVUsers);
+ WideIVVisitor WIV(CurrIV, SE, TD);
- while (!SimpleIVUsers.empty()) {
- Instruction *UseInst, *Operand;
- tie(UseInst, Operand) = SimpleIVUsers.pop_back_val();
- // Bypass back edges to avoid extra work.
- if (UseInst == CurrIV) continue;
+ Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &WIV);
- if (EliminateIVUser(UseInst, Operand)) {
- pushIVUsers(Operand, Simplified, SimpleIVUsers);
- continue;
- }
- if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) {
- bool IsSigned = Cast->getOpcode() == Instruction::SExt;
- if (IsSigned || Cast->getOpcode() == Instruction::ZExt) {
- CollectExtend(Cast, IsSigned, WI, SE, TD);
- }
- continue;
- }
- if (isSimpleIVUser(UseInst, L, SE)) {
- pushIVUsers(UseInst, Simplified, SimpleIVUsers);
- }
- }
- if (WI.WidestNativeType) {
- WideIVMap[CurrIV] = WI;
+ if (WIV.WI.WidestNativeType) {
+ WideIVs.push_back(WIV.WI);
}
} while(!LoopPhis.empty());
- for (std::map<PHINode *, WideIVInfo>::const_iterator I = WideIVMap.begin(),
- E = WideIVMap.end(); I != E; ++I) {
- WidenIV Widener(I->first, I->second, LI, SE, DT, DeadInsts);
+ for (; !WideIVs.empty(); WideIVs.pop_back()) {
+ WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts);
if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) {
Changed = true;
LoopPhis.push_back(WidePhi);
}
}
- WideIVMap.clear();
}
}
-/// SimplifyCongruentIVs - Check for congruent phis in this loop header and
-/// populate ExprToIVMap for use later.
-///
-void IndVarSimplify::SimplifyCongruentIVs(Loop *L) {
- DenseMap<const SCEV *, PHINode *> ExprToIVMap;
- for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
- PHINode *Phi = cast<PHINode>(I);
- if (!SE->isSCEVable(Phi->getType()))
- continue;
+//===----------------------------------------------------------------------===//
+// LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.
+//===----------------------------------------------------------------------===//
- const SCEV *S = SE->getSCEV(Phi);
- DenseMap<const SCEV *, PHINode *>::const_iterator Pos;
- bool Inserted;
- tie(Pos, Inserted) = ExprToIVMap.insert(std::make_pair(S, Phi));
- if (Inserted)
- continue;
- PHINode *OrigPhi = Pos->second;
- // Replacing the congruent phi is sufficient because acyclic redundancy
- // elimination, CSE/GVN, should handle the rest. However, once SCEV proves
- // that a phi is congruent, it's almost certain to be the head of an IV
- // user cycle that is isomorphic with the original phi. So it's worth
- // eagerly cleaning up the common case of a single IV increment.
- if (BasicBlock *LatchBlock = L->getLoopLatch()) {
- Instruction *OrigInc =
- cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
- Instruction *IsomorphicInc =
- cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
- if (OrigInc != IsomorphicInc &&
- SE->getSCEV(OrigInc) == SE->getSCEV(IsomorphicInc) &&
- HoistStep(OrigInc, IsomorphicInc, DT)) {
- DEBUG(dbgs() << "INDVARS: Eliminated congruent iv.inc: "
- << *IsomorphicInc << '\n');
- IsomorphicInc->replaceAllUsesWith(OrigInc);
- DeadInsts.push_back(IsomorphicInc);
- }
+/// Check for expressions that ScalarEvolution generates to compute
+/// BackedgeTakenInfo. If these expressions have not been reduced, then
+/// expanding them may incur additional cost (albeit in the loop preheader).
+static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
+ ScalarEvolution *SE) {
+ // If the backedge-taken count is a UDiv, it's very likely a UDiv that
+ // ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a
+ // precise expression, rather than a UDiv from the user's code. If we can't
+ // find a UDiv in the code with some simple searching, assume the former and
+ // forego rewriting the loop.
+ if (isa<SCEVUDivExpr>(S)) {
+ ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!OrigCond) return true;
+ const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
+ R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
+ if (R != S) {
+ const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
+ L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
+ if (L != S)
+ return true;
}
- DEBUG(dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi << '\n');
- ++NumElimIV;
- Phi->replaceAllUsesWith(OrigPhi);
- DeadInsts.push_back(Phi);
}
-}
-//===----------------------------------------------------------------------===//
-// LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.
-//===----------------------------------------------------------------------===//
+ if (EnableIVRewrite)
+ return false;
+
+ // Recurse past add expressions, which commonly occur in the
+ // BackedgeTakenCount. They may already exist in program code, and if not,
+ // they are not too expensive rematerialize.
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+ I != E; ++I) {
+ if (isHighCostExpansion(*I, BI, SE))
+ return true;
+ }
+ return false;
+ }
+
+ // HowManyLessThans uses a Max expression whenever the loop is not guarded by
+ // the exit condition.
+ if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
+ return true;
+
+ // If we haven't recognized an expensive SCEV patter, assume its an expression
+ // produced by program code.
+ return false;
+}
/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
/// count expression can be safely and cheaply expanded into an instruction
@@ -1465,31 +1292,17 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
if (!BI)
return false;
- // Special case: If the backedge-taken count is a UDiv, it's very likely a
- // UDiv that ScalarEvolution produced in order to compute a precise
- // expression, rather than a UDiv from the user's code. If we can't find a
- // UDiv in the code with some simple searching, assume the former and forego
- // rewriting the loop.
- if (isa<SCEVUDivExpr>(BackedgeTakenCount)) {
- ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
- if (!OrigCond) return false;
- const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
- R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
- if (R != BackedgeTakenCount) {
- const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
- L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
- if (L != BackedgeTakenCount)
- return false;
- }
- }
+ if (isHighCostExpansion(BackedgeTakenCount, BI, SE))
+ return false;
+
return true;
}
/// getBackedgeIVType - Get the widest type used by the loop test after peeking
/// through Truncs.
///
-/// TODO: Unnecessary if LFTR does not force a canonical IV.
-static const Type *getBackedgeIVType(Loop *L) {
+/// TODO: Unnecessary when ForceLFTR is removed.
+static Type *getBackedgeIVType(Loop *L) {
if (!L->getExitingBlock())
return 0;
@@ -1502,7 +1315,7 @@ static const Type *getBackedgeIVType(Loop *L) {
if (!Cond)
return 0;
- const Type *Ty = 0;
+ Type *Ty = 0;
for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end();
OI != OE; ++OI) {
assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types");
@@ -1515,12 +1328,187 @@ static const Type *getBackedgeIVType(Loop *L) {
return Ty;
}
+/// getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop
+/// invariant value to the phi.
+static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
+ Instruction *IncI = dyn_cast<Instruction>(IncV);
+ if (!IncI)
+ return 0;
+
+ switch (IncI->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ break;
+ case Instruction::GetElementPtr:
+ // An IV counter must preserve its type.
+ if (IncI->getNumOperands() == 2)
+ break;
+ default:
+ return 0;
+ }
+
+ PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
+ if (Phi && Phi->getParent() == L->getHeader()) {
+ if (isLoopInvariant(IncI->getOperand(1), L, DT))
+ return Phi;
+ return 0;
+ }
+ if (IncI->getOpcode() == Instruction::GetElementPtr)
+ return 0;
+
+ // Allow add/sub to be commuted.
+ Phi = dyn_cast<PHINode>(IncI->getOperand(1));
+ if (Phi && Phi->getParent() == L->getHeader()) {
+ if (isLoopInvariant(IncI->getOperand(0), L, DT))
+ return Phi;
+ }
+ return 0;
+}
+
+/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show
+/// that the current exit test is already sufficiently canonical.
+static bool needsLFTR(Loop *L, DominatorTree *DT) {
+ assert(L->getExitingBlock() && "expected loop exit");
+
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ // Don't bother with LFTR if the loop is not properly simplified.
+ if (!LatchBlock)
+ return false;
+
+ BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
+ assert(BI && "expected exit branch");
+
+ // Do LFTR to simplify the exit condition to an ICMP.
+ ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!Cond)
+ return true;
+
+ // Do LFTR to simplify the exit ICMP to EQ/NE
+ ICmpInst::Predicate Pred = Cond->getPredicate();
+ if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
+ return true;
+
+ // Look for a loop invariant RHS
+ Value *LHS = Cond->getOperand(0);
+ Value *RHS = Cond->getOperand(1);
+ if (!isLoopInvariant(RHS, L, DT)) {
+ if (!isLoopInvariant(LHS, L, DT))
+ return true;
+ std::swap(LHS, RHS);
+ }
+ // Look for a simple IV counter LHS
+ PHINode *Phi = dyn_cast<PHINode>(LHS);
+ if (!Phi)
+ Phi = getLoopPhiForCounter(LHS, L, DT);
+
+ if (!Phi)
+ return true;
+
+ // Do LFTR if the exit condition's IV is *not* a simple counter.
+ Value *IncV = Phi->getIncomingValueForBlock(L->getLoopLatch());
+ return Phi != getLoopPhiForCounter(IncV, L, DT);
+}
+
+/// AlmostDeadIV - Return true if this IV has any uses other than the (soon to
+/// be rewritten) loop exit test.
+static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
+ int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
+ Value *IncV = Phi->getIncomingValue(LatchIdx);
+
+ for (Value::use_iterator UI = Phi->use_begin(), UE = Phi->use_end();
+ UI != UE; ++UI) {
+ if (*UI != Cond && *UI != IncV) return false;
+ }
+
+ for (Value::use_iterator UI = IncV->use_begin(), UE = IncV->use_end();
+ UI != UE; ++UI) {
+ if (*UI != Cond && *UI != Phi) return false;
+ }
+ return true;
+}
+
+/// FindLoopCounter - Find an affine IV in canonical form.
+///
+/// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
+///
+/// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
+/// This is difficult in general for SCEV because of potential overflow. But we
+/// could at least handle constant BECounts.
+static PHINode *
+FindLoopCounter(Loop *L, const SCEV *BECount,
+ ScalarEvolution *SE, DominatorTree *DT, const TargetData *TD) {
+ // I'm not sure how BECount could be a pointer type, but we definitely don't
+ // want to LFTR that.
+ if (BECount->getType()->isPointerTy())
+ return 0;
+
+ uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
+
+ Value *Cond =
+ cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
+
+ // Loop over all of the PHI nodes, looking for a simple counter.
+ PHINode *BestPhi = 0;
+ const SCEV *BestInit = 0;
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ assert(LatchBlock && "needsLFTR should guarantee a loop latch");
+
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+ PHINode *Phi = cast<PHINode>(I);
+ if (!SE->isSCEVable(Phi->getType()))
+ continue;
+
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
+ if (!AR || AR->getLoop() != L || !AR->isAffine())
+ continue;
+
+ // AR may be a pointer type, while BECount is an integer type.
+ // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
+ // AR may not be a narrower type, or we may never exit.
+ uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
+ if (PhiWidth < BCWidth || (TD && !TD->isLegalInteger(PhiWidth)))
+ continue;
+
+ const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
+ if (!Step || !Step->isOne())
+ continue;
+
+ int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
+ Value *IncV = Phi->getIncomingValue(LatchIdx);
+ if (getLoopPhiForCounter(IncV, L, DT) != Phi)
+ continue;
+
+ const SCEV *Init = AR->getStart();
+
+ if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
+ // Don't force a live loop counter if another IV can be used.
+ if (AlmostDeadIV(Phi, LatchBlock, Cond))
+ continue;
+
+ // Prefer to count-from-zero. This is a more "canonical" counter form. It
+ // also prefers integer to pointer IVs.
+ if (BestInit->isZero() != Init->isZero()) {
+ if (BestInit->isZero())
+ continue;
+ }
+ // If two IVs both count from zero or both count from nonzero then the
+ // narrower is likely a dead phi that has been widened. Use the wider phi
+ // to allow the other to be eliminated.
+ if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
+ continue;
+ }
+ BestPhi = Phi;
+ BestInit = Init;
+ }
+ return BestPhi;
+}
+
/// LinearFunctionTestReplace - This method rewrites the exit condition of the
/// loop to be a canonical != comparison against the incremented loop induction
/// variable. This pass is able to rewrite the exit tests of any loop where the
/// SCEV analysis can determine a loop-invariant trip count of the loop, which
/// is actually a much broader range than just linear tests.
-ICmpInst *IndVarSimplify::
+Value *IndVarSimplify::
LinearFunctionTestReplace(Loop *L,
const SCEV *BackedgeTakenCount,
PHINode *IndVar,
@@ -1528,62 +1516,117 @@ LinearFunctionTestReplace(Loop *L,
assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+ // LFTR can ignore IV overflow and truncate to the width of
+ // BECount. This avoids materializing the add(zext(add)) expression.
+ Type *CntTy = !EnableIVRewrite ?
+ BackedgeTakenCount->getType() : IndVar->getType();
+
+ const SCEV *IVLimit = BackedgeTakenCount;
+
// If the exiting block is not the same as the backedge block, we must compare
// against the preincremented value, otherwise we prefer to compare against
// the post-incremented value.
Value *CmpIndVar;
- const SCEV *RHS = BackedgeTakenCount;
if (L->getExitingBlock() == L->getLoopLatch()) {
// Add one to the "backedge-taken" count to get the trip count.
// If this addition may overflow, we have to be more pessimistic and
// cast the induction variable before doing the add.
- const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0);
const SCEV *N =
- SE->getAddExpr(BackedgeTakenCount,
- SE->getConstant(BackedgeTakenCount->getType(), 1));
- if ((isa<SCEVConstant>(N) && !N->isZero()) ||
- SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
- // No overflow. Cast the sum.
- RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType());
- } else {
- // Potential overflow. Cast before doing the add.
- RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
- IndVar->getType());
- RHS = SE->getAddExpr(RHS,
- SE->getConstant(IndVar->getType(), 1));
+ SE->getAddExpr(IVLimit, SE->getConstant(IVLimit->getType(), 1));
+ if (CntTy == IVLimit->getType())
+ IVLimit = N;
+ else {
+ const SCEV *Zero = SE->getConstant(IVLimit->getType(), 0);
+ if ((isa<SCEVConstant>(N) && !N->isZero()) ||
+ SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
+ // No overflow. Cast the sum.
+ IVLimit = SE->getTruncateOrZeroExtend(N, CntTy);
+ } else {
+ // Potential overflow. Cast before doing the add.
+ IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy);
+ IVLimit = SE->getAddExpr(IVLimit, SE->getConstant(CntTy, 1));
+ }
}
-
// The BackedgeTaken expression contains the number of times that the
// backedge branches to the loop header. This is one less than the
// number of times the loop executes, so use the incremented indvar.
CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
} else {
// We have to use the preincremented value...
- RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
- IndVar->getType());
+ IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy);
CmpIndVar = IndVar;
}
+ // For unit stride, IVLimit = Start + BECount with 2's complement overflow.
+ // So for, non-zero start compute the IVLimit here.
+ bool isPtrIV = false;
+ Type *CmpTy = CntTy;
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
+ assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
+ if (!AR->getStart()->isZero()) {
+ assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
+ const SCEV *IVInit = AR->getStart();
+
+ // For pointer types, sign extend BECount in order to materialize a GEP.
+ // Note that for without EnableIVRewrite, we never run SCEVExpander on a
+ // pointer type, because we must preserve the existing GEPs. Instead we
+ // directly generate a GEP later.
+ if (IVInit->getType()->isPointerTy()) {
+ isPtrIV = true;
+ CmpTy = SE->getEffectiveSCEVType(IVInit->getType());
+ IVLimit = SE->getTruncateOrSignExtend(IVLimit, CmpTy);
+ }
+ // For integer types, truncate the IV before computing IVInit + BECount.
+ else {
+ if (SE->getTypeSizeInBits(IVInit->getType())
+ > SE->getTypeSizeInBits(CmpTy))
+ IVInit = SE->getTruncateExpr(IVInit, CmpTy);
+
+ IVLimit = SE->getAddExpr(IVInit, IVLimit);
+ }
+ }
// Expand the code for the iteration count.
- assert(SE->isLoopInvariant(RHS, L) &&
+ IRBuilder<> Builder(BI);
+
+ assert(SE->isLoopInvariant(IVLimit, L) &&
"Computed iteration count is not loop invariant!");
- Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI);
+ Value *ExitCnt = Rewriter.expandCodeFor(IVLimit, CmpTy, BI);
+
+ // Create a gep for IVInit + IVLimit from on an existing pointer base.
+ assert(isPtrIV == IndVar->getType()->isPointerTy() &&
+ "IndVar type must match IVInit type");
+ if (isPtrIV) {
+ Value *IVStart = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
+ assert(AR->getStart() == SE->getSCEV(IVStart) && "bad loop counter");
+ assert(SE->getSizeOfExpr(
+ cast<PointerType>(IVStart->getType())->getElementType())->isOne()
+ && "unit stride pointer IV must be i8*");
+
+ Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
+ ExitCnt = Builder.CreateGEP(IVStart, ExitCnt, "lftr.limit");
+ Builder.SetInsertPoint(BI);
+ }
// Insert a new icmp_ne or icmp_eq instruction before the branch.
- ICmpInst::Predicate Opcode;
+ ICmpInst::Predicate P;
if (L->contains(BI->getSuccessor(0)))
- Opcode = ICmpInst::ICMP_NE;
+ P = ICmpInst::ICMP_NE;
else
- Opcode = ICmpInst::ICMP_EQ;
+ P = ICmpInst::ICMP_EQ;
DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
<< " LHS:" << *CmpIndVar << '\n'
<< " op:\t"
- << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
- << " RHS:\t" << *RHS << "\n");
+ << (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
+ << " RHS:\t" << *ExitCnt << "\n"
+ << " Expr:\t" << *IVLimit << "\n");
+
+ if (SE->getTypeSizeInBits(CmpIndVar->getType())
+ > SE->getTypeSizeInBits(CmpTy)) {
+ CmpIndVar = Builder.CreateTrunc(CmpIndVar, CmpTy, "lftr.wideiv");
+ }
- ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond");
- Cond->setDebugLoc(BI->getDebugLoc());
+ Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
Value *OrigCond = BI->getCondition();
// It's tempting to use replaceAllUsesWith here to fully replace the old
// comparison, but that's not immediately safe, since users of the old
@@ -1612,7 +1655,7 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) return;
- Instruction *InsertPt = ExitBlock->getFirstNonPHI();
+ Instruction *InsertPt = ExitBlock->getFirstInsertionPt();
BasicBlock::iterator I = Preheader->getTerminator();
while (I != Preheader->begin()) {
--I;
@@ -1633,6 +1676,10 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
if (isa<DbgInfoIntrinsic>(I))
continue;
+ // Skip landingpad instructions.
+ if (isa<LandingPadInst>(I))
+ continue;
+
// Don't sink static AllocaInsts out of the entry block, which would
// turn them into dynamic allocas!
if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
@@ -1699,7 +1746,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
if (!L->isLoopSimplifyForm())
return false;
- if (!DisableIVRewrite)
+ if (EnableIVRewrite)
IU = &getAnalysis<IVUsers>();
LI = &getAnalysis<LoopInfo>();
SE = &getAnalysis<ScalarEvolution>();
@@ -1717,6 +1764,9 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// Create a rewriter object which we'll use to transform the code with.
SCEVExpander Rewriter(*SE, "indvars");
+#ifndef NDEBUG
+ Rewriter.setDebugType(DEBUG_TYPE);
+#endif
// Eliminate redundant IV users.
//
@@ -1724,9 +1774,9 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// attempt to avoid evaluating SCEVs for sign/zero extend operations until
// other expressions involving loop IVs have been evaluated. This helps SCEV
// set no-wrap flags before normalizing sign/zero extension.
- if (DisableIVRewrite) {
+ if (!EnableIVRewrite) {
Rewriter.disableCanonicalMode();
- SimplifyIVUsersNoRewrite(L, Rewriter);
+ SimplifyAndExtend(L, Rewriter, LPM);
}
// Check to see if this loop has a computable loop-invariant execution count.
@@ -1739,25 +1789,25 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
RewriteLoopExitValues(L, Rewriter);
// Eliminate redundant IV users.
- if (!DisableIVRewrite)
- SimplifyIVUsers(Rewriter);
+ if (EnableIVRewrite)
+ Changed |= simplifyIVUsers(IU, SE, &LPM, DeadInsts);
// Eliminate redundant IV cycles.
- if (DisableIVRewrite)
- SimplifyCongruentIVs(L);
+ if (!EnableIVRewrite)
+ NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
// Compute the type of the largest recurrence expression, and decide whether
// a canonical induction variable should be inserted.
- const Type *LargestType = 0;
+ Type *LargestType = 0;
bool NeedCannIV = false;
bool ExpandBECount = canExpandBackedgeTakenCount(L, SE);
- if (ExpandBECount) {
+ if (EnableIVRewrite && ExpandBECount) {
// If we have a known trip count and a single exit block, we'll be
// rewriting the loop exit test condition below, which requires a
// canonical induction variable.
NeedCannIV = true;
- const Type *Ty = BackedgeTakenCount->getType();
- if (DisableIVRewrite) {
+ Type *Ty = BackedgeTakenCount->getType();
+ if (!EnableIVRewrite) {
// In this mode, SimplifyIVUsers may have already widened the IV used by
// the backedge test and inserted a Trunc on the compare's operand. Get
// the wider type to avoid creating a redundant narrow IV only used by the
@@ -1769,10 +1819,10 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
SE->getTypeSizeInBits(LargestType))
LargestType = SE->getEffectiveSCEVType(Ty);
}
- if (!DisableIVRewrite) {
+ if (EnableIVRewrite) {
for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
NeedCannIV = true;
- const Type *Ty =
+ Type *Ty =
SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType());
if (!LargestType ||
SE->getTypeSizeInBits(Ty) >
@@ -1811,18 +1861,16 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// the end of the pass.
while (!OldCannIVs.empty()) {
PHINode *OldCannIV = OldCannIVs.pop_back_val();
- OldCannIV->insertBefore(L->getHeader()->getFirstNonPHI());
+ OldCannIV->insertBefore(L->getHeader()->getFirstInsertionPt());
}
}
-
+ else if (!EnableIVRewrite && ExpandBECount && needsLFTR(L, DT)) {
+ IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD);
+ }
// If we have a trip count expression, rewrite the loop's exit condition
// using it. We can currently only handle loops with a single exit.
- ICmpInst *NewICmp = 0;
- if (ExpandBECount) {
- assert(canExpandBackedgeTakenCount(L, SE) &&
- "canonical IV disrupted BackedgeTaken expansion");
- assert(NeedCannIV &&
- "LinearFunctionTestReplace requires a canonical induction variable");
+ Value *NewICmp = 0;
+ if (ExpandBECount && IndVar) {
// Check preconditions for proper SCEVExpander operation. SCEV does not
// express SCEVExpander's dependencies, such as LoopSimplify. Instead any
// pass that uses the SCEVExpander must do it. This does not work well for
@@ -1837,7 +1885,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, Rewriter);
}
// Rewrite IV-derived expressions.
- if (!DisableIVRewrite)
+ if (EnableIVRewrite)
RewriteIVExpressions(L, Rewriter);
// Clear the rewriter cache, because values that are in the rewriter's cache
@@ -1860,12 +1908,34 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// For completeness, inform IVUsers of the IV use in the newly-created
// loop exit test instruction.
- if (NewICmp && IU)
- IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0)));
-
+ if (IU && NewICmp) {
+ ICmpInst *NewICmpInst = dyn_cast<ICmpInst>(NewICmp);
+ if (NewICmpInst)
+ IU->AddUsersIfInteresting(cast<Instruction>(NewICmpInst->getOperand(0)));
+ }
// Clean up dead instructions.
Changed |= DeleteDeadPHIs(L->getHeader());
// Check a post-condition.
- assert(L->isLCSSAForm(*DT) && "Indvars did not leave the loop in lcssa form!");
+ assert(L->isLCSSAForm(*DT) &&
+ "Indvars did not leave the loop in lcssa form!");
+
+ // Verify that LFTR, and any other change have not interfered with SCEV's
+ // ability to compute trip count.
+#ifndef NDEBUG
+ if (!EnableIVRewrite && VerifyIndvars &&
+ !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
+ SE->forgetLoop(L);
+ const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
+ if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
+ SE->getTypeSizeInBits(NewBECount->getType()))
+ NewBECount = SE->getTruncateOrNoop(NewBECount,
+ BackedgeTakenCount->getType());
+ else
+ BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
+ NewBECount->getType());
+ assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
+ }
+#endif
+
return Changed;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index b500d5b..f410af3 100644
--- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -811,8 +811,8 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
/// important optimization that encourages jump threading, and needs to be run
/// interlaced with other jump threading tasks.
bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
- // Don't hack volatile loads.
- if (LI->isVolatile()) return false;
+ // Don't hack volatile/atomic loads.
+ if (!LI->isSimple()) return false;
// If the load is defined in a block with exactly one predecessor, it can't be
// partially redundant.
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
index 66add6c..b79bb13 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -151,6 +151,11 @@ namespace {
///
bool isSafeToExecuteUnconditionally(Instruction &I);
+ /// isGuaranteedToExecute - Check that the instruction is guaranteed to
+ /// execute.
+ ///
+ bool isGuaranteedToExecute(Instruction &I);
+
/// pointerInvalidatedByLoop - Return true if the body of this loop may
/// store into the memory location pointed to by V.
///
@@ -357,8 +362,8 @@ void LICM::HoistRegion(DomTreeNode *N) {
bool LICM::canSinkOrHoistInst(Instruction &I) {
// Loads have extra constraints we have to verify before we can hoist them.
if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
- if (LI->isVolatile())
- return false; // Don't hoist volatile loads!
+ if (!LI->isUnordered())
+ return false; // Don't hoist volatile/atomic loads!
// Loads from constant memory are always safe to move, even if they end up
// in the same alias set as something that ends up being modified.
@@ -461,7 +466,7 @@ void LICM::sink(Instruction &I) {
} else {
// Move the instruction to the start of the exit block, after any PHI
// nodes in it.
- I.moveBefore(ExitBlocks[0]->getFirstNonPHI());
+ I.moveBefore(ExitBlocks[0]->getFirstInsertionPt());
// This instruction is no longer in the AST for the current loop, because
// we just sunk it out of the loop. If we just sunk it into an outer
@@ -504,7 +509,7 @@ void LICM::sink(Instruction &I) {
continue;
// Insert the code after the last PHI node.
- BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI();
+ BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
// If this is the first exit block processed, just move the original
// instruction, otherwise clone the original instruction and insert
@@ -577,6 +582,10 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
if (Inst.isSafeToSpeculativelyExecute())
return true;
+ return isGuaranteedToExecute(Inst);
+}
+
+bool LICM::isGuaranteedToExecute(Instruction &Inst) {
// Otherwise we have to check to make sure that the instruction dominates all
// of the exit blocks. If it doesn't, then there is a path out of the loop
// which does not execute this instruction, so we can't hoist it.
@@ -635,7 +644,7 @@ namespace {
for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
BasicBlock *ExitBlock = LoopExitBlocks[i];
Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
- Instruction *InsertPos = ExitBlock->getFirstNonPHI();
+ Instruction *InsertPos = ExitBlock->getFirstInsertionPt();
StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos);
NewSI->setAlignment(Alignment);
NewSI->setDebugLoc(DL);
@@ -713,34 +722,41 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
// If there is an non-load/store instruction in the loop, we can't promote
// it.
- unsigned InstAlignment;
if (LoadInst *load = dyn_cast<LoadInst>(Use)) {
- assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken");
- InstAlignment = load->getAlignment();
+ assert(!load->isVolatile() && "AST broken");
+ if (!load->isSimple())
+ return;
} else if (StoreInst *store = dyn_cast<StoreInst>(Use)) {
// Stores *of* the pointer are not interesting, only stores *to* the
// pointer.
if (Use->getOperand(1) != ASIV)
continue;
- InstAlignment = store->getAlignment();
- assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken");
+ assert(!store->isVolatile() && "AST broken");
+ if (!store->isSimple())
+ return;
+
+ // Note that we only check GuaranteedToExecute inside the store case
+ // so that we do not introduce stores where they did not exist before
+ // (which would break the LLVM concurrency model).
+
+ // If the alignment of this instruction allows us to specify a more
+ // restrictive (and performant) alignment and if we are sure this
+ // instruction will be executed, update the alignment.
+ // Larger is better, with the exception of 0 being the best alignment.
+ unsigned InstAlignment = store->getAlignment();
+ if ((InstAlignment > Alignment || InstAlignment == 0)
+ && (Alignment != 0))
+ if (isGuaranteedToExecute(*Use)) {
+ GuaranteedToExecute = true;
+ Alignment = InstAlignment;
+ }
+
+ if (!GuaranteedToExecute)
+ GuaranteedToExecute = isGuaranteedToExecute(*Use);
+
} else
return; // Not a load or store.
- // If the alignment of this instruction allows us to specify a more
- // restrictive (and performant) alignment and if we are sure this
- // instruction will be executed, update the alignment.
- // Larger is better, with the exception of 0 being the best alignment.
- if ((InstAlignment > Alignment || InstAlignment == 0)
- && (Alignment != 0))
- if (isSafeToExecuteUnconditionally(*Use)) {
- GuaranteedToExecute = true;
- Alignment = InstAlignment;
- }
-
- if (!GuaranteedToExecute)
- GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use);
-
LoopUses.push_back(Use);
}
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index a0e41d9..ad15cbb 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -267,7 +267,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
/// processLoopStore - See if this store can be promoted to a memset or memcpy.
bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
- if (SI->isVolatile()) return false;
+ if (!SI->isSimple()) return false;
Value *StoredVal = SI->getValueOperand();
Value *StorePtr = SI->getPointerOperand();
@@ -314,7 +314,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
const SCEVAddRecExpr *LoadEv =
dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0)));
if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() &&
- StoreEv->getOperand(1) == LoadEv->getOperand(1) && !LI->isVolatile())
+ StoreEv->getOperand(1) == LoadEv->getOperand(1) && LI->isSimple())
if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount))
return true;
}
@@ -463,7 +463,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
SplatValue = 0;
} else {
// Otherwise, this isn't an idiom we can transform. For example, we can't
- // do anything with a 3-byte store, for example.
+ // do anything with a 3-byte store.
return false;
}
@@ -498,7 +498,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
- const Type *IntPtr = TD->getIntPtrType(DestPtr->getContext());
+ Type *IntPtr = TD->getIntPtrType(DestPtr->getContext());
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
@@ -604,7 +604,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
- const Type *IntPtr = TD->getIntPtrType(SI->getContext());
+ Type *IntPtr = TD->getIntPtrType(SI->getContext());
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 509d026..3e122c2 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -70,12 +70,27 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
#include <algorithm>
using namespace llvm;
+namespace llvm {
+cl::opt<bool> EnableNested(
+ "enable-lsr-nested", cl::Hidden, cl::desc("Enable LSR on nested loops"));
+
+cl::opt<bool> EnableRetry(
+ "enable-lsr-retry", cl::Hidden, cl::desc("Enable LSR retry"));
+
+// Temporary flag to cleanup congruent phis after LSR phi expansion.
+// It's currently disabled until we can determine whether it's truly useful or
+// not. The flag should be removed after the v3.0 release.
+cl::opt<bool> EnablePhiElim(
+ "enable-lsr-phielim", cl::Hidden, cl::desc("Enable LSR phi elimination"));
+}
+
namespace {
/// RegSortData - This class holds data which is used to order reuse candidates.
@@ -219,7 +234,7 @@ struct Formula {
void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
unsigned getNumRegs() const;
- const Type *getType() const;
+ Type *getType() const;
void DeleteBaseReg(const SCEV *&S);
@@ -319,7 +334,7 @@ unsigned Formula::getNumRegs() const {
/// getType - Return the type of this formula, if it has one, or null
/// otherwise. This type is meaningless except for the bit size.
-const Type *Formula::getType() const {
+Type *Formula::getType() const {
return !BaseRegs.empty() ? BaseRegs.front()->getType() :
ScaledReg ? ScaledReg->getType() :
AM.BaseGV ? AM.BaseGV->getType() :
@@ -397,7 +412,7 @@ void Formula::dump() const {
/// isAddRecSExtable - Return true if the given addrec can be sign-extended
/// without changing its value.
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
- const Type *WideTy =
+ Type *WideTy =
IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
}
@@ -405,7 +420,7 @@ static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
/// isAddSExtable - Return true if the given add can be sign-extended
/// without changing its value.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
- const Type *WideTy =
+ Type *WideTy =
IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
}
@@ -413,7 +428,7 @@ static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
/// isMulSExtable - Return true if the given mul can be sign-extended
/// without changing its value.
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
- const Type *WideTy =
+ Type *WideTy =
IntegerType::get(SE.getContext(),
SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());
return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
@@ -594,8 +609,8 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
}
/// getAccessType - Return the type of the memory being accessed.
-static const Type *getAccessType(const Instruction *Inst) {
- const Type *AccessTy = Inst->getType();
+static Type *getAccessType(const Instruction *Inst) {
+ Type *AccessTy = Inst->getType();
if (const StoreInst *SI = dyn_cast<StoreInst>(Inst))
AccessTy = SI->getOperand(0)->getType();
else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
@@ -614,7 +629,7 @@ static const Type *getAccessType(const Instruction *Inst) {
// All pointers have the same requirements, so canonicalize them to an
// arbitrary pointer type to minimize variation.
- if (const PointerType *PTy = dyn_cast<PointerType>(AccessTy))
+ if (PointerType *PTy = dyn_cast<PointerType>(AccessTy))
AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
PTy->getAddressSpace());
@@ -670,6 +685,21 @@ public:
void Loose();
+#ifndef NDEBUG
+ // Once any of the metrics loses, they must all remain losers.
+ bool isValid() {
+ return ((NumRegs | AddRecCost | NumIVMuls | NumBaseAdds
+ | ImmCost | SetupCost) != ~0u)
+ || ((NumRegs & AddRecCost & NumIVMuls & NumBaseAdds
+ & ImmCost & SetupCost) == ~0u);
+ }
+#endif
+
+ bool isLoser() {
+ assert(isValid() && "invalid cost");
+ return NumRegs == ~0u;
+ }
+
void RateFormula(const Formula &F,
SmallPtrSet<const SCEV *, 16> &Regs,
const DenseSet<const SCEV *> &VisitedRegs,
@@ -702,34 +732,48 @@ void Cost::RateRegister(const SCEV *Reg,
if (AR->getLoop() == L)
AddRecCost += 1; /// TODO: This should be a function of the stride.
- // If this is an addrec for a loop that's already been visited by LSR,
- // don't second-guess its addrec phi nodes. LSR isn't currently smart
- // enough to reason about more than one loop at a time. Consider these
- // registers free and leave them alone.
- else if (L->contains(AR->getLoop()) ||
+ // If this is an addrec for another loop, don't second-guess its addrec phi
+ // nodes. LSR isn't currently smart enough to reason about more than one
+ // loop at a time. LSR has either already run on inner loops, will not run
+ // on other loops, and cannot be expected to change sibling loops. If the
+ // AddRec exists, consider it's register free and leave it alone. Otherwise,
+ // do not consider this formula at all.
+ // FIXME: why do we need to generate such fomulae?
+ else if (!EnableNested || L->contains(AR->getLoop()) ||
(!AR->getLoop()->contains(L) &&
DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) {
for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
- PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
if (SE.isSCEVable(PN->getType()) &&
(SE.getEffectiveSCEVType(PN->getType()) ==
SE.getEffectiveSCEVType(AR->getType())) &&
SE.getSCEV(PN) == AR)
return;
-
+ }
+ if (!EnableNested) {
+ Loose();
+ return;
+ }
// If this isn't one of the addrecs that the loop already has, it
// would require a costly new phi and add. TODO: This isn't
// precisely modeled right now.
++NumBaseAdds;
- if (!Regs.count(AR->getStart()))
+ if (!Regs.count(AR->getStart())) {
RateRegister(AR->getStart(), Regs, L, SE, DT);
+ if (isLoser())
+ return;
+ }
}
// Add the step value register, if it needs one.
// TODO: The non-affine case isn't precisely modeled here.
- if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1)))
- if (!Regs.count(AR->getStart()))
+ if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
+ if (!Regs.count(AR->getOperand(1))) {
RateRegister(AR->getOperand(1), Regs, L, SE, DT);
+ if (isLoser())
+ return;
+ }
+ }
}
++NumRegs;
@@ -769,6 +813,8 @@ void Cost::RateFormula(const Formula &F,
return;
}
RatePrimaryRegister(ScaledReg, Regs, L, SE, DT);
+ if (isLoser())
+ return;
}
for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
E = F.BaseRegs.end(); I != E; ++I) {
@@ -778,6 +824,8 @@ void Cost::RateFormula(const Formula &F,
return;
}
RatePrimaryRegister(BaseReg, Regs, L, SE, DT);
+ if (isLoser())
+ return;
}
// Determine how many (unfolded) adds we'll need inside the loop.
@@ -795,6 +843,7 @@ void Cost::RateFormula(const Formula &F,
else if (Offset != 0)
ImmCost += APInt(64, Offset, true).getMinSignedBits();
}
+ assert(isValid() && "invalid cost");
}
/// Loose - Set this cost to a losing value.
@@ -980,7 +1029,7 @@ public:
};
KindType Kind;
- const Type *AccessTy;
+ Type *AccessTy;
SmallVector<int64_t, 8> Offsets;
int64_t MinOffset;
@@ -995,7 +1044,7 @@ public:
/// this LSRUse. FindUseWithSimilarFormula can't consider uses with different
/// max fixup widths to be equivalent, because the narrower one may be relying
/// on the implicit truncation to truncate away bogus bits.
- const Type *WidestFixupType;
+ Type *WidestFixupType;
/// Formulae - A list of ways to build a value that can satisfy this user.
/// After the list is populated, one of these is selected heuristically and
@@ -1005,7 +1054,7 @@ public:
/// Regs - The set of register candidates used by all formulae in this LSRUse.
SmallPtrSet<const SCEV *, 4> Regs;
- LSRUse(KindType K, const Type *T) : Kind(K), AccessTy(T),
+ LSRUse(KindType K, Type *T) : Kind(K), AccessTy(T),
MinOffset(INT64_MAX),
MaxOffset(INT64_MIN),
AllFixupsOutsideLoop(true),
@@ -1127,7 +1176,7 @@ void LSRUse::dump() const {
/// be completely folded into the user instruction at isel time. This includes
/// address-mode folding and special icmp tricks.
static bool isLegalUse(const TargetLowering::AddrMode &AM,
- LSRUse::KindType Kind, const Type *AccessTy,
+ LSRUse::KindType Kind, Type *AccessTy,
const TargetLowering *TLI) {
switch (Kind) {
case LSRUse::Address:
@@ -1156,7 +1205,7 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM,
// If we have low-level target information, ask the target if it can fold an
// integer immediate on an icmp.
if (AM.BaseOffs != 0) {
- if (TLI) return TLI->isLegalICmpImmediate(-AM.BaseOffs);
+ if (TLI) return TLI->isLegalICmpImmediate(-(uint64_t)AM.BaseOffs);
return false;
}
@@ -1176,7 +1225,7 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM,
static bool isLegalUse(TargetLowering::AddrMode AM,
int64_t MinOffset, int64_t MaxOffset,
- LSRUse::KindType Kind, const Type *AccessTy,
+ LSRUse::KindType Kind, Type *AccessTy,
const TargetLowering *TLI) {
// Check for overflow.
if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) !=
@@ -1198,7 +1247,7 @@ static bool isLegalUse(TargetLowering::AddrMode AM,
static bool isAlwaysFoldable(int64_t BaseOffs,
GlobalValue *BaseGV,
bool HasBaseReg,
- LSRUse::KindType Kind, const Type *AccessTy,
+ LSRUse::KindType Kind, Type *AccessTy,
const TargetLowering *TLI) {
// Fast-path: zero is always foldable.
if (BaseOffs == 0 && !BaseGV) return true;
@@ -1224,7 +1273,7 @@ static bool isAlwaysFoldable(int64_t BaseOffs,
static bool isAlwaysFoldable(const SCEV *S,
int64_t MinOffset, int64_t MaxOffset,
bool HasBaseReg,
- LSRUse::KindType Kind, const Type *AccessTy,
+ LSRUse::KindType Kind, Type *AccessTy,
const TargetLowering *TLI,
ScalarEvolution &SE) {
// Fast-path: zero is always foldable.
@@ -1299,7 +1348,7 @@ class LSRInstance {
SmallSetVector<int64_t, 8> Factors;
/// Types - Interesting use types, to facilitate truncation reuse.
- SmallSetVector<const Type *, 4> Types;
+ SmallSetVector<Type *, 4> Types;
/// Fixups - The list of operands which are to be replaced.
SmallVector<LSRFixup, 16> Fixups;
@@ -1330,11 +1379,11 @@ class LSRInstance {
UseMapTy UseMap;
bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
- LSRUse::KindType Kind, const Type *AccessTy);
+ LSRUse::KindType Kind, Type *AccessTy);
std::pair<size_t, int64_t> getUse(const SCEV *&Expr,
LSRUse::KindType Kind,
- const Type *AccessTy);
+ Type *AccessTy);
void DeleteUse(LSRUse &LU, size_t LUIdx);
@@ -1426,7 +1475,8 @@ void LSRInstance::OptimizeShadowIV() {
IVUsers::const_iterator CandidateUI = UI;
++UI;
Instruction *ShadowUse = CandidateUI->getUser();
- const Type *DestTy = NULL;
+ Type *DestTy = NULL;
+ bool IsSigned = false;
/* If shadow use is a int->float cast then insert a second IV
to eliminate this cast.
@@ -1440,10 +1490,14 @@ void LSRInstance::OptimizeShadowIV() {
for (unsigned i = 0; i < n; ++i, ++d)
foo(d);
*/
- if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
+ if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
+ IsSigned = false;
DestTy = UCast->getDestTy();
- else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser()))
+ }
+ else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
+ IsSigned = true;
DestTy = SCast->getDestTy();
+ }
if (!DestTy) continue;
if (TLI) {
@@ -1457,7 +1511,7 @@ void LSRInstance::OptimizeShadowIV() {
if (!PH) continue;
if (PH->getNumIncomingValues() != 2) continue;
- const Type *SrcTy = PH->getType();
+ Type *SrcTy = PH->getType();
int Mantissa = DestTy->getFPMantissaWidth();
if (Mantissa == -1) continue;
if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
@@ -1474,7 +1528,9 @@ void LSRInstance::OptimizeShadowIV() {
ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
if (!Init) continue;
- Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
+ Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
+ (double)Init->getSExtValue() :
+ (double)Init->getZExtValue());
BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
@@ -1776,7 +1832,7 @@ LSRInstance::OptimizeLoopTermCond() {
if (!TLI)
goto decline_post_inc;
// Check for possible scaled-address reuse.
- const Type *AccessTy = getAccessType(UI->getUser());
+ Type *AccessTy = getAccessType(UI->getUser());
TargetLowering::AddrMode AM;
AM.Scale = C->getSExtValue();
if (TLI->isLegalAddressingMode(AM, AccessTy))
@@ -1840,10 +1896,10 @@ LSRInstance::OptimizeLoopTermCond() {
/// return true.
bool
LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
- LSRUse::KindType Kind, const Type *AccessTy) {
+ LSRUse::KindType Kind, Type *AccessTy) {
int64_t NewMinOffset = LU.MinOffset;
int64_t NewMaxOffset = LU.MaxOffset;
- const Type *NewAccessTy = AccessTy;
+ Type *NewAccessTy = AccessTy;
// Check for a mismatched kind. It's tempting to collapse mismatched kinds to
// something conservative, however this can pessimize in the case that one of
@@ -1882,7 +1938,7 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
/// Either reuse an existing use or create a new one, as needed.
std::pair<size_t, int64_t>
LSRInstance::getUse(const SCEV *&Expr,
- LSRUse::KindType Kind, const Type *AccessTy) {
+ LSRUse::KindType Kind, Type *AccessTy) {
const SCEV *Copy = Expr;
int64_t Offset = ExtractImmediate(Expr, SE);
@@ -2044,7 +2100,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
LF.PostIncLoops = UI->getPostIncLoops();
LSRUse::KindType Kind = LSRUse::Basic;
- const Type *AccessTy = 0;
+ Type *AccessTy = 0;
if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
Kind = LSRUse::Address;
AccessTy = getAccessType(LF.UserInst);
@@ -2464,7 +2520,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
if (LU.Kind != LSRUse::ICmpZero) return;
// Determine the integer type for the base formula.
- const Type *IntTy = Base.getType();
+ Type *IntTy = Base.getType();
if (!IntTy) return;
if (SE.getTypeSizeInBits(IntTy) > 64) return;
@@ -2538,7 +2594,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
/// scaled-offset address modes, for example.
void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
// Determine the integer type for the base formula.
- const Type *IntTy = Base.getType();
+ Type *IntTy = Base.getType();
if (!IntTy) return;
// If this Formula already has a scaled register, we can't add another one.
@@ -2598,13 +2654,13 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
if (Base.AM.BaseGV) return;
// Determine the integer type for the base formula.
- const Type *DstTy = Base.getType();
+ Type *DstTy = Base.getType();
if (!DstTy) return;
DstTy = SE.getEffectiveSCEVType(DstTy);
- for (SmallSetVector<const Type *, 4>::const_iterator
+ for (SmallSetVector<Type *, 4>::const_iterator
I = Types.begin(), E = Types.end(); I != E; ++I) {
- const Type *SrcTy = *I;
+ Type *SrcTy = *I;
if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
Formula F = Base;
@@ -2741,7 +2797,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
int64_t Imm = WI.Imm;
const SCEV *OrigReg = WI.OrigReg;
- const Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
+ Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
@@ -3275,6 +3331,9 @@ retry:
skip:;
}
+ if (!EnableRetry && !AnySatisfiedReqRegs)
+ return;
+
// If none of the formulae had all of the required registers, relax the
// constraint so that we don't exclude all formulae.
if (!AnySatisfiedReqRegs) {
@@ -3298,6 +3357,10 @@ void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
// SolveRecurse does all the work.
SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
CurRegs, VisitedRegs);
+ if (Solution.empty()) {
+ DEBUG(dbgs() << "\nNo Satisfactory Solution\n");
+ return;
+ }
// Ok, we've now made all our decisions.
DEBUG(dbgs() << "\n"
@@ -3416,6 +3479,9 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP,
// Don't insert instructions before PHI nodes.
while (isa<PHINode>(IP)) ++IP;
+ // Ignore landingpad instructions.
+ while (isa<LandingPadInst>(IP)) ++IP;
+
// Ignore debug intrinsics.
while (isa<DbgInfoIntrinsic>(IP)) ++IP;
@@ -3440,9 +3506,9 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
Rewriter.setPostInc(LF.PostIncLoops);
// This is the type that the user actually needs.
- const Type *OpTy = LF.OperandValToReplace->getType();
+ Type *OpTy = LF.OperandValToReplace->getType();
// This will be the type that we'll initially expand to.
- const Type *Ty = F.getType();
+ Type *Ty = F.getType();
if (!Ty)
// No type known; just expand directly to the ultimate type.
Ty = OpTy;
@@ -3450,7 +3516,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Expand directly to the ultimate type if it's the right size.
Ty = OpTy;
// This is the type to do integer arithmetic in.
- const Type *IntTy = SE.getEffectiveSCEVType(Ty);
+ Type *IntTy = SE.getEffectiveSCEVType(Ty);
// Build up a list of operands to add together to form the full base.
SmallVector<const SCEV *, 8> Ops;
@@ -3527,7 +3593,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// The other interesting way of "folding" with an ICmpZero is to use a
// negated immediate.
if (!ICmpScaledV)
- ICmpScaledV = ConstantInt::get(IntTy, -Offset);
+ ICmpScaledV = ConstantInt::get(IntTy, -(uint64_t)Offset);
else {
Ops.push_back(SE.getUnknown(ICmpScaledV));
ICmpScaledV = ConstantInt::get(IntTy, Offset);
@@ -3611,10 +3677,20 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
// users.
if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
!isa<IndirectBrInst>(BB->getTerminator())) {
- Loop *PNLoop = LI.getLoopFor(PN->getParent());
- if (!PNLoop || PN->getParent() != PNLoop->getHeader()) {
+ BasicBlock *Parent = PN->getParent();
+ Loop *PNLoop = LI.getLoopFor(Parent);
+ if (!PNLoop || Parent != PNLoop->getHeader()) {
// Split the critical edge.
- BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
+ BasicBlock *NewBB = 0;
+ if (!Parent->isLandingPad()) {
+ NewBB = SplitCriticalEdge(BB, Parent, P,
+ /*MergeIdenticalEdges=*/true,
+ /*DontDeleteUselessPhis=*/true);
+ } else {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(Parent, BB, "", "", P, NewBBs);
+ NewBB = NewBBs[0];
+ }
// If PN is outside of the loop and BB is in the loop, we want to
// move the block to be immediately before the PHI block, not
@@ -3637,7 +3713,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts);
// If this is reuse-by-noop-cast, insert the noop cast.
- const Type *OpTy = LF.OperandValToReplace->getType();
+ Type *OpTy = LF.OperandValToReplace->getType();
if (FullV->getType() != OpTy)
FullV =
CastInst::Create(CastInst::getCastOpcode(FullV, false,
@@ -3667,7 +3743,7 @@ void LSRInstance::Rewrite(const LSRFixup &LF,
Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts);
// If this is reuse-by-noop-cast, insert the noop cast.
- const Type *OpTy = LF.OperandValToReplace->getType();
+ Type *OpTy = LF.OperandValToReplace->getType();
if (FullV->getType() != OpTy) {
Instruction *Cast =
CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
@@ -3700,6 +3776,7 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
SCEVExpander Rewriter(SE, "lsr");
Rewriter.disableCanonicalMode();
+ Rewriter.enableLSRMode();
Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
// Expand the new value definitions and update the users.
@@ -3740,6 +3817,23 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
OptimizeShadowIV();
OptimizeLoopTermCond();
+ // If loop preparation eliminates all interesting IV users, bail.
+ if (IU.empty()) return;
+
+ // Skip nested loops until we can model them better with formulae.
+ if (!EnableNested && !L->empty()) {
+
+ if (EnablePhiElim) {
+ // Remove any extra phis created by processing inner loops.
+ SmallVector<WeakVH, 16> DeadInsts;
+ SCEVExpander Rewriter(SE, "lsr");
+ Changed |= Rewriter.replaceCongruentIVs(L, &DT, DeadInsts);
+ Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
+ }
+ DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");
+ return;
+ }
+
// Start collecting data and preparing for the solver.
CollectInterestingTypesAndFactors();
CollectFixupsAndInitialFormulae();
@@ -3763,6 +3857,9 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
Types.clear();
RegUses.clear();
+ if (Solution.empty())
+ return;
+
#ifndef NDEBUG
// Formulae should be legal.
for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
@@ -3778,6 +3875,14 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
// Now that we've decided what we want, make it so.
ImplementSolution(Solution, P);
+
+ if (EnablePhiElim) {
+ // Remove any extra phis created by processing inner loops.
+ SmallVector<WeakVH, 16> DeadInsts;
+ SCEVExpander Rewriter(SE, "lsr");
+ Changed |= Rewriter.replaceCongruentIVs(L, &DT, DeadInsts);
+ Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
+ }
}
void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
@@ -3793,7 +3898,7 @@ void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
OS << '*' << *I;
}
- for (SmallSetVector<const Type *, 4>::const_iterator
+ for (SmallSetVector<Type *, 4>::const_iterator
I = Types.begin(), E = Types.end(); I != E; ++I) {
if (!First) OS << ", ";
First = false;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index fef6bc3..91395b2 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -22,6 +22,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/Target/TargetData.h"
#include <climits>
using namespace llvm;
@@ -39,6 +40,11 @@ UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
cl::desc("Allows loops to be partially unrolled until "
"-unroll-threshold loop size is reached."));
+// Temporary flag to be removed in 3.0
+static cl::opt<bool>
+NoSCEVUnroll("disable-unroll-scev", cl::init(false), cl::Hidden,
+ cl::desc("Use ScalarEvolution to analyze loop trip counts for unrolling"));
+
namespace {
class LoopUnroll : public LoopPass {
public:
@@ -49,7 +55,7 @@ namespace {
CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P;
UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0);
-
+
initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
}
@@ -57,11 +63,11 @@ namespace {
/// that the loop unroll should be performed regardless of how much
/// code expansion would result.
static const unsigned NoThreshold = UINT_MAX;
-
+
// Threshold to use when optsize is specified (and there is no
// explicit -unroll-threshold).
static const unsigned OptSizeUnrollThreshold = 50;
-
+
unsigned CurrentCount;
unsigned CurrentThreshold;
bool CurrentAllowPartial;
@@ -79,6 +85,7 @@ namespace {
AU.addPreservedID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
+ AU.addRequired<ScalarEvolution>();
AU.addPreserved<ScalarEvolution>();
// FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
// If loop unroll does not preserve dom info then LCSSA pass on next
@@ -101,45 +108,62 @@ Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) {
}
/// ApproximateLoopSize - Approximate the size of the loop.
-static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) {
+static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
+ const TargetData *TD) {
CodeMetrics Metrics;
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I)
- Metrics.analyzeBasicBlock(*I);
+ Metrics.analyzeBasicBlock(*I, TD);
NumCalls = Metrics.NumInlineCandidates;
-
+
unsigned LoopSize = Metrics.NumInsts;
-
+
// Don't allow an estimate of size zero. This would allows unrolling of loops
// with huge iteration counts, which is a compile time problem even if it's
// not a problem for code quality.
if (LoopSize == 0) LoopSize = 1;
-
+
return LoopSize;
}
bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
LoopInfo *LI = &getAnalysis<LoopInfo>();
+ ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
BasicBlock *Header = L->getHeader();
DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
<< "] Loop %" << Header->getName() << "\n");
(void)Header;
-
+
// Determine the current unrolling threshold. While this is normally set
// from UnrollThreshold, it is overridden to a smaller value if the current
// function is marked as optimize-for-size, and the unroll threshold was
// not user specified.
unsigned Threshold = CurrentThreshold;
- if (!UserThreshold &&
+ if (!UserThreshold &&
Header->getParent()->hasFnAttr(Attribute::OptimizeForSize))
Threshold = OptSizeUnrollThreshold;
- // Find trip count
- unsigned TripCount = L->getSmallConstantTripCount();
- unsigned Count = CurrentCount;
-
+ // Find trip count and trip multiple if count is not available
+ unsigned TripCount = 0;
+ unsigned TripMultiple = 1;
+ if (!NoSCEVUnroll) {
+ // Find "latch trip count". UnrollLoop assumes that control cannot exit
+ // via the loop latch on any iteration prior to TripCount. The loop may exit
+ // early via an earlier branch.
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ if (LatchBlock) {
+ TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
+ TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
+ }
+ }
+ else {
+ TripCount = L->getSmallConstantTripCount();
+ if (TripCount == 0)
+ TripMultiple = L->getSmallConstantTripMultiple();
+ }
// Automatically select an unroll count.
+ unsigned Count = CurrentCount;
if (Count == 0) {
// Conservative heuristic: if we know the trip count, see if we can
// completely unroll (subject to the threshold, checked below); otherwise
@@ -152,8 +176,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// Enforce the threshold.
if (Threshold != NoThreshold) {
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
unsigned NumInlineCandidates;
- unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates);
+ unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates, TD);
DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
if (NumInlineCandidates != 0) {
DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
@@ -182,12 +207,8 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
}
// Unroll the loop.
- Function *F = L->getHeader()->getParent();
- if (!UnrollLoop(L, Count, LI, &LPM))
+ if (!UnrollLoop(L, Count, TripCount, TripMultiple, LI, &LPM))
return false;
- // FIXME: Reconstruct dom info, because it is not preserved properly.
- if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
- DT->runOnFunction(*F);
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index 840c4b6..458949c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -492,7 +492,7 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
Value *BranchVal = LIC;
if (!isa<ConstantInt>(Val) ||
Val->getType() != Type::getInt1Ty(LIC->getContext()))
- BranchVal = new ICmpInst(InsertPt, ICmpInst::ICMP_EQ, LIC, Val, "tmp");
+ BranchVal = new ICmpInst(InsertPt, ICmpInst::ICMP_EQ, LIC, Val);
else if (Val != ConstantInt::getTrue(Val->getContext()))
// We want to enter the new loop when the condition is true.
std::swap(TrueDest, FalseDest);
@@ -561,10 +561,17 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
BasicBlock *ExitBlock = ExitBlocks[i];
SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock),
pred_end(ExitBlock));
+
// Although SplitBlockPredecessors doesn't preserve loop-simplify in
// general, if we call it on all predecessors of all exits then it does.
- SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(),
- ".us-lcssa", this);
+ if (!ExitBlock->isLandingPad()) {
+ SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(),
+ ".us-lcssa", this);
+ } else {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(ExitBlock, Preds, ".us-lcssa", ".us-lcssa",
+ this, NewBBs);
+ }
}
}
@@ -632,7 +639,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
// as well.
ParentLoop->addBasicBlockToLoop(NewBlocks[0], LI->getBase());
}
-
+
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]);
// The new exit block should be in the same loop as the old one.
@@ -653,6 +660,19 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
if (It != VMap.end()) V = It->second;
PN->addIncoming(V, NewExit);
}
+
+ if (LandingPadInst *LPad = NewExit->getLandingPadInst()) {
+ PN = PHINode::Create(LPad->getType(), 0, "",
+ ExitSucc->getFirstInsertionPt());
+
+ for (pred_iterator I = pred_begin(ExitSucc), E = pred_end(ExitSucc);
+ I != E; ++I) {
+ BasicBlock *BB = *I;
+ LandingPadInst *LPI = BB->getLandingPadInst();
+ LPI->replaceAllUsesWith(PN);
+ PN->addIncoming(LPI, BB);
+ }
+ }
}
// Rewrite the code to refer to itself.
diff --git a/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
index 9087b46..689bbe9 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -20,98 +20,88 @@
#include "llvm/Support/IRBuilder.h"
using namespace llvm;
-static bool LowerAtomicIntrinsic(IntrinsicInst *II) {
- IRBuilder<> Builder(II->getParent(), II);
- unsigned IID = II->getIntrinsicID();
- switch (IID) {
- case Intrinsic::memory_barrier:
- break;
+static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
+ IRBuilder<> Builder(CXI->getParent(), CXI);
+ Value *Ptr = CXI->getPointerOperand();
+ Value *Cmp = CXI->getCompareOperand();
+ Value *Val = CXI->getNewValOperand();
+
+ LoadInst *Orig = Builder.CreateLoad(Ptr);
+ Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
+ Value *Res = Builder.CreateSelect(Equal, Val, Orig);
+ Builder.CreateStore(Res, Ptr);
+
+ CXI->replaceAllUsesWith(Orig);
+ CXI->eraseFromParent();
+ return true;
+}
- case Intrinsic::atomic_load_add:
- case Intrinsic::atomic_load_sub:
- case Intrinsic::atomic_load_and:
- case Intrinsic::atomic_load_nand:
- case Intrinsic::atomic_load_or:
- case Intrinsic::atomic_load_xor:
- case Intrinsic::atomic_load_max:
- case Intrinsic::atomic_load_min:
- case Intrinsic::atomic_load_umax:
- case Intrinsic::atomic_load_umin: {
- Value *Ptr = II->getArgOperand(0), *Delta = II->getArgOperand(1);
+static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) {
+ IRBuilder<> Builder(RMWI->getParent(), RMWI);
+ Value *Ptr = RMWI->getPointerOperand();
+ Value *Val = RMWI->getValOperand();
- LoadInst *Orig = Builder.CreateLoad(Ptr);
- Value *Res = NULL;
- switch (IID) {
- default: assert(0 && "Unrecognized atomic modify operation");
- case Intrinsic::atomic_load_add:
- Res = Builder.CreateAdd(Orig, Delta);
- break;
- case Intrinsic::atomic_load_sub:
- Res = Builder.CreateSub(Orig, Delta);
- break;
- case Intrinsic::atomic_load_and:
- Res = Builder.CreateAnd(Orig, Delta);
- break;
- case Intrinsic::atomic_load_nand:
- Res = Builder.CreateNot(Builder.CreateAnd(Orig, Delta));
- break;
- case Intrinsic::atomic_load_or:
- Res = Builder.CreateOr(Orig, Delta);
- break;
- case Intrinsic::atomic_load_xor:
- Res = Builder.CreateXor(Orig, Delta);
- break;
- case Intrinsic::atomic_load_max:
- Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
- Delta, Orig);
- break;
- case Intrinsic::atomic_load_min:
- Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
- Orig, Delta);
- break;
- case Intrinsic::atomic_load_umax:
- Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
- Delta, Orig);
- break;
- case Intrinsic::atomic_load_umin:
- Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
- Orig, Delta);
- break;
- }
- Builder.CreateStore(Res, Ptr);
+ LoadInst *Orig = Builder.CreateLoad(Ptr);
+ Value *Res = NULL;
- II->replaceAllUsesWith(Orig);
+ switch (RMWI->getOperation()) {
+ default: llvm_unreachable("Unexpected RMW operation");
+ case AtomicRMWInst::Xchg:
+ Res = Val;
break;
- }
-
- case Intrinsic::atomic_swap: {
- Value *Ptr = II->getArgOperand(0), *Val = II->getArgOperand(1);
- LoadInst *Orig = Builder.CreateLoad(Ptr);
- Builder.CreateStore(Val, Ptr);
- II->replaceAllUsesWith(Orig);
+ case AtomicRMWInst::Add:
+ Res = Builder.CreateAdd(Orig, Val);
break;
- }
-
- case Intrinsic::atomic_cmp_swap: {
- Value *Ptr = II->getArgOperand(0), *Cmp = II->getArgOperand(1);
- Value *Val = II->getArgOperand(2);
-
- LoadInst *Orig = Builder.CreateLoad(Ptr);
- Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
- Value *Res = Builder.CreateSelect(Equal, Val, Orig);
- Builder.CreateStore(Res, Ptr);
- II->replaceAllUsesWith(Orig);
+ case AtomicRMWInst::Sub:
+ Res = Builder.CreateSub(Orig, Val);
+ break;
+ case AtomicRMWInst::And:
+ Res = Builder.CreateAnd(Orig, Val);
+ break;
+ case AtomicRMWInst::Nand:
+ Res = Builder.CreateNot(Builder.CreateAnd(Orig, Val));
+ break;
+ case AtomicRMWInst::Or:
+ Res = Builder.CreateOr(Orig, Val);
+ break;
+ case AtomicRMWInst::Xor:
+ Res = Builder.CreateXor(Orig, Val);
+ break;
+ case AtomicRMWInst::Max:
+ Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Val),
+ Val, Orig);
+ break;
+ case AtomicRMWInst::Min:
+ Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Val),
+ Orig, Val);
+ break;
+ case AtomicRMWInst::UMax:
+ Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val),
+ Val, Orig);
+ break;
+ case AtomicRMWInst::UMin:
+ Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val),
+ Orig, Val);
break;
}
+ Builder.CreateStore(Res, Ptr);
+ RMWI->replaceAllUsesWith(Orig);
+ RMWI->eraseFromParent();
+ return true;
+}
- default:
- return false;
- }
+static bool LowerFenceInst(FenceInst *FI) {
+ FI->eraseFromParent();
+ return true;
+}
- assert(II->use_empty() &&
- "Lowering should have eliminated any uses of the intrinsic call!");
- II->eraseFromParent();
+static bool LowerLoadInst(LoadInst *LI) {
+ LI->setAtomic(NotAtomic);
+ return true;
+}
+static bool LowerStoreInst(StoreInst *SI) {
+ SI->setAtomic(NotAtomic);
return true;
}
@@ -123,9 +113,22 @@ namespace {
}
bool runOnBasicBlock(BasicBlock &BB) {
bool Changed = false;
- for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; )
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DI++))
- Changed |= LowerAtomicIntrinsic(II);
+ for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) {
+ Instruction *Inst = DI++;
+ if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
+ Changed |= LowerFenceInst(FI);
+ else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(Inst))
+ Changed |= LowerAtomicCmpXchgInst(CXI);
+ else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(Inst))
+ Changed |= LowerAtomicRMWInst(RMWI);
+ else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ if (LI->isAtomic())
+ LowerLoadInst(LI);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (SI->isAtomic())
+ LowerStoreInst(SI);
+ }
+ }
return Changed;
}
};
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 7ed3db6..eeb8931 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -54,7 +54,7 @@ static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
if (OpC->isZero()) continue; // No offset.
// Handle struct indices, which add their field offset to the pointer.
- if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
continue;
}
@@ -384,7 +384,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
// If this is a store, see if we can merge it in.
- if (NextStore->isVolatile()) break;
+ if (!NextStore->isSimple()) break;
// Check to see if this stored value is of the same byte-splattable value.
if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
@@ -448,7 +448,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
// Determine alignment
unsigned Alignment = Range.Alignment;
if (Alignment == 0) {
- const Type *EltType =
+ Type *EltType =
cast<PointerType>(StartPtr->getType())->getElementType();
Alignment = TD->getABITypeAlignment(EltType);
}
@@ -479,7 +479,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
- if (SI->isVolatile()) return false;
+ if (!SI->isSimple()) return false;
if (TD == 0) return false;
@@ -487,7 +487,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// happen to be using a load-store pair to implement it, rather than
// a memcpy.
if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
- if (!LI->isVolatile() && LI->hasOneUse() &&
+ if (LI->isSimple() && LI->hasOneUse() &&
LI->getParent() == SI->getParent()) {
MemDepResult ldep = MD->getDependency(LI);
CallInst *C = 0;
@@ -616,7 +616,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
if (!A->hasStructRetAttr())
return false;
- const Type *StructTy = cast<PointerType>(A->getType())->getElementType();
+ Type *StructTy = cast<PointerType>(A->getType())->getElementType();
uint64_t destSize = TD->getTypeAllocSize(StructTy);
if (destSize < srcSize)
@@ -860,7 +860,7 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
// Find out what feeds this byval argument.
Value *ByValArg = CS.getArgument(ArgNo);
- const Type *ByValTy =cast<PointerType>(ByValArg->getType())->getElementType();
+ Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
uint64_t ByValSize = TD->getTypeAllocSize(ByValTy);
MemDepResult DepInfo =
MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize),
diff --git a/contrib/llvm/lib/Transforms/Scalar/ObjCARC.cpp b/contrib/llvm/lib/Transforms/Scalar/ObjCARC.cpp
index ee132d3..da74e9c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ObjCARC.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ObjCARC.cpp
@@ -180,7 +180,7 @@ static bool IsPotentialUse(const Value *Op) {
Arg->hasStructRetAttr())
return false;
// Only consider values with pointer types, and not function pointers.
- const PointerType *Ty = dyn_cast<PointerType>(Op->getType());
+ PointerType *Ty = dyn_cast<PointerType>(Op->getType());
if (!Ty || isa<FunctionType>(Ty->getElementType()))
return false;
// Conservatively assume anything else is a potential use.
@@ -213,8 +213,8 @@ static InstructionClass GetFunctionClass(const Function *F) {
const Argument *A0 = AI++;
if (AI == AE)
// Argument is a pointer.
- if (const PointerType *PTy = dyn_cast<PointerType>(A0->getType())) {
- const Type *ETy = PTy->getElementType();
+ if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) {
+ Type *ETy = PTy->getElementType();
// Argument is i8*.
if (ETy->isIntegerTy(8))
return StringSwitch<InstructionClass>(F->getName())
@@ -234,7 +234,7 @@ static InstructionClass GetFunctionClass(const Function *F) {
.Default(IC_CallOrUser);
// Argument is i8**
- if (const PointerType *Pte = dyn_cast<PointerType>(ETy))
+ if (PointerType *Pte = dyn_cast<PointerType>(ETy))
if (Pte->getElementType()->isIntegerTy(8))
return StringSwitch<InstructionClass>(F->getName())
.Case("objc_loadWeakRetained", IC_LoadWeakRetained)
@@ -246,11 +246,11 @@ static InstructionClass GetFunctionClass(const Function *F) {
// Two arguments, first is i8**.
const Argument *A1 = AI++;
if (AI == AE)
- if (const PointerType *PTy = dyn_cast<PointerType>(A0->getType()))
- if (const PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType()))
+ if (PointerType *PTy = dyn_cast<PointerType>(A0->getType()))
+ if (PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType()))
if (Pte->getElementType()->isIntegerTy(8))
- if (const PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) {
- const Type *ETy1 = PTy1->getElementType();
+ if (PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) {
+ Type *ETy1 = PTy1->getElementType();
// Second argument is i8*
if (ETy1->isIntegerTy(8))
return StringSwitch<InstructionClass>(F->getName())
@@ -258,7 +258,7 @@ static InstructionClass GetFunctionClass(const Function *F) {
.Case("objc_initWeak", IC_InitWeak)
.Default(IC_CallOrUser);
// Second argument is i8**.
- if (const PointerType *Pte1 = dyn_cast<PointerType>(ETy1))
+ if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1))
if (Pte1->getElementType()->isIntegerTy(8))
return StringSwitch<InstructionClass>(F->getName())
.Case("objc_moveWeak", IC_MoveWeak)
@@ -344,6 +344,10 @@ static InstructionClass GetInstructionClass(const Value *V) {
break;
default:
// For anything else, check all the operands.
+ // Note that this includes both operands of a Store: while the first
+ // operand isn't actually being dereferenced, it is being stored to
+ // memory where we can no longer track who might read it and dereference
+ // it, so we have to consider it potentially used.
for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end();
OI != OE; ++OI)
if (IsPotentialUse(*OI))
@@ -421,9 +425,10 @@ static bool IsAlwaysTail(InstructionClass Class) {
/// IsNoThrow - Test if the given class represents instructions which are always
/// safe to mark with the nounwind attribute..
static bool IsNoThrow(InstructionClass Class) {
+ // objc_retainBlock is not nounwind because it calls user copy constructors
+ // which could theoretically throw.
return Class == IC_Retain ||
Class == IC_RetainRV ||
- Class == IC_RetainBlock ||
Class == IC_Release ||
Class == IC_Autorelease ||
Class == IC_AutoreleaseRV ||
@@ -515,6 +520,10 @@ static bool IsObjCIdentifiedObject(const Value *V) {
const Value *Pointer =
StripPointerCastsAndObjCCalls(LI->getPointerOperand());
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) {
+ // A constant pointer can't be pointing to an object on the heap. It may
+ // be reference-counted, but it won't be deleted.
+ if (GV->isConstant())
+ return true;
StringRef Name = GV->getName();
// These special variables are known to hold values which are not
// reference-counted pointers.
@@ -738,7 +747,6 @@ ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) {
switch (GetBasicInstructionClass(CS.getInstruction())) {
case IC_Retain:
case IC_RetainRV:
- case IC_RetainBlock:
case IC_Autorelease:
case IC_AutoreleaseRV:
case IC_NoopCast:
@@ -746,6 +754,8 @@ ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) {
case IC_FusedRetainAutorelease:
case IC_FusedRetainAutoreleaseRV:
// These functions don't access any memory visible to the compiler.
+ // Note that this doesn't include objc_retainBlock, becuase it updates
+ // pointers when it copies block data.
return NoModRef;
default:
break;
@@ -877,7 +887,9 @@ bool ObjCARCExpand::runOnFunction(Function &F) {
// usually can't sink them past other calls, which would be the main
// case where it would be useful.
-/// TODO: The pointer returned from objc_loadWeakRetained is retained.
+// TODO: The pointer returned from objc_loadWeakRetained is retained.
+
+// TODO: Delete release+retain pairs (rare).
#include "llvm/GlobalAlias.h"
#include "llvm/Constants.h"
@@ -1098,16 +1110,16 @@ static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) {
if (A == S_None || B == S_None)
return S_None;
- // Note that we can't merge S_CanRelease and S_Use.
if (A > B) std::swap(A, B);
if (TopDown) {
// Choose the side which is further along in the sequence.
- if (A == S_Retain && (B == S_CanRelease || B == S_Use))
+ if ((A == S_Retain || A == S_CanRelease) &&
+ (B == S_CanRelease || B == S_Use))
return B;
} else {
// Choose the side which is further along in the sequence.
if ((A == S_Use || A == S_CanRelease) &&
- (B == S_Release || B == S_Stop || B == S_MovableRelease))
+ (B == S_Use || B == S_Release || B == S_Stop || B == S_MovableRelease))
return A;
// If both sides are releases, choose the more conservative one.
if (A == S_Stop && (B == S_Release || B == S_MovableRelease))
@@ -1124,13 +1136,19 @@ namespace {
/// retain-decrement-use-release sequence or release-use-decrement-retain
/// reverese sequence.
struct RRInfo {
- /// KnownIncremented - After an objc_retain, the reference count of the
- /// referenced object is known to be positive. Similarly, before an
- /// objc_release, the reference count of the referenced object is known to
- /// be positive. If there are retain-release pairs in code regions where the
- /// retain count is known to be positive, they can be eliminated, regardless
- /// of any side effects between them.
- bool KnownIncremented;
+ /// KnownSafe - After an objc_retain, the reference count of the referenced
+ /// object is known to be positive. Similarly, before an objc_release, the
+ /// reference count of the referenced object is known to be positive. If
+ /// there are retain-release pairs in code regions where the retain count
+ /// is known to be positive, they can be eliminated, regardless of any side
+ /// effects between them.
+ ///
+ /// Also, a retain+release pair nested within another retain+release
+ /// pair all on the known same pointer value can be eliminated, regardless
+ /// of any intervening side effects.
+ ///
+ /// KnownSafe is true when either of these conditions is satisfied.
+ bool KnownSafe;
/// IsRetainBlock - True if the Calls are objc_retainBlock calls (as
/// opposed to objc_retain calls).
@@ -1153,7 +1171,7 @@ namespace {
SmallPtrSet<Instruction *, 2> ReverseInsertPts;
RRInfo() :
- KnownIncremented(false), IsRetainBlock(false), IsTailCallRelease(false),
+ KnownSafe(false), IsRetainBlock(false), IsTailCallRelease(false),
ReleaseMetadata(0) {}
void clear();
@@ -1161,7 +1179,7 @@ namespace {
}
void RRInfo::clear() {
- KnownIncremented = false;
+ KnownSafe = false;
IsRetainBlock = false;
IsTailCallRelease = false;
ReleaseMetadata = 0;
@@ -1176,6 +1194,9 @@ namespace {
/// RefCount - The known minimum number of reference count increments.
unsigned RefCount;
+ /// NestCount - The known minimum level of retain+release nesting.
+ unsigned NestCount;
+
/// Seq - The current position in the sequence.
Sequence Seq;
@@ -1184,7 +1205,11 @@ namespace {
/// TODO: Encapsulate this better.
RRInfo RRI;
- PtrState() : RefCount(0), Seq(S_None) {}
+ PtrState() : RefCount(0), NestCount(0), Seq(S_None) {}
+
+ void SetAtLeastOneRefCount() {
+ if (RefCount == 0) RefCount = 1;
+ }
void IncrementRefCount() {
if (RefCount != UINT_MAX) ++RefCount;
@@ -1194,14 +1219,22 @@ namespace {
if (RefCount != 0) --RefCount;
}
- void ClearRefCount() {
- RefCount = 0;
- }
-
bool IsKnownIncremented() const {
return RefCount > 0;
}
+ void IncrementNestCount() {
+ if (NestCount != UINT_MAX) ++NestCount;
+ }
+
+ void DecrementNestCount() {
+ if (NestCount != 0) --NestCount;
+ }
+
+ bool IsKnownNested() const {
+ return NestCount > 0;
+ }
+
void SetSeq(Sequence NewSeq) {
Seq = NewSeq;
}
@@ -1233,6 +1266,7 @@ void
PtrState::Merge(const PtrState &Other, bool TopDown) {
Seq = MergeSeqs(Seq, Other.Seq, TopDown);
RefCount = std::min(RefCount, Other.RefCount);
+ NestCount = std::min(NestCount, Other.NestCount);
// We can't merge a plain objc_retain with an objc_retainBlock.
if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock)
@@ -1245,7 +1279,7 @@ PtrState::Merge(const PtrState &Other, bool TopDown) {
if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata)
RRI.ReleaseMetadata = 0;
- RRI.KnownIncremented = RRI.KnownIncremented && Other.RRI.KnownIncremented;
+ RRI.KnownSafe = RRI.KnownSafe && Other.RRI.KnownSafe;
RRI.IsTailCallRelease = RRI.IsTailCallRelease && Other.RRI.IsTailCallRelease;
RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end());
RRI.ReverseInsertPts.insert(Other.RRI.ReverseInsertPts.begin(),
@@ -1316,7 +1350,7 @@ namespace {
}
void clearBottomUpPointers() {
- PerPtrTopDown.clear();
+ PerPtrBottomUp.clear();
}
void clearTopDownPointers() {
@@ -1334,6 +1368,12 @@ namespace {
unsigned GetAllPathCount() const {
return TopDownPathCount * BottomUpPathCount;
}
+
+ /// IsVisitedTopDown - Test whether the block for this BBState has been
+ /// visited by the top-down portion of the algorithm.
+ bool isVisitedTopDown() const {
+ return TopDownPathCount != 0;
+ }
};
}
@@ -1364,7 +1404,7 @@ void BBState::MergePred(const BBState &Other) {
/*TopDown=*/true);
}
- // For each entry in our set, if the other set doens't have an entry with the
+ // For each entry in our set, if the other set doesn't have an entry with the
// same key, force it to merge with an empty entry.
for (ptr_iterator MI = top_down_ptr_begin(),
ME = top_down_ptr_end(); MI != ME; ++MI)
@@ -1389,7 +1429,7 @@ void BBState::MergeSucc(const BBState &Other) {
/*TopDown=*/false);
}
- // For each entry in our set, if the other set doens't have an entry
+ // For each entry in our set, if the other set doesn't have an entry
// with the same key, force it to merge with an empty entry.
for (ptr_iterator MI = bottom_up_ptr_begin(),
ME = bottom_up_ptr_end(); MI != ME; ++MI)
@@ -1406,15 +1446,11 @@ namespace {
/// Run - A flag indicating whether this optimization pass should run.
bool Run;
- /// RetainFunc, RelaseFunc - Declarations for objc_retain,
- /// objc_retainBlock, and objc_release.
- Function *RetainFunc, *RetainBlockFunc, *RetainRVFunc, *ReleaseFunc;
-
/// RetainRVCallee, etc. - Declarations for ObjC runtime
/// functions, for use in creating calls to them. These are initialized
/// lazily to avoid cluttering up the Module with unused declarations.
Constant *RetainRVCallee, *AutoreleaseRVCallee, *ReleaseCallee,
- *RetainCallee, *AutoreleaseCallee;
+ *RetainCallee, *RetainBlockCallee, *AutoreleaseCallee;
/// UsedInThisFunciton - Flags which determine whether each of the
/// interesting runtine functions is in fact used in the current function.
@@ -1428,6 +1464,7 @@ namespace {
Constant *getAutoreleaseRVCallee(Module *M);
Constant *getReleaseCallee(Module *M);
Constant *getRetainCallee(Module *M);
+ Constant *getRetainBlockCallee(Module *M);
Constant *getAutoreleaseCallee(Module *M);
void OptimizeRetainCall(Function &F, Instruction *Retain);
@@ -1452,11 +1489,13 @@ namespace {
void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
MapVector<Value *, RRInfo> &Retains,
DenseMap<Value *, RRInfo> &Releases,
- SmallVectorImpl<Instruction *> &DeadInsts);
+ SmallVectorImpl<Instruction *> &DeadInsts,
+ Module *M);
bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates,
MapVector<Value *, RRInfo> &Retains,
- DenseMap<Value *, RRInfo> &Releases);
+ DenseMap<Value *, RRInfo> &Releases,
+ Module *M);
void OptimizeWeakCalls(Function &F);
@@ -1501,7 +1540,7 @@ Constant *ObjCARCOpt::getRetainRVCallee(Module *M) {
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
std::vector<Type *> Params;
Params.push_back(I8X);
- const FunctionType *FTy =
+ FunctionType *FTy =
FunctionType::get(I8X, Params, /*isVarArg=*/false);
AttrListPtr Attributes;
Attributes.addAttr(~0u, Attribute::NoUnwind);
@@ -1518,7 +1557,7 @@ Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
std::vector<Type *> Params;
Params.push_back(I8X);
- const FunctionType *FTy =
+ FunctionType *FTy =
FunctionType::get(I8X, Params, /*isVarArg=*/false);
AttrListPtr Attributes;
Attributes.addAttr(~0u, Attribute::NoUnwind);
@@ -1561,6 +1600,23 @@ Constant *ObjCARCOpt::getRetainCallee(Module *M) {
return RetainCallee;
}
+Constant *ObjCARCOpt::getRetainBlockCallee(Module *M) {
+ if (!RetainBlockCallee) {
+ LLVMContext &C = M->getContext();
+ std::vector<Type *> Params;
+ Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C)));
+ AttrListPtr Attributes;
+ // objc_retainBlock is not nounwind because it calls user copy constructors
+ // which could theoretically throw.
+ RetainBlockCallee =
+ M->getOrInsertFunction(
+ "objc_retainBlock",
+ FunctionType::get(Params[0], Params, /*isVarArg=*/false),
+ Attributes);
+ }
+ return RetainBlockCallee;
+}
+
Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
if (!AutoreleaseCallee) {
LLVMContext &C = M->getContext();
@@ -1904,12 +1960,19 @@ void
ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV) {
// Check for a return of the pointer value.
const Value *Ptr = GetObjCArg(AutoreleaseRV);
- for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end();
- UI != UE; ++UI) {
- const User *I = *UI;
- if (isa<ReturnInst>(I) || GetBasicInstructionClass(I) == IC_RetainRV)
- return;
- }
+ SmallVector<const Value *, 2> Users;
+ Users.push_back(Ptr);
+ do {
+ Ptr = Users.pop_back_val();
+ for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end();
+ UI != UE; ++UI) {
+ const User *I = *UI;
+ if (isa<ReturnInst>(I) || GetBasicInstructionClass(I) == IC_RetainRV)
+ return;
+ if (isa<BitCastInst>(I))
+ Users.push_back(I);
+ }
+ } while (!Users.empty());
Changed = true;
++NumPeeps;
@@ -1953,7 +2016,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
case IC_DestroyWeak: {
CallInst *CI = cast<CallInst>(Inst);
if (isNullOrUndef(CI->getArgOperand(0))) {
- const Type *Ty = CI->getArgOperand(0)->getType();
+ Type *Ty = CI->getArgOperand(0)->getType();
new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
Constant::getNullValue(Ty),
CI);
@@ -1968,7 +2031,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
CallInst *CI = cast<CallInst>(Inst);
if (isNullOrUndef(CI->getArgOperand(0)) ||
isNullOrUndef(CI->getArgOperand(1))) {
- const Type *Ty = CI->getArgOperand(0)->getType();
+ Type *Ty = CI->getArgOperand(0)->getType();
new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
Constant::getNullValue(Ty),
CI);
@@ -2090,7 +2153,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
++NumPartialNoops;
// Clone the call into each predecessor that has a non-null value.
CallInst *CInst = cast<CallInst>(Inst);
- const Type *ParamTy = CInst->getArgOperand(0)->getType();
+ Type *ParamTy = CInst->getArgOperand(0)->getType();
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *Incoming =
StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
@@ -2132,41 +2195,49 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
bool SomeSuccHasSame = false;
bool AllSuccsHaveSame = true;
- for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI)
- switch (BBStates[*SI].getPtrBottomUpState(Arg).GetSeq()) {
+ PtrState &S = MyStates.getPtrTopDownState(Arg);
+ for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) {
+ PtrState &SuccS = BBStates[*SI].getPtrBottomUpState(Arg);
+ switch (SuccS.GetSeq()) {
case S_None:
- case S_CanRelease:
- MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
- SomeSuccHasSame = false;
- break;
+ case S_CanRelease: {
+ if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe)
+ S.ClearSequenceProgress();
+ continue;
+ }
case S_Use:
SomeSuccHasSame = true;
break;
case S_Stop:
case S_Release:
case S_MovableRelease:
- AllSuccsHaveSame = false;
+ if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe)
+ AllSuccsHaveSame = false;
break;
case S_Retain:
llvm_unreachable("bottom-up pointer in retain state!");
}
+ }
// If the state at the other end of any of the successor edges
// matches the current state, require all edges to match. This
// guards against loops in the middle of a sequence.
if (SomeSuccHasSame && !AllSuccsHaveSame)
- MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
+ S.ClearSequenceProgress();
}
case S_CanRelease: {
const Value *Arg = I->first;
const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
bool SomeSuccHasSame = false;
bool AllSuccsHaveSame = true;
- for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI)
- switch (BBStates[*SI].getPtrBottomUpState(Arg).GetSeq()) {
- case S_None:
- MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
- SomeSuccHasSame = false;
- break;
+ PtrState &S = MyStates.getPtrTopDownState(Arg);
+ for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) {
+ PtrState &SuccS = BBStates[*SI].getPtrBottomUpState(Arg);
+ switch (SuccS.GetSeq()) {
+ case S_None: {
+ if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe)
+ S.ClearSequenceProgress();
+ continue;
+ }
case S_CanRelease:
SomeSuccHasSame = true;
break;
@@ -2174,16 +2245,18 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
case S_Release:
case S_MovableRelease:
case S_Use:
- AllSuccsHaveSame = false;
+ if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe)
+ AllSuccsHaveSame = false;
break;
case S_Retain:
llvm_unreachable("bottom-up pointer in retain state!");
}
+ }
// If the state at the other end of any of the successor edges
// matches the current state, require all edges to match. This
// guards against loops in the middle of a sequence.
if (SomeSuccHasSame && !AllSuccsHaveSame)
- MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
+ S.ClearSequenceProgress();
}
}
}
@@ -2207,6 +2280,8 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
if (Succ == BB)
continue;
DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ);
+ // If we haven't seen this node yet, then we've found a CFG cycle.
+ // Be optimistic here; it's CheckForCFGHazards' job detect trouble.
if (I == BBStates.end())
continue;
MyStates.InitFromSucc(I->second);
@@ -2245,11 +2320,12 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
S.SetSeqToRelease(Inst->getMetadata(ImpreciseReleaseMDKind));
S.RRI.clear();
- S.RRI.KnownIncremented = S.IsKnownIncremented();
+ S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented();
S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
S.RRI.Calls.insert(Inst);
S.IncrementRefCount();
+ S.IncrementNestCount();
break;
}
case IC_RetainBlock:
@@ -2259,6 +2335,13 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
PtrState &S = MyStates.getPtrBottomUpState(Arg);
S.DecrementRefCount();
+ S.SetAtLeastOneRefCount();
+ S.DecrementNestCount();
+
+ // An objc_retainBlock call with just a use still needs to be kept,
+ // because it may be copying a block from the stack to the heap.
+ if (Class == IC_RetainBlock && S.GetSeq() == S_Use)
+ S.SetSeq(S_CanRelease);
switch (S.GetSeq()) {
case S_Stop:
@@ -2281,7 +2364,7 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
case S_Retain:
llvm_unreachable("bottom-up pointer in retain state!");
}
- break;
+ continue;
}
case IC_AutoreleasepoolPop:
// Conservatively, clear MyStates for all known pointers.
@@ -2305,26 +2388,22 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
PtrState &S = MI->second;
Sequence Seq = S.GetSeq();
- // Check for possible retains and releases.
+ // Check for possible releases.
if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
- // Check for a retain (we're going bottom-up here).
S.DecrementRefCount();
-
- // Check for a release.
- if (!IsRetain(Class) && Class != IC_RetainBlock)
- switch (Seq) {
- case S_Use:
- S.SetSeq(S_CanRelease);
- continue;
- case S_CanRelease:
- case S_Release:
- case S_MovableRelease:
- case S_Stop:
- case S_None:
- break;
- case S_Retain:
- llvm_unreachable("bottom-up pointer in retain state!");
- }
+ switch (Seq) {
+ case S_Use:
+ S.SetSeq(S_CanRelease);
+ continue;
+ case S_CanRelease:
+ case S_Release:
+ case S_MovableRelease:
+ case S_Stop:
+ case S_None:
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
}
// Check for possible direct uses.
@@ -2332,14 +2411,14 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
case S_Release:
case S_MovableRelease:
if (CanUse(Inst, Ptr, PA, Class)) {
- S.RRI.ReverseInsertPts.clear();
+ assert(S.RRI.ReverseInsertPts.empty());
S.RRI.ReverseInsertPts.insert(Inst);
S.SetSeq(S_Use);
} else if (Seq == S_Release &&
(Class == IC_User || Class == IC_CallOrUser)) {
// Non-movable releases depend on any possible objc pointer use.
S.SetSeq(S_Stop);
- S.RRI.ReverseInsertPts.clear();
+ assert(S.RRI.ReverseInsertPts.empty());
S.RRI.ReverseInsertPts.insert(Inst);
}
break;
@@ -2378,14 +2457,18 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
if (Pred == BB)
continue;
DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
- if (I == BBStates.end())
+ assert(I != BBStates.end());
+ // If we haven't seen this node yet, then we've found a CFG cycle.
+ // Be optimistic here; it's CheckForCFGHazards' job detect trouble.
+ if (!I->second.isVisitedTopDown())
continue;
MyStates.InitFromPred(I->second);
while (PI != PE) {
Pred = *PI++;
if (Pred != BB) {
I = BBStates.find(Pred);
- if (I != BBStates.end())
+ assert(I != BBStates.end());
+ if (I->second.isVisitedTopDown())
MyStates.MergePred(I->second);
}
}
@@ -2422,18 +2505,23 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
S.SetSeq(S_Retain);
S.RRI.clear();
S.RRI.IsRetainBlock = Class == IC_RetainBlock;
- S.RRI.KnownIncremented = S.IsKnownIncremented();
+ // Don't check S.IsKnownIncremented() here because it's not
+ // sufficient.
+ S.RRI.KnownSafe = S.IsKnownNested();
S.RRI.Calls.insert(Inst);
}
+ S.SetAtLeastOneRefCount();
S.IncrementRefCount();
- break;
+ S.IncrementNestCount();
+ continue;
}
case IC_Release: {
Arg = GetObjCArg(Inst);
PtrState &S = MyStates.getPtrTopDownState(Arg);
S.DecrementRefCount();
+ S.DecrementNestCount();
switch (S.GetSeq()) {
case S_Retain:
@@ -2478,16 +2566,12 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
Sequence Seq = S.GetSeq();
// Check for possible releases.
- if (!IsRetain(Class) && Class != IC_RetainBlock &&
- CanAlterRefCount(Inst, Ptr, PA, Class)) {
- // Check for a release.
+ if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
S.DecrementRefCount();
-
- // Check for a release.
switch (Seq) {
case S_Retain:
S.SetSeq(S_CanRelease);
- S.RRI.ReverseInsertPts.clear();
+ assert(S.RRI.ReverseInsertPts.empty());
S.RRI.ReverseInsertPts.insert(Inst);
// One call can't cause a transition from S_Retain to S_CanRelease
@@ -2511,8 +2595,18 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
if (CanUse(Inst, Ptr, PA, Class))
S.SetSeq(S_Use);
break;
- case S_Use:
case S_Retain:
+ // An objc_retainBlock call may be responsible for copying the block
+ // data from the stack to the heap. Model this by moving it straight
+ // from S_Retain to S_Use.
+ if (S.RRI.IsRetainBlock &&
+ CanUse(Inst, Ptr, PA, Class)) {
+ assert(S.RRI.ReverseInsertPts.empty());
+ S.RRI.ReverseInsertPts.insert(Inst);
+ S.SetSeq(S_Use);
+ }
+ break;
+ case S_Use:
case S_None:
break;
case S_Stop:
@@ -2533,28 +2627,43 @@ ObjCARCOpt::Visit(Function &F,
DenseMap<const BasicBlock *, BBState> &BBStates,
MapVector<Value *, RRInfo> &Retains,
DenseMap<Value *, RRInfo> &Releases) {
- // Use postorder for bottom-up, and reverse-postorder for top-down, because we
+ // Use reverse-postorder on the reverse CFG for bottom-up, because we
// magically know that loops will be well behaved, i.e. they won't repeatedly
- // call retain on a single pointer without doing a release.
+ // call retain on a single pointer without doing a release. We can't use
+ // ReversePostOrderTraversal here because we want to walk up from each
+ // function exit point.
+ SmallPtrSet<BasicBlock *, 16> Visited;
+ SmallVector<std::pair<BasicBlock *, pred_iterator>, 16> Stack;
+ SmallVector<BasicBlock *, 16> Order;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ BasicBlock *BB = I;
+ if (BB->getTerminator()->getNumSuccessors() == 0)
+ Stack.push_back(std::make_pair(BB, pred_begin(BB)));
+ }
+ while (!Stack.empty()) {
+ pred_iterator End = pred_end(Stack.back().first);
+ while (Stack.back().second != End) {
+ BasicBlock *BB = *Stack.back().second++;
+ if (Visited.insert(BB))
+ Stack.push_back(std::make_pair(BB, pred_begin(BB)));
+ }
+ Order.push_back(Stack.pop_back_val().first);
+ }
bool BottomUpNestingDetected = false;
- SmallVector<BasicBlock *, 8> PostOrder;
- for (po_iterator<Function *> I = po_begin(&F), E = po_end(&F); I != E; ++I) {
+ for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I =
+ Order.rbegin(), E = Order.rend(); I != E; ++I) {
BasicBlock *BB = *I;
- PostOrder.push_back(BB);
-
BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains);
}
- // Iterate through the post-order in reverse order, achieving a
- // reverse-postorder traversal. We don't use the ReversePostOrderTraversal
- // class here because it works by computing its own full postorder iteration,
- // recording the sequence, and playing it back in reverse. Since we're already
- // doing a full iteration above, we can just record the sequence manually and
- // avoid the cost of having ReversePostOrderTraversal compute it.
+ // Use regular reverse-postorder for top-down.
bool TopDownNestingDetected = false;
- for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator
- RI = PostOrder.rbegin(), RE = PostOrder.rend(); RI != RE; ++RI)
- TopDownNestingDetected |= VisitTopDown(*RI, BBStates, Releases);
+ typedef ReversePostOrderTraversal<Function *> RPOTType;
+ RPOTType RPOT(&F);
+ for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
+ BasicBlock *BB = *I;
+ TopDownNestingDetected |= VisitTopDown(BB, BBStates, Releases);
+ }
return TopDownNestingDetected && BottomUpNestingDetected;
}
@@ -2565,12 +2674,10 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
RRInfo &ReleasesToMove,
MapVector<Value *, RRInfo> &Retains,
DenseMap<Value *, RRInfo> &Releases,
- SmallVectorImpl<Instruction *> &DeadInsts) {
- const Type *ArgTy = Arg->getType();
- const Type *ParamTy =
- (RetainRVFunc ? RetainRVFunc :
- RetainFunc ? RetainFunc :
- RetainBlockFunc)->arg_begin()->getType();
+ SmallVectorImpl<Instruction *> &DeadInsts,
+ Module *M) {
+ Type *ArgTy = Arg->getType();
+ Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext()));
// Insert the new retain and release calls.
for (SmallPtrSet<Instruction *, 2>::const_iterator
@@ -2581,7 +2688,7 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
new BitCastInst(Arg, ParamTy, "", InsertPt);
CallInst *Call =
CallInst::Create(RetainsToMove.IsRetainBlock ?
- RetainBlockFunc : RetainFunc,
+ getRetainBlockCallee(M) : getRetainCallee(M),
MyArg, "", InsertPt);
Call->setDoesNotThrow();
if (!RetainsToMove.IsRetainBlock)
@@ -2598,8 +2705,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
// The invoke's return value isn't available in the unwind block,
// but our releases will never depend on it, because they must be
// paired with retains from before the invoke.
- InsertPts[0] = II->getNormalDest()->getFirstNonPHI();
- InsertPts[1] = II->getUnwindDest()->getFirstNonPHI();
+ InsertPts[0] = II->getNormalDest()->getFirstInsertionPt();
+ InsertPts[1] = II->getUnwindDest()->getFirstInsertionPt();
} else {
// Insert code immediately after the last use.
InsertPts[0] = llvm::next(BasicBlock::iterator(LastUse));
@@ -2609,7 +2716,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
Instruction *InsertPt = *I;
Value *MyArg = ArgTy == ParamTy ? Arg :
new BitCastInst(Arg, ParamTy, "", InsertPt);
- CallInst *Call = CallInst::Create(ReleaseFunc, MyArg, "", InsertPt);
+ CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg,
+ "", InsertPt);
// Attach a clang.imprecise_release metadata tag, if appropriate.
if (MDNode *M = ReleasesToMove.ReleaseMetadata)
Call->setMetadata(ImpreciseReleaseMDKind, M);
@@ -2640,7 +2748,8 @@ bool
ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
&BBStates,
MapVector<Value *, RRInfo> &Retains,
- DenseMap<Value *, RRInfo> &Releases) {
+ DenseMap<Value *, RRInfo> &Releases,
+ Module *M) {
bool AnyPairsCompletelyEliminated = false;
RRInfo RetainsToMove;
RRInfo ReleasesToMove;
@@ -2649,21 +2758,36 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
SmallVector<Instruction *, 8> DeadInsts;
for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(),
- E = Retains.end(); I != E; ) {
- Value *V = (I++)->first;
+ E = Retains.end(); I != E; ++I) {
+ Value *V = I->first;
if (!V) continue; // blotted
Instruction *Retain = cast<Instruction>(V);
Value *Arg = GetObjCArg(Retain);
- // If the object being released is in static or stack storage, we know it's
+ // If the object being released is in static storage, we know it's
// not being managed by ObjC reference counting, so we can delete pairs
// regardless of what possible decrements or uses lie between them.
- bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg);
+ bool KnownSafe = isa<Constant>(Arg);
+
+ // Same for stack storage, unless this is an objc_retainBlock call,
+ // which is responsible for copying the block data from the stack to
+ // the heap.
+ if (!I->second.IsRetainBlock && isa<AllocaInst>(Arg))
+ KnownSafe = true;
+
+ // A constant pointer can't be pointing to an object on the heap. It may
+ // be reference-counted, but it won't be deleted.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(Arg))
+ if (const GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(
+ StripPointerCastsAndObjCCalls(LI->getPointerOperand())))
+ if (GV->isConstant())
+ KnownSafe = true;
// If a pair happens in a region where it is known that the reference count
// is already incremented, we can similarly ignore possible decrements.
- bool KnownIncrementedTD = true, KnownIncrementedBU = true;
+ bool KnownSafeTD = true, KnownSafeBU = true;
// Connect the dots between the top-down-collected RetainsToMove and
// bottom-up-collected ReleasesToMove to form sets of related calls.
@@ -2683,7 +2807,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
MapVector<Value *, RRInfo>::const_iterator It = Retains.find(NewRetain);
assert(It != Retains.end());
const RRInfo &NewRetainRRI = It->second;
- KnownIncrementedTD &= NewRetainRRI.KnownIncremented;
+ KnownSafeTD &= NewRetainRRI.KnownSafe;
for (SmallPtrSet<Instruction *, 2>::const_iterator
LI = NewRetainRRI.Calls.begin(),
LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) {
@@ -2739,7 +2863,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
Releases.find(NewRelease);
assert(It != Releases.end());
const RRInfo &NewReleaseRRI = It->second;
- KnownIncrementedBU &= NewReleaseRRI.KnownIncremented;
+ KnownSafeBU &= NewReleaseRRI.KnownSafe;
for (SmallPtrSet<Instruction *, 2>::const_iterator
LI = NewReleaseRRI.Calls.begin(),
LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) {
@@ -2787,12 +2911,19 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
if (NewRetains.empty()) break;
}
- // If the pointer is known incremented, we can safely delete the pair
- // regardless of what's between them.
- if (KnownIncrementedTD || KnownIncrementedBU) {
+ // If the pointer is known incremented or nested, we can safely delete the
+ // pair regardless of what's between them.
+ if (KnownSafeTD || KnownSafeBU) {
RetainsToMove.ReverseInsertPts.clear();
ReleasesToMove.ReverseInsertPts.clear();
NewCount = 0;
+ } else {
+ // Determine whether the new insertion points we computed preserve the
+ // balance of retain and release calls through the program.
+ // TODO: If the fully aggressive solution isn't valid, try to find a
+ // less aggressive solution which is.
+ if (NewDelta != 0)
+ goto next_retain;
}
// Determine whether the original call points are balanced in the retain and
@@ -2803,18 +2934,12 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
if (OldDelta != 0)
goto next_retain;
- // Determine whether the new insertion points we computed preserve the
- // balance of retain and release calls through the program.
- // TODO: If the fully aggressive solution isn't valid, try to find a
- // less aggressive solution which is.
- if (NewDelta != 0)
- goto next_retain;
-
// Ok, everything checks out and we're all set. Let's move some code!
Changed = true;
AnyPairsCompletelyEliminated = NewCount == 0;
NumRRs += OldCount - NewCount;
- MoveCalls(Arg, RetainsToMove, ReleasesToMove, Retains, Releases, DeadInsts);
+ MoveCalls(Arg, RetainsToMove, ReleasesToMove,
+ Retains, Releases, DeadInsts, M);
next_retain:
NewReleases.clear();
@@ -2993,7 +3118,8 @@ bool ObjCARCOpt::OptimizeSequences(Function &F) {
bool NestingDetected = Visit(F, BBStates, Retains, Releases);
// Transform.
- return PerformCodePlacement(BBStates, Retains, Releases) && NestingDetected;
+ return PerformCodePlacement(BBStates, Retains, Releases, F.getParent()) &&
+ NestingDetected;
}
/// OptimizeReturns - Look for this pattern:
@@ -3072,7 +3198,8 @@ void ObjCARCOpt::OptimizeReturns(Function &F) {
// Check that there is nothing that can affect the reference
// count between the retain and the call.
- FindDependencies(CanChangeRetainCount, Arg, BB, Retain,
+ // Note that Retain need not be in BB.
+ FindDependencies(CanChangeRetainCount, Arg, Retain->getParent(), Retain,
DependingInstructions, Visited, PA);
if (DependingInstructions.size() != 1)
goto next_block;
@@ -3117,12 +3244,6 @@ bool ObjCARCOpt::doInitialization(Module &M) {
ImpreciseReleaseMDKind =
M.getContext().getMDKindID("clang.imprecise_release");
- // Identify the declarations for objc_retain and friends.
- RetainFunc = M.getFunction("objc_retain");
- RetainBlockFunc = M.getFunction("objc_retainBlock");
- RetainRVFunc = M.getFunction("objc_retainAutoreleasedReturnValue");
- ReleaseFunc = M.getFunction("objc_release");
-
// Intuitively, objc_retain and others are nocapture, however in practice
// they are not, because they return their argument value. And objc_release
// calls finalizers.
@@ -3132,6 +3253,7 @@ bool ObjCARCOpt::doInitialization(Module &M) {
AutoreleaseRVCallee = 0;
ReleaseCallee = 0;
RetainCallee = 0;
+ RetainBlockCallee = 0;
AutoreleaseCallee = 0;
return false;
@@ -3294,7 +3416,7 @@ Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) {
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
std::vector<Type *> Params;
Params.push_back(I8X);
- const FunctionType *FTy =
+ FunctionType *FTy =
FunctionType::get(I8X, Params, /*isVarArg=*/false);
AttrListPtr Attributes;
Attributes.addAttr(~0u, Attribute::NoUnwind);
@@ -3310,7 +3432,7 @@ Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) {
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
std::vector<Type *> Params;
Params.push_back(I8X);
- const FunctionType *FTy =
+ FunctionType *FTy =
FunctionType::get(I8X, Params, /*isVarArg=*/false);
AttrListPtr Attributes;
Attributes.addAttr(~0u, Attribute::NoUnwind);
@@ -3377,7 +3499,7 @@ ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
void ObjCARCContract::ContractRelease(Instruction *Release,
inst_iterator &Iter) {
LoadInst *Load = dyn_cast<LoadInst>(GetObjCArg(Release));
- if (!Load || Load->isVolatile()) return;
+ if (!Load || !Load->isSimple()) return;
// For now, require everything to be in one basic block.
BasicBlock *BB = Release->getParent();
@@ -3393,7 +3515,7 @@ void ObjCARCContract::ContractRelease(Instruction *Release,
!(AA->getModRefInfo(I, Loc) & AliasAnalysis::Mod)))
++I;
StoreInst *Store = dyn_cast<StoreInst>(I);
- if (!Store || Store->isVolatile()) return;
+ if (!Store || !Store->isSimple()) return;
if (Store->getPointerOperand() != Loc.Ptr) return;
Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand());
@@ -3411,8 +3533,8 @@ void ObjCARCContract::ContractRelease(Instruction *Release,
++NumStoreStrongs;
LLVMContext &C = Release->getContext();
- const Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
- const Type *I8XX = PointerType::getUnqual(I8X);
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ Type *I8XX = PointerType::getUnqual(I8X);
Value *Args[] = { Load->getPointerOperand(), New };
if (Args[0]->getType() != I8XX)
@@ -3548,7 +3670,7 @@ bool ObjCARCContract::runOnFunction(Function &F) {
if (Inst != UserInst && DT->dominates(Inst, UserInst)) {
Changed = true;
Instruction *Replacement = Inst;
- const Type *UseTy = U.get()->getType();
+ Type *UseTy = U.get()->getType();
if (PHINode *PHI = dyn_cast<PHINode>(UserInst)) {
// For PHI nodes, insert the bitcast in the predecessor block.
unsigned ValNo =
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
index e6341ae..8f98a5b 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -309,7 +309,7 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I,
std::swap(LHS, RHS);
bool Success = !I->swapOperands();
assert(Success && "swapOperands failed");
- Success = false;
+ (void)Success;
MadeChange = true;
} else if (RHSBO) {
// Turn (A+B)+(C+D) -> (((A+B)+C)+D). This guarantees the RHS is not
diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
index 083412e..196a847 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -156,7 +156,7 @@ namespace {
///
class SCCPSolver : public InstVisitor<SCCPSolver> {
const TargetData *TD;
- SmallPtrSet<BasicBlock*, 8> BBExecutable;// The BBs that are executable.
+ SmallPtrSet<BasicBlock*, 8> BBExecutable; // The BBs that are executable.
DenseMap<Value*, LatticeVal> ValueState; // The state each value is in.
/// StructValueState - This maintains ValueState for values that have
@@ -241,7 +241,7 @@ public:
/// this method must be called.
void AddTrackedFunction(Function *F) {
// Add an entry, F -> undef.
- if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) {
+ if (StructType *STy = dyn_cast<StructType>(F->getReturnType())) {
MRVFunctionsTracked.insert(F);
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
TrackedMultipleRetVals.insert(std::make_pair(std::make_pair(F, i),
@@ -302,7 +302,7 @@ public:
/// markAnythingOverdefined - Mark the specified value overdefined. This
/// works with both scalars and structs.
void markAnythingOverdefined(Value *V) {
- if (const StructType *STy = dyn_cast<StructType>(V->getType()))
+ if (StructType *STy = dyn_cast<StructType>(V->getType()))
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
markOverdefined(getStructValueState(V, i), V);
else
@@ -417,7 +417,7 @@ private:
else if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C))
LV.markConstant(CS->getOperand(i)); // Constants are constant.
else if (isa<ConstantAggregateZero>(C)) {
- const Type *FieldTy = cast<StructType>(V->getType())->getElementType(i);
+ Type *FieldTy = cast<StructType>(V->getType())->getElementType(i);
LV.markConstant(Constant::getNullValue(FieldTy));
} else
LV.markOverdefined(); // Unknown sort of constant.
@@ -471,9 +471,9 @@ private:
/// UsersOfOverdefinedPHIs map for PN, remove them now.
void RemoveFromOverdefinedPHIs(Instruction *I, PHINode *PN) {
if (UsersOfOverdefinedPHIs.empty()) return;
- std::multimap<PHINode*, Instruction*>::iterator It, E;
- tie(It, E) = UsersOfOverdefinedPHIs.equal_range(PN);
- while (It != E) {
+ typedef std::multimap<PHINode*, Instruction*>::iterator ItTy;
+ std::pair<ItTy, ItTy> Range = UsersOfOverdefinedPHIs.equal_range(PN);
+ for (ItTy It = Range.first, E = Range.second; It != E;) {
if (It->second == I)
UsersOfOverdefinedPHIs.erase(It++);
else
@@ -486,9 +486,9 @@ private:
/// (Duplicate entries do not break anything directly, but can lead to
/// exponential growth of the table in rare cases.)
void InsertInOverdefinedPHIs(Instruction *I, PHINode *PN) {
- std::multimap<PHINode*, Instruction*>::iterator J, E;
- tie(J, E) = UsersOfOverdefinedPHIs.equal_range(PN);
- for (; J != E; ++J)
+ typedef std::multimap<PHINode*, Instruction*>::iterator ItTy;
+ std::pair<ItTy, ItTy> Range = UsersOfOverdefinedPHIs.equal_range(PN);
+ for (ItTy J = Range.first, E = Range.second; J != E; ++J)
if (J->second == I)
return;
UsersOfOverdefinedPHIs.insert(std::make_pair(PN, I));
@@ -515,6 +515,7 @@ private:
void visitShuffleVectorInst(ShuffleVectorInst &I);
void visitExtractValueInst(ExtractValueInst &EVI);
void visitInsertValueInst(InsertValueInst &IVI);
+ void visitLandingPadInst(LandingPadInst &I) { markAnythingOverdefined(&I); }
// Instructions that cannot be folded away.
void visitStoreInst (StoreInst &I);
@@ -528,8 +529,12 @@ private:
visitTerminatorInst(II);
}
void visitCallSite (CallSite CS);
+ void visitResumeInst (TerminatorInst &I) { /*returns void*/ }
void visitUnwindInst (TerminatorInst &I) { /*returns void*/ }
void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ }
+ void visitFenceInst (FenceInst &I) { /*returns void*/ }
+ void visitAtomicCmpXchgInst (AtomicCmpXchgInst &I) { markOverdefined(&I); }
+ void visitAtomicRMWInst (AtomicRMWInst &I) { markOverdefined(&I); }
void visitAllocaInst (Instruction &I) { markOverdefined(&I); }
void visitVAArgInst (Instruction &I) { markAnythingOverdefined(&I); }
@@ -577,6 +582,10 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
}
if (SwitchInst *SI = dyn_cast<SwitchInst>(&TI)) {
+ if (TI.getNumSuccessors() < 2) {
+ Succs[0] = true;
+ return;
+ }
LatticeVal SCValue = getValueState(SI->getCondition());
ConstantInt *CI = SCValue.getConstantInt();
@@ -637,6 +646,9 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
return true;
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ if (SI->getNumSuccessors() < 2)
+ return true;
+
LatticeVal SCValue = getValueState(SI->getCondition());
ConstantInt *CI = SCValue.getConstantInt();
@@ -692,13 +704,14 @@ void SCCPSolver::visitPHINode(PHINode &PN) {
// There may be instructions using this PHI node that are not overdefined
// themselves. If so, make sure that they know that the PHI node operand
// changed.
- std::multimap<PHINode*, Instruction*>::iterator I, E;
- tie(I, E) = UsersOfOverdefinedPHIs.equal_range(&PN);
- if (I == E)
+ typedef std::multimap<PHINode*, Instruction*>::iterator ItTy;
+ std::pair<ItTy, ItTy> Range = UsersOfOverdefinedPHIs.equal_range(&PN);
+
+ if (Range.first == Range.second)
return;
SmallVector<Instruction*, 16> Users;
- for (; I != E; ++I)
+ for (ItTy I = Range.first, E = Range.second; I != E; ++I)
Users.push_back(I->second);
while (!Users.empty())
visit(Users.pop_back_val());
@@ -772,7 +785,7 @@ void SCCPSolver::visitReturnInst(ReturnInst &I) {
// Handle functions that return multiple values.
if (!TrackedMultipleRetVals.empty()) {
- if (const StructType *STy = dyn_cast<StructType>(ResultOp->getType()))
+ if (StructType *STy = dyn_cast<StructType>(ResultOp->getType()))
if (MRVFunctionsTracked.count(F))
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
mergeInValue(TrackedMultipleRetVals[std::make_pair(F, i)], F,
@@ -825,7 +838,7 @@ void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
}
void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
- const StructType *STy = dyn_cast<StructType>(IVI.getType());
+ StructType *STy = dyn_cast<StructType>(IVI.getType());
if (STy == 0)
return markOverdefined(&IVI);
@@ -925,7 +938,7 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
// Could annihilate value.
if (I.getOpcode() == Instruction::And)
markConstant(IV, &I, Constant::getNullValue(I.getType()));
- else if (const VectorType *PT = dyn_cast<VectorType>(I.getType()))
+ else if (VectorType *PT = dyn_cast<VectorType>(I.getType()))
markConstant(IV, &I, Constant::getAllOnesValue(PT));
else
markConstant(IV, &I,
@@ -1179,8 +1192,8 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
}
Constant *Ptr = Operands[0];
- markConstant(&I, ConstantExpr::getGetElementPtr(Ptr, &Operands[0]+1,
- Operands.size()-1));
+ ArrayRef<Constant *> Indices(Operands.begin() + 1, Operands.end());
+ markConstant(&I, ConstantExpr::getGetElementPtr(Ptr, Indices));
}
void SCCPSolver::visitStoreInst(StoreInst &SI) {
@@ -1278,7 +1291,7 @@ CallOverdefined:
// If we can constant fold this, mark the result of the call as a
// constant.
- if (Constant *C = ConstantFoldCall(F, Operands.data(), Operands.size()))
+ if (Constant *C = ConstantFoldCall(F, Operands))
return markConstant(I, C);
}
@@ -1303,7 +1316,7 @@ CallOverdefined:
continue;
}
- if (const StructType *STy = dyn_cast<StructType>(AI->getType())) {
+ if (StructType *STy = dyn_cast<StructType>(AI->getType())) {
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
LatticeVal CallArg = getStructValueState(*CAI, i);
mergeInValue(getStructValueState(AI, i), AI, CallArg);
@@ -1315,7 +1328,7 @@ CallOverdefined:
}
// If this is a single/zero retval case, see if we're tracking the function.
- if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) {
+ if (StructType *STy = dyn_cast<StructType>(F->getReturnType())) {
if (!MRVFunctionsTracked.count(F))
goto CallOverdefined; // Not tracking this callee.
@@ -1419,67 +1432,116 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// Look for instructions which produce undef values.
if (I->getType()->isVoidTy()) continue;
- if (const StructType *STy = dyn_cast<StructType>(I->getType())) {
- // Only a few things that can be structs matter for undef. Just send
- // all their results to overdefined. We could be more precise than this
- // but it isn't worth bothering.
- if (isa<CallInst>(I) || isa<SelectInst>(I)) {
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- LatticeVal &LV = getStructValueState(I, i);
- if (LV.isUndefined())
- markOverdefined(LV, I);
- }
+ if (StructType *STy = dyn_cast<StructType>(I->getType())) {
+ // Only a few things that can be structs matter for undef.
+
+ // Tracked calls must never be marked overdefined in ResolvedUndefsIn.
+ if (CallSite CS = CallSite(I))
+ if (Function *F = CS.getCalledFunction())
+ if (MRVFunctionsTracked.count(F))
+ continue;
+
+ // extractvalue and insertvalue don't need to be marked; they are
+ // tracked as precisely as their operands.
+ if (isa<ExtractValueInst>(I) || isa<InsertValueInst>(I))
+ continue;
+
+ // Send the results of everything else to overdefined. We could be
+ // more precise than this but it isn't worth bothering.
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ LatticeVal &LV = getStructValueState(I, i);
+ if (LV.isUndefined())
+ markOverdefined(LV, I);
}
continue;
}
-
+
LatticeVal &LV = getValueState(I);
if (!LV.isUndefined()) continue;
- // No instructions using structs need disambiguation.
- if (I->getOperand(0)->getType()->isStructTy())
+ // extractvalue is safe; check here because the argument is a struct.
+ if (isa<ExtractValueInst>(I))
continue;
- // Get the lattice values of the first two operands for use below.
+ // Compute the operand LatticeVals, for convenience below.
+ // Anything taking a struct is conservatively assumed to require
+ // overdefined markings.
+ if (I->getOperand(0)->getType()->isStructTy()) {
+ markOverdefined(I);
+ return true;
+ }
LatticeVal Op0LV = getValueState(I->getOperand(0));
LatticeVal Op1LV;
if (I->getNumOperands() == 2) {
- // No instructions using structs need disambiguation.
- if (I->getOperand(1)->getType()->isStructTy())
- continue;
-
- // If this is a two-operand instruction, and if both operands are
- // undefs, the result stays undef.
+ if (I->getOperand(1)->getType()->isStructTy()) {
+ markOverdefined(I);
+ return true;
+ }
+
Op1LV = getValueState(I->getOperand(1));
- if (Op0LV.isUndefined() && Op1LV.isUndefined())
- continue;
}
-
// If this is an instructions whose result is defined even if the input is
// not fully defined, propagate the information.
- const Type *ITy = I->getType();
+ Type *ITy = I->getType();
switch (I->getOpcode()) {
- default: break; // Leave the instruction as an undef.
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast:
+ break; // Any undef -> undef
+ case Instruction::FSub:
+ case Instruction::FAdd:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ // Floating-point binary operation: be conservative.
+ if (Op0LV.isUndefined() && Op1LV.isUndefined())
+ markForcedConstant(I, Constant::getNullValue(ITy));
+ else
+ markOverdefined(I);
+ return true;
case Instruction::ZExt:
- // After a zero extend, we know the top part is zero. SExt doesn't have
- // to be handled here, because we don't know whether the top part is 1's
- // or 0's.
- case Instruction::SIToFP: // some FP values are not possible, just use 0.
- case Instruction::UIToFP: // some FP values are not possible, just use 0.
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ // undef -> 0; some outputs are impossible
markForcedConstant(I, Constant::getNullValue(ITy));
return true;
case Instruction::Mul:
case Instruction::And:
+ // Both operands undef -> undef
+ if (Op0LV.isUndefined() && Op1LV.isUndefined())
+ break;
// undef * X -> 0. X could be zero.
// undef & X -> 0. X could be zero.
markForcedConstant(I, Constant::getNullValue(ITy));
return true;
case Instruction::Or:
+ // Both operands undef -> undef
+ if (Op0LV.isUndefined() && Op1LV.isUndefined())
+ break;
// undef | X -> -1. X could be -1.
markForcedConstant(I, Constant::getAllOnesValue(ITy));
return true;
+ case Instruction::Xor:
+ // undef ^ undef -> 0; strictly speaking, this is not strictly
+ // necessary, but we try to be nice to people who expect this
+ // behavior in simple cases
+ if (Op0LV.isUndefined() && Op1LV.isUndefined()) {
+ markForcedConstant(I, Constant::getNullValue(ITy));
+ return true;
+ }
+ // undef ^ X -> undef
+ break;
+
case Instruction::SDiv:
case Instruction::UDiv:
case Instruction::SRem:
@@ -1494,26 +1556,24 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
return true;
case Instruction::AShr:
- // undef >>s X -> undef. No change.
- if (Op0LV.isUndefined()) break;
-
- // X >>s undef -> X. X could be 0, X could have the high-bit known set.
- if (Op0LV.isConstant())
- markForcedConstant(I, Op0LV.getConstant());
- else
- markOverdefined(I);
+ // X >>a undef -> undef.
+ if (Op1LV.isUndefined()) break;
+
+ // undef >>a X -> all ones
+ markForcedConstant(I, Constant::getAllOnesValue(ITy));
return true;
case Instruction::LShr:
case Instruction::Shl:
- // undef >> X -> undef. No change.
- // undef << X -> undef. No change.
- if (Op0LV.isUndefined()) break;
-
- // X >> undef -> 0. X could be 0.
- // X << undef -> 0. X could be 0.
+ // X << undef -> undef.
+ // X >> undef -> undef.
+ if (Op1LV.isUndefined()) break;
+
+ // undef << X -> 0
+ // undef >> X -> 0
markForcedConstant(I, Constant::getNullValue(ITy));
return true;
case Instruction::Select:
+ Op1LV = getValueState(I->getOperand(1));
// undef ? X : Y -> X or Y. There could be commonality between X/Y.
if (Op0LV.isUndefined()) {
if (!Op1LV.isConstant()) // Pick the constant one if there is any.
@@ -1533,9 +1593,35 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
else
markOverdefined(I);
return true;
+ case Instruction::Load:
+ // A load here means one of two things: a load of undef from a global,
+ // a load from an unknown pointer. Either way, having it return undef
+ // is okay.
+ break;
+ case Instruction::ICmp:
+ // X == undef -> undef. Other comparisons get more complicated.
+ if (cast<ICmpInst>(I)->isEquality())
+ break;
+ markOverdefined(I);
+ return true;
case Instruction::Call:
- // If a call has an undef result, it is because it is constant foldable
- // but one of the inputs was undef. Just force the result to
+ case Instruction::Invoke: {
+ // There are two reasons a call can have an undef result
+ // 1. It could be tracked.
+ // 2. It could be constant-foldable.
+ // Because of the way we solve return values, tracked calls must
+ // never be marked overdefined in ResolvedUndefsIn.
+ if (Function *F = CallSite(I).getCalledFunction())
+ if (TrackedRetVals.count(F))
+ break;
+
+ // If the call is constant-foldable, we mark it overdefined because
+ // we do not know what return values are valid.
+ markOverdefined(I);
+ return true;
+ }
+ default:
+ // If we don't know what should happen here, conservatively mark it
// overdefined.
markOverdefined(I);
return true;
@@ -1621,15 +1707,25 @@ FunctionPass *llvm::createSCCPPass() {
static void DeleteInstructionInBlock(BasicBlock *BB) {
DEBUG(dbgs() << " BasicBlock Dead:" << *BB);
++NumDeadBlocks;
-
- // Delete the instructions backwards, as it has a reduced likelihood of
- // having to update as many def-use and use-def chains.
- while (!isa<TerminatorInst>(BB->begin())) {
- Instruction *I = --BasicBlock::iterator(BB->getTerminator());
-
- if (!I->use_empty())
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
- BB->getInstList().erase(I);
+
+ // Check to see if there are non-terminating instructions to delete.
+ if (isa<TerminatorInst>(BB->begin()))
+ return;
+
+ // Delete the instructions backwards, as it has a reduced likelihood of having
+ // to update as many def-use and use-def chains.
+ Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
+ while (EndInst != BB->begin()) {
+ // Delete the next to last instruction.
+ BasicBlock::iterator I = EndInst;
+ Instruction *Inst = --I;
+ if (!Inst->use_empty())
+ Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
+ if (isa<LandingPadInst>(Inst)) {
+ EndInst = Inst;
+ continue;
+ }
+ BB->getInstList().erase(Inst);
++NumInstRemoved;
}
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
index 302c287..f6918de 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -63,7 +63,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeCFGSimplifyPassPass(Registry);
initializeSimplifyLibCallsPass(Registry);
initializeSinkingPass(Registry);
- initializeTailDupPass(Registry);
initializeTailCallElimPass(Registry);
}
@@ -187,3 +186,7 @@ void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) {
void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createBasicAliasAnalysisPass());
}
+
+void LLVMAddLowerExpectIntrinsicPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLowerExpectIntrinsicPass());
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 7d6349c..c6d9123 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -129,11 +129,11 @@ namespace {
AllocaInfo &Info);
void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info);
void isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
- const Type *MemOpType, bool isStore, AllocaInfo &Info,
+ Type *MemOpType, bool isStore, AllocaInfo &Info,
Instruction *TheAccess, bool AllowWholeAccess);
- bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size);
- uint64_t FindElementAndOffset(const Type *&T, uint64_t &Offset,
- const Type *&IdxTy);
+ bool TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size);
+ uint64_t FindElementAndOffset(Type *&T, uint64_t &Offset,
+ Type *&IdxTy);
void DoScalarReplacement(AllocaInst *AI,
std::vector<AllocaInst*> &WorkList);
@@ -145,6 +145,9 @@ namespace {
SmallVector<AllocaInst*, 32> &NewElts);
void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
SmallVector<AllocaInst*, 32> &NewElts);
+ void RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
+ uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts);
void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts);
@@ -253,7 +256,7 @@ class ConvertToScalarInfo {
/// VectorTy - This tracks the type that we should promote the vector to if
/// it is possible to turn it into a vector. This starts out null, and if it
/// isn't possible to turn into a vector type, it gets set to VoidTy.
- const VectorType *VectorTy;
+ VectorType *VectorTy;
/// HadNonMemTransferAccess - True if there is at least one access to the
/// alloca that is not a MemTransferInst. We don't want to turn structs into
@@ -269,11 +272,11 @@ public:
private:
bool CanConvertToScalar(Value *V, uint64_t Offset);
- void MergeInTypeForLoadOrStore(const Type *In, uint64_t Offset);
- bool MergeInVectorType(const VectorType *VInTy, uint64_t Offset);
+ void MergeInTypeForLoadOrStore(Type *In, uint64_t Offset);
+ bool MergeInVectorType(VectorType *VInTy, uint64_t Offset);
void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
- Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType,
+ Value *ConvertScalar_ExtractValue(Value *NV, Type *ToType,
uint64_t Offset, IRBuilder<> &Builder);
Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
uint64_t Offset, IRBuilder<> &Builder);
@@ -295,8 +298,6 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
if (ScalarKind == Unknown)
ScalarKind = Integer;
- // FIXME: It should be possible to promote the vector type up to the alloca's
- // size.
if (ScalarKind == Vector && VectorTy->getBitWidth() != AllocaSize * 8)
ScalarKind = Integer;
@@ -306,7 +307,7 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
// random stuff that doesn't use vectors (e.g. <9 x double>) because then
// we just get a lot of insert/extracts. If at least one vector is
// involved, then we probably really do have a union of vector/array.
- const Type *NewTy;
+ Type *NewTy;
if (ScalarKind == Vector) {
assert(VectorTy && "Missing type for vector scalar.");
DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = "
@@ -331,20 +332,16 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
/// (VectorTy) so far at the offset specified by Offset (which is specified in
/// bytes).
///
-/// There are three cases we handle here:
+/// There are two cases we handle here:
/// 1) A union of vector types of the same size and potentially its elements.
/// Here we turn element accesses into insert/extract element operations.
/// This promotes a <4 x float> with a store of float to the third element
/// into a <4 x float> that uses insert element.
-/// 2) A union of vector types with power-of-2 size differences, e.g. a float,
-/// <2 x float> and <4 x float>. Here we turn element accesses into insert
-/// and extract element operations, and <2 x float> accesses into a cast to
-/// <2 x double>, an extract, and a cast back to <2 x float>.
-/// 3) A fully general blob of memory, which we turn into some (potentially
+/// 2) A fully general blob of memory, which we turn into some (potentially
/// large) integer type with extract and insert operations where the loads
/// and stores would mutate the memory. We mark this by setting VectorTy
/// to VoidTy.
-void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In,
+void ConvertToScalarInfo::MergeInTypeForLoadOrStore(Type *In,
uint64_t Offset) {
// If we already decided to turn this into a blob of integer memory, there is
// nothing to be done.
@@ -355,7 +352,7 @@ void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In,
// If the In type is a vector that is the same size as the alloca, see if it
// matches the existing VecTy.
- if (const VectorType *VInTy = dyn_cast<VectorType>(In)) {
+ if (VectorType *VInTy = dyn_cast<VectorType>(In)) {
if (MergeInVectorType(VInTy, Offset))
return;
} else if (In->isFloatTy() || In->isDoubleTy() ||
@@ -371,20 +368,13 @@ void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In,
// if the implied vector agrees with what we already have and if Offset is
// compatible with it.
if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
- (!VectorTy || Offset * 8 < VectorTy->getPrimitiveSizeInBits())) {
+ (!VectorTy || EltSize == VectorTy->getElementType()
+ ->getPrimitiveSizeInBits()/8)) {
if (!VectorTy) {
ScalarKind = ImplicitVector;
VectorTy = VectorType::get(In, AllocaSize/EltSize);
- return;
}
-
- unsigned CurrentEltSize = VectorTy->getElementType()
- ->getPrimitiveSizeInBits()/8;
- if (EltSize == CurrentEltSize)
- return;
-
- if (In->isIntegerTy() && isPowerOf2_32(AllocaSize / EltSize))
- return;
+ return;
}
}
@@ -395,74 +385,21 @@ void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In,
/// MergeInVectorType - Handles the vector case of MergeInTypeForLoadOrStore,
/// returning true if the type was successfully merged and false otherwise.
-bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy,
+bool ConvertToScalarInfo::MergeInVectorType(VectorType *VInTy,
uint64_t Offset) {
- // TODO: Support nonzero offsets?
- if (Offset != 0)
- return false;
-
- // Only allow vectors that are a power-of-2 away from the size of the alloca.
- if (!isPowerOf2_64(AllocaSize / (VInTy->getBitWidth() / 8)))
- return false;
-
- // If this the first vector we see, remember the type so that we know the
- // element size.
- if (!VectorTy) {
+ if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
+ // If we're storing/loading a vector of the right size, allow it as a
+ // vector. If this the first vector we see, remember the type so that
+ // we know the element size. If this is a subsequent access, ignore it
+ // even if it is a differing type but the same size. Worst case we can
+ // bitcast the resultant vectors.
+ if (!VectorTy)
+ VectorTy = VInTy;
ScalarKind = Vector;
- VectorTy = VInTy;
return true;
}
- unsigned BitWidth = VectorTy->getBitWidth();
- unsigned InBitWidth = VInTy->getBitWidth();
-
- // Vectors of the same size can be converted using a simple bitcast.
- if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8)) {
- ScalarKind = Vector;
- return true;
- }
-
- const Type *ElementTy = VectorTy->getElementType();
- const Type *InElementTy = VInTy->getElementType();
-
- // Do not allow mixed integer and floating-point accesses from vectors of
- // different sizes.
- if (ElementTy->isFloatingPointTy() != InElementTy->isFloatingPointTy())
- return false;
-
- if (ElementTy->isFloatingPointTy()) {
- // Only allow floating-point vectors of different sizes if they have the
- // same element type.
- // TODO: This could be loosened a bit, but would anything benefit?
- if (ElementTy != InElementTy)
- return false;
-
- // There are no arbitrary-precision floating-point types, which limits the
- // number of legal vector types with larger element types that we can form
- // to bitcast and extract a subvector.
- // TODO: We could support some more cases with mixed fp128 and double here.
- if (!(BitWidth == 64 || BitWidth == 128) ||
- !(InBitWidth == 64 || InBitWidth == 128))
- return false;
- } else {
- assert(ElementTy->isIntegerTy() && "Vector elements must be either integer "
- "or floating-point.");
- unsigned BitWidth = ElementTy->getPrimitiveSizeInBits();
- unsigned InBitWidth = InElementTy->getPrimitiveSizeInBits();
-
- // Do not allow integer types smaller than a byte or types whose widths are
- // not a multiple of a byte.
- if (BitWidth < 8 || InBitWidth < 8 ||
- BitWidth % 8 != 0 || InBitWidth % 8 != 0)
- return false;
- }
-
- // Pick the largest of the two vector types.
- ScalarKind = Vector;
- if (InBitWidth > BitWidth)
- VectorTy = VInTy;
-
- return true;
+ return false;
}
/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all
@@ -480,7 +417,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
// Don't break volatile loads.
- if (LI->isVolatile())
+ if (!LI->isSimple())
return false;
// Don't touch MMX operations.
if (LI->getType()->isX86_MMXTy())
@@ -492,7 +429,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Storing the pointer, not into the value?
- if (SI->getOperand(0) == V || SI->isVolatile()) return false;
+ if (SI->getOperand(0) == V || !SI->isSimple()) return false;
// Don't touch MMX operations.
if (SI->getOperand(0)->getType()->isX86_MMXTy())
return false;
@@ -502,7 +439,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
}
if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
- IsNotTrivial = true; // Can't be mem2reg'd.
+ if (!onlyUsedByLifetimeMarkers(BCI))
+ IsNotTrivial = true; // Can't be mem2reg'd.
if (!CanConvertToScalar(BCI, Offset))
return false;
continue;
@@ -516,7 +454,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
- &Indices[0], Indices.size());
+ Indices);
// See if all uses can be converted.
if (!CanConvertToScalar(GEP, Offset+GEPOffset))
return false;
@@ -560,6 +498,14 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
continue;
}
+ // If this is a lifetime intrinsic, we can handle it.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ continue;
+ }
+ }
+
// Otherwise, we cannot handle this!
return false;
}
@@ -589,7 +535,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
- &Indices[0], Indices.size());
+ Indices);
ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8);
GEP->eraseFromParent();
continue;
@@ -599,7 +545,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
// The load is a bit extract from NewAI shifted right by Offset bits.
- Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp");
+ Value *LoadedVal = Builder.CreateLoad(NewAI);
Value *NewLoadVal
= ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder);
LI->replaceAllUsesWith(NewLoadVal);
@@ -668,8 +614,8 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
// pointer (bitcasted), then a store to our new alloca.
assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
Value *SrcPtr = MTI->getSource();
- const PointerType* SPTy = cast<PointerType>(SrcPtr->getType());
- const PointerType* AIPTy = cast<PointerType>(NewAI->getType());
+ PointerType* SPTy = cast<PointerType>(SrcPtr->getType());
+ PointerType* AIPTy = cast<PointerType>(NewAI->getType());
if (SPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
AIPTy = PointerType::get(AIPTy->getElementType(),
SPTy->getAddressSpace());
@@ -685,8 +631,8 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
- const PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType());
- const PointerType* AIPTy = cast<PointerType>(NewAI->getType());
+ PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType());
+ PointerType* AIPTy = cast<PointerType>(NewAI->getType());
if (DPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
AIPTy = PointerType::get(AIPTy->getElementType(),
DPTy->getAddressSpace());
@@ -703,65 +649,18 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
continue;
}
- llvm_unreachable("Unsupported operation!");
- }
-}
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ // There's no need to preserve these, as the resulting alloca will be
+ // converted to a register anyways.
+ II->eraseFromParent();
+ continue;
+ }
+ }
-/// getScaledElementType - Gets a scaled element type for a partial vector
-/// access of an alloca. The input types must be integer or floating-point
-/// scalar or vector types, and the resulting type is an integer, float or
-/// double.
-static const Type *getScaledElementType(const Type *Ty1, const Type *Ty2,
- unsigned NewBitWidth) {
- bool IsFP1 = Ty1->isFloatingPointTy() ||
- (Ty1->isVectorTy() &&
- cast<VectorType>(Ty1)->getElementType()->isFloatingPointTy());
- bool IsFP2 = Ty2->isFloatingPointTy() ||
- (Ty2->isVectorTy() &&
- cast<VectorType>(Ty2)->getElementType()->isFloatingPointTy());
-
- LLVMContext &Context = Ty1->getContext();
-
- // Prefer floating-point types over integer types, as integer types may have
- // been created by earlier scalar replacement.
- if (IsFP1 || IsFP2) {
- if (NewBitWidth == 32)
- return Type::getFloatTy(Context);
- if (NewBitWidth == 64)
- return Type::getDoubleTy(Context);
+ llvm_unreachable("Unsupported operation!");
}
-
- return Type::getIntNTy(Context, NewBitWidth);
-}
-
-/// CreateShuffleVectorCast - Creates a shuffle vector to convert one vector
-/// to another vector of the same element type which has the same allocation
-/// size but different primitive sizes (e.g. <3 x i32> and <4 x i32>).
-static Value *CreateShuffleVectorCast(Value *FromVal, const Type *ToType,
- IRBuilder<> &Builder) {
- const Type *FromType = FromVal->getType();
- const VectorType *FromVTy = cast<VectorType>(FromType);
- const VectorType *ToVTy = cast<VectorType>(ToType);
- assert((ToVTy->getElementType() == FromVTy->getElementType()) &&
- "Vectors must have the same element type");
- Value *UnV = UndefValue::get(FromType);
- unsigned numEltsFrom = FromVTy->getNumElements();
- unsigned numEltsTo = ToVTy->getNumElements();
-
- SmallVector<Constant*, 3> Args;
- const Type* Int32Ty = Builder.getInt32Ty();
- unsigned minNumElts = std::min(numEltsFrom, numEltsTo);
- unsigned i;
- for (i=0; i != minNumElts; ++i)
- Args.push_back(ConstantInt::get(Int32Ty, i));
-
- if (i < numEltsTo) {
- Constant* UnC = UndefValue::get(Int32Ty);
- for (; i != numEltsTo; ++i)
- Args.push_back(UnC);
- }
- Constant *Mask = ConstantVector::get(Args);
- return Builder.CreateShuffleVector(FromVal, UnV, Mask, "tmpV");
}
/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
@@ -775,50 +674,20 @@ static Value *CreateShuffleVectorCast(Value *FromVal, const Type *ToType,
/// Offset is an offset from the original alloca, in bits that need to be
/// shifted to the right.
Value *ConvertToScalarInfo::
-ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
+ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
uint64_t Offset, IRBuilder<> &Builder) {
// If the load is of the whole new alloca, no conversion is needed.
- const Type *FromType = FromVal->getType();
+ Type *FromType = FromVal->getType();
if (FromType == ToType && Offset == 0)
return FromVal;
// If the result alloca is a vector type, this is either an element
// access or a bitcast to another vector type of the same size.
- if (const VectorType *VTy = dyn_cast<VectorType>(FromType)) {
+ if (VectorType *VTy = dyn_cast<VectorType>(FromType)) {
unsigned FromTypeSize = TD.getTypeAllocSize(FromType);
unsigned ToTypeSize = TD.getTypeAllocSize(ToType);
- if (FromTypeSize == ToTypeSize) {
- // If the two types have the same primitive size, use a bit cast.
- // Otherwise, it is two vectors with the same element type that has
- // the same allocation size but different number of elements so use
- // a shuffle vector.
- if (FromType->getPrimitiveSizeInBits() ==
- ToType->getPrimitiveSizeInBits())
- return Builder.CreateBitCast(FromVal, ToType, "tmp");
- else
- return CreateShuffleVectorCast(FromVal, ToType, Builder);
- }
-
- if (isPowerOf2_64(FromTypeSize / ToTypeSize)) {
- assert(!(ToType->isVectorTy() && Offset != 0) && "Can't extract a value "
- "of a smaller vector type at a nonzero offset.");
-
- const Type *CastElementTy = getScaledElementType(FromType, ToType,
- ToTypeSize * 8);
- unsigned NumCastVectorElements = FromTypeSize / ToTypeSize;
-
- LLVMContext &Context = FromVal->getContext();
- const Type *CastTy = VectorType::get(CastElementTy,
- NumCastVectorElements);
- Value *Cast = Builder.CreateBitCast(FromVal, CastTy, "tmp");
-
- unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy);
- unsigned Elt = Offset/EltSize;
- assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
- Value *Extract = Builder.CreateExtractElement(Cast, ConstantInt::get(
- Type::getInt32Ty(Context), Elt), "tmp");
- return Builder.CreateBitCast(Extract, ToType, "tmp");
- }
+ if (FromTypeSize == ToTypeSize)
+ return Builder.CreateBitCast(FromVal, ToType);
// Otherwise it must be an element access.
unsigned Elt = 0;
@@ -828,40 +697,39 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
}
// Return the element extracted out of it.
- Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get(
- Type::getInt32Ty(FromVal->getContext()), Elt), "tmp");
+ Value *V = Builder.CreateExtractElement(FromVal, Builder.getInt32(Elt));
if (V->getType() != ToType)
- V = Builder.CreateBitCast(V, ToType, "tmp");
+ V = Builder.CreateBitCast(V, ToType);
return V;
}
// If ToType is a first class aggregate, extract out each of the pieces and
// use insertvalue's to form the FCA.
- if (const StructType *ST = dyn_cast<StructType>(ToType)) {
+ if (StructType *ST = dyn_cast<StructType>(ToType)) {
const StructLayout &Layout = *TD.getStructLayout(ST);
Value *Res = UndefValue::get(ST);
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
Offset+Layout.getElementOffsetInBits(i),
Builder);
- Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
+ Res = Builder.CreateInsertValue(Res, Elt, i);
}
return Res;
}
- if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
+ if (ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
Value *Res = UndefValue::get(AT);
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
Offset+i*EltSize, Builder);
- Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
+ Res = Builder.CreateInsertValue(Res, Elt, i);
}
return Res;
}
// Otherwise, this must be a union that was converted to an integer value.
- const IntegerType *NTy = cast<IntegerType>(FromVal->getType());
+ IntegerType *NTy = cast<IntegerType>(FromVal->getType());
// If this is a big-endian system and the load is narrower than the
// full alloca type, we need to do a shift to get the right bits.
@@ -881,33 +749,31 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
// only some bits are used.
if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
FromVal = Builder.CreateLShr(FromVal,
- ConstantInt::get(FromVal->getType(),
- ShAmt), "tmp");
+ ConstantInt::get(FromVal->getType(), ShAmt));
else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
FromVal = Builder.CreateShl(FromVal,
- ConstantInt::get(FromVal->getType(),
- -ShAmt), "tmp");
+ ConstantInt::get(FromVal->getType(), -ShAmt));
// Finally, unconditionally truncate the integer to the right width.
unsigned LIBitWidth = TD.getTypeSizeInBits(ToType);
if (LIBitWidth < NTy->getBitWidth())
FromVal =
Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(),
- LIBitWidth), "tmp");
+ LIBitWidth));
else if (LIBitWidth > NTy->getBitWidth())
FromVal =
Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(),
- LIBitWidth), "tmp");
+ LIBitWidth));
// If the result is an integer, this is a trunc or bitcast.
if (ToType->isIntegerTy()) {
// Should be done.
} else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) {
// Just do a bitcast, we know the sizes match up.
- FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp");
+ FromVal = Builder.CreateBitCast(FromVal, ToType);
} else {
// Otherwise must be a pointer.
- FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp");
+ FromVal = Builder.CreateIntToPtr(FromVal, ToType);
}
assert(FromVal->getType() == ToType && "Didn't convert right?");
return FromVal;
@@ -927,65 +793,30 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
uint64_t Offset, IRBuilder<> &Builder) {
// Convert the stored type to the actual type, shift it left to insert
// then 'or' into place.
- const Type *AllocaType = Old->getType();
+ Type *AllocaType = Old->getType();
LLVMContext &Context = Old->getContext();
- if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
+ if (VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy);
uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType());
// Changing the whole vector with memset or with an access of a different
// vector type?
- if (ValSize == VecSize) {
- // If the two types have the same primitive size, use a bit cast.
- // Otherwise, it is two vectors with the same element type that has
- // the same allocation size but different number of elements so use
- // a shuffle vector.
- if (VTy->getPrimitiveSizeInBits() ==
- SV->getType()->getPrimitiveSizeInBits())
- return Builder.CreateBitCast(SV, AllocaType, "tmp");
- else
- return CreateShuffleVectorCast(SV, VTy, Builder);
- }
-
- if (isPowerOf2_64(VecSize / ValSize)) {
- assert(!(SV->getType()->isVectorTy() && Offset != 0) && "Can't insert a "
- "value of a smaller vector type at a nonzero offset.");
-
- const Type *CastElementTy = getScaledElementType(VTy, SV->getType(),
- ValSize);
- unsigned NumCastVectorElements = VecSize / ValSize;
-
- LLVMContext &Context = SV->getContext();
- const Type *OldCastTy = VectorType::get(CastElementTy,
- NumCastVectorElements);
- Value *OldCast = Builder.CreateBitCast(Old, OldCastTy, "tmp");
-
- Value *SVCast = Builder.CreateBitCast(SV, CastElementTy, "tmp");
-
- unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy);
- unsigned Elt = Offset/EltSize;
- assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
- Value *Insert =
- Builder.CreateInsertElement(OldCast, SVCast, ConstantInt::get(
- Type::getInt32Ty(Context), Elt), "tmp");
- return Builder.CreateBitCast(Insert, AllocaType, "tmp");
- }
+ if (ValSize == VecSize)
+ return Builder.CreateBitCast(SV, AllocaType);
// Must be an element insertion.
assert(SV->getType() == VTy->getElementType());
uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
unsigned Elt = Offset/EltSize;
- return Builder.CreateInsertElement(Old, SV,
- ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt),
- "tmp");
+ return Builder.CreateInsertElement(Old, SV, Builder.getInt32(Elt));
}
// If SV is a first-class aggregate value, insert each value recursively.
- if (const StructType *ST = dyn_cast<StructType>(SV->getType())) {
+ if (StructType *ST = dyn_cast<StructType>(SV->getType())) {
const StructLayout &Layout = *TD.getStructLayout(ST);
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
- Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
+ Value *Elt = Builder.CreateExtractValue(SV, i);
Old = ConvertScalar_InsertValue(Elt, Old,
Offset+Layout.getElementOffsetInBits(i),
Builder);
@@ -993,10 +824,10 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
return Old;
}
- if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
+ if (ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
- Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
+ Value *Elt = Builder.CreateExtractValue(SV, i);
Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder);
}
return Old;
@@ -1009,20 +840,19 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
unsigned SrcStoreWidth = TD.getTypeStoreSizeInBits(SV->getType());
unsigned DestStoreWidth = TD.getTypeStoreSizeInBits(AllocaType);
if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
- SV = Builder.CreateBitCast(SV,
- IntegerType::get(SV->getContext(),SrcWidth), "tmp");
+ SV = Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(),SrcWidth));
else if (SV->getType()->isPointerTy())
- SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()), "tmp");
+ SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()));
// Zero extend or truncate the value if needed.
if (SV->getType() != AllocaType) {
if (SV->getType()->getPrimitiveSizeInBits() <
AllocaType->getPrimitiveSizeInBits())
- SV = Builder.CreateZExt(SV, AllocaType, "tmp");
+ SV = Builder.CreateZExt(SV, AllocaType);
else {
// Truncation may be needed if storing more than the alloca can hold
// (undefined behavior).
- SV = Builder.CreateTrunc(SV, AllocaType, "tmp");
+ SV = Builder.CreateTrunc(SV, AllocaType);
SrcWidth = DestWidth;
SrcStoreWidth = DestStoreWidth;
}
@@ -1045,12 +875,10 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
// only some bits in the structure are set.
APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
- SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(),
- ShAmt), "tmp");
+ SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), ShAmt));
Mask <<= ShAmt;
} else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
- SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(),
- -ShAmt), "tmp");
+ SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), -ShAmt));
Mask = Mask.lshr(-ShAmt);
}
@@ -1196,7 +1024,7 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) {
for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
UI != UE; ++UI) {
LoadInst *LI = dyn_cast<LoadInst>(*UI);
- if (LI == 0 || LI->isVolatile()) return false;
+ if (LI == 0 || !LI->isSimple()) return false;
// Both operands to the select need to be dereferencable, either absolutely
// (e.g. allocas) or at this point because we can see other accesses to it.
@@ -1237,7 +1065,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) {
for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end();
UI != UE; ++UI) {
LoadInst *LI = dyn_cast<LoadInst>(*UI);
- if (LI == 0 || LI->isVolatile()) return false;
+ if (LI == 0 || !LI->isSimple()) return false;
// For now we only allow loads in the same block as the PHI. This is a
// common case that happens when instcombine merges two loads through a PHI.
@@ -1258,17 +1086,21 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) {
// trapping load in the predecessor if it is a critical edge.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *Pred = PN->getIncomingBlock(i);
+ Value *InVal = PN->getIncomingValue(i);
+
+ // If the terminator of the predecessor has side-effects (an invoke),
+ // there is no safe place to put a load in the predecessor.
+ if (Pred->getTerminator()->mayHaveSideEffects())
+ return false;
+
+ // If the value is produced by the terminator of the predecessor
+ // (an invoke), there is no valid place to put a load in the predecessor.
+ if (Pred->getTerminator() == InVal)
+ return false;
// If the predecessor has a single successor, then the edge isn't critical.
if (Pred->getTerminator()->getNumSuccessors() == 1)
continue;
-
- Value *InVal = PN->getIncomingValue(i);
-
- // If the InVal is an invoke in the pred, we can't put a load on the edge.
- if (InvokeInst *II = dyn_cast<InvokeInst>(InVal))
- if (II->getParent() == Pred)
- return false;
// If this pointer is always safe to load, or if we can prove that there is
// already a load in the block, then we can move the load to the pred block.
@@ -1295,13 +1127,13 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
UI != UE; ++UI) {
User *U = *UI;
if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
- if (LI->isVolatile())
+ if (!LI->isSimple())
return false;
continue;
}
if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
- if (SI->getOperand(0) == AI || SI->isVolatile())
+ if (SI->getOperand(0) == AI || !SI->isSimple())
return false; // Don't allow a store OF the AI, only INTO the AI.
continue;
}
@@ -1343,6 +1175,13 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
continue;
}
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+ if (onlyUsedByLifetimeMarkers(BCI)) {
+ InstsToRewrite.insert(BCI);
+ continue;
+ }
+ }
+
return false;
}
@@ -1354,6 +1193,18 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
// If we have instructions that need to be rewritten for this to be promotable
// take care of it now.
for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) {
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(InstsToRewrite[i])) {
+ // This could only be a bitcast used by nothing but lifetime intrinsics.
+ for (BitCastInst::use_iterator I = BCI->use_begin(), E = BCI->use_end();
+ I != E;) {
+ Use &U = I.getUse();
+ ++I;
+ cast<Instruction>(U.getUser())->eraseFromParent();
+ }
+ BCI->eraseFromParent();
+ continue;
+ }
+
if (SelectInst *SI = dyn_cast<SelectInst>(InstsToRewrite[i])) {
// Selects in InstsToRewrite only have load uses. Rewrite each as two
// loads with a new select.
@@ -1393,7 +1244,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
continue;
}
- const Type *LoadTy = cast<PointerType>(PN->getType())->getElementType();
+ Type *LoadTy = cast<PointerType>(PN->getType())->getElementType();
PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(),
PN->getName()+".ld", PN);
@@ -1483,13 +1334,13 @@ bool SROA::performPromotion(Function &F) {
/// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for
/// SROA. It must be a struct or array type with a small number of elements.
static bool ShouldAttemptScalarRepl(AllocaInst *AI) {
- const Type *T = AI->getAllocatedType();
+ Type *T = AI->getAllocatedType();
// Do not promote any struct into more than 32 separate vars.
- if (const StructType *ST = dyn_cast<StructType>(T))
+ if (StructType *ST = dyn_cast<StructType>(T))
return ST->getNumElements() <= 32;
// Arrays are much less likely to be safe for SROA; only consider
// them if they are very small.
- if (const ArrayType *AT = dyn_cast<ArrayType>(T))
+ if (ArrayType *AT = dyn_cast<ArrayType>(T))
return AT->getNumElements() <= 8;
return false;
}
@@ -1594,7 +1445,7 @@ void SROA::DoScalarReplacement(AllocaInst *AI,
std::vector<AllocaInst*> &WorkList) {
DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n');
SmallVector<AllocaInst*, 32> ElementAllocas;
- if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
+ if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
ElementAllocas.reserve(ST->getNumContainedTypes());
for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) {
AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0,
@@ -1604,9 +1455,9 @@ void SROA::DoScalarReplacement(AllocaInst *AI,
WorkList.push_back(NA); // Add to worklist for recursive processing
}
} else {
- const ArrayType *AT = cast<ArrayType>(AI->getAllocatedType());
+ ArrayType *AT = cast<ArrayType>(AI->getAllocatedType());
ElementAllocas.reserve(AT->getNumElements());
- const Type *ElTy = AT->getElementType();
+ Type *ElTy = AT->getElementType();
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(),
AI->getName() + "." + Twine(i), AI);
@@ -1670,22 +1521,26 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
UI.getOperandNo() == 0, Info, MI,
true /*AllowWholeAccess*/);
} else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
- if (LI->isVolatile())
+ if (!LI->isSimple())
return MarkUnsafe(Info, User);
- const Type *LIType = LI->getType();
+ Type *LIType = LI->getType();
isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
LIType, false, Info, LI, true /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
} else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Store is ok if storing INTO the pointer, not storing the pointer
- if (SI->isVolatile() || SI->getOperand(0) == I)
+ if (!SI->isSimple() || SI->getOperand(0) == I)
return MarkUnsafe(Info, User);
- const Type *SIType = SI->getOperand(0)->getType();
+ Type *SIType = SI->getOperand(0)->getType();
isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
SIType, true, Info, SI, true /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
+ if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+ II->getIntrinsicID() != Intrinsic::lifetime_end)
+ return MarkUnsafe(Info, User);
} else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
isSafePHISelectUseForScalarRepl(User, Offset, Info);
} else {
@@ -1725,19 +1580,19 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
return MarkUnsafe(Info, User);
isSafePHISelectUseForScalarRepl(GEPI, Offset, Info);
} else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
- if (LI->isVolatile())
+ if (!LI->isSimple())
return MarkUnsafe(Info, User);
- const Type *LIType = LI->getType();
+ Type *LIType = LI->getType();
isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
LIType, false, Info, LI, false /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
} else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Store is ok if storing INTO the pointer, not storing the pointer
- if (SI->isVolatile() || SI->getOperand(0) == I)
+ if (!SI->isSimple() || SI->getOperand(0) == I)
return MarkUnsafe(Info, User);
- const Type *SIType = SI->getOperand(0)->getType();
+ Type *SIType = SI->getOperand(0)->getType();
isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
SIType, true, Info, SI, false /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
@@ -1776,8 +1631,7 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI,
// Compute the offset due to this GEP and check if the alloca has a
// component element at that offset.
SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
- Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
- &Indices[0], Indices.size());
+ Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, 0))
MarkUnsafe(Info, GEPI);
}
@@ -1786,14 +1640,14 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI,
/// elements of the same type (which is always true for arrays). If so,
/// return true with NumElts and EltTy set to the number of elements and the
/// element type, respectively.
-static bool isHomogeneousAggregate(const Type *T, unsigned &NumElts,
- const Type *&EltTy) {
- if (const ArrayType *AT = dyn_cast<ArrayType>(T)) {
+static bool isHomogeneousAggregate(Type *T, unsigned &NumElts,
+ Type *&EltTy) {
+ if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
NumElts = AT->getNumElements();
EltTy = (NumElts == 0 ? 0 : AT->getElementType());
return true;
}
- if (const StructType *ST = dyn_cast<StructType>(T)) {
+ if (StructType *ST = dyn_cast<StructType>(T)) {
NumElts = ST->getNumContainedTypes();
EltTy = (NumElts == 0 ? 0 : ST->getContainedType(0));
for (unsigned n = 1; n < NumElts; ++n) {
@@ -1807,12 +1661,12 @@ static bool isHomogeneousAggregate(const Type *T, unsigned &NumElts,
/// isCompatibleAggregate - Check if T1 and T2 are either the same type or are
/// "homogeneous" aggregates with the same element type and number of elements.
-static bool isCompatibleAggregate(const Type *T1, const Type *T2) {
+static bool isCompatibleAggregate(Type *T1, Type *T2) {
if (T1 == T2)
return true;
unsigned NumElts1, NumElts2;
- const Type *EltTy1, *EltTy2;
+ Type *EltTy1, *EltTy2;
if (isHomogeneousAggregate(T1, NumElts1, EltTy1) &&
isHomogeneousAggregate(T2, NumElts2, EltTy2) &&
NumElts1 == NumElts2 &&
@@ -1830,7 +1684,7 @@ static bool isCompatibleAggregate(const Type *T1, const Type *T2) {
/// If AllowWholeAccess is true, then this allows uses of the entire alloca as a
/// unit. If false, it only allows accesses known to be in a single element.
void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
- const Type *MemOpType, bool isStore,
+ Type *MemOpType, bool isStore,
AllocaInfo &Info, Instruction *TheAccess,
bool AllowWholeAccess) {
// Check if this is a load/store of the entire alloca.
@@ -1857,7 +1711,7 @@ void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
}
}
// Check if the offset/size correspond to a component within the alloca type.
- const Type *T = Info.AI->getAllocatedType();
+ Type *T = Info.AI->getAllocatedType();
if (TypeHasComponent(T, Offset, MemSize)) {
Info.hasSubelementAccess = true;
return;
@@ -1868,16 +1722,16 @@ void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
/// TypeHasComponent - Return true if T has a component type with the
/// specified offset and size. If Size is zero, do not check the size.
-bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) {
- const Type *EltTy;
+bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) {
+ Type *EltTy;
uint64_t EltSize;
- if (const StructType *ST = dyn_cast<StructType>(T)) {
+ if (StructType *ST = dyn_cast<StructType>(T)) {
const StructLayout *Layout = TD->getStructLayout(ST);
unsigned EltIdx = Layout->getElementContainingOffset(Offset);
EltTy = ST->getContainedType(EltIdx);
EltSize = TD->getTypeAllocSize(EltTy);
Offset -= Layout->getElementOffset(EltIdx);
- } else if (const ArrayType *AT = dyn_cast<ArrayType>(T)) {
+ } else if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
EltTy = AT->getElementType();
EltSize = TD->getTypeAllocSize(EltTy);
if (Offset >= AT->getNumElements() * EltSize)
@@ -1924,9 +1778,17 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
// address operand will be updated, so nothing else needs to be done.
continue;
}
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ RewriteLifetimeIntrinsic(II, AI, Offset, NewElts);
+ }
+ continue;
+ }
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
- const Type *LIType = LI->getType();
+ Type *LIType = LI->getType();
if (isCompatibleAggregate(LIType, AI->getAllocatedType())) {
// Replace:
@@ -1956,7 +1818,7 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
Value *Val = SI->getOperand(0);
- const Type *SIType = Val->getType();
+ Type *SIType = Val->getType();
if (isCompatibleAggregate(SIType, AI->getAllocatedType())) {
// Replace:
// store { i32, i32 } %val, { i32, i32 }* %alloc
@@ -2026,10 +1888,10 @@ void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
/// Sets T to the type of the element and Offset to the offset within that
/// element. IdxTy is set to the type of the index result to be used in a
/// GEP instruction.
-uint64_t SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset,
- const Type *&IdxTy) {
+uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset,
+ Type *&IdxTy) {
uint64_t Idx = 0;
- if (const StructType *ST = dyn_cast<StructType>(T)) {
+ if (StructType *ST = dyn_cast<StructType>(T)) {
const StructLayout *Layout = TD->getStructLayout(ST);
Idx = Layout->getElementContainingOffset(Offset);
T = ST->getContainedType(Idx);
@@ -2037,7 +1899,7 @@ uint64_t SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset,
IdxTy = Type::getInt32Ty(T->getContext());
return Idx;
}
- const ArrayType *AT = cast<ArrayType>(T);
+ ArrayType *AT = cast<ArrayType>(T);
T = AT->getElementType();
uint64_t EltSize = TD->getTypeAllocSize(T);
Idx = Offset / EltSize;
@@ -2053,13 +1915,12 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
SmallVector<AllocaInst*, 32> &NewElts) {
uint64_t OldOffset = Offset;
SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
- Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
- &Indices[0], Indices.size());
+ Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
RewriteForScalarRepl(GEPI, AI, Offset, NewElts);
- const Type *T = AI->getAllocatedType();
- const Type *IdxTy;
+ Type *T = AI->getAllocatedType();
+ Type *IdxTy;
uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy);
if (GEPI->getOperand(0) == AI)
OldIdx = ~0ULL; // Force the GEP to be rewritten.
@@ -2073,7 +1934,7 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
if (Idx == OldIdx)
return;
- const Type *i32Ty = Type::getInt32Ty(AI->getContext());
+ Type *i32Ty = Type::getInt32Ty(AI->getContext());
SmallVector<Value*, 8> NewArgs;
NewArgs.push_back(Constant::getNullValue(i32Ty));
while (EltOffset != 0) {
@@ -2082,8 +1943,7 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
}
Instruction *Val = NewElts[Idx];
if (NewArgs.size() > 1) {
- Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(),
- NewArgs.end(), "", GEPI);
+ Val = GetElementPtrInst::CreateInBounds(Val, NewArgs, "", GEPI);
Val->takeName(GEPI);
}
if (Val->getType() != GEPI->getType())
@@ -2092,6 +1952,62 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
DeadInsts.push_back(GEPI);
}
+/// RewriteLifetimeIntrinsic - II is a lifetime.start/lifetime.end. Rewrite it
+/// to mark the lifetime of the scalarized memory.
+void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
+ uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts) {
+ ConstantInt *OldSize = cast<ConstantInt>(II->getArgOperand(0));
+ // Put matching lifetime markers on everything from Offset up to
+ // Offset+OldSize.
+ Type *AIType = AI->getAllocatedType();
+ uint64_t NewOffset = Offset;
+ Type *IdxTy;
+ uint64_t Idx = FindElementAndOffset(AIType, NewOffset, IdxTy);
+
+ IRBuilder<> Builder(II);
+ uint64_t Size = OldSize->getLimitedValue();
+
+ if (NewOffset) {
+ // Splice the first element and index 'NewOffset' bytes in. SROA will
+ // split the alloca again later.
+ Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy());
+ V = Builder.CreateGEP(V, Builder.getInt64(NewOffset));
+
+ IdxTy = NewElts[Idx]->getAllocatedType();
+ uint64_t EltSize = TD->getTypeAllocSize(IdxTy) - NewOffset;
+ if (EltSize > Size) {
+ EltSize = Size;
+ Size = 0;
+ } else {
+ Size -= EltSize;
+ }
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+ Builder.CreateLifetimeStart(V, Builder.getInt64(EltSize));
+ else
+ Builder.CreateLifetimeEnd(V, Builder.getInt64(EltSize));
+ ++Idx;
+ }
+
+ for (; Idx != NewElts.size() && Size; ++Idx) {
+ IdxTy = NewElts[Idx]->getAllocatedType();
+ uint64_t EltSize = TD->getTypeAllocSize(IdxTy);
+ if (EltSize > Size) {
+ EltSize = Size;
+ Size = 0;
+ } else {
+ Size -= EltSize;
+ }
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+ Builder.CreateLifetimeStart(NewElts[Idx],
+ Builder.getInt64(EltSize));
+ else
+ Builder.CreateLifetimeEnd(NewElts[Idx],
+ Builder.getInt64(EltSize));
+ }
+ DeadInsts.push_back(II);
+}
+
/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI.
/// Rewrite it to copy or set the elements of the scalarized memory.
void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
@@ -2139,7 +2055,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
// If the pointer is not the right type, insert a bitcast to the right
// type.
- const Type *NewTy =
+ Type *NewTy =
PointerType::get(AI->getType()->getElementType(), AddrSpace);
if (OtherPtr->getType() != NewTy)
@@ -2159,16 +2075,16 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
if (OtherPtr) {
Value *Idx[2] = { Zero,
ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) };
- OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2,
+ OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx,
OtherPtr->getName()+"."+Twine(i),
MI);
uint64_t EltOffset;
- const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
- const Type *OtherTy = OtherPtrTy->getElementType();
- if (const StructType *ST = dyn_cast<StructType>(OtherTy)) {
+ PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
+ Type *OtherTy = OtherPtrTy->getElementType();
+ if (StructType *ST = dyn_cast<StructType>(OtherTy)) {
EltOffset = TD->getStructLayout(ST)->getElementOffset(i);
} else {
- const Type *EltTy = cast<SequentialType>(OtherTy)->getElementType();
+ Type *EltTy = cast<SequentialType>(OtherTy)->getElementType();
EltOffset = TD->getTypeAllocSize(EltTy)*i;
}
@@ -2181,7 +2097,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
}
Value *EltPtr = NewElts[i];
- const Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType();
+ Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType();
// If we got down to a scalar, insert a load or store as appropriate.
if (EltTy->isSingleValueType()) {
@@ -2207,7 +2123,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0>
} else {
// If EltTy is a vector type, get the element type.
- const Type *ValTy = EltTy->getScalarType();
+ Type *ValTy = EltTy->getScalarType();
// Construct an integer with the right value.
unsigned EltSize = TD->getTypeSizeInBits(ValTy);
@@ -2228,8 +2144,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
assert(StoreVal->getType() == ValTy && "Type mismatch!");
// If the requested value was a vector constant, create it.
- if (EltTy != ValTy) {
- unsigned NumElts = cast<VectorType>(ValTy)->getNumElements();
+ if (EltTy->isVectorTy()) {
+ unsigned NumElts = cast<VectorType>(EltTy)->getNumElements();
SmallVector<Constant*, 16> Elts(NumElts, StoreVal);
StoreVal = ConstantVector::get(Elts);
}
@@ -2271,7 +2187,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
// Extract each element out of the integer according to its structure offset
// and store the element value to the individual alloca.
Value *SrcVal = SI->getOperand(0);
- const Type *AllocaEltTy = AI->getAllocatedType();
+ Type *AllocaEltTy = AI->getAllocatedType();
uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
IRBuilder<> Builder(SI);
@@ -2286,12 +2202,12 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
// There are two forms here: AI could be an array or struct. Both cases
// have different ways to compute the element offset.
- if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
+ if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
const StructLayout *Layout = TD->getStructLayout(EltSTy);
for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
// Get the number of bits to shift SrcVal to get the value.
- const Type *FieldTy = EltSTy->getElementType(i);
+ Type *FieldTy = EltSTy->getElementType(i);
uint64_t Shift = Layout->getElementOffsetInBits(i);
if (TD->isBigEndian())
@@ -2327,8 +2243,8 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
}
} else {
- const ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
- const Type *ArrayEltTy = ATy->getElementType();
+ ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
+ Type *ArrayEltTy = ATy->getElementType();
uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy);
@@ -2384,7 +2300,7 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts) {
// Extract each element out of the NewElts according to its structure offset
// and form the result value.
- const Type *AllocaEltTy = AI->getAllocatedType();
+ Type *AllocaEltTy = AI->getAllocatedType();
uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
@@ -2394,10 +2310,10 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
// have different ways to compute the element offset.
const StructLayout *Layout = 0;
uint64_t ArrayEltBitOffset = 0;
- if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
+ if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
Layout = TD->getStructLayout(EltSTy);
} else {
- const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
+ Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
}
@@ -2408,14 +2324,14 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
// Load the value from the alloca. If the NewElt is an aggregate, cast
// the pointer to an integer of the same size before doing the load.
Value *SrcField = NewElts[i];
- const Type *FieldTy =
+ Type *FieldTy =
cast<PointerType>(SrcField->getType())->getElementType();
uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
// Ignore zero sized fields like {}, they obviously contain no data.
if (FieldSizeBits == 0) continue;
- const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),
+ IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),
FieldSizeBits);
if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() &&
!FieldTy->isVectorTy())
@@ -2468,14 +2384,14 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
/// HasPadding - Return true if the specified type has any structure or
/// alignment padding in between the elements that would be split apart
/// by SROA; return false otherwise.
-static bool HasPadding(const Type *Ty, const TargetData &TD) {
- if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+static bool HasPadding(Type *Ty, const TargetData &TD) {
+ if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
Ty = ATy->getElementType();
return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
}
// SROA currently handles only Arrays and Structs.
- const StructType *STy = cast<StructType>(Ty);
+ StructType *STy = cast<StructType>(Ty);
const StructLayout *SL = TD.getStructLayout(STy);
unsigned PrevFieldBitOffset = 0;
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
@@ -2530,7 +2446,7 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
// and fusion code.
if (!Info.hasSubelementAccess && Info.hasALoadOrStore) {
// If the struct/array just has one element, use basic SRoA.
- if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
+ if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
if (ST->getNumElements() > 1) return false;
} else {
if (cast<ArrayType>(AI->getAllocatedType())->getNumElements() > 1)
@@ -2576,7 +2492,7 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
// Ignore non-volatile loads, they are always ok.
- if (LI->isVolatile()) return false;
+ if (!LI->isSimple()) return false;
continue;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 7c415e5..fbb9465 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -134,7 +134,7 @@ namespace {
struct StrCatOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Verify the "strcat" function prototype.
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
FT->getReturnType() != B.getInt8PtrTy() ||
FT->getParamType(0) != FT->getReturnType() ||
@@ -184,7 +184,7 @@ struct StrCatOpt : public LibCallOptimization {
struct StrNCatOpt : public StrCatOpt {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Verify the "strncat" function prototype.
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 ||
FT->getReturnType() != B.getInt8PtrTy() ||
FT->getParamType(0) != FT->getReturnType() ||
@@ -232,7 +232,7 @@ struct StrNCatOpt : public StrCatOpt {
struct StrChrOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Verify the "strchr" function prototype.
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
FT->getReturnType() != B.getInt8PtrTy() ||
FT->getParamType(0) != FT->getReturnType() ||
@@ -282,7 +282,7 @@ struct StrChrOpt : public LibCallOptimization {
struct StrRChrOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Verify the "strrchr" function prototype.
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
FT->getReturnType() != B.getInt8PtrTy() ||
FT->getParamType(0) != FT->getReturnType() ||
@@ -323,7 +323,7 @@ struct StrRChrOpt : public LibCallOptimization {
struct StrCmpOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Verify the "strcmp" function prototype.
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
!FT->getReturnType()->isIntegerTy(32) ||
FT->getParamType(0) != FT->getParamType(1) ||
@@ -338,16 +338,17 @@ struct StrCmpOpt : public LibCallOptimization {
bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
bool HasStr2 = GetConstantStringInfo(Str2P, Str2);
- if (HasStr1 && Str1.empty()) // strcmp("", x) -> *x
- return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType());
-
- if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
- return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
-
// strcmp(x, y) -> cnst (if both x and y are constant strings)
if (HasStr1 && HasStr2)
return ConstantInt::get(CI->getType(),
- strcmp(Str1.c_str(),Str2.c_str()));
+ StringRef(Str1).compare(Str2));
+
+ if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x
+ return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
+ CI->getType()));
+
+ if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
+ return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
// strcmp(P, "x") -> memcmp(P, "x", 2)
uint64_t Len1 = GetStringLength(Str1P);
@@ -371,7 +372,7 @@ struct StrCmpOpt : public LibCallOptimization {
struct StrNCmpOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Verify the "strncmp" function prototype.
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 ||
!FT->getReturnType()->isIntegerTy(32) ||
FT->getParamType(0) != FT->getParamType(1) ||
@@ -400,16 +401,20 @@ struct StrNCmpOpt : public LibCallOptimization {
bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
bool HasStr2 = GetConstantStringInfo(Str2P, Str2);
- if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> *x
- return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType());
+ // strncmp(x, y) -> cnst (if both x and y are constant strings)
+ if (HasStr1 && HasStr2) {
+ StringRef SubStr1 = StringRef(Str1).substr(0, Length);
+ StringRef SubStr2 = StringRef(Str2).substr(0, Length);
+ return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2));
+ }
+
+ if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x
+ return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
+ CI->getType()));
if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
- // strncmp(x, y) -> cnst (if both x and y are constant strings)
- if (HasStr1 && HasStr2)
- return ConstantInt::get(CI->getType(),
- strncmp(Str1.c_str(), Str2.c_str(), Length));
return 0;
}
};
@@ -426,7 +431,7 @@ struct StrCpyOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Verify the "strcpy" function prototype.
unsigned NumParams = OptChkCall ? 3 : 2;
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != NumParams ||
FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
@@ -462,7 +467,7 @@ struct StrCpyOpt : public LibCallOptimization {
struct StrNCpyOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != B.getInt8PtrTy() ||
@@ -511,7 +516,7 @@ struct StrNCpyOpt : public LibCallOptimization {
struct StrLenOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 1 ||
FT->getParamType(0) != B.getInt8PtrTy() ||
!FT->getReturnType()->isIntegerTy())
@@ -537,7 +542,7 @@ struct StrLenOpt : public LibCallOptimization {
struct StrPBrkOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
FT->getParamType(0) != B.getInt8PtrTy() ||
FT->getParamType(1) != FT->getParamType(0) ||
@@ -575,7 +580,7 @@ struct StrPBrkOpt : public LibCallOptimization {
struct StrToOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy())
@@ -597,7 +602,7 @@ struct StrToOpt : public LibCallOptimization {
struct StrSpnOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
FT->getParamType(0) != B.getInt8PtrTy() ||
FT->getParamType(1) != FT->getParamType(0) ||
@@ -626,7 +631,7 @@ struct StrSpnOpt : public LibCallOptimization {
struct StrCSpnOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
FT->getParamType(0) != B.getInt8PtrTy() ||
FT->getParamType(1) != FT->getParamType(0) ||
@@ -658,7 +663,7 @@ struct StrCSpnOpt : public LibCallOptimization {
struct StrStrOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
@@ -722,7 +727,7 @@ struct StrStrOpt : public LibCallOptimization {
struct MemCmpOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
!FT->getReturnType()->isIntegerTy(32))
@@ -773,7 +778,7 @@ struct MemCpyOpt : public LibCallOptimization {
// These optimizations require TargetData.
if (!TD) return 0;
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
@@ -795,7 +800,7 @@ struct MemMoveOpt : public LibCallOptimization {
// These optimizations require TargetData.
if (!TD) return 0;
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
@@ -817,7 +822,7 @@ struct MemSetOpt : public LibCallOptimization {
// These optimizations require TargetData.
if (!TD) return 0;
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isIntegerTy() ||
@@ -840,7 +845,7 @@ struct MemSetOpt : public LibCallOptimization {
struct PowOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
// Just make sure this has 2 arguments of the same FP type, which match the
// result type.
if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
@@ -874,8 +879,8 @@ struct PowOpt : public LibCallOptimization {
Callee->getAttributes());
Value *FAbs = EmitUnaryFloatFnCall(Sqrt, "fabs", B,
Callee->getAttributes());
- Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf, "tmp");
- Value *Sel = B.CreateSelect(FCmp, Inf, FAbs, "tmp");
+ Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf);
+ Value *Sel = B.CreateSelect(FCmp, Inf, FAbs);
return Sel;
}
@@ -895,7 +900,7 @@ struct PowOpt : public LibCallOptimization {
struct Exp2Opt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
// Just make sure this has 1 argument of FP type, which matches the
// result type.
if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
@@ -908,10 +913,10 @@ struct Exp2Opt : public LibCallOptimization {
Value *LdExpArg = 0;
if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
- LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty(), "tmp");
+ LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty());
} else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
- LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty(), "tmp");
+ LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty());
}
if (LdExpArg) {
@@ -946,7 +951,7 @@ struct Exp2Opt : public LibCallOptimization {
struct UnaryDoubleFPOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
!FT->getParamType(0)->isDoubleTy())
return 0;
@@ -973,7 +978,7 @@ struct UnaryDoubleFPOpt : public LibCallOptimization {
struct FFSOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
// Just make sure this has 2 arguments of the same FP type, which match the
// result type.
if (FT->getNumParams() != 1 ||
@@ -996,10 +1001,10 @@ struct FFSOpt : public LibCallOptimization {
Value *F = Intrinsic::getDeclaration(Callee->getParent(),
Intrinsic::cttz, ArgType);
Value *V = B.CreateCall(F, Op, "cttz");
- V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp");
- V = B.CreateIntCast(V, B.getInt32Ty(), false, "tmp");
+ V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
+ V = B.CreateIntCast(V, B.getInt32Ty(), false);
- Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp");
+ Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType));
return B.CreateSelect(Cond, V, B.getInt32(0));
}
};
@@ -1009,7 +1014,7 @@ struct FFSOpt : public LibCallOptimization {
struct IsDigitOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
// We require integer(i32)
if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
!FT->getParamType(0)->isIntegerTy(32))
@@ -1028,7 +1033,7 @@ struct IsDigitOpt : public LibCallOptimization {
struct IsAsciiOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
// We require integer(i32)
if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
!FT->getParamType(0)->isIntegerTy(32))
@@ -1046,7 +1051,7 @@ struct IsAsciiOpt : public LibCallOptimization {
struct AbsOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
// We require integer(integer) where the types agree.
if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
FT->getParamType(0) != FT->getReturnType())
@@ -1067,7 +1072,7 @@ struct AbsOpt : public LibCallOptimization {
struct ToAsciiOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
// We require i32(i32)
if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isIntegerTy(32))
@@ -1147,7 +1152,7 @@ struct PrintFOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Require one fixed pointer argument and an integer/void result.
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
!(FT->getReturnType()->isIntegerTy() ||
FT->getReturnType()->isVoidTy()))
@@ -1241,7 +1246,7 @@ struct SPrintFOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Require two fixed pointer arguments and an integer result.
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
!FT->getReturnType()->isIntegerTy())
@@ -1272,7 +1277,7 @@ struct SPrintFOpt : public LibCallOptimization {
struct FWriteOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Require a pointer, an integer, an integer, a pointer, returning integer.
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isIntegerTy() ||
!FT->getParamType(2)->isIntegerTy() ||
@@ -1310,7 +1315,7 @@ struct FPutsOpt : public LibCallOptimization {
if (!TD) return 0;
// Require two pointers. Also, we can't optimize if return value is used.
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
!CI->use_empty())
@@ -1379,7 +1384,7 @@ struct FPrintFOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Require two fixed paramters as pointers and integer result.
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
!FT->getReturnType()->isIntegerTy())
@@ -1410,7 +1415,7 @@ struct FPrintFOpt : public LibCallOptimization {
struct PutsOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Require one fixed pointer argument and an integer/void result.
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
!(FT->getReturnType()->isIntegerTy() ||
FT->getReturnType()->isVoidTy()))
@@ -1685,7 +1690,7 @@ void SimplifyLibCalls::setDoesNotAlias(Function &F, unsigned n) {
void SimplifyLibCalls::inferPrototypeAttributes(Function &F) {
- const FunctionType *FTy = F.getFunctionType();
+ FunctionType *FTy = F.getFunctionType();
StringRef Name = F.getName();
switch (Name[0]) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
index 705f442..c83f56c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
@@ -153,9 +153,13 @@ bool Sinking::ProcessBlock(BasicBlock &BB) {
static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
SmallPtrSet<Instruction *, 8> &Stores) {
- if (LoadInst *L = dyn_cast<LoadInst>(Inst)) {
- if (L->isVolatile()) return false;
+ if (Inst->mayWriteToMemory()) {
+ Stores.insert(Inst);
+ return false;
+ }
+
+ if (LoadInst *L = dyn_cast<LoadInst>(Inst)) {
AliasAnalysis::Location Loc = AA->getLocation(L);
for (SmallPtrSet<Instruction *, 8>::iterator I = Stores.begin(),
E = Stores.end(); I != E; ++I)
@@ -163,11 +167,6 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
return false;
}
- if (Inst->mayWriteToMemory()) {
- Stores.insert(Inst);
- return false;
- }
-
if (isa<TerminatorInst>(Inst) || isa<PHINode>(Inst))
return false;
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp b/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp
deleted file mode 100644
index 9dd83c0..0000000
--- a/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp
+++ /dev/null
@@ -1,373 +0,0 @@
-//===- TailDuplication.cpp - Simplify CFG through tail duplication --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass performs a limited form of tail duplication, intended to simplify
-// CFGs by removing some unconditional branches. This pass is necessary to
-// straighten out loops created by the C front-end, but also is capable of
-// making other code nicer. After this pass is run, the CFG simplify pass
-// should be run to clean up the mess.
-//
-// This pass could be enhanced in the future to use profile information to be
-// more aggressive.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "tailduplicate"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Constant.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Pass.h"
-#include "llvm/Type.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include <map>
-using namespace llvm;
-
-STATISTIC(NumEliminated, "Number of unconditional branches eliminated");
-
-static cl::opt<unsigned>
-TailDupThreshold("taildup-threshold",
- cl::desc("Max block size to tail duplicate"),
- cl::init(1), cl::Hidden);
-
-namespace {
- class TailDup : public FunctionPass {
- bool runOnFunction(Function &F);
- public:
- static char ID; // Pass identification, replacement for typeid
- TailDup() : FunctionPass(ID) {
- initializeTailDupPass(*PassRegistry::getPassRegistry());
- }
-
- private:
- inline bool shouldEliminateUnconditionalBranch(TerminatorInst *, unsigned);
- inline void eliminateUnconditionalBranch(BranchInst *BI);
- SmallPtrSet<BasicBlock*, 4> CycleDetector;
- };
-}
-
-char TailDup::ID = 0;
-INITIALIZE_PASS(TailDup, "tailduplicate", "Tail Duplication", false, false)
-
-// Public interface to the Tail Duplication pass
-FunctionPass *llvm::createTailDuplicationPass() { return new TailDup(); }
-
-/// runOnFunction - Top level algorithm - Loop over each unconditional branch in
-/// the function, eliminating it if it looks attractive enough. CycleDetector
-/// prevents infinite loops by checking that we aren't redirecting a branch to
-/// a place it already pointed to earlier; see PR 2323.
-bool TailDup::runOnFunction(Function &F) {
- bool Changed = false;
- CycleDetector.clear();
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
- if (shouldEliminateUnconditionalBranch(I->getTerminator(),
- TailDupThreshold)) {
- eliminateUnconditionalBranch(cast<BranchInst>(I->getTerminator()));
- Changed = true;
- } else {
- ++I;
- CycleDetector.clear();
- }
- }
- return Changed;
-}
-
-/// shouldEliminateUnconditionalBranch - Return true if this branch looks
-/// attractive to eliminate. We eliminate the branch if the destination basic
-/// block has <= 5 instructions in it, not counting PHI nodes. In practice,
-/// since one of these is a terminator instruction, this means that we will add
-/// up to 4 instructions to the new block.
-///
-/// We don't count PHI nodes in the count since they will be removed when the
-/// contents of the block are copied over.
-///
-bool TailDup::shouldEliminateUnconditionalBranch(TerminatorInst *TI,
- unsigned Threshold) {
- BranchInst *BI = dyn_cast<BranchInst>(TI);
- if (!BI || !BI->isUnconditional()) return false; // Not an uncond branch!
-
- BasicBlock *Dest = BI->getSuccessor(0);
- if (Dest == BI->getParent()) return false; // Do not loop infinitely!
-
- // Do not inline a block if we will just get another branch to the same block!
- TerminatorInst *DTI = Dest->getTerminator();
- if (BranchInst *DBI = dyn_cast<BranchInst>(DTI))
- if (DBI->isUnconditional() && DBI->getSuccessor(0) == Dest)
- return false; // Do not loop infinitely!
-
- // FIXME: DemoteRegToStack cannot yet demote invoke instructions to the stack,
- // because doing so would require breaking critical edges. This should be
- // fixed eventually.
- if (!DTI->use_empty())
- return false;
-
- // Do not bother with blocks with only a single predecessor: simplify
- // CFG will fold these two blocks together!
- pred_iterator PI = pred_begin(Dest), PE = pred_end(Dest);
- ++PI;
- if (PI == PE) return false; // Exactly one predecessor!
-
- BasicBlock::iterator I = Dest->getFirstNonPHI();
-
- for (unsigned Size = 0; I != Dest->end(); ++I) {
- if (Size == Threshold) return false; // The block is too large.
-
- // Don't tail duplicate call instructions. They are very large compared to
- // other instructions.
- if (isa<CallInst>(I) || isa<InvokeInst>(I)) return false;
-
- // Also alloca and malloc.
- if (isa<AllocaInst>(I)) return false;
-
- // Some vector instructions can expand into a number of instructions.
- if (isa<ShuffleVectorInst>(I) || isa<ExtractElementInst>(I) ||
- isa<InsertElementInst>(I)) return false;
-
- // Only count instructions that are not debugger intrinsics.
- if (!isa<DbgInfoIntrinsic>(I)) ++Size;
- }
-
- // Do not tail duplicate a block that has thousands of successors into a block
- // with a single successor if the block has many other predecessors. This can
- // cause an N^2 explosion in CFG edges (and PHI node entries), as seen in
- // cases that have a large number of indirect gotos.
- unsigned NumSuccs = DTI->getNumSuccessors();
- if (NumSuccs > 8) {
- unsigned TooMany = 128;
- if (NumSuccs >= TooMany) return false;
- TooMany = TooMany/NumSuccs;
- for (; PI != PE; ++PI)
- if (TooMany-- == 0) return false;
- }
-
- // If this unconditional branch is a fall-through, be careful about
- // tail duplicating it. In particular, we don't want to taildup it if the
- // original block will still be there after taildup is completed: doing so
- // would eliminate the fall-through, requiring unconditional branches.
- Function::iterator DestI = Dest;
- if (&*--DestI == BI->getParent()) {
- // The uncond branch is a fall-through. Tail duplication of the block is
- // will eliminate the fall-through-ness and end up cloning the terminator
- // at the end of the Dest block. Since the original Dest block will
- // continue to exist, this means that one or the other will not be able to
- // fall through. One typical example that this helps with is code like:
- // if (a)
- // foo();
- // if (b)
- // foo();
- // Cloning the 'if b' block into the end of the first foo block is messy.
-
- // The messy case is when the fall-through block falls through to other
- // blocks. This is what we would be preventing if we cloned the block.
- DestI = Dest;
- if (++DestI != Dest->getParent()->end()) {
- BasicBlock *DestSucc = DestI;
- // If any of Dest's successors are fall-throughs, don't do this xform.
- for (succ_iterator SI = succ_begin(Dest), SE = succ_end(Dest);
- SI != SE; ++SI)
- if (*SI == DestSucc)
- return false;
- }
- }
-
- // Finally, check that we haven't redirected to this target block earlier;
- // there are cases where we loop forever if we don't check this (PR 2323).
- if (!CycleDetector.insert(Dest))
- return false;
-
- return true;
-}
-
-/// FindObviousSharedDomOf - We know there is a branch from SrcBlock to
-/// DestBlock, and that SrcBlock is not the only predecessor of DstBlock. If we
-/// can find a predecessor of SrcBlock that is a dominator of both SrcBlock and
-/// DstBlock, return it.
-static BasicBlock *FindObviousSharedDomOf(BasicBlock *SrcBlock,
- BasicBlock *DstBlock) {
- // SrcBlock must have a single predecessor.
- pred_iterator PI = pred_begin(SrcBlock), PE = pred_end(SrcBlock);
- if (PI == PE || ++PI != PE) return 0;
-
- BasicBlock *SrcPred = *pred_begin(SrcBlock);
-
- // Look at the predecessors of DstBlock. One of them will be SrcBlock. If
- // there is only one other pred, get it, otherwise we can't handle it.
- PI = pred_begin(DstBlock); PE = pred_end(DstBlock);
- BasicBlock *DstOtherPred = 0;
- BasicBlock *P = *PI;
- if (P == SrcBlock) {
- if (++PI == PE) return 0;
- DstOtherPred = *PI;
- if (++PI != PE) return 0;
- } else {
- DstOtherPred = P;
- if (++PI == PE || *PI != SrcBlock || ++PI != PE) return 0;
- }
-
- // We can handle two situations here: "if then" and "if then else" blocks. An
- // 'if then' situation is just where DstOtherPred == SrcPred.
- if (DstOtherPred == SrcPred)
- return SrcPred;
-
- // Check to see if we have an "if then else" situation, which means that
- // DstOtherPred will have a single predecessor and it will be SrcPred.
- PI = pred_begin(DstOtherPred); PE = pred_end(DstOtherPred);
- if (PI != PE && *PI == SrcPred) {
- if (++PI != PE) return 0; // Not a single pred.
- return SrcPred; // Otherwise, it's an "if then" situation. Return the if.
- }
-
- // Otherwise, this is something we can't handle.
- return 0;
-}
-
-
-/// eliminateUnconditionalBranch - Clone the instructions from the destination
-/// block into the source block, eliminating the specified unconditional branch.
-/// If the destination block defines values used by successors of the dest
-/// block, we may need to insert PHI nodes.
-///
-void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
- BasicBlock *SourceBlock = Branch->getParent();
- BasicBlock *DestBlock = Branch->getSuccessor(0);
- assert(SourceBlock != DestBlock && "Our predicate is broken!");
-
- DEBUG(dbgs() << "TailDuplication[" << SourceBlock->getParent()->getName()
- << "]: Eliminating branch: " << *Branch);
-
- // See if we can avoid duplicating code by moving it up to a dominator of both
- // blocks.
- if (BasicBlock *DomBlock = FindObviousSharedDomOf(SourceBlock, DestBlock)) {
- DEBUG(dbgs() << "Found shared dominator: " << DomBlock->getName() << "\n");
-
- // If there are non-phi instructions in DestBlock that have no operands
- // defined in DestBlock, and if the instruction has no side effects, we can
- // move the instruction to DomBlock instead of duplicating it.
- BasicBlock::iterator BBI = DestBlock->getFirstNonPHI();
- while (!isa<TerminatorInst>(BBI)) {
- Instruction *I = BBI++;
-
- bool CanHoist = I->isSafeToSpeculativelyExecute() &&
- !I->mayReadFromMemory();
- if (CanHoist) {
- for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
- if (Instruction *OpI = dyn_cast<Instruction>(I->getOperand(op)))
- if (OpI->getParent() == DestBlock ||
- (isa<InvokeInst>(OpI) && OpI->getParent() == DomBlock)) {
- CanHoist = false;
- break;
- }
- if (CanHoist) {
- // Remove from DestBlock, move right before the term in DomBlock.
- DestBlock->getInstList().remove(I);
- DomBlock->getInstList().insert(DomBlock->getTerminator(), I);
- DEBUG(dbgs() << "Hoisted: " << *I);
- }
- }
- }
- }
-
- // Tail duplication can not update SSA properties correctly if the values
- // defined in the duplicated tail are used outside of the tail itself. For
- // this reason, we spill all values that are used outside of the tail to the
- // stack.
- for (BasicBlock::iterator I = DestBlock->begin(); I != DestBlock->end(); ++I)
- if (I->isUsedOutsideOfBlock(DestBlock)) {
- // We found a use outside of the tail. Create a new stack slot to
- // break this inter-block usage pattern.
- DemoteRegToStack(*I);
- }
-
- // We are going to have to map operands from the original block B to the new
- // copy of the block B'. If there are PHI nodes in the DestBlock, these PHI
- // nodes also define part of this mapping. Loop over these PHI nodes, adding
- // them to our mapping.
- //
- std::map<Value*, Value*> ValueMapping;
-
- BasicBlock::iterator BI = DestBlock->begin();
- bool HadPHINodes = isa<PHINode>(BI);
- for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
- ValueMapping[PN] = PN->getIncomingValueForBlock(SourceBlock);
-
- // Clone the non-phi instructions of the dest block into the source block,
- // keeping track of the mapping...
- //
- for (; BI != DestBlock->end(); ++BI) {
- Instruction *New = BI->clone();
- New->setName(BI->getName());
- SourceBlock->getInstList().push_back(New);
- ValueMapping[BI] = New;
- }
-
- // Now that we have built the mapping information and cloned all of the
- // instructions (giving us a new terminator, among other things), walk the new
- // instructions, rewriting references of old instructions to use new
- // instructions.
- //
- BI = Branch; ++BI; // Get an iterator to the first new instruction
- for (; BI != SourceBlock->end(); ++BI)
- for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) {
- std::map<Value*, Value*>::const_iterator I =
- ValueMapping.find(BI->getOperand(i));
- if (I != ValueMapping.end())
- BI->setOperand(i, I->second);
- }
-
- // Next we check to see if any of the successors of DestBlock had PHI nodes.
- // If so, we need to add entries to the PHI nodes for SourceBlock now.
- for (succ_iterator SI = succ_begin(DestBlock), SE = succ_end(DestBlock);
- SI != SE; ++SI) {
- BasicBlock *Succ = *SI;
- for (BasicBlock::iterator PNI = Succ->begin(); isa<PHINode>(PNI); ++PNI) {
- PHINode *PN = cast<PHINode>(PNI);
- // Ok, we have a PHI node. Figure out what the incoming value was for the
- // DestBlock.
- Value *IV = PN->getIncomingValueForBlock(DestBlock);
-
- // Remap the value if necessary...
- std::map<Value*, Value*>::const_iterator I = ValueMapping.find(IV);
- if (I != ValueMapping.end())
- IV = I->second;
- PN->addIncoming(IV, SourceBlock);
- }
- }
-
- // Next, remove the old branch instruction, and any PHI node entries that we
- // had.
- BI = Branch; ++BI; // Get an iterator to the first new instruction
- DestBlock->removePredecessor(SourceBlock); // Remove entries in PHI nodes...
- SourceBlock->getInstList().erase(Branch); // Destroy the uncond branch...
-
- // Final step: now that we have finished everything up, walk the cloned
- // instructions one last time, constant propagating and DCE'ing them, because
- // they may not be needed anymore.
- //
- if (HadPHINodes) {
- while (BI != SourceBlock->end()) {
- Instruction *Inst = BI++;
- if (isInstructionTriviallyDead(Inst))
- Inst->eraseFromParent();
- else if (Value *V = SimplifyInstruction(Inst)) {
- Inst->replaceAllUsesWith(V);
- Inst->eraseFromParent();
- }
- }
- }
-
- ++NumEliminated; // We just killed a branch!
-}
OpenPOWER on IntegriCloud