summaryrefslogtreecommitdiffstats
path: root/lib/Transforms/Scalar
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r--lib/Transforms/Scalar/ADCE.cpp16
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt2
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp226
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp131
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp80
-rw-r--r--lib/Transforms/Scalar/GVN.cpp391
-rw-r--r--lib/Transforms/Scalar/GlobalMerge.cpp6
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp74
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp8
-rw-r--r--lib/Transforms/Scalar/LICM.cpp42
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp50
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp12
-rw-r--r--lib/Transforms/Scalar/LoopInstSimplify.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp7
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp264
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp4
-rw-r--r--lib/Transforms/Scalar/LowerAtomic.cpp6
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp178
-rw-r--r--lib/Transforms/Scalar/ObjCARC.cpp658
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp1426
-rw-r--r--lib/Transforms/Scalar/Reg2Mem.cpp29
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp2
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp4
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp467
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp76
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp189
-rw-r--r--lib/Transforms/Scalar/Sink.cpp174
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp12
28 files changed, 2851 insertions, 1685 deletions
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index ba214d1..b344952 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -9,7 +9,7 @@
//
// This file implements the Aggressive Dead Code Elimination pass. This pass
// optimistically assumes that all instructions are dead until proven otherwise,
-// allowing it to eliminate dead computations that other DCE passes do not
+// allowing it to eliminate dead computations that other DCE passes do not
// catch, particularly involving loop computations.
//
//===----------------------------------------------------------------------===//
@@ -36,13 +36,13 @@ namespace {
ADCE() : FunctionPass(ID) {
initializeADCEPass(*PassRegistry::getPassRegistry());
}
-
+
virtual bool runOnFunction(Function& F);
-
+
virtual void getAnalysisUsage(AnalysisUsage& AU) const {
AU.setPreservesCFG();
}
-
+
};
}
@@ -52,7 +52,7 @@ INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false)
bool ADCE::runOnFunction(Function& F) {
SmallPtrSet<Instruction*, 128> alive;
SmallVector<Instruction*, 128> worklist;
-
+
// Collect the set of "root" instructions that are known live.
for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
if (isa<TerminatorInst>(I.getInstructionIterator()) ||
@@ -62,7 +62,7 @@ bool ADCE::runOnFunction(Function& F) {
alive.insert(I.getInstructionIterator());
worklist.push_back(I.getInstructionIterator());
}
-
+
// Propagate liveness backwards to operands.
while (!worklist.empty()) {
Instruction* curr = worklist.pop_back_val();
@@ -72,7 +72,7 @@ bool ADCE::runOnFunction(Function& F) {
if (alive.insert(Inst))
worklist.push_back(Inst);
}
-
+
// The inverse of the live set is the dead set. These are those instructions
// which have no side effects and do not influence the control flow or return
// value of the function, and may therefore be deleted safely.
@@ -82,7 +82,7 @@ bool ADCE::runOnFunction(Function& F) {
worklist.push_back(I.getInstructionIterator());
I->dropAllReferences();
}
-
+
for (SmallVector<Instruction*, 1024>::iterator I = worklist.begin(),
E = worklist.end(); I != E; ++I) {
++NumRemoved;
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index d660c72..a01e066 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -32,3 +32,5 @@ add_llvm_library(LLVMScalarOpts
Sink.cpp
TailRecursionElimination.cpp
)
+
+add_dependencies(LLVMScalarOpts intrinsics_gen)
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 9a5423f..bc87106 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -18,32 +18,32 @@
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
+#include "llvm/IRBuilder.h"
#include "llvm/InlineAsm.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Pass.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Transforms/Utils/AddrModeMatcher.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/PatternMatch.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/AddrModeMatcher.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -60,6 +60,7 @@ STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
STATISTIC(NumRetsDup, "Number of return instructions duplicated");
STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
+STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
static cl::opt<bool> DisableBranchOpts(
"disable-cgp-branch-opts", cl::Hidden, cl::init(false),
@@ -70,6 +71,10 @@ static cl::opt<bool> DisableDeleteDeadBlocks(
"disable-cgp-delete-dead-blocks", cl::Hidden, cl::init(false),
cl::desc("Disable deleting dead blocks in CodeGenPrepare"));
+static cl::opt<bool> DisableSelectToBranch(
+ "disable-cgp-select2branch", cl::Hidden, cl::init(false),
+ cl::desc("Disable select to branch conversion."));
+
namespace {
class CodeGenPrepare : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
@@ -78,7 +83,7 @@ namespace {
const TargetLibraryInfo *TLInfo;
DominatorTree *DT;
ProfileInfo *PFI;
-
+
/// CurInstIterator - As we scan instructions optimizing them, this is the
/// next instruction to optimize. Xforms that can invalidate this should
/// update it.
@@ -93,6 +98,9 @@ namespace {
/// be updated.
bool ModifiedDT;
+ /// OptSize - True if optimizing for size.
+ bool OptSize;
+
public:
static char ID; // Pass identification, replacement for typeid
explicit CodeGenPrepare(const TargetLowering *tli = 0)
@@ -108,6 +116,7 @@ namespace {
}
private:
+ bool EliminateFallThrough(Function &F);
bool EliminateMostlyEmptyBlocks(Function &F);
bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
void EliminateMostlyEmptyBlock(BasicBlock *BB);
@@ -118,6 +127,7 @@ namespace {
bool OptimizeCallInst(CallInst *CI);
bool MoveExtToFormExtLoad(Instruction *I);
bool OptimizeExtUses(Instruction *I);
+ bool OptimizeSelectInst(SelectInst *SI);
bool DupRetToEnableTailCallOpts(ReturnInst *RI);
bool PlaceDbgValues(Function &F);
};
@@ -141,13 +151,14 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
TLInfo = &getAnalysis<TargetLibraryInfo>();
DT = getAnalysisIfAvailable<DominatorTree>();
PFI = getAnalysisIfAvailable<ProfileInfo>();
+ OptSize = F.hasFnAttr(Attribute::OptimizeForSize);
// First pass, eliminate blocks that contain only PHI nodes and an
// unconditional branch.
EverMadeChange |= EliminateMostlyEmptyBlocks(F);
// llvm.dbg.value is far away from the value then iSel may not be able
- // handle it properly. iSel will drop llvm.dbg.value if it can not
+ // handle it properly. iSel will drop llvm.dbg.value if it can not
// find a node corresponding to the value.
EverMadeChange |= PlaceDbgValues(F);
@@ -182,6 +193,11 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
I = WorkList.begin(), E = WorkList.end(); I != E; ++I)
DeleteDeadBlock(*I);
+ // Merge pairs of basic blocks with unconditional branches, connected by
+ // a single edge.
+ if (EverMadeChange || MadeChange)
+ MadeChange |= EliminateFallThrough(F);
+
if (MadeChange)
ModifiedDT = true;
EverMadeChange |= MadeChange;
@@ -193,6 +209,39 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
return EverMadeChange;
}
+/// EliminateFallThrough - Merge basic blocks which are connected
+/// by a single edge, where one of the basic blocks has a single successor
+/// pointing to the other basic block, which has a single predecessor.
+bool CodeGenPrepare::EliminateFallThrough(Function &F) {
+ bool Changed = false;
+ // Scan all of the blocks in the function, except for the entry block.
+ for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
+ BasicBlock *BB = I++;
+ // If the destination block has a single pred, then this is a trivial
+ // edge, just collapse it.
+ BasicBlock *SinglePred = BB->getSinglePredecessor();
+
+ if (!SinglePred || SinglePred == BB) continue;
+
+ BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
+ if (Term && !Term->isConditional()) {
+ Changed = true;
+ // Remember if SinglePred was the entry block of the function.
+ // If so, we will need to move BB back to the entry position.
+ bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+ MergeBasicBlockIntoOnlyPred(BB, this);
+
+ if (isEntry && BB != &BB->getParent()->getEntryBlock())
+ BB->moveBefore(&BB->getParent()->getEntryBlock());
+
+ // We have erased a block. Update the iterator.
+ I = BB;
+ DEBUG(dbgs() << "Merged:\n"<< *SinglePred << "\n\n\n");
+ }
+ }
+ return Changed;
+}
+
/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
/// debug info directives, and an unconditional branch. Passes before isel
/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
@@ -326,7 +375,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
if (isEntry && BB != &BB->getParent()->getEntryBlock())
BB->moveBefore(&BB->getParent()->getEntryBlock());
-
+
DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
return;
}
@@ -537,7 +586,7 @@ protected:
bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
BasicBlock *BB = CI->getParent();
-
+
// Lower inline assembly if we can.
// If we found an inline asm expession, and if the target knows how to
// lower it to normal LLVM code, do so now.
@@ -554,19 +603,19 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
if (OptimizeInlineAsmInst(CI))
return true;
}
-
+
// Lower all uses of llvm.objectsize.*
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
Type *ReturnTy = CI->getType();
- Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
-
+ Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
+
// Substituting this can cause recursive simplifications, which can
// invalidate our iterator. Use a WeakVH to hold onto it in case this
// happens.
WeakVH IterHandle(CurInstIterator);
-
+
replaceAndRecursivelySimplify(CI, RetVal, TLI ? TLI->getTargetData() : 0,
TLInfo, ModifiedDT ? 0 : DT);
@@ -594,13 +643,13 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
// We'll need TargetData from here on out.
const TargetData *TD = TLI ? TLI->getTargetData() : 0;
if (!TD) return false;
-
+
// Lower all default uses of _chk calls. This is very similar
// to what InstCombineCalls does, but here we are only lowering calls
// that have the default "don't know" as the objectsize. Anything else
// should be left alone.
CodeGenPrepareFortifiedLibCalls Simplifier;
- return Simplifier.fold(CI, TD);
+ return Simplifier.fold(CI, TD, TLInfo);
}
/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
@@ -635,10 +684,18 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
if (!TLI)
return false;
+ PHINode *PN = 0;
+ BitCastInst *BCI = 0;
Value *V = RI->getReturnValue();
- PHINode *PN = V ? dyn_cast<PHINode>(V) : NULL;
- if (V && !PN)
- return false;
+ if (V) {
+ BCI = dyn_cast<BitCastInst>(V);
+ if (BCI)
+ V = BCI->getOperand(0);
+
+ PN = dyn_cast<PHINode>(V);
+ if (!PN)
+ return false;
+ }
BasicBlock *BB = RI->getParent();
if (PN && PN->getParent() != BB)
@@ -656,6 +713,9 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
if (PN) {
BasicBlock::iterator BI = BB->begin();
do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
+ if (&*BI == BCI)
+ // Also skip over the bitcast.
+ ++BI;
if (&*BI != RI)
return false;
} else {
@@ -750,13 +810,13 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
Type *AccessTy) {
Value *Repl = Addr;
-
- // Try to collapse single-value PHI nodes. This is necessary to undo
+
+ // Try to collapse single-value PHI nodes. This is necessary to undo
// unprofitable PRE transformations.
SmallVector<Value*, 8> worklist;
SmallPtrSet<Value*, 16> Visited;
worklist.push_back(Addr);
-
+
// Use a worklist to iteratively look through PHI nodes, and ensure that
// the addressing mode obtained from the non-PHI roots of the graph
// are equivalent.
@@ -768,20 +828,20 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
while (!worklist.empty()) {
Value *V = worklist.back();
worklist.pop_back();
-
+
// Break use-def graph loops.
if (!Visited.insert(V)) {
Consensus = 0;
break;
}
-
+
// For a PHI node, push all of its incoming values.
if (PHINode *P = dyn_cast<PHINode>(V)) {
for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
worklist.push_back(P->getIncomingValue(i));
continue;
}
-
+
// For non-PHIs, determine the addressing mode being computed.
SmallVector<Instruction*, 16> NewAddrModeInsts;
ExtAddrMode NewAddrMode =
@@ -816,15 +876,15 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
}
continue;
}
-
+
Consensus = 0;
break;
}
-
+
// If the addressing mode couldn't be determined, or if multiple different
// ones were determined, bail out now.
if (!Consensus) return false;
-
+
// Check to see if any of the instructions supersumed by this addr mode are
// non-local to I's BB.
bool AnyNonLocal = false;
@@ -933,7 +993,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// Use a WeakVH to hold onto it in case this happens.
WeakVH IterHandle(CurInstIterator);
BasicBlock *BB = CurInstIterator->getParent();
-
+
RecursivelyDeleteTriviallyDeadInstructions(Repl);
if (IterHandle != CurInstIterator) {
@@ -945,7 +1005,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// This address is now available for reassignment, so erase the table
// entry; we don't want to match some completely different instruction.
SunkAddrs[Addr] = 0;
- }
+ }
}
++NumMemoryInsts;
return true;
@@ -957,12 +1017,12 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
bool MadeChange = false;
- TargetLowering::AsmOperandInfoVector
+ TargetLowering::AsmOperandInfoVector
TargetConstraints = TLI->ParseConstraints(CS);
unsigned ArgNo = 0;
for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
-
+
// Compute the constraint code and ConstraintType to use.
TLI->ComputeConstraintToUse(OpInfo, SDValue());
@@ -1091,6 +1151,79 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
return MadeChange;
}
+/// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be
+/// turned into an explicit branch.
+static bool isFormingBranchFromSelectProfitable(SelectInst *SI) {
+ // FIXME: This should use the same heuristics as IfConversion to determine
+ // whether a select is better represented as a branch. This requires that
+ // branch probability metadata is preserved for the select, which is not the
+ // case currently.
+
+ CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
+
+ // If the branch is predicted right, an out of order CPU can avoid blocking on
+ // the compare. Emit cmovs on compares with a memory operand as branches to
+ // avoid stalls on the load from memory. If the compare has more than one use
+ // there's probably another cmov or setcc around so it's not worth emitting a
+ // branch.
+ if (!Cmp)
+ return false;
+
+ Value *CmpOp0 = Cmp->getOperand(0);
+ Value *CmpOp1 = Cmp->getOperand(1);
+
+ // We check that the memory operand has one use to avoid uses of the loaded
+ // value directly after the compare, making branches unprofitable.
+ return Cmp->hasOneUse() &&
+ ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
+ (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()));
+}
+
+
+bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) {
+ // If we have a SelectInst that will likely profit from branch prediction,
+ // turn it into a branch.
+ if (DisableSelectToBranch || OptSize || !TLI ||
+ !TLI->isPredictableSelectExpensive())
+ return false;
+
+ if (!SI->getCondition()->getType()->isIntegerTy(1) ||
+ !isFormingBranchFromSelectProfitable(SI))
+ return false;
+
+ ModifiedDT = true;
+
+ // First, we split the block containing the select into 2 blocks.
+ BasicBlock *StartBlock = SI->getParent();
+ BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI));
+ BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
+
+ // Create a new block serving as the landing pad for the branch.
+ BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid",
+ NextBlock->getParent(), NextBlock);
+
+ // Move the unconditional branch from the block with the select in it into our
+ // landing pad block.
+ StartBlock->getTerminator()->eraseFromParent();
+ BranchInst::Create(NextBlock, SmallBlock);
+
+ // Insert the real conditional branch based on the original condition.
+ BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI);
+
+ // The select itself is replaced with a PHI Node.
+ PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin());
+ PN->takeName(SI);
+ PN->addIncoming(SI->getTrueValue(), StartBlock);
+ PN->addIncoming(SI->getFalseValue(), SmallBlock);
+ SI->replaceAllUsesWith(PN);
+ SI->eraseFromParent();
+
+ // Instruct OptimizeBlock to skip to the next block.
+ CurInstIterator = StartBlock->end();
+ ++NumSelectsExpanded;
+ return true;
+}
+
bool CodeGenPrepare::OptimizeInst(Instruction *I) {
if (PHINode *P = dyn_cast<PHINode>(I)) {
// It is possible for very late stage optimizations (such as SimplifyCFG)
@@ -1104,7 +1237,7 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
}
return false;
}
-
+
if (CastInst *CI = dyn_cast<CastInst>(I)) {
// If the source of the cast is a constant, then this should have
// already been constant folded. The only reason NOT to constant fold
@@ -1124,23 +1257,23 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
}
return false;
}
-
+
if (CmpInst *CI = dyn_cast<CmpInst>(I))
return OptimizeCmpExpression(CI);
-
+
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
if (TLI)
return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
return false;
}
-
+
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
if (TLI)
return OptimizeMemoryInst(I, SI->getOperand(1),
SI->getOperand(0)->getType());
return false;
}
-
+
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
if (GEPI->hasAllZeroIndices()) {
/// The GEP operand must be a pointer, so must its result -> BitCast
@@ -1154,13 +1287,16 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
}
return false;
}
-
+
if (CallInst *CI = dyn_cast<CallInst>(I))
return OptimizeCallInst(CI);
if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
return DupRetToEnableTailCallOpts(RI);
+ if (SelectInst *SI = dyn_cast<SelectInst>(I))
+ return OptimizeSelectInst(SI);
+
return false;
}
@@ -1179,7 +1315,7 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
}
// llvm.dbg.value is far away from the value then iSel may not be able
-// handle it properly. iSel will drop llvm.dbg.value if it can not
+// handle it properly. iSel will drop llvm.dbg.value if it can not
// find a node corresponding to the value.
bool CodeGenPrepare::PlaceDbgValues(Function &F) {
bool MadeChange = false;
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index c8c5360..8b1283f 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -32,7 +32,7 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/Debug.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
using namespace llvm;
@@ -71,7 +71,7 @@ namespace {
bool HandleFree(CallInst *F);
bool handleEndBlock(BasicBlock &BB);
void RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
- SmallPtrSet<Value*, 16> &DeadStackObjects);
+ SmallSetVector<Value*, 16> &DeadStackObjects);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
@@ -106,7 +106,7 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
///
static void DeleteDeadInstruction(Instruction *I,
MemoryDependenceAnalysis &MD,
- SmallPtrSet<Value*, 16> *ValueSet = 0) {
+ SmallSetVector<Value*, 16> *ValueSet = 0) {
SmallVector<Instruction*, 32> NowDeadInsts;
NowDeadInsts.push_back(I);
@@ -136,7 +136,7 @@ static void DeleteDeadInstruction(Instruction *I,
DeadInst->eraseFromParent();
- if (ValueSet) ValueSet->erase(DeadInst);
+ if (ValueSet) ValueSet->remove(DeadInst);
} while (!NowDeadInsts.empty());
}
@@ -248,7 +248,7 @@ static bool isShortenable(Instruction *I) {
// Don't shorten stores for now
if (isa<StoreInst>(I))
return false;
-
+
IntrinsicInst *II = cast<IntrinsicInst>(I);
switch (II->getIntrinsicID()) {
default: return false;
@@ -275,33 +275,9 @@ static Value *getStoredPointerOperand(Instruction *I) {
}
static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) {
- const TargetData *TD = AA.getTargetData();
-
- if (const CallInst *CI = extractMallocCall(V)) {
- if (const ConstantInt *C = dyn_cast<ConstantInt>(CI->getArgOperand(0)))
- return C->getZExtValue();
- }
-
- if (TD == 0)
- return AliasAnalysis::UnknownSize;
-
- if (const AllocaInst *A = dyn_cast<AllocaInst>(V)) {
- // Get size information for the alloca
- if (const ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize()))
- return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType());
- }
-
- if (const Argument *A = dyn_cast<Argument>(V)) {
- if (A->hasByValAttr())
- if (PointerType *PT = dyn_cast<PointerType>(A->getType()))
- return TD->getTypeAllocSize(PT->getElementType());
- }
-
- if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
- if (!GV->mayBeOverridden())
- return TD->getTypeAllocSize(GV->getType()->getElementType());
- }
-
+ uint64_t Size;
+ if (getObjectSize(V, Size, AA.getTargetData()))
+ return Size;
return AliasAnalysis::UnknownSize;
}
@@ -316,7 +292,7 @@ namespace {
/// isOverwrite - Return 'OverwriteComplete' if a store to the 'Later' location
/// completely overwrites a store to the 'Earlier' location.
-/// 'OverwriteEnd' if the end of the 'Earlier' location is completely
+/// 'OverwriteEnd' if the end of the 'Earlier' location is completely
/// overwritten by 'Later', or 'OverwriteUnknown' if nothing can be determined
static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
const AliasAnalysis::Location &Earlier,
@@ -339,7 +315,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
if (AA.getTargetData() == 0 &&
Later.Ptr->getType() == Earlier.Ptr->getType())
return OverwriteComplete;
-
+
return OverwriteUnknown;
}
@@ -402,10 +378,10 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
//
// We have to be careful here as *Off is signed while *.Size is unsigned.
if (EarlierOff >= LaterOff &&
- Later.Size > Earlier.Size &&
+ Later.Size >= Earlier.Size &&
uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size)
return OverwriteComplete;
-
+
// The other interesting case is if the later store overwrites the end of
// the earlier store
//
@@ -544,11 +520,11 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
// If we find a write that is a) removable (i.e., non-volatile), b) is
// completely obliterated by the store to 'Loc', and c) which we know that
// 'Inst' doesn't load from, then we can remove it.
- if (isRemovable(DepWrite) &&
+ if (isRemovable(DepWrite) &&
!isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
- int64_t InstWriteOffset, DepWriteOffset;
- OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA,
- DepWriteOffset, InstWriteOffset);
+ int64_t InstWriteOffset, DepWriteOffset;
+ OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA,
+ DepWriteOffset, InstWriteOffset);
if (OR == OverwriteComplete) {
DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
<< *DepWrite << "\n KILLER: " << *Inst << '\n');
@@ -557,7 +533,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
DeleteDeadInstruction(DepWrite, *MD);
++NumFastStores;
MadeChange = true;
-
+
// DeleteDeadInstruction can delete the current instruction in loop
// cases, reset BBI.
BBI = Inst;
@@ -575,16 +551,16 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
unsigned DepWriteAlign = DepIntrinsic->getAlignment();
if (llvm::isPowerOf2_64(InstWriteOffset) ||
((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
-
+
DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW END: "
- << *DepWrite << "\n KILLER (offset "
- << InstWriteOffset << ", "
+ << *DepWrite << "\n KILLER (offset "
+ << InstWriteOffset << ", "
<< DepLoc.Size << ")"
<< *Inst << '\n');
-
+
Value* DepWriteLength = DepIntrinsic->getLength();
Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(),
- InstWriteOffset -
+ InstWriteOffset -
DepWriteOffset);
DepIntrinsic->setLength(TrimmedLength);
MadeChange = true;
@@ -694,19 +670,18 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// Keep track of all of the stack objects that are dead at the end of the
// function.
- SmallPtrSet<Value*, 16> DeadStackObjects;
+ SmallSetVector<Value*, 16> DeadStackObjects;
// Find all of the alloca'd pointers in the entry block.
BasicBlock *Entry = BB.getParent()->begin();
for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I) {
- if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
- DeadStackObjects.insert(AI);
+ if (isa<AllocaInst>(I))
+ DeadStackObjects.insert(I);
// Okay, so these are dead heap objects, but if the pointer never escapes
// then it's leaked by this function anyways.
- if (CallInst *CI = extractMallocCall(I))
- if (!PointerMayBeCaptured(CI, true, true))
- DeadStackObjects.insert(CI);
+ else if (isAllocLikeFn(I) && !PointerMayBeCaptured(I, true, true))
+ DeadStackObjects.insert(I);
}
// Treat byval arguments the same, stores to them are dead at the end of the
@@ -723,14 +698,30 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// If we find a store, check to see if it points into a dead stack value.
if (hasMemoryWrite(BBI) && isRemovable(BBI)) {
// See through pointer-to-pointer bitcasts
- Value *Pointer = GetUnderlyingObject(getStoredPointerOperand(BBI));
+ SmallVector<Value *, 4> Pointers;
+ GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers);
// Stores to stack values are valid candidates for removal.
- if (DeadStackObjects.count(Pointer)) {
+ bool AllDead = true;
+ for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(),
+ E = Pointers.end(); I != E; ++I)
+ if (!DeadStackObjects.count(*I)) {
+ AllDead = false;
+ break;
+ }
+
+ if (AllDead) {
Instruction *Dead = BBI++;
DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: "
- << *Dead << "\n Object: " << *Pointer << '\n');
+ << *Dead << "\n Objects: ";
+ for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(),
+ E = Pointers.end(); I != E; ++I) {
+ dbgs() << **I;
+ if (llvm::next(I) != E)
+ dbgs() << ", ";
+ }
+ dbgs() << '\n');
// DCE instructions only used to calculate that store.
DeleteDeadInstruction(Dead, *MD, &DeadStackObjects);
@@ -749,17 +740,19 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
continue;
}
- if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) {
- DeadStackObjects.erase(A);
- continue;
- }
-
- if (CallInst *CI = extractMallocCall(BBI)) {
- DeadStackObjects.erase(CI);
+ if (isa<AllocaInst>(BBI)) {
+ // Remove allocas from the list of dead stack objects; there can't be
+ // any references before the definition.
+ DeadStackObjects.remove(BBI);
continue;
}
if (CallSite CS = cast<Value>(BBI)) {
+ // Remove allocation function calls from the list of dead stack objects;
+ // there can't be any references before the definition.
+ if (isAllocLikeFn(BBI))
+ DeadStackObjects.remove(BBI);
+
// If this call does not access memory, it can't be loading any of our
// pointers.
if (AA->doesNotAccessMemory(CS))
@@ -768,7 +761,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// If the call might load from any of our allocas, then any store above
// the call is live.
SmallVector<Value*, 8> LiveAllocas;
- for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
+ for (SmallSetVector<Value*, 16>::iterator I = DeadStackObjects.begin(),
E = DeadStackObjects.end(); I != E; ++I) {
// See if the call site touches it.
AliasAnalysis::ModRefResult A =
@@ -780,12 +773,12 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(),
E = LiveAllocas.end(); I != E; ++I)
- DeadStackObjects.erase(*I);
+ DeadStackObjects.remove(*I);
// If all of the allocas were clobbered by the call then we're not going
// to find anything else to process.
if (DeadStackObjects.empty())
- return MadeChange;
+ break;
continue;
}
@@ -827,7 +820,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
/// of the stack objects in the DeadStackObjects set. If so, they become live
/// because the location is being loaded.
void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
- SmallPtrSet<Value*, 16> &DeadStackObjects) {
+ SmallSetVector<Value*, 16> &DeadStackObjects) {
const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr);
// A constant can't be in the dead pointer set.
@@ -837,12 +830,12 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
// If the kill pointer can be easily reduced to an alloca, don't bother doing
// extraneous AA queries.
if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) {
- DeadStackObjects.erase(const_cast<Value*>(UnderlyingPointer));
+ DeadStackObjects.remove(const_cast<Value*>(UnderlyingPointer));
return;
}
SmallVector<Value*, 16> NowLive;
- for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
+ for (SmallSetVector<Value*, 16>::iterator I = DeadStackObjects.begin(),
E = DeadStackObjects.end(); I != E; ++I) {
// See if the loaded location could alias the stack location.
AliasAnalysis::Location StackLoc(*I, getPointerSize(*I, *AA));
@@ -852,5 +845,5 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
for (SmallVector<Value*, 16>::iterator I = NowLive.begin(), E = NowLive.end();
I != E; ++I)
- DeadStackObjects.erase(*I);
+ DeadStackObjects.remove(*I);
}
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index f3c92d6..9759549 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -39,7 +39,7 @@ static unsigned getHash(const void *V) {
}
//===----------------------------------------------------------------------===//
-// SimpleValue
+// SimpleValue
//===----------------------------------------------------------------------===//
namespace {
@@ -47,16 +47,16 @@ namespace {
/// scoped hash table.
struct SimpleValue {
Instruction *Inst;
-
+
SimpleValue(Instruction *I) : Inst(I) {
assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
}
-
+
bool isSentinel() const {
return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
}
-
+
static bool canHandle(Instruction *Inst) {
// This can only handle non-void readnone functions.
if (CallInst *CI = dyn_cast<CallInst>(Inst))
@@ -90,7 +90,7 @@ template<> struct DenseMapInfo<SimpleValue> {
unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
Instruction *Inst = Val.Inst;
-
+
// Hash in all of the operands as pointers.
unsigned Res = 0;
for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
@@ -126,13 +126,13 @@ bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
if (LHS.isSentinel() || RHS.isSentinel())
return LHSI == RHSI;
-
+
if (LHSI->getOpcode() != RHSI->getOpcode()) return false;
return LHSI->isIdenticalTo(RHSI);
}
//===----------------------------------------------------------------------===//
-// CallValue
+// CallValue
//===----------------------------------------------------------------------===//
namespace {
@@ -140,21 +140,21 @@ namespace {
/// the scoped hash table.
struct CallValue {
Instruction *Inst;
-
+
CallValue(Instruction *I) : Inst(I) {
assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
}
-
+
bool isSentinel() const {
return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
}
-
+
static bool canHandle(Instruction *Inst) {
// Don't value number anything that returns void.
if (Inst->getType()->isVoidTy())
return false;
-
+
CallInst *CI = dyn_cast<CallInst>(Inst);
if (CI == 0 || !CI->onlyReadsMemory())
return false;
@@ -168,7 +168,7 @@ namespace llvm {
template<> struct isPodLike<CallValue> {
static const bool value = true;
};
-
+
template<> struct DenseMapInfo<CallValue> {
static inline CallValue getEmptyKey() {
return DenseMapInfo<Instruction*>::getEmptyKey();
@@ -189,7 +189,7 @@ unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
"Cannot value number calls with metadata operands");
Res ^= getHash(Inst->getOperand(i)) << (i & 0xF);
}
-
+
// Mix in the opcode.
return (Res << 1) ^ Inst->getOpcode();
}
@@ -203,11 +203,11 @@ bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
//===----------------------------------------------------------------------===//
-// EarlyCSE pass.
+// EarlyCSE pass.
//===----------------------------------------------------------------------===//
namespace {
-
+
/// EarlyCSE - This pass does a simple depth-first walk over the dominator
/// tree, eliminating trivially redundant instructions and using instsimplify
/// to canonicalize things as it goes. It is intended to be fast and catch
@@ -223,14 +223,14 @@ public:
ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy;
typedef ScopedHashTable<SimpleValue, Value*, DenseMapInfo<SimpleValue>,
AllocatorTy> ScopedHTType;
-
+
/// AvailableValues - This scoped hash table contains the current values of
/// all of our simple scalar expressions. As we walk down the domtree, we
/// look to see if instructions are in this: if so, we replace them with what
/// we find, otherwise we insert them so that dominated values can succeed in
/// their lookup.
ScopedHTType *AvailableValues;
-
+
/// AvailableLoads - This scoped hash table contains the current values
/// of loads. This allows us to get efficient access to dominating loads when
/// we have a fully redundant load. In addition to the most recent load, we
@@ -243,15 +243,15 @@ public:
typedef ScopedHashTable<Value*, std::pair<Value*, unsigned>,
DenseMapInfo<Value*>, LoadMapAllocator> LoadHTType;
LoadHTType *AvailableLoads;
-
+
/// AvailableCalls - This scoped hash table contains the current values
/// of read-only call values. It uses the same generation count as loads.
typedef ScopedHashTable<CallValue, std::pair<Value*, unsigned> > CallHTType;
CallHTType *AvailableCalls;
-
+
/// CurrentGeneration - This is the current generation of the memory value.
unsigned CurrentGeneration;
-
+
static char ID;
explicit EarlyCSE() : FunctionPass(ID) {
initializeEarlyCSEPass(*PassRegistry::getPassRegistry());
@@ -326,7 +326,7 @@ private:
};
bool processNode(DomTreeNode *Node);
-
+
// This transformation requires dominator postdominator info
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
@@ -350,7 +350,7 @@ INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false)
bool EarlyCSE::processNode(DomTreeNode *Node) {
BasicBlock *BB = Node->getBlock();
-
+
// If this block has a single predecessor, then the predecessor is the parent
// of the domtree node and all of the live out memory values are still current
// in this block. If this block has multiple predecessors, then they could
@@ -359,20 +359,20 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// predecessors.
if (BB->getSinglePredecessor() == 0)
++CurrentGeneration;
-
+
/// LastStore - Keep track of the last non-volatile store that we saw... for
/// as long as there in no instruction that reads memory. If we see a store
/// to the same location, we delete the dead store. This zaps trivial dead
/// stores which can occur in bitfield code among other things.
StoreInst *LastStore = 0;
-
+
bool Changed = false;
// See if any instructions in the block can be eliminated. If so, do it. If
// not, add them to AvailableValues.
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
Instruction *Inst = I++;
-
+
// Dead instructions should just be removed.
if (isInstructionTriviallyDead(Inst)) {
DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
@@ -381,7 +381,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
++NumSimplify;
continue;
}
-
+
// If the instruction can be simplified (e.g. X+0 = X) then replace it with
// its simpler value.
if (Value *V = SimplifyInstruction(Inst, TD, TLI, DT)) {
@@ -392,7 +392,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
++NumSimplify;
continue;
}
-
+
// If this is a simple instruction that we can value number, process it.
if (SimpleValue::canHandle(Inst)) {
// See if the instruction has an available value. If so, use it.
@@ -404,12 +404,12 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
++NumCSE;
continue;
}
-
+
// Otherwise, just remember that this value is available.
AvailableValues->insert(Inst, Inst);
continue;
}
-
+
// If this is a non-volatile load, process it.
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
// Ignore volatile loads.
@@ -417,7 +417,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
LastStore = 0;
continue;
}
-
+
// If we have an available version of this load, and if it is the right
// generation, replace this instruction.
std::pair<Value*, unsigned> InVal =
@@ -431,18 +431,18 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
++NumCSELoad;
continue;
}
-
+
// Otherwise, remember that we have this instruction.
AvailableLoads->insert(Inst->getOperand(0),
std::pair<Value*, unsigned>(Inst, CurrentGeneration));
LastStore = 0;
continue;
}
-
+
// If this instruction may read from memory, forget LastStore.
if (Inst->mayReadFromMemory())
LastStore = 0;
-
+
// If this is a read-only call, process it.
if (CallValue::canHandle(Inst)) {
// If we have an available version of this call, and if it is the right
@@ -457,19 +457,19 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
++NumCSECall;
continue;
}
-
+
// Otherwise, remember that we have this instruction.
AvailableCalls->insert(Inst,
std::pair<Value*, unsigned>(Inst, CurrentGeneration));
continue;
}
-
+
// Okay, this isn't something we can CSE at all. Check to see if it is
// something that could modify memory. If so, our available memory values
// cannot be used so bump the generation count.
if (Inst->mayWriteToMemory()) {
++CurrentGeneration;
-
+
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// We do a trivial form of DSE if there are two stores to the same
// location with no intervening loads. Delete the earlier store.
@@ -483,7 +483,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
LastStore = 0;
continue;
}
-
+
// Okay, we just invalidated anything we knew about loaded values. Try
// to salvage *something* by remembering that the stored value is a live
// version of the pointer. It is safe to forward from volatile stores
@@ -491,7 +491,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// the store.
AvailableLoads->insert(SI->getPointerOperand(),
std::pair<Value*, unsigned>(SI->getValueOperand(), CurrentGeneration));
-
+
// Remember that this was the last store we saw for DSE.
if (SI->isSimple())
LastStore = SI;
@@ -509,7 +509,7 @@ bool EarlyCSE::runOnFunction(Function &F) {
TD = getAnalysisIfAvailable<TargetData>();
TLI = &getAnalysis<TargetLibraryInfo>();
DT = &getAnalysis<DominatorTree>();
-
+
// Tables that the pass uses when walking the domtree.
ScopedHTType AVTable;
AvailableValues = &AVTable;
@@ -517,7 +517,7 @@ bool EarlyCSE::runOnFunction(Function &F) {
AvailableLoads = &LoadTable;
CallHTType CallTable;
AvailableCalls = &CallTable;
-
+
CurrentGeneration = 0;
bool Changed = false;
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index fb733ad..120175d 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -18,8 +18,15 @@
#define DEBUG_TYPE "gvn"
#include "llvm/Transforms/Scalar.h"
#include "llvm/GlobalVariable.h"
+#include "llvm/IRBuilder.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/Dominators.h"
@@ -30,20 +37,14 @@
#include "llvm/Analysis/PHITransAddr.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Assembly/Writer.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/Hashing.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/PatternMatch.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
using namespace PatternMatch;
@@ -59,6 +60,11 @@ static cl::opt<bool> EnablePRE("enable-pre",
cl::init(true), cl::Hidden);
static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
+// Maximum allowed recursion depth.
+static cl::opt<uint32_t>
+MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore,
+ cl::desc("Max recurse depth (default = 1000)"));
+
//===----------------------------------------------------------------------===//
// ValueTable Class
//===----------------------------------------------------------------------===//
@@ -167,7 +173,7 @@ Expression ValueTable::create_expression(Instruction *I) {
if (e.varargs[0] > e.varargs[1])
std::swap(e.varargs[0], e.varargs[1]);
}
-
+
if (CmpInst *C = dyn_cast<CmpInst>(I)) {
// Sort the operand value numbers so x<y and y>x get the same value number.
CmpInst::Predicate Predicate = C->getPredicate();
@@ -181,7 +187,7 @@ Expression ValueTable::create_expression(Instruction *I) {
II != IE; ++II)
e.varargs.push_back(*II);
}
-
+
return e;
}
@@ -385,7 +391,7 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
valueNumbering[V] = nextValueNumber;
return nextValueNumber++;
}
-
+
Instruction* I = cast<Instruction>(V);
Expression exp;
switch (I->getOpcode()) {
@@ -501,17 +507,17 @@ namespace {
const TargetLibraryInfo *TLI;
ValueTable VN;
-
+
/// LeaderTable - A mapping from value numbers to lists of Value*'s that
/// have that value number. Use findLeader to query it.
struct LeaderTableEntry {
Value *Val;
- BasicBlock *BB;
+ const BasicBlock *BB;
LeaderTableEntry *Next;
};
DenseMap<uint32_t, LeaderTableEntry> LeaderTable;
BumpPtrAllocator TableAllocator;
-
+
SmallVector<Instruction*, 8> InstrsToErase;
public:
static char ID; // Pass identification, replacement for typeid
@@ -521,14 +527,14 @@ namespace {
}
bool runOnFunction(Function &F);
-
+
/// markInstructionForDeletion - This removes the specified instruction from
/// our various maps and marks it for deletion.
void markInstructionForDeletion(Instruction *I) {
VN.erase(I);
InstrsToErase.push_back(I);
}
-
+
const TargetData *getTargetData() const { return TD; }
DominatorTree &getDominatorTree() const { return *DT; }
AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); }
@@ -536,32 +542,32 @@ namespace {
private:
/// addToLeaderTable - Push a new Value to the LeaderTable onto the list for
/// its value number.
- void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) {
+ void addToLeaderTable(uint32_t N, Value *V, const BasicBlock *BB) {
LeaderTableEntry &Curr = LeaderTable[N];
if (!Curr.Val) {
Curr.Val = V;
Curr.BB = BB;
return;
}
-
+
LeaderTableEntry *Node = TableAllocator.Allocate<LeaderTableEntry>();
Node->Val = V;
Node->BB = BB;
Node->Next = Curr.Next;
Curr.Next = Node;
}
-
+
/// removeFromLeaderTable - Scan the list of values corresponding to a given
- /// value number, and remove the given value if encountered.
- void removeFromLeaderTable(uint32_t N, Value *V, BasicBlock *BB) {
+ /// value number, and remove the given instruction if encountered.
+ void removeFromLeaderTable(uint32_t N, Instruction *I, BasicBlock *BB) {
LeaderTableEntry* Prev = 0;
LeaderTableEntry* Curr = &LeaderTable[N];
- while (Curr->Val != V || Curr->BB != BB) {
+ while (Curr->Val != I || Curr->BB != BB) {
Prev = Curr;
Curr = Curr->Next;
}
-
+
if (Prev) {
Prev->Next = Curr->Next;
} else {
@@ -591,7 +597,7 @@ namespace {
AU.addPreserved<DominatorTree>();
AU.addPreserved<AliasAnalysis>();
}
-
+
// Helper fuctions
// FIXME: eliminate or document these better
@@ -602,13 +608,13 @@ namespace {
void dump(DenseMap<uint32_t, Value*> &d);
bool iterateOnFunction(Function &F);
bool performPRE(Function &F);
- Value *findLeader(BasicBlock *BB, uint32_t num);
+ Value *findLeader(const BasicBlock *BB, uint32_t num);
void cleanupGlobalSets();
void verifyRemoved(const Instruction *I) const;
bool splitCriticalEdges();
unsigned replaceAllDominatedUsesWith(Value *From, Value *To,
- BasicBlock *Root);
- bool propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root);
+ const BasicBlock *Root);
+ bool propagateEquality(Value *LHS, Value *RHS, const BasicBlock *Root);
};
char GVN::ID = 0;
@@ -647,7 +653,11 @@ void GVN::dump(DenseMap<uint32_t, Value*>& d) {
/// 3) we are speculating for this block and have used that to speculate for
/// other blocks.
static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
- DenseMap<BasicBlock*, char> &FullyAvailableBlocks) {
+ DenseMap<BasicBlock*, char> &FullyAvailableBlocks,
+ uint32_t RecurseDepth) {
+ if (RecurseDepth > MaxRecurseDepth)
+ return false;
+
// Optimistically assume that the block is fully available and check to see
// if we already know about this block in one lookup.
std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV =
@@ -673,7 +683,7 @@ static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
// If the value isn't fully available in one of our predecessors, then it
// isn't fully available in this block either. Undo our previous
// optimistic assumption and bail out.
- if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks))
+ if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks,RecurseDepth+1))
goto SpeculationFailure;
return true;
@@ -725,15 +735,15 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
StoredVal->getType()->isStructTy() ||
StoredVal->getType()->isArrayTy())
return false;
-
+
// The store has to be at least as big as the load.
if (TD.getTypeSizeInBits(StoredVal->getType()) <
TD.getTypeSizeInBits(LoadTy))
return false;
-
+
return true;
}
-
+
/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and
/// then a load from a must-aliased pointer of a different type, try to coerce
@@ -741,80 +751,80 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
/// InsertPt is the place to insert new instructions.
///
/// If we can't do it, return null.
-static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
+static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
Type *LoadedTy,
Instruction *InsertPt,
const TargetData &TD) {
if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
return 0;
-
+
// If this is already the right type, just return it.
Type *StoredValTy = StoredVal->getType();
-
+
uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy);
uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
-
+
// If the store and reload are the same size, we can always reuse it.
if (StoreSize == LoadSize) {
// Pointer to Pointer -> use bitcast.
if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy())
return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
-
+
// Convert source pointers to integers, which can be bitcast.
if (StoredValTy->isPointerTy()) {
StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
}
-
+
Type *TypeToCastTo = LoadedTy;
if (TypeToCastTo->isPointerTy())
TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext());
-
+
if (StoredValTy != TypeToCastTo)
StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
-
+
// Cast to pointer if the load needs a pointer type.
if (LoadedTy->isPointerTy())
StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
-
+
return StoredVal;
}
-
+
// If the loaded value is smaller than the available value, then we can
// extract out a piece from it. If the available value is too small, then we
// can't do anything.
assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail");
-
+
// Convert source pointers to integers, which can be manipulated.
if (StoredValTy->isPointerTy()) {
StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
}
-
+
// Convert vectors and fp to integer, which can be manipulated.
if (!StoredValTy->isIntegerTy()) {
StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize);
StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt);
}
-
+
// If this is a big-endian system, we need to shift the value down to the low
// bits so that a truncate will work.
if (TD.isBigEndian()) {
Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize);
StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt);
}
-
+
// Truncate the integer to the right size now.
Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize);
StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt);
-
+
if (LoadedTy == NewIntTy)
return StoredVal;
-
+
// If the result is a pointer, inttoptr.
if (LoadedTy->isPointerTy())
return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
-
+
// Otherwise, bitcast.
return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
}
@@ -835,13 +845,13 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
// to transform them. We need to be able to bitcast to integer.
if (LoadTy->isStructTy() || LoadTy->isArrayTy())
return -1;
-
+
int64_t StoreOffset = 0, LoadOffset = 0;
Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD);
Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD);
if (StoreBase != LoadBase)
return -1;
-
+
// If the load and store are to the exact same address, they should have been
// a must alias. AA must have gotten confused.
// FIXME: Study to see if/when this happens. One case is forwarding a memset
@@ -856,18 +866,18 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
abort();
}
#endif
-
+
// If the load and store don't overlap at all, the store doesn't provide
// anything to the load. In this case, they really don't alias at all, AA
// must have gotten confused.
uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy);
-
+
if ((WriteSizeInBits & 7) | (LoadSize & 7))
return -1;
uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes.
LoadSize >>= 3;
-
-
+
+
bool isAAFailure = false;
if (StoreOffset < LoadOffset)
isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset;
@@ -885,7 +895,7 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
#endif
return -1;
}
-
+
// If the Load isn't completely contained within the stored bits, we don't
// have all the bits to feed it. We could do something crazy in the future
// (issue a smaller load then merge the bits in) but this seems unlikely to be
@@ -893,11 +903,11 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
if (StoreOffset > LoadOffset ||
StoreOffset+StoreSize < LoadOffset+LoadSize)
return -1;
-
+
// Okay, we can do this transformation. Return the number of bytes into the
// store that the load is.
return LoadOffset-StoreOffset;
-}
+}
/// AnalyzeLoadFromClobberingStore - This function is called when we have a
/// memdep query of a load that ends up being a clobbering store.
@@ -923,23 +933,23 @@ static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr,
// Cannot handle reading from store of first-class aggregate yet.
if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
return -1;
-
+
Value *DepPtr = DepLI->getPointerOperand();
uint64_t DepSize = TD.getTypeSizeInBits(DepLI->getType());
int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, TD);
if (R != -1) return R;
-
+
// If we have a load/load clobber an DepLI can be widened to cover this load,
// then we should widen it!
int64_t LoadOffs = 0;
const Value *LoadBase =
GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, TD);
unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
-
+
unsigned Size = MemoryDependenceAnalysis::
getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, TD);
if (Size == 0) return -1;
-
+
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, TD);
}
@@ -958,29 +968,29 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
if (MI->getIntrinsicID() == Intrinsic::memset)
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
MemSizeInBits, TD);
-
+
// If we have a memcpy/memmove, the only case we can handle is if this is a
// copy from constant memory. In that case, we can read directly from the
// constant memory.
MemTransferInst *MTI = cast<MemTransferInst>(MI);
-
+
Constant *Src = dyn_cast<Constant>(MTI->getSource());
if (Src == 0) return -1;
-
+
GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &TD));
if (GV == 0 || !GV->isConstant()) return -1;
-
+
// See if the access is within the bounds of the transfer.
int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
MI->getDest(), MemSizeInBits, TD);
if (Offset == -1)
return Offset;
-
+
// Otherwise, see if we can constant fold a load from the constant with the
// offset applied as appropriate.
Src = ConstantExpr::getBitCast(Src,
llvm::Type::getInt8PtrTy(Src->getContext()));
- Constant *OffsetCst =
+ Constant *OffsetCst =
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
@@ -988,7 +998,7 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
return Offset;
return -1;
}
-
+
/// GetStoreValueForLoad - This function is called when we have a
/// memdep query of a load that ends up being a clobbering store. This means
@@ -999,32 +1009,32 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
Type *LoadTy,
Instruction *InsertPt, const TargetData &TD){
LLVMContext &Ctx = SrcVal->getType()->getContext();
-
+
uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8;
-
+
IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
-
+
// Compute which bits of the stored value are being used by the load. Convert
// to an integer type to start with.
if (SrcVal->getType()->isPointerTy())
SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx));
if (!SrcVal->getType()->isIntegerTy())
SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8));
-
+
// Shift the bits to the least significant depending on endianness.
unsigned ShiftAmt;
if (TD.isLittleEndian())
ShiftAmt = Offset*8;
else
ShiftAmt = (StoreSize-LoadSize-Offset)*8;
-
+
if (ShiftAmt)
SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt);
-
+
if (LoadSize != StoreSize)
SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8));
-
+
return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
}
@@ -1051,14 +1061,14 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
NewLoadSize = NextPowerOf2(NewLoadSize);
Value *PtrVal = SrcVal->getPointerOperand();
-
+
// Insert the new load after the old load. This ensures that subsequent
// memdep queries will find the new load. We can't easily remove the old
// load completely because it is already in the value numbering table.
IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal));
- Type *DestPTy =
+ Type *DestPTy =
IntegerType::get(LoadTy->getContext(), NewLoadSize*8);
- DestPTy = PointerType::get(DestPTy,
+ DestPTy = PointerType::get(DestPTy,
cast<PointerType>(PtrVal->getType())->getAddressSpace());
Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
@@ -1068,7 +1078,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
-
+
// Replace uses of the original load with the wider load. On a big endian
// system, we need to shift down to get the relevant bits.
Value *RV = NewLoad;
@@ -1077,7 +1087,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits());
RV = Builder.CreateTrunc(RV, SrcVal->getType());
SrcVal->replaceAllUsesWith(RV);
-
+
// We would like to use gvn.markInstructionForDeletion here, but we can't
// because the load is already memoized into the leader map table that GVN
// tracks. It is potentially possible to remove the load from the table,
@@ -1086,7 +1096,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
gvn.getMemDep().removeInstruction(SrcVal);
SrcVal = NewLoad;
}
-
+
return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, TD);
}
@@ -1100,7 +1110,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
-
+
// We know that this method is only called when the mem transfer fully
// provides the bits for the load.
if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
@@ -1109,9 +1119,9 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
Value *Val = MSI->getValue();
if (LoadSize != 1)
Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8));
-
+
Value *OneElt = Val;
-
+
// Splat the value out to the right number of bits.
for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) {
// If we can double the number of bytes set, do it.
@@ -1121,16 +1131,16 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
NumBytesSet <<= 1;
continue;
}
-
+
// Otherwise insert one byte at a time.
Value *ShVal = Builder.CreateShl(Val, 1*8);
Val = Builder.CreateOr(OneElt, ShVal);
++NumBytesSet;
}
-
+
return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD);
}
-
+
// Otherwise, this is a memcpy/memmove from a constant global.
MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
Constant *Src = cast<Constant>(MTI->getSource());
@@ -1139,7 +1149,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
// offset applied as appropriate.
Src = ConstantExpr::getBitCast(Src,
llvm::Type::getInt8PtrTy(Src->getContext()));
- Constant *OffsetCst =
+ Constant *OffsetCst =
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
@@ -1156,13 +1166,13 @@ struct AvailableValueInBlock {
LoadVal, // A value produced by a load.
MemIntrin // A memory intrinsic which is loaded from.
};
-
+
/// V - The value that is live out of the block.
PointerIntPair<Value *, 2, ValType> Val;
-
+
/// Offset - The byte offset in Val that is interesting for the load query.
unsigned Offset;
-
+
static AvailableValueInBlock get(BasicBlock *BB, Value *V,
unsigned Offset = 0) {
AvailableValueInBlock Res;
@@ -1182,7 +1192,7 @@ struct AvailableValueInBlock {
Res.Offset = Offset;
return Res;
}
-
+
static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
unsigned Offset = 0) {
AvailableValueInBlock Res;
@@ -1201,17 +1211,17 @@ struct AvailableValueInBlock {
assert(isSimpleValue() && "Wrong accessor");
return Val.getPointer();
}
-
+
LoadInst *getCoercedLoadValue() const {
assert(isCoercedLoadValue() && "Wrong accessor");
return cast<LoadInst>(Val.getPointer());
}
-
+
MemIntrinsic *getMemIntrinValue() const {
assert(isMemIntrinValue() && "Wrong accessor");
return cast<MemIntrinsic>(Val.getPointer());
}
-
+
/// MaterializeAdjustedValue - Emit code into this block to adjust the value
/// defined here to the specified type. This handles various coercion cases.
Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const {
@@ -1223,7 +1233,7 @@ struct AvailableValueInBlock {
assert(TD && "Need target data to handle type mismatch case");
Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
*TD);
-
+
DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
<< *getSimpleValue() << '\n'
<< *Res << '\n' << "\n\n\n");
@@ -1235,7 +1245,7 @@ struct AvailableValueInBlock {
} else {
Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
gvn);
-
+
DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " "
<< *getCoercedLoadValue() << '\n'
<< *Res << '\n' << "\n\n\n");
@@ -1258,12 +1268,12 @@ struct AvailableValueInBlock {
/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
/// construct SSA form, allowing us to eliminate LI. This returns the value
/// that should be used at LI's definition site.
-static Value *ConstructSSAForLoadSet(LoadInst *LI,
+static Value *ConstructSSAForLoadSet(LoadInst *LI,
SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
GVN &gvn) {
// Check for the fully redundant, dominating load case. In this case, we can
// just use the dominating value directly.
- if (ValuesPerBlock.size() == 1 &&
+ if (ValuesPerBlock.size() == 1 &&
gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
LI->getParent()))
return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn);
@@ -1272,29 +1282,29 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
SmallVector<PHINode*, 8> NewPHIs;
SSAUpdater SSAUpdate(&NewPHIs);
SSAUpdate.Initialize(LI->getType(), LI->getName());
-
+
Type *LoadTy = LI->getType();
-
+
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
const AvailableValueInBlock &AV = ValuesPerBlock[i];
BasicBlock *BB = AV.BB;
-
+
if (SSAUpdate.HasValueForBlock(BB))
continue;
SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, gvn));
}
-
+
// Perform PHI construction.
Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
-
+
// If new PHI nodes were created, notify alias analysis.
if (V->getType()->isPointerTy()) {
AliasAnalysis *AA = gvn.getAliasAnalysis();
-
+
for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
AA->copyValue(LI, NewPHIs[i]);
-
+
// Now that we've copied information to the new PHIs, scan through
// them again and inform alias analysis that we've added potentially
// escaping uses to any values that are operands to these PHIs.
@@ -1366,7 +1376,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
// the pointer operand of the load if PHI translation occurs. Make sure
// to consider the right address.
Value *Address = Deps[i].getAddress();
-
+
// If the dependence is to a store that writes to a superset of the bits
// read by the load, we can extract the bits we need for the load from the
// stored value.
@@ -1382,7 +1392,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
}
}
}
-
+
// Check to see if we have something like this:
// load i32* P
// load i8* (P+1)
@@ -1394,7 +1404,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(),
LI->getPointerOperand(),
DepLI, *TD);
-
+
if (Offset != -1) {
ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI,
Offset));
@@ -1413,10 +1423,10 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
Offset));
continue;
- }
+ }
}
}
-
+
UnavailableBlocks.push_back(DepBB);
continue;
}
@@ -1426,14 +1436,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
Instruction *DepInst = DepInfo.getInst();
// Loading the allocation -> undef.
- if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) ||
+ if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst) ||
// Loading immediately after lifetime begin -> undef.
isLifetimeStart(DepInst)) {
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
UndefValue::get(LI->getType())));
continue;
}
-
+
if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
// Reject loads and stores that are to the same address but are of
// different types if we have to.
@@ -1451,7 +1461,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
S->getValueOperand()));
continue;
}
-
+
if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
// If the types mismatch and we can't handle it, reject reuse of the load.
if (LD->getType() != LI->getType()) {
@@ -1460,12 +1470,12 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){
UnavailableBlocks.push_back(DepBB);
continue;
- }
+ }
}
ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB, LD));
continue;
}
-
+
UnavailableBlocks.push_back(DepBB);
continue;
}
@@ -1479,7 +1489,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
// its value. Insert PHIs and remove the fully redundant value now.
if (UnavailableBlocks.empty()) {
DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
-
+
// Perform PHI construction.
Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
LI->replaceAllUsesWith(V);
@@ -1522,10 +1532,10 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
return false;
if (Blockers.count(TmpBB))
return false;
-
+
// If any of these blocks has more than one successor (i.e. if the edge we
- // just traversed was critical), then there are other paths through this
- // block along which the load may not be anticipated. Hoisting the load
+ // just traversed was critical), then there are other paths through this
+ // block along which the load may not be anticipated. Hoisting the load
// above this block would be adding the load to execution paths along
// which it was not previously executed.
if (TmpBB->getTerminator()->getNumSuccessors() != 1)
@@ -1570,7 +1580,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
PI != E; ++PI) {
BasicBlock *Pred = *PI;
- if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) {
+ if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks, 0)) {
continue;
}
PredLoads[Pred] = 0;
@@ -1603,7 +1613,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
unsigned NumUnavailablePreds = PredLoads.size();
assert(NumUnavailablePreds != 0 &&
"Fully available value should be eliminated above!");
-
+
// If this load is unavailable in multiple predecessors, reject it.
// FIXME: If we could restructure the CFG, we could make a common pred with
// all the preds that don't have an available LI and insert a new load into
@@ -1680,10 +1690,10 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
DEBUG(if (!NewInsts.empty())
dbgs() << "INSERTED " << NewInsts.size() << " INSTS: "
<< *NewInsts.back() << '\n');
-
+
// Assign value numbers to the new instructions.
for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) {
- // FIXME: We really _ought_ to insert these value numbers into their
+ // FIXME: We really _ought_ to insert these value numbers into their
// parent's availability map. However, in doing so, we risk getting into
// ordering issues. If a block hasn't been processed yet, we would be
// marking a value as AVAIL-IN, which isn't what we intend.
@@ -1725,6 +1735,53 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
return true;
}
+static void patchReplacementInstruction(Value *Repl, Instruction *I) {
+ // Patch the replacement so that it is not more restrictive than the value
+ // being replaced.
+ BinaryOperator *Op = dyn_cast<BinaryOperator>(I);
+ BinaryOperator *ReplOp = dyn_cast<BinaryOperator>(Repl);
+ if (Op && ReplOp && isa<OverflowingBinaryOperator>(Op) &&
+ isa<OverflowingBinaryOperator>(ReplOp)) {
+ if (ReplOp->hasNoSignedWrap() && !Op->hasNoSignedWrap())
+ ReplOp->setHasNoSignedWrap(false);
+ if (ReplOp->hasNoUnsignedWrap() && !Op->hasNoUnsignedWrap())
+ ReplOp->setHasNoUnsignedWrap(false);
+ }
+ if (Instruction *ReplInst = dyn_cast<Instruction>(Repl)) {
+ SmallVector<std::pair<unsigned, MDNode*>, 4> Metadata;
+ ReplInst->getAllMetadataOtherThanDebugLoc(Metadata);
+ for (int i = 0, n = Metadata.size(); i < n; ++i) {
+ unsigned Kind = Metadata[i].first;
+ MDNode *IMD = I->getMetadata(Kind);
+ MDNode *ReplMD = Metadata[i].second;
+ switch(Kind) {
+ default:
+ ReplInst->setMetadata(Kind, NULL); // Remove unknown metadata
+ break;
+ case LLVMContext::MD_dbg:
+ llvm_unreachable("getAllMetadataOtherThanDebugLoc returned a MD_dbg");
+ case LLVMContext::MD_tbaa:
+ ReplInst->setMetadata(Kind, MDNode::getMostGenericTBAA(IMD, ReplMD));
+ break;
+ case LLVMContext::MD_range:
+ ReplInst->setMetadata(Kind, MDNode::getMostGenericRange(IMD, ReplMD));
+ break;
+ case LLVMContext::MD_prof:
+ llvm_unreachable("MD_prof in a non terminator instruction");
+ break;
+ case LLVMContext::MD_fpmath:
+ ReplInst->setMetadata(Kind, MDNode::getMostGenericFPMath(IMD, ReplMD));
+ break;
+ }
+ }
+ }
+}
+
+static void patchAndReplaceAllUsesWith(Value *Repl, Instruction *I) {
+ patchReplacementInstruction(Repl, I);
+ I->replaceAllUsesWith(Repl);
+}
+
/// processLoad - Attempt to eliminate a load, first by eliminating it
/// locally, and then attempting non-local elimination if that fails.
bool GVN::processLoad(LoadInst *L) {
@@ -1738,7 +1795,7 @@ bool GVN::processLoad(LoadInst *L) {
markInstructionForDeletion(L);
return true;
}
-
+
// ... to a pointer that has been loaded from before...
MemDepResult Dep = MD->getDependency(L);
@@ -1764,7 +1821,7 @@ bool GVN::processLoad(LoadInst *L) {
AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
L->getType(), L, *TD);
}
-
+
// Check to see if we have something like this:
// load i32* P
// load i8* (P+1)
@@ -1774,14 +1831,14 @@ bool GVN::processLoad(LoadInst *L) {
// we have the first instruction in the entry block.
if (DepLI == L)
return false;
-
+
int Offset = AnalyzeLoadFromClobberingLoad(L->getType(),
L->getPointerOperand(),
DepLI, *TD);
if (Offset != -1)
AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this);
}
-
+
// If the clobbering value is a memset/memcpy/memmove, see if we can forward
// a value on from it.
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
@@ -1791,11 +1848,11 @@ bool GVN::processLoad(LoadInst *L) {
if (Offset != -1)
AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *TD);
}
-
+
if (AvailVal) {
DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n'
<< *AvailVal << '\n' << *L << "\n\n\n");
-
+
// Replace the load!
L->replaceAllUsesWith(AvailVal);
if (AvailVal->getType()->isPointerTy())
@@ -1805,7 +1862,7 @@ bool GVN::processLoad(LoadInst *L) {
return true;
}
}
-
+
// If the value isn't available, don't do anything!
if (Dep.isClobber()) {
DEBUG(
@@ -1835,7 +1892,7 @@ bool GVN::processLoad(LoadInst *L) {
Instruction *DepInst = Dep.getInst();
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
Value *StoredVal = DepSI->getValueOperand();
-
+
// The store and load are to a must-aliased pointer, but they may not
// actually have the same type. See if we know how to reuse the stored
// value (depending on its type).
@@ -1845,11 +1902,11 @@ bool GVN::processLoad(LoadInst *L) {
L, *TD);
if (StoredVal == 0)
return false;
-
+
DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
<< '\n' << *L << "\n\n\n");
}
- else
+ else
return false;
}
@@ -1864,7 +1921,7 @@ bool GVN::processLoad(LoadInst *L) {
if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) {
Value *AvailableVal = DepLI;
-
+
// The loads are of a must-aliased pointer, but they may not actually have
// the same type. See if we know how to reuse the previously loaded value
// (depending on its type).
@@ -1874,16 +1931,16 @@ bool GVN::processLoad(LoadInst *L) {
L, *TD);
if (AvailableVal == 0)
return false;
-
+
DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
<< "\n" << *L << "\n\n\n");
}
- else
+ else
return false;
}
-
+
// Remove it!
- L->replaceAllUsesWith(AvailableVal);
+ patchAndReplaceAllUsesWith(AvailableVal, L);
if (DepLI->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(DepLI);
markInstructionForDeletion(L);
@@ -1894,13 +1951,13 @@ bool GVN::processLoad(LoadInst *L) {
// If this load really doesn't depend on anything, then we must be loading an
// undef value. This can happen when loading for a fresh allocation with no
// intervening stores, for example.
- if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) {
+ if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst)) {
L->replaceAllUsesWith(UndefValue::get(L->getType()));
markInstructionForDeletion(L);
++NumGVNLoad;
return true;
}
-
+
// If this load occurs either right after a lifetime begin,
// then the loaded value is undefined.
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DepInst)) {
@@ -1915,28 +1972,28 @@ bool GVN::processLoad(LoadInst *L) {
return false;
}
-// findLeader - In order to find a leader for a given value number at a
+// findLeader - In order to find a leader for a given value number at a
// specific basic block, we first obtain the list of all Values for that number,
-// and then scan the list to find one whose block dominates the block in
+// and then scan the list to find one whose block dominates the block in
// question. This is fast because dominator tree queries consist of only
// a few comparisons of DFS numbers.
-Value *GVN::findLeader(BasicBlock *BB, uint32_t num) {
+Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) {
LeaderTableEntry Vals = LeaderTable[num];
if (!Vals.Val) return 0;
-
+
Value *Val = 0;
if (DT->dominates(Vals.BB, BB)) {
Val = Vals.Val;
if (isa<Constant>(Val)) return Val;
}
-
+
LeaderTableEntry* Next = Vals.Next;
while (Next) {
if (DT->dominates(Next->BB, BB)) {
if (isa<Constant>(Next->Val)) return Next->Val;
if (!Val) Val = Next->Val;
}
-
+
Next = Next->Next;
}
@@ -1947,7 +2004,7 @@ Value *GVN::findLeader(BasicBlock *BB, uint32_t num) {
/// use is dominated by the given basic block. Returns the number of uses that
/// were replaced.
unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To,
- BasicBlock *Root) {
+ const BasicBlock *Root) {
unsigned Count = 0;
for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
UI != UE; ) {
@@ -1973,7 +2030,7 @@ unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To,
/// propagateEquality - The given values are known to be equal in every block
/// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with
/// 'RHS' everywhere in the scope. Returns whether a change was made.
-bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
+bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlock *Root) {
SmallVector<std::pair<Value*, Value*>, 4> Worklist;
Worklist.push_back(std::make_pair(LHS, RHS));
bool Changed = false;
@@ -2012,9 +2069,15 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
DT->properlyDominates(cast<Instruction>(RHS)->getParent(), Root)) &&
"Instruction doesn't dominate scope!");
- // If value numbering later deduces that an instruction in the scope is equal
- // to 'LHS' then ensure it will be turned into 'RHS'.
- addToLeaderTable(LVN, RHS, Root);
+ // If value numbering later sees that an instruction in the scope is equal
+ // to 'LHS' then ensure it will be turned into 'RHS'. In order to preserve
+ // the invariant that instructions only occur in the leader table for their
+ // own value number (this is used by removeFromLeaderTable), do not do this
+ // if RHS is an instruction (if an instruction in the scope is morphed into
+ // LHS then it will be turned into RHS by the next GVN iteration anyway, so
+ // using the leader table is about compiling faster, not optimizing better).
+ if (!isa<Instruction>(RHS))
+ addToLeaderTable(LVN, RHS, Root);
// Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As
// LHS always has at least one use that is not dominated by Root, this will
@@ -2180,7 +2243,7 @@ bool GVN::processInstruction(Instruction *I) {
// Instructions with void type don't return a value, so there's
// no point in trying to find redundancies in them.
if (I->getType()->isVoidTy()) return false;
-
+
uint32_t NextNum = VN.getNextUnusedValueNumber();
unsigned Num = VN.lookup_or_add(I);
@@ -2198,7 +2261,7 @@ bool GVN::processInstruction(Instruction *I) {
addToLeaderTable(Num, I, I->getParent());
return false;
}
-
+
// Perform fast-path value-number based elimination of values inherited from
// dominators.
Value *repl = findLeader(I->getParent(), Num);
@@ -2207,9 +2270,9 @@ bool GVN::processInstruction(Instruction *I) {
addToLeaderTable(Num, I, I->getParent());
return false;
}
-
+
// Remove it!
- I->replaceAllUsesWith(repl);
+ patchAndReplaceAllUsesWith(repl, I);
if (MD && repl->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(repl);
markInstructionForDeletion(I);
@@ -2234,7 +2297,7 @@ bool GVN::runOnFunction(Function& F) {
// optimization opportunities.
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
BasicBlock *BB = FI++;
-
+
bool removedBlock = MergeBlockIntoPredecessor(BB, this);
if (removedBlock) ++NumGVNBlocks;
@@ -2391,7 +2454,7 @@ bool GVN::performPRE(Function &F) {
// we would need to insert instructions in more than one pred.
if (NumWithout != 1 || NumWith == 0)
continue;
-
+
// Don't do PRE across indirect branch.
if (isa<IndirectBrInst>(PREPred->getTerminator()))
continue;
@@ -2467,7 +2530,7 @@ bool GVN::performPRE(Function &F) {
unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj));
}
-
+
if (MD)
MD->invalidateCachedPointerInfo(Phi);
}
@@ -2504,7 +2567,7 @@ bool GVN::splitCriticalEdges() {
/// iterateOnFunction - Executes one iteration of GVN
bool GVN::iterateOnFunction(Function &F) {
cleanupGlobalSets();
-
+
// Top-down walk of the dominator tree
bool Changed = false;
#if 0
@@ -2539,7 +2602,7 @@ void GVN::verifyRemoved(const Instruction *Inst) const {
I = LeaderTable.begin(), E = LeaderTable.end(); I != E; ++I) {
const LeaderTableEntry *Node = &I->second;
assert(Node->Val != Inst && "Inst still in value numbering scope!");
-
+
while (Node->Next) {
Node = Node->Next;
assert(Node->Val != Inst && "Inst still in value numbering scope!");
diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp
index c2bd6e6..b36a3cb 100644
--- a/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -12,7 +12,7 @@
// global). Such a transformation can significantly reduce the register pressure
// when many globals are involved.
//
-// For example, consider the code which touches several global variables at
+// For example, consider the code which touches several global variables at
// once:
//
// static int foo[N], bar[N], baz[N];
@@ -208,8 +208,8 @@ bool GlobalMerge::doInitialization(Module &M) {
if (BSSGlobals.size() > 1)
Changed |= doMerge(BSSGlobals, M, false);
- // FIXME: This currently breaks the EH processing due to way how the
- // typeinfo detection works. We might want to detect the TIs and ignore
+ // FIXME: This currently breaks the EH processing due to way how the
+ // typeinfo detection works. We might want to detect the TIs and ignore
// them in the future.
// if (ConstGlobals.size() > 1)
// Changed |= doMerge(ConstGlobals, M, true);
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index a9ba657..37f8bdf 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1215,21 +1215,26 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
return 0;
}
-/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show
-/// that the current exit test is already sufficiently canonical.
-static bool needsLFTR(Loop *L, DominatorTree *DT) {
+/// Return the compare guarding the loop latch, or NULL for unrecognized tests.
+static ICmpInst *getLoopTest(Loop *L) {
assert(L->getExitingBlock() && "expected loop exit");
BasicBlock *LatchBlock = L->getLoopLatch();
// Don't bother with LFTR if the loop is not properly simplified.
if (!LatchBlock)
- return false;
+ return 0;
BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
assert(BI && "expected exit branch");
+ return dyn_cast<ICmpInst>(BI->getCondition());
+}
+
+/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show
+/// that the current exit test is already sufficiently canonical.
+static bool needsLFTR(Loop *L, DominatorTree *DT) {
// Do LFTR to simplify the exit condition to an ICMP.
- ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
+ ICmpInst *Cond = getLoopTest(L);
if (!Cond)
return true;
@@ -1259,6 +1264,48 @@ static bool needsLFTR(Loop *L, DominatorTree *DT) {
return Phi != getLoopPhiForCounter(IncV, L, DT);
}
+/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
+/// down to checking that all operands are constant and listing instructions
+/// that may hide undef.
+static bool hasConcreteDefImpl(Value *V, SmallPtrSet<Value*, 8> &Visited,
+ unsigned Depth) {
+ if (isa<Constant>(V))
+ return !isa<UndefValue>(V);
+
+ if (Depth >= 6)
+ return false;
+
+ // Conservatively handle non-constant non-instructions. For example, Arguments
+ // may be undef.
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return false;
+
+ // Load and return values may be undef.
+ if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
+ return false;
+
+ // Optimistically handle other instructions.
+ for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) {
+ if (!Visited.insert(*OI))
+ continue;
+ if (!hasConcreteDefImpl(*OI, Visited, Depth+1))
+ return false;
+ }
+ return true;
+}
+
+/// Return true if the given value is concrete. We must prove that undef can
+/// never reach it.
+///
+/// TODO: If we decide that this is a good approach to checking for undef, we
+/// may factor it into a common location.
+static bool hasConcreteDef(Value *V) {
+ SmallPtrSet<Value*, 8> Visited;
+ Visited.insert(V);
+ return hasConcreteDefImpl(V, Visited, 0);
+}
+
/// AlmostDeadIV - Return true if this IV has any uses other than the (soon to
/// be rewritten) loop exit test.
static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
@@ -1283,6 +1330,8 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
/// valid count without scaling the address stride, so it remains a pointer
/// expression as far as SCEV is concerned.
///
+/// Currently only valid for LFTR. See the comments on hasConcreteDef below.
+///
/// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
///
/// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
@@ -1331,6 +1380,19 @@ FindLoopCounter(Loop *L, const SCEV *BECount,
if (getLoopPhiForCounter(IncV, L, DT) != Phi)
continue;
+ // Avoid reusing a potentially undef value to compute other values that may
+ // have originally had a concrete definition.
+ if (!hasConcreteDef(Phi)) {
+ // We explicitly allow unknown phis as long as they are already used by
+ // the loop test. In this case we assume that performing LFTR could not
+ // increase the number of undef users.
+ if (ICmpInst *Cond = getLoopTest(L)) {
+ if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L, DT)
+ && Phi != getLoopPhiForCounter(Cond->getOperand(1), L, DT)) {
+ continue;
+ }
+ }
+ }
const SCEV *Init = AR->getStart();
if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
@@ -1347,7 +1409,7 @@ FindLoopCounter(Loop *L, const SCEV *BECount,
// If two IVs both count from zero or both count from nonzero then the
// narrower is likely a dead phi that has been widened. Use the wider phi
// to allow the other to be eliminated.
- if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
+ else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
continue;
}
BestPhi = Phi;
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 429b61b..dd42c59 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -670,6 +670,8 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {
Condition = SI->getCondition();
} else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {
+ // Can't thread indirect branch with no successors.
+ if (IB->getNumSuccessors() == 0) return false;
Condition = IB->getAddress()->stripPointerCasts();
Preference = WantBlockAddress;
} else {
@@ -859,7 +861,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// If all of the loads and stores that feed the value have the same TBAA tag,
// then we can propagate it onto any newly inserted loads.
- MDNode *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa);
+ MDNode *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa);
SmallPtrSet<BasicBlock*, 8> PredsScanned;
typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy;
@@ -885,7 +887,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
OneUnavailablePred = PredBB;
continue;
}
-
+
// If tbaa tags disagree or are not present, forget about them.
if (TBAATag != ThisTBAATag) TBAATag = 0;
@@ -949,7 +951,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
NewVal->setDebugLoc(LI->getDebugLoc());
if (TBAATag)
NewVal->setMetadata(LLVMContext::MD_tbaa, TBAATag);
-
+
AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
}
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 8795cd8..0192e92 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -175,7 +175,9 @@ namespace {
bool canSinkOrHoistInst(Instruction &I);
bool isNotUsedInLoop(Instruction &I);
- void PromoteAliasSet(AliasSet &AS);
+ void PromoteAliasSet(AliasSet &AS,
+ SmallVectorImpl<BasicBlock*> &ExitBlocks,
+ SmallVectorImpl<Instruction*> &InsertPts);
};
}
@@ -256,10 +258,13 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// Now that all loop invariants have been removed from the loop, promote any
// memory references to scalars that we can.
if (!DisablePromotion && Preheader && L->hasDedicatedExits()) {
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ SmallVector<Instruction *, 8> InsertPts;
+
// Loop over all of the alias sets in the tracker object.
for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
I != E; ++I)
- PromoteAliasSet(*I);
+ PromoteAliasSet(*I, ExitBlocks, InsertPts);
}
// Clear out loops state information for the next iteration
@@ -618,6 +623,11 @@ bool LICM::isGuaranteedToExecute(Instruction &Inst) {
if (!DT->dominates(Inst.getParent(), ExitBlocks[i]))
return false;
+ // As a degenerate case, if the loop is statically infinite then we haven't
+ // proven anything since there are no exit blocks.
+ if (ExitBlocks.empty())
+ return false;
+
return true;
}
@@ -626,6 +636,7 @@ namespace {
Value *SomePtr; // Designated pointer to store to.
SmallPtrSet<Value*, 4> &PointerMustAliases;
SmallVectorImpl<BasicBlock*> &LoopExitBlocks;
+ SmallVectorImpl<Instruction*> &LoopInsertPts;
AliasSetTracker &AST;
DebugLoc DL;
int Alignment;
@@ -633,11 +644,12 @@ namespace {
LoopPromoter(Value *SP,
const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
SmallPtrSet<Value*, 4> &PMA,
- SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast,
- DebugLoc dl, int alignment)
+ SmallVectorImpl<BasicBlock*> &LEB,
+ SmallVectorImpl<Instruction*> &LIP,
+ AliasSetTracker &ast, DebugLoc dl, int alignment)
: LoadAndStorePromoter(Insts, S), SomePtr(SP),
- PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl),
- Alignment(alignment) {}
+ PointerMustAliases(PMA), LoopExitBlocks(LEB), LoopInsertPts(LIP),
+ AST(ast), DL(dl), Alignment(alignment) {}
virtual bool isInstInList(Instruction *I,
const SmallVectorImpl<Instruction*> &) const {
@@ -657,7 +669,7 @@ namespace {
for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
BasicBlock *ExitBlock = LoopExitBlocks[i];
Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
- Instruction *InsertPos = ExitBlock->getFirstInsertionPt();
+ Instruction *InsertPos = LoopInsertPts[i];
StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos);
NewSI->setAlignment(Alignment);
NewSI->setDebugLoc(DL);
@@ -679,7 +691,9 @@ namespace {
/// looping over the stores in the loop, looking for stores to Must pointers
/// which are loop invariant.
///
-void LICM::PromoteAliasSet(AliasSet &AS) {
+void LICM::PromoteAliasSet(AliasSet &AS,
+ SmallVectorImpl<BasicBlock*> &ExitBlocks,
+ SmallVectorImpl<Instruction*> &InsertPts) {
// We can promote this alias set if it has a store, if it is a "Must" alias
// set, if the pointer is loop invariant, and if we are not eliminating any
// volatile loads or stores.
@@ -789,14 +803,20 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
// location is better than none.
DebugLoc DL = LoopUses[0]->getDebugLoc();
- SmallVector<BasicBlock*, 8> ExitBlocks;
- CurLoop->getUniqueExitBlocks(ExitBlocks);
+ // Figure out the loop exits and their insertion points, if this is the
+ // first promotion.
+ if (ExitBlocks.empty()) {
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
+ InsertPts.resize(ExitBlocks.size());
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ InsertPts[i] = ExitBlocks[i]->getFirstInsertionPt();
+ }
// We use the SSAUpdater interface to insert phi nodes as required.
SmallVector<PHINode*, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
- *CurAST, DL, Alignment);
+ InsertPts, *CurAST, DL, Alignment);
// Set up the preheader to have a definition of the value. It is the live-out
// value from the preheader that uses in the loop will use.
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index f7f3298..3771f5a 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -32,10 +32,10 @@ namespace {
LoopDeletion() : LoopPass(ID) {
initializeLoopDeletionPass(*PassRegistry::getPassRegistry());
}
-
+
// Possibly eliminate loop L if it is dead.
bool runOnLoop(Loop* L, LPPassManager& LPM);
-
+
bool IsLoopDead(Loop* L, SmallVector<BasicBlock*, 4>& exitingBlocks,
SmallVector<BasicBlock*, 4>& exitBlocks,
bool &Changed, BasicBlock *Preheader);
@@ -46,7 +46,7 @@ namespace {
AU.addRequired<ScalarEvolution>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
-
+
AU.addPreserved<ScalarEvolution>();
AU.addPreserved<DominatorTree>();
AU.addPreserved<LoopInfo>();
@@ -55,7 +55,7 @@ namespace {
}
};
}
-
+
char LoopDeletion::ID = 0;
INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion",
"Delete dead loops", false, false)
@@ -79,7 +79,7 @@ bool LoopDeletion::IsLoopDead(Loop* L,
SmallVector<BasicBlock*, 4>& exitBlocks,
bool &Changed, BasicBlock *Preheader) {
BasicBlock* exitBlock = exitBlocks[0];
-
+
// Make sure that all PHI entries coming from the loop are loop invariant.
// Because the code is in LCSSA form, any values used outside of the loop
// must pass through a PHI in the exit block, meaning that this check is
@@ -97,14 +97,14 @@ bool LoopDeletion::IsLoopDead(Loop* L,
if (incoming != P->getIncomingValueForBlock(exitingBlocks[i]))
return false;
}
-
+
if (Instruction* I = dyn_cast<Instruction>(incoming))
if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator()))
return false;
++BI;
}
-
+
// Make sure that no instructions in the block have potential side-effects.
// This includes instructions that could write to memory, and loads that are
// marked volatile. This could be made more aggressive by using aliasing
@@ -117,23 +117,23 @@ bool LoopDeletion::IsLoopDead(Loop* L,
return false;
}
}
-
+
return true;
}
/// runOnLoop - Remove dead loops, by which we mean loops that do not impact the
-/// observable behavior of the program other than finite running time. Note
+/// observable behavior of the program other than finite running time. Note
/// we do ensure that this never remove a loop that might be infinite, as doing
/// so could change the halting/non-halting nature of a program.
/// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA
/// in order to make various safety checks work.
bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
- // We can only remove the loop if there is a preheader that we can
+ // We can only remove the loop if there is a preheader that we can
// branch from after removing it.
BasicBlock* preheader = L->getLoopPreheader();
if (!preheader)
return false;
-
+
// If LoopSimplify form is not available, stay out of trouble.
if (!L->hasDedicatedExits())
return false;
@@ -142,36 +142,36 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
// they would already have been removed in earlier executions of this pass.
if (L->begin() != L->end())
return false;
-
+
SmallVector<BasicBlock*, 4> exitingBlocks;
L->getExitingBlocks(exitingBlocks);
-
+
SmallVector<BasicBlock*, 4> exitBlocks;
L->getUniqueExitBlocks(exitBlocks);
-
+
// We require that the loop only have a single exit block. Otherwise, we'd
// be in the situation of needing to be able to solve statically which exit
// block will be branched to, or trying to preserve the branching logic in
// a loop invariant manner.
if (exitBlocks.size() != 1)
return false;
-
+
// Finally, we have to check that the loop really is dead.
bool Changed = false;
if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader))
return Changed;
-
+
// Don't remove loops for which we can't solve the trip count.
// They could be infinite, in which case we'd be changing program behavior.
ScalarEvolution& SE = getAnalysis<ScalarEvolution>();
const SCEV *S = SE.getMaxBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(S))
return Changed;
-
+
// Now that we know the removal is safe, remove the loop by changing the
- // branch from the preheader to go to the single exit block.
+ // branch from the preheader to go to the single exit block.
BasicBlock* exitBlock = exitBlocks[0];
-
+
// Because we're deleting a large chunk of code at once, the sequence in which
// we remove things is very important to avoid invalidation issues. Don't
// mess with this unless you have good reason and know what you're doing.
@@ -197,7 +197,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
P->removeIncomingValue(exitingBlocks[i]);
++BI;
}
-
+
// Update the dominator tree and remove the instructions and blocks that will
// be deleted from the reference counting scheme.
DominatorTree& DT = getAnalysis<DominatorTree>();
@@ -211,7 +211,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
DE = ChildNodes.end(); DI != DE; ++DI) {
DT.changeImmediateDominator(*DI, DT[preheader]);
}
-
+
ChildNodes.clear();
DT.eraseNode(*LI);
@@ -219,7 +219,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
// delete it freely later.
(*LI)->dropAllReferences();
}
-
+
// Erase the instructions and the blocks without having to worry
// about ordering because we already dropped the references.
// NOTE: This iteration is safe because erasing the block does not remove its
@@ -236,13 +236,13 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
for (SmallPtrSet<BasicBlock*,8>::iterator I = blocks.begin(),
E = blocks.end(); I != E; ++I)
loopInfo.removeBlock(*I);
-
+
// The last step is to inform the loop pass manager that we've
// eliminated this loop.
LPM.deleteLoopFromQueue(L);
Changed = true;
-
+
++NumDeleted;
-
+
return Changed;
}
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index ad15cbb..ac1082c 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -43,20 +43,20 @@
#define DEBUG_TYPE "loop-idiom"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/IRBuilder.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Module.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/IRBuilder.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
using namespace llvm;
STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
@@ -173,7 +173,7 @@ static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) {
bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
CurLoop = L;
- // Disable loop idiom recognition if the function's name is a common idiom.
+ // Disable loop idiom recognition if the function's name is a common idiom.
StringRef Name = L->getHeader()->getParent()->getName();
if (Name == "memset" || Name == "memcpy")
return false;
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index f0f05e6..982400c 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -48,7 +48,7 @@ namespace {
}
};
}
-
+
char LoopInstSimplify::ID = 0;
INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify",
"Simplify instructions in loops", false, false)
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 59aace9..7eeb152 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -418,12 +418,13 @@ bool LoopRotate::rotateLoop(Loop *L) {
}
// Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
- // thus is not a preheader anymore. Split the edge to form a real preheader.
+ // thus is not a preheader anymore.
+ // Split the edge to form a real preheader.
BasicBlock *NewPH = SplitCriticalEdge(OrigPreheader, NewHeader, this);
NewPH->setName(NewHeader->getName() + ".lr.ph");
- // Preserve canonical loop form, which means that 'Exit' should have only one
- // predecessor.
+ // Preserve canonical loop form, which means that 'Exit' should have only
+ // one predecessor.
BasicBlock *ExitSplit = SplitCriticalEdge(L->getLoopLatch(), Exit, this);
ExitSplit->moveBefore(Exit);
} else {
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index b085b00..b14a713 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1308,8 +1308,8 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM,
return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0;
case LSRUse::Special:
- // Only handle -1 scales, or no scale.
- return AM.Scale == 0 || AM.Scale == -1;
+ // Special case Basic to handle -1 scales.
+ return !AM.BaseGV && (AM.Scale == 0 || AM.Scale == -1) && AM.BaseOffs == 0;
}
llvm_unreachable("Invalid LSRUse Kind!");
@@ -1439,7 +1439,41 @@ struct IVInc {
// IVChain - The list of IV increments in program order.
// We typically add the head of a chain without finding subsequent links.
-typedef SmallVector<IVInc,1> IVChain;
+struct IVChain {
+ SmallVector<IVInc,1> Incs;
+ const SCEV *ExprBase;
+
+ IVChain() : ExprBase(0) {}
+
+ IVChain(const IVInc &Head, const SCEV *Base)
+ : Incs(1, Head), ExprBase(Base) {}
+
+ typedef SmallVectorImpl<IVInc>::const_iterator const_iterator;
+
+ // begin - return the first increment in the chain.
+ const_iterator begin() const {
+ assert(!Incs.empty());
+ return llvm::next(Incs.begin());
+ }
+ const_iterator end() const {
+ return Incs.end();
+ }
+
+ // hasIncs - Returns true if this chain contains any increments.
+ bool hasIncs() const { return Incs.size() >= 2; }
+
+ // add - Add an IVInc to the end of this chain.
+ void add(const IVInc &X) { Incs.push_back(X); }
+
+ // tailUserInst - Returns the last UserInst in the chain.
+ Instruction *tailUserInst() const { return Incs.back().UserInst; }
+
+ // isProfitableIncrement - Returns true if IncExpr can be profitably added to
+ // this chain.
+ bool isProfitableIncrement(const SCEV *OperExpr,
+ const SCEV *IncExpr,
+ ScalarEvolution&);
+};
/// ChainUsers - Helper for CollectChains to track multiple IV increment uses.
/// Distinguish between FarUsers that definitely cross IV increments and
@@ -2160,7 +2194,7 @@ LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
return &LU;
// This is the formula where all the registers and symbols matched;
// there aren't going to be any others. Since we declined it, we
- // can skip the rest of the formulae and procede to the next LSRUse.
+ // can skip the rest of the formulae and proceed to the next LSRUse.
break;
}
}
@@ -2319,41 +2353,23 @@ static const SCEV *getExprBase(const SCEV *S) {
/// increment will be an offset relative to the same base. We allow such offsets
/// to potentially be used as chain increment as long as it's not obviously
/// expensive to expand using real instructions.
-static const SCEV *
-getProfitableChainIncrement(Value *NextIV, Value *PrevIV,
- const IVChain &Chain, Loop *L,
- ScalarEvolution &SE, const TargetLowering *TLI) {
- // Prune the solution space aggressively by checking that both IV operands
- // are expressions that operate on the same unscaled SCEVUnknown. This
- // "base" will be canceled by the subsequent getMinusSCEV call. Checking first
- // avoids creating extra SCEV expressions.
- const SCEV *OperExpr = SE.getSCEV(NextIV);
- const SCEV *PrevExpr = SE.getSCEV(PrevIV);
- if (getExprBase(OperExpr) != getExprBase(PrevExpr) && !StressIVChain)
- return 0;
-
- const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);
- if (!SE.isLoopInvariant(IncExpr, L))
- return 0;
-
- // We are not able to expand an increment unless it is loop invariant,
- // however, the following checks are purely for profitability.
+bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
+ const SCEV *IncExpr,
+ ScalarEvolution &SE) {
+ // Aggressively form chains when -stress-ivchain.
if (StressIVChain)
- return IncExpr;
+ return true;
// Do not replace a constant offset from IV head with a nonconstant IV
// increment.
if (!isa<SCEVConstant>(IncExpr)) {
- const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Chain[0].IVOperand));
+ const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand));
if (isa<SCEVConstant>(SE.getMinusSCEV(OperExpr, HeadExpr)))
return 0;
}
SmallPtrSet<const SCEV*, 8> Processed;
- if (isHighCostExpansion(IncExpr, Processed, SE))
- return 0;
-
- return IncExpr;
+ return !isHighCostExpansion(IncExpr, Processed, SE);
}
/// Return true if the number of registers needed for the chain is estimated to
@@ -2372,18 +2388,18 @@ isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
if (StressIVChain)
return true;
- if (Chain.size() <= 2)
+ if (!Chain.hasIncs())
return false;
if (!Users.empty()) {
- DEBUG(dbgs() << "Chain: " << *Chain[0].UserInst << " users:\n";
+ DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n";
for (SmallPtrSet<Instruction*, 4>::const_iterator I = Users.begin(),
E = Users.end(); I != E; ++I) {
dbgs() << " " << **I << "\n";
});
return false;
}
- assert(!Chain.empty() && "empty IV chains are not allowed");
+ assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
// The chain itself may require a register, so intialize cost to 1.
int cost = 1;
@@ -2391,15 +2407,15 @@ isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
// A complete chain likely eliminates the need for keeping the original IV in
// a register. LSR does not currently know how to form a complete chain unless
// the header phi already exists.
- if (isa<PHINode>(Chain.back().UserInst)
- && SE.getSCEV(Chain.back().UserInst) == Chain[0].IncExpr) {
+ if (isa<PHINode>(Chain.tailUserInst())
+ && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
--cost;
}
const SCEV *LastIncExpr = 0;
unsigned NumConstIncrements = 0;
unsigned NumVarIncrements = 0;
unsigned NumReusedIncrements = 0;
- for (IVChain::const_iterator I = llvm::next(Chain.begin()), E = Chain.end();
+ for (IVChain::const_iterator I = Chain.begin(), E = Chain.end();
I != E; ++I) {
if (I->IncExpr->isZero())
@@ -2435,7 +2451,8 @@ isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
// the stride.
cost -= NumReusedIncrements;
- DEBUG(dbgs() << "Chain: " << *Chain[0].UserInst << " Cost: " << cost << "\n");
+ DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost
+ << "\n");
return cost < 0;
}
@@ -2446,25 +2463,39 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
SmallVectorImpl<ChainUsers> &ChainUsersVec) {
// When IVs are used as types of varying widths, they are generally converted
// to a wider type with some uses remaining narrow under a (free) trunc.
- Value *NextIV = getWideOperand(IVOper);
+ Value *const NextIV = getWideOperand(IVOper);
+ const SCEV *const OperExpr = SE.getSCEV(NextIV);
+ const SCEV *const OperExprBase = getExprBase(OperExpr);
// Visit all existing chains. Check if its IVOper can be computed as a
// profitable loop invariant increment from the last link in the Chain.
unsigned ChainIdx = 0, NChains = IVChainVec.size();
const SCEV *LastIncExpr = 0;
for (; ChainIdx < NChains; ++ChainIdx) {
- Value *PrevIV = getWideOperand(IVChainVec[ChainIdx].back().IVOperand);
+ IVChain &Chain = IVChainVec[ChainIdx];
+
+ // Prune the solution space aggressively by checking that both IV operands
+ // are expressions that operate on the same unscaled SCEVUnknown. This
+ // "base" will be canceled by the subsequent getMinusSCEV call. Checking
+ // first avoids creating extra SCEV expressions.
+ if (!StressIVChain && Chain.ExprBase != OperExprBase)
+ continue;
+
+ Value *PrevIV = getWideOperand(Chain.Incs.back().IVOperand);
if (!isCompatibleIVType(PrevIV, NextIV))
continue;
// A phi node terminates a chain.
- if (isa<PHINode>(UserInst)
- && isa<PHINode>(IVChainVec[ChainIdx].back().UserInst))
+ if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
+ continue;
+
+ // The increment must be loop-invariant so it can be kept in a register.
+ const SCEV *PrevExpr = SE.getSCEV(PrevIV);
+ const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);
+ if (!SE.isLoopInvariant(IncExpr, L))
continue;
- if (const SCEV *IncExpr =
- getProfitableChainIncrement(NextIV, PrevIV, IVChainVec[ChainIdx],
- L, SE, TLI)) {
+ if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
LastIncExpr = IncExpr;
break;
}
@@ -2478,24 +2509,24 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
DEBUG(dbgs() << "IV Chain Limit\n");
return;
}
- LastIncExpr = SE.getSCEV(NextIV);
+ LastIncExpr = OperExpr;
// IVUsers may have skipped over sign/zero extensions. We don't currently
// attempt to form chains involving extensions unless they can be hoisted
// into this loop's AddRec.
if (!isa<SCEVAddRecExpr>(LastIncExpr))
return;
++NChains;
- IVChainVec.resize(NChains);
+ IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
+ OperExprBase));
ChainUsersVec.resize(NChains);
- DEBUG(dbgs() << "IV Head: (" << *UserInst << ") IV=" << *LastIncExpr
- << "\n");
+ DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst
+ << ") IV=" << *LastIncExpr << "\n");
+ } else {
+ DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInst
+ << ") IV+" << *LastIncExpr << "\n");
+ // Add this IV user to the end of the chain.
+ IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
}
- else
- DEBUG(dbgs() << "IV Inc: (" << *UserInst << ") IV+" << *LastIncExpr
- << "\n");
-
- // Add this IV user to the end of the chain.
- IVChainVec[ChainIdx].push_back(IVInc(UserInst, IVOper, LastIncExpr));
SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
// This chain's NearUsers become FarUsers.
@@ -2551,6 +2582,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
/// loop latch. This will discover chains on side paths, but requires
/// maintaining multiple copies of the Chains state.
void LSRInstance::CollectChains() {
+ DEBUG(dbgs() << "Collecting IV Chains.\n");
SmallVector<ChainUsers, 8> ChainUsersVec;
SmallVector<BasicBlock *,8> LatchPath;
@@ -2622,10 +2654,10 @@ void LSRInstance::CollectChains() {
}
void LSRInstance::FinalizeChain(IVChain &Chain) {
- assert(!Chain.empty() && "empty IV chains are not allowed");
- DEBUG(dbgs() << "Final Chain: " << *Chain[0].UserInst << "\n");
+ assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
+ DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
- for (IVChain::const_iterator I = llvm::next(Chain.begin()), E = Chain.end();
+ for (IVChain::const_iterator I = Chain.begin(), E = Chain.end();
I != E; ++I) {
DEBUG(dbgs() << " Inc: " << *I->UserInst << "\n");
User::op_iterator UseI =
@@ -2659,7 +2691,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts) {
// Find the new IVOperand for the head of the chain. It may have been replaced
// by LSR.
- const IVInc &Head = Chain[0];
+ const IVInc &Head = Chain.Incs[0];
User::op_iterator IVOpEnd = Head.UserInst->op_end();
User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
IVOpEnd, L, SE);
@@ -2691,7 +2723,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
Type *IVTy = IVSrc->getType();
Type *IntTy = SE.getEffectiveSCEVType(IVTy);
const SCEV *LeftOverExpr = 0;
- for (IVChain::const_iterator IncI = llvm::next(Chain.begin()),
+ for (IVChain::const_iterator IncI = Chain.begin(),
IncE = Chain.end(); IncI != IncE; ++IncI) {
Instruction *InsertPt = IncI->UserInst;
@@ -2736,7 +2768,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
}
// If LSR created a new, wider phi, we may also replace its postinc. We only
// do this if we also found a wide value for the head of the chain.
- if (isa<PHINode>(Chain.back().UserInst)) {
+ if (isa<PHINode>(Chain.tailUserInst())) {
for (BasicBlock::iterator I = L->getHeader()->begin();
PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
if (!isCompatibleIVType(Phi, IVSrc))
@@ -2804,7 +2836,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
// x == y --> x - y == 0
const SCEV *N = SE.getSCEV(NV);
- if (SE.isLoopInvariant(N, L)) {
+ if (SE.isLoopInvariant(N, L) && isSafeToExpand(N)) {
// S is normalized, so normalize N before folding it into S
// to keep the result normalized.
N = TransformForPostIncUse(Normalize, N, CI, 0,
@@ -2974,42 +3006,64 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
/// CollectSubexprs - Split S into subexpressions which can be pulled out into
/// separate registers. If C is non-null, multiply each subexpression by C.
-static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
- SmallVectorImpl<const SCEV *> &Ops,
- const Loop *L,
- ScalarEvolution &SE) {
+///
+/// Return remainder expression after factoring the subexpressions captured by
+/// Ops. If Ops is complete, return NULL.
+static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
+ SmallVectorImpl<const SCEV *> &Ops,
+ const Loop *L,
+ ScalarEvolution &SE,
+ unsigned Depth = 0) {
+ // Arbitrarily cap recursion to protect compile time.
+ if (Depth >= 3)
+ return S;
+
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
// Break out add operands.
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
- I != E; ++I)
- CollectSubexprs(*I, C, Ops, L, SE);
- return;
+ I != E; ++I) {
+ const SCEV *Remainder = CollectSubexprs(*I, C, Ops, L, SE, Depth+1);
+ if (Remainder)
+ Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
+ }
+ return NULL;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
// Split a non-zero base out of an addrec.
- if (!AR->getStart()->isZero()) {
- CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
- AR->getStepRecurrence(SE),
- AR->getLoop(),
- //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
- SCEV::FlagAnyWrap),
- C, Ops, L, SE);
- CollectSubexprs(AR->getStart(), C, Ops, L, SE);
- return;
+ if (AR->getStart()->isZero())
+ return S;
+
+ const SCEV *Remainder = CollectSubexprs(AR->getStart(),
+ C, Ops, L, SE, Depth+1);
+ // Split the non-zero AddRec unless it is part of a nested recurrence that
+ // does not pertain to this loop.
+ if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
+ Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
+ Remainder = NULL;
+ }
+ if (Remainder != AR->getStart()) {
+ if (!Remainder)
+ Remainder = SE.getConstant(AR->getType(), 0);
+ return SE.getAddRecExpr(Remainder,
+ AR->getStepRecurrence(SE),
+ AR->getLoop(),
+ //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+ SCEV::FlagAnyWrap);
}
} else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
// Break (C * (a + b + c)) into C*a + C*b + C*c.
- if (Mul->getNumOperands() == 2)
- if (const SCEVConstant *Op0 =
- dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
- CollectSubexprs(Mul->getOperand(1),
- C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0,
- Ops, L, SE);
- return;
- }
+ if (Mul->getNumOperands() != 2)
+ return S;
+ if (const SCEVConstant *Op0 =
+ dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
+ C = C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0;
+ const SCEV *Remainder =
+ CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1);
+ if (Remainder)
+ Ops.push_back(SE.getMulExpr(C, Remainder));
+ return NULL;
+ }
}
-
- // Otherwise use the value itself, optionally with a scale applied.
- Ops.push_back(C ? SE.getMulExpr(C, S) : S);
+ return S;
}
/// GenerateReassociations - Split out subexpressions from adds and the bases of
@@ -3024,7 +3078,9 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
const SCEV *BaseReg = Base.BaseRegs[i];
SmallVector<const SCEV *, 8> AddOps;
- CollectSubexprs(BaseReg, 0, AddOps, L, SE);
+ const SCEV *Remainder = CollectSubexprs(BaseReg, 0, AddOps, L, SE);
+ if (Remainder)
+ AddOps.push_back(Remainder);
if (AddOps.size() == 1) continue;
@@ -4236,13 +4292,6 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
}
- // Flush the operand list to suppress SCEVExpander hoisting.
- if (!Ops.empty()) {
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
- Ops.clear();
- Ops.push_back(SE.getUnknown(FullV));
- }
-
// Expand the ScaledReg portion.
Value *ICmpScaledV = 0;
if (F.AM.Scale != 0) {
@@ -4264,23 +4313,34 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
} else {
// Otherwise just expand the scaled register and an explicit scale,
// which is expected to be matched as part of the address.
+
+ // Flush the operand list to suppress SCEVExpander hoisting address modes.
+ if (!Ops.empty() && LU.Kind == LSRUse::Address) {
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Ops.clear();
+ Ops.push_back(SE.getUnknown(FullV));
+ }
ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
ScaledS = SE.getMulExpr(ScaledS,
SE.getConstant(ScaledS->getType(), F.AM.Scale));
Ops.push_back(ScaledS);
-
- // Flush the operand list to suppress SCEVExpander hoisting.
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
- Ops.clear();
- Ops.push_back(SE.getUnknown(FullV));
}
}
// Expand the GV portion.
if (F.AM.BaseGV) {
+ // Flush the operand list to suppress SCEVExpander hoisting.
+ if (!Ops.empty()) {
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Ops.clear();
+ Ops.push_back(SE.getUnknown(FullV));
+ }
Ops.push_back(SE.getUnknown(F.AM.BaseGV));
+ }
- // Flush the operand list to suppress SCEVExpander hoisting.
+ // Flush the operand list to suppress SCEVExpander hoisting of both folded and
+ // unfolded offsets. LSR assumes they both live next to their uses.
+ if (!Ops.empty()) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
@@ -4485,7 +4545,7 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
// Mark phi nodes that terminate chains so the expander tries to reuse them.
for (SmallVectorImpl<IVChain>::const_iterator ChainI = IVChainVec.begin(),
ChainE = IVChainVec.end(); ChainI != ChainE; ++ChainI) {
- if (PHINode *PN = dyn_cast<PHINode>(ChainI->back().UserInst))
+ if (PHINode *PN = dyn_cast<PHINode>(ChainI->tailUserInst()))
Rewriter.setChainedPhi(PN);
}
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 930980f..58f7739 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -1214,8 +1214,8 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
// See if instruction simplification can hack this up. This is common for
// things like "select false, X, Y" after unswitching made the condition be
- // 'false'.
- if (Value *V = SimplifyInstruction(I, 0, 0, DT))
+ // 'false'. TODO: update the domtree properly so we can pass it here.
+ if (Value *V = SimplifyInstruction(I))
if (LI->replacementPreservesLCSSAForm(I, V)) {
ReplaceUsesOfWith(I, V, Worklist, L, LPM);
continue;
diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp
index 689bbe9..7419a65 100644
--- a/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -15,9 +15,9 @@
#define DEBUG_TYPE "loweratomic"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Function.h"
+#include "llvm/IRBuilder.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Pass.h"
-#include "llvm/Support/IRBuilder.h"
using namespace llvm;
static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
@@ -25,12 +25,12 @@ static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
Value *Ptr = CXI->getPointerOperand();
Value *Cmp = CXI->getCompareOperand();
Value *Val = CXI->getNewValOperand();
-
+
LoadInst *Orig = Builder.CreateLoad(Ptr);
Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
Value *Res = Builder.CreateSelect(Equal, Val, Orig);
Builder.CreateStore(Res, Ptr);
-
+
CXI->replaceAllUsesWith(Orig);
CXI->eraseFromParent();
return true;
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index a87cce3..2a5ee33 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -15,21 +15,21 @@
#define DEBUG_TYPE "memcpyopt"
#include "llvm/Transforms/Scalar.h"
#include "llvm/GlobalVariable.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/IRBuilder.h"
#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <list>
using namespace llvm;
@@ -44,7 +44,7 @@ static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
gep_type_iterator GTI = gep_type_begin(GEP);
for (unsigned i = 1; i != Idx; ++i, ++GTI)
/*skip along*/;
-
+
// Compute the offset implied by the rest of the indices.
int64_t Offset = 0;
for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
@@ -58,7 +58,7 @@ static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
continue;
}
-
+
// Otherwise, we have a sequential type like an array or vector. Multiply
// the index by the ElementSize.
uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
@@ -77,7 +77,7 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
Ptr2 = Ptr2->stripPointerCasts();
GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1);
GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2);
-
+
bool VariableIdxFound = false;
// If one pointer is a GEP and the other isn't, then see if the GEP is a
@@ -91,7 +91,7 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD);
return !VariableIdxFound;
}
-
+
// Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
// base. After that base, they may have some number of common (and
// potentially variable) indices. After that they handle some constant
@@ -99,7 +99,7 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
// handle no other case.
if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0))
return false;
-
+
// Skip any common indices and track the GEP types.
unsigned Idx = 1;
for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx)
@@ -109,7 +109,7 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD);
int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD);
if (VariableIdxFound) return false;
-
+
Offset = Offset2-Offset1;
return true;
}
@@ -128,19 +128,19 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
namespace {
struct MemsetRange {
// Start/End - A semi range that describes the span that this range covers.
- // The range is closed at the start and open at the end: [Start, End).
+ // The range is closed at the start and open at the end: [Start, End).
int64_t Start, End;
/// StartPtr - The getelementptr instruction that points to the start of the
/// range.
Value *StartPtr;
-
+
/// Alignment - The known alignment of the first store.
unsigned Alignment;
-
+
/// TheStores - The actual stores that make up this range.
SmallVector<Instruction*, 16> TheStores;
-
+
bool isProfitableToUseMemset(const TargetData &TD) const;
};
@@ -152,17 +152,17 @@ bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const {
// If there is nothing to merge, don't do anything.
if (TheStores.size() < 2) return false;
-
+
// If any of the stores are a memset, then it is always good to extend the
// memset.
for (unsigned i = 0, e = TheStores.size(); i != e; ++i)
if (!isa<StoreInst>(TheStores[i]))
return true;
-
+
// Assume that the code generator is capable of merging pairs of stores
// together if it wants to.
if (TheStores.size() == 2) return false;
-
+
// If we have fewer than 8 stores, it can still be worthwhile to do this.
// For example, merging 4 i8 stores into an i32 store is useful almost always.
// However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
@@ -175,15 +175,15 @@ bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const {
// actually reducing the number of stores used.
unsigned Bytes = unsigned(End-Start);
unsigned NumPointerStores = Bytes/TD.getPointerSize();
-
+
// Assume the remaining bytes if any are done a byte at a time.
unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize();
-
+
// If we will reduce the # stores (according to this heuristic), do the
// transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
// etc.
return TheStores.size() > NumPointerStores+NumByteStores;
-}
+}
namespace {
@@ -195,12 +195,12 @@ class MemsetRanges {
const TargetData &TD;
public:
MemsetRanges(const TargetData &td) : TD(td) {}
-
+
typedef std::list<MemsetRange>::const_iterator const_iterator;
const_iterator begin() const { return Ranges.begin(); }
const_iterator end() const { return Ranges.end(); }
bool empty() const { return Ranges.empty(); }
-
+
void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
addStore(OffsetFromFirst, SI);
@@ -210,21 +210,21 @@ public:
void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
int64_t StoreSize = TD.getTypeStoreSize(SI->getOperand(0)->getType());
-
+
addRange(OffsetFromFirst, StoreSize,
SI->getPointerOperand(), SI->getAlignment(), SI);
}
-
+
void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI);
}
-
+
void addRange(int64_t Start, int64_t Size, Value *Ptr,
unsigned Alignment, Instruction *Inst);
};
-
+
} // end anon namespace
@@ -240,10 +240,10 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
unsigned Alignment, Instruction *Inst) {
int64_t End = Start+Size;
range_iterator I = Ranges.begin(), E = Ranges.end();
-
+
while (I != E && Start > I->End)
++I;
-
+
// We now know that I == E, in which case we didn't find anything to merge
// with, or that Start <= I->End. If End < I->Start or I == E, then we need
// to insert a new range. Handle this now.
@@ -256,18 +256,18 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
R.TheStores.push_back(Inst);
return;
}
-
+
// This store overlaps with I, add it.
I->TheStores.push_back(Inst);
-
+
// At this point, we may have an interval that completely contains our store.
// If so, just add it to the interval and return.
if (I->Start <= Start && I->End >= End)
return;
-
+
// Now we know that Start <= I->End and End >= I->Start so the range overlaps
// but is not entirely contained within the range.
-
+
// See if the range extends the start of the range. In this case, it couldn't
// possibly cause it to join the prior range, because otherwise we would have
// stopped on *it*.
@@ -276,7 +276,7 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
I->StartPtr = Ptr;
I->Alignment = Alignment;
}
-
+
// Now we know that Start <= I->End and Start >= I->Start (so the startpoint
// is in or right at the end of I), and that End >= I->Start. Extend I out to
// End.
@@ -325,7 +325,7 @@ namespace {
AU.addPreserved<AliasAnalysis>();
AU.addPreserved<MemoryDependenceAnalysis>();
}
-
+
// Helper fuctions
bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI);
@@ -341,7 +341,7 @@ namespace {
bool iterateOnFunction(Function &F);
};
-
+
char MemCpyOpt::ID = 0;
}
@@ -361,16 +361,16 @@ INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
/// some other patterns to fold away. In particular, this looks for stores to
/// neighboring locations of memory. If it sees enough consecutive ones, it
/// attempts to merge them together into a memcpy/memset.
-Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
+Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
Value *StartPtr, Value *ByteVal) {
if (TD == 0) return 0;
-
+
// Okay, so we now have a single store that can be splatable. Scan to find
// all subsequent stores of the same value to offset from the same pointer.
// Join these together into ranges, so we can decide whether contiguous blocks
// are stored.
MemsetRanges Ranges(*TD);
-
+
BasicBlock::iterator BI = StartInst;
for (++BI; !isa<TerminatorInst>(BI); ++BI) {
if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
@@ -381,43 +381,43 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
break;
continue;
}
-
+
if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
// If this is a store, see if we can merge it in.
if (!NextStore->isSimple()) break;
-
+
// Check to see if this stored value is of the same byte-splattable value.
if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
break;
-
+
// Check to see if this store is to a constant offset from the start ptr.
int64_t Offset;
if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(),
Offset, *TD))
break;
-
+
Ranges.addStore(Offset, NextStore);
} else {
MemSetInst *MSI = cast<MemSetInst>(BI);
-
+
if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
!isa<ConstantInt>(MSI->getLength()))
break;
-
+
// Check to see if this store is to a constant offset from the start ptr.
int64_t Offset;
if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *TD))
break;
-
+
Ranges.addMemSet(Offset, MSI);
}
}
-
+
// If we have no ranges, then we just had a single store with nothing that
// could be merged in. This is a very common case of course.
if (Ranges.empty())
return 0;
-
+
// If we had at least one store that could be merged in, add the starting
// store as well. We try to avoid this unless there is at least something
// interesting as a small compile-time optimization.
@@ -434,28 +434,28 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
I != E; ++I) {
const MemsetRange &Range = *I;
-
+
if (Range.TheStores.size() == 1) continue;
-
+
// If it is profitable to lower this range to memset, do so now.
if (!Range.isProfitableToUseMemset(*TD))
continue;
-
+
// Otherwise, we do want to transform this! Create a new memset.
// Get the starting pointer of the block.
StartPtr = Range.StartPtr;
-
+
// Determine alignment
unsigned Alignment = Range.Alignment;
if (Alignment == 0) {
- Type *EltType =
+ Type *EltType =
cast<PointerType>(StartPtr->getType())->getElementType();
Alignment = TD->getABITypeAlignment(EltType);
}
-
- AMemSet =
+
+ AMemSet =
Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
-
+
DEBUG(dbgs() << "Replace stores:\n";
for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
dbgs() << *Range.TheStores[i] << '\n';
@@ -473,14 +473,14 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
}
++NumMemSetInfer;
}
-
+
return AMemSet;
}
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (!SI->isSimple()) return false;
-
+
if (TD == 0) return false;
// Detect cases where we're performing call slot forwarding, but
@@ -510,7 +510,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (C) {
bool changed = performCallSlotOptzn(LI,
- SI->getPointerOperand()->stripPointerCasts(),
+ SI->getPointerOperand()->stripPointerCasts(),
LI->getPointerOperand()->stripPointerCasts(),
TD->getTypeStoreSize(SI->getOperand(0)->getType()), C);
if (changed) {
@@ -524,10 +524,10 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
}
}
}
-
+
// There are two cases that are interesting for this code to handle: memcpy
// and memset. Right now we only handle memset.
-
+
// Ensure that the value being stored is something that can be memset'able a
// byte at a time like "0" or "-1" or any width, as well as things like
// 0xA0A0A0A0 and 0.0.
@@ -537,7 +537,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
BBI = I; // Don't invalidate iterator.
return true;
}
-
+
return false;
}
@@ -662,7 +662,11 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
// the use analysis, we also need to know that it does not sneakily
// access dest. We rely on AA to figure this out for us.
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
- if (AA.getModRefInfo(C, cpyDest, srcSize) != AliasAnalysis::NoModRef)
+ AliasAnalysis::ModRefResult MR = AA.getModRefInfo(C, cpyDest, srcSize);
+ // If necessary, perform additional analysis.
+ if (MR != AliasAnalysis::NoModRef)
+ MR = AA.callCapturesBefore(C, cpyDest, srcSize, &DT);
+ if (MR != AliasAnalysis::NoModRef)
return false;
// All the checks have passed, so do the transformation.
@@ -676,7 +680,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
if (CS.getArgument(i)->getType() == cpyDest->getType())
CS.setArgument(i, cpyDest);
else
- CS.setArgument(i, CastInst::CreatePointerCast(cpyDest,
+ CS.setArgument(i, CastInst::CreatePointerCast(cpyDest,
CS.getArgument(i)->getType(), cpyDest->getName(), C));
}
@@ -697,14 +701,14 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
/// processMemCpyMemCpyDependence - We've found that the (upward scanning)
/// memory dependence of memcpy 'M' is the memcpy 'MDep'. Try to simplify M to
/// copy from MDep's input if we can. MSize is the size of M's copy.
-///
+///
bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
uint64_t MSize) {
// We can only transforms memcpy's where the dest of one is the source of the
// other.
if (M->getSource() != MDep->getDest() || MDep->isVolatile())
return false;
-
+
// If dep instruction is reading from our current input, then it is a noop
// transfer and substituting the input won't change this instruction. Just
// ignore the input and let someone else zap MDep. This handles cases like:
@@ -712,14 +716,14 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
// memcpy(b <- a)
if (M->getSource() == MDep->getSource())
return false;
-
+
// Second, the length of the memcpy's must be the same, or the preceding one
// must be larger than the following one.
ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
return false;
-
+
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
// Verify that the copied-from memory doesn't change in between the two
@@ -739,23 +743,23 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
false, M, M->getParent());
if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
return false;
-
+
// If the dest of the second might alias the source of the first, then the
// source and dest might overlap. We still want to eliminate the intermediate
// value, but we have to generate a memmove instead of memcpy.
bool UseMemMove = false;
if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(MDep)))
UseMemMove = true;
-
+
// If all checks passed, then we can transform M.
-
+
// Make sure to use the lesser of the alignment of the source and the dest
// since we're changing where we're reading from, but don't want to increase
// the alignment past what can be read from or written to.
// TODO: Is this worth it if we're creating a less aligned memcpy? For
// example we could be moving from movaps -> movq on x86.
unsigned Align = std::min(MDep->getAlignment(), M->getAlignment());
-
+
IRBuilder<> Builder(M);
if (UseMemMove)
Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(),
@@ -835,13 +839,13 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
if (!TLI->has(LibFunc::memmove))
return false;
-
+
// See if the pointers alias.
if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M)))
return false;
-
+
DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
-
+
// If not, then we know we can transform this.
Module *Mod = M->getParent()->getParent()->getParent();
Type *ArgTys[3] = { M->getRawDest()->getType(),
@@ -857,7 +861,7 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
++NumMoveToCpy;
return true;
}
-
+
/// processByValArgument - This is called on every byval argument in call sites.
bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
if (TD == 0) return false;
@@ -880,7 +884,7 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
if (MDep == 0 || MDep->isVolatile() ||
ByValArg->stripPointerCasts() != MDep->getDest())
return false;
-
+
// The length of the memcpy must be larger or equal to the size of the byval.
ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize)
@@ -890,13 +894,13 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
// then it is some target specific value that we can't know.
unsigned ByValAlign = CS.getParamAlignment(ArgNo+1);
if (ByValAlign == 0) return false;
-
+
// If it is greater than the memcpy, then we check to see if we can force the
// source of the memcpy to the alignment we need. If we fail, we bail out.
if (MDep->getAlignment() < ByValAlign &&
getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, TD) < ByValAlign)
return false;
-
+
// Verify that the copied-from memory doesn't change in between the memcpy and
// the byval call.
// memcpy(a <- b)
@@ -911,16 +915,16 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
false, CS.getInstruction(), MDep->getParent());
if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
return false;
-
+
Value *TmpCast = MDep->getSource();
if (MDep->getSource()->getType() != ByValArg->getType())
TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
"tmpcast", CS.getInstruction());
-
+
DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n"
<< " " << *MDep << "\n"
<< " " << *CS.getInstruction() << "\n");
-
+
// Otherwise we're good! Update the byval argument.
CS.setArgument(ArgNo, TmpCast);
++NumMemCpyInstr;
@@ -936,9 +940,9 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
// Avoid invalidating the iterator.
Instruction *I = BI++;
-
+
bool RepeatInstruction = false;
-
+
if (StoreInst *SI = dyn_cast<StoreInst>(I))
MadeChange |= processStore(SI, BI);
else if (MemSetInst *M = dyn_cast<MemSetInst>(I))
@@ -960,7 +964,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
}
}
}
-
+
return MadeChange;
}
@@ -972,19 +976,19 @@ bool MemCpyOpt::runOnFunction(Function &F) {
MD = &getAnalysis<MemoryDependenceAnalysis>();
TD = getAnalysisIfAvailable<TargetData>();
TLI = &getAnalysis<TargetLibraryInfo>();
-
+
// If we don't have at least memset and memcpy, there is little point of doing
// anything here. These are required by a freestanding implementation, so if
// even they are disabled, there is no point in trying hard.
if (!TLI->has(LibFunc::memset) || !TLI->has(LibFunc::memcpy))
return false;
-
+
while (1) {
if (!iterateOnFunction(F))
break;
MadeChange = true;
}
-
+
MD = 0;
return MadeChange;
}
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
index 7e3e69b..3222f20 100644
--- a/lib/Transforms/Scalar/ObjCARC.cpp
+++ b/lib/Transforms/Scalar/ObjCARC.cpp
@@ -20,7 +20,7 @@
// This file also defines a simple ARC-aware AliasAnalysis.
//
// WARNING: This file knows about certain library functions. It recognizes them
-// by name, and hardwires knowedge of their semantics.
+// by name, and hardwires knowledge of their semantics.
//
// WARNING: This file knows about how certain Objective-C library functions are
// used. Naive LLVM IR transformations which would otherwise be
@@ -29,18 +29,8 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "objc-arc"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Support/CallSite.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
using namespace llvm;
// A handy option to enable/disable all optimizations in this file.
@@ -141,6 +131,13 @@ namespace {
// ARC Utilities.
//===----------------------------------------------------------------------===//
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/ADT/StringSwitch.h"
+
namespace {
/// InstructionClass - A simple classification for instructions.
enum InstructionClass {
@@ -299,22 +296,23 @@ static InstructionClass GetInstructionClass(const Value *V) {
// None of the intrinsic functions do objc_release. For intrinsics, the
// only question is whether or not they may be users.
switch (F->getIntrinsicID()) {
- case 0: break;
- case Intrinsic::bswap: case Intrinsic::ctpop:
- case Intrinsic::ctlz: case Intrinsic::cttz:
case Intrinsic::returnaddress: case Intrinsic::frameaddress:
case Intrinsic::stacksave: case Intrinsic::stackrestore:
case Intrinsic::vastart: case Intrinsic::vacopy: case Intrinsic::vaend:
+ case Intrinsic::objectsize: case Intrinsic::prefetch:
+ case Intrinsic::stackprotector:
+ case Intrinsic::eh_return_i32: case Intrinsic::eh_return_i64:
+ case Intrinsic::eh_typeid_for: case Intrinsic::eh_dwarf_cfa:
+ case Intrinsic::eh_sjlj_lsda: case Intrinsic::eh_sjlj_functioncontext:
+ case Intrinsic::init_trampoline: case Intrinsic::adjust_trampoline:
+ case Intrinsic::lifetime_start: case Intrinsic::lifetime_end:
+ case Intrinsic::invariant_start: case Intrinsic::invariant_end:
// Don't let dbg info affect our results.
case Intrinsic::dbg_declare: case Intrinsic::dbg_value:
// Short cut: Some intrinsics obviously don't use ObjC pointers.
return IC_None;
default:
- for (Function::const_arg_iterator AI = F->arg_begin(),
- AE = F->arg_end(); AI != AE; ++AI)
- if (IsPotentialUse(AI))
- return IC_User;
- return IC_None;
+ break;
}
}
return GetCallSiteClass(CI);
@@ -382,14 +380,14 @@ static InstructionClass GetBasicInstructionClass(const Value *V) {
return isa<InvokeInst>(V) ? IC_CallOrUser : IC_User;
}
-/// IsRetain - Test if the the given class is objc_retain or
+/// IsRetain - Test if the given class is objc_retain or
/// equivalent.
static bool IsRetain(InstructionClass Class) {
return Class == IC_Retain ||
Class == IC_RetainRV;
}
-/// IsAutorelease - Test if the the given class is objc_autorelease or
+/// IsAutorelease - Test if the given class is objc_autorelease or
/// equivalent.
static bool IsAutorelease(InstructionClass Class) {
return Class == IC_Autorelease ||
@@ -444,7 +442,7 @@ static bool IsNoThrow(InstructionClass Class) {
Class == IC_AutoreleasepoolPop;
}
-/// EraseInstruction - Erase the given instruction. ObjC calls return their
+/// EraseInstruction - Erase the given instruction. Many ObjC calls return their
/// argument verbatim, so if it's such a call and the return value has users,
/// replace them with the argument value.
static void EraseInstruction(Instruction *CI) {
@@ -565,9 +563,8 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
return Arg;
}
- // If we found an identifiable object but it has multiple uses, but they
- // are trivial uses, we can still consider this to be a single-use
- // value.
+ // If we found an identifiable object but it has multiple uses, but they are
+ // trivial uses, we can still consider this to be a single-use value.
if (IsObjCIdentifiedObject(Arg)) {
for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
UI != UE; ++UI) {
@@ -692,7 +689,7 @@ namespace {
/// specified pass info.
virtual void *getAdjustedAnalysisPointer(const void *PI) {
if (PI == &AliasAnalysis::ID)
- return (AliasAnalysis*)this;
+ return static_cast<AliasAnalysis *>(this);
return this;
}
@@ -815,7 +812,7 @@ ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) {
case IC_FusedRetainAutorelease:
case IC_FusedRetainAutoreleaseRV:
// These functions don't access any memory visible to the compiler.
- // Note that this doesn't include objc_retainBlock, becuase it updates
+ // Note that this doesn't include objc_retainBlock, because it updates
// pointers when it copies block data.
return NoModRef;
default:
@@ -915,6 +912,7 @@ bool ObjCARCExpand::runOnFunction(Function &F) {
//===----------------------------------------------------------------------===//
#include "llvm/Constants.h"
+#include "llvm/ADT/STLExtras.h"
namespace {
/// ObjCARCAPElim - Autorelease pool elimination.
@@ -922,8 +920,8 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
virtual bool runOnModule(Module &M);
- bool MayAutorelease(CallSite CS, unsigned Depth = 0);
- bool OptimizeBB(BasicBlock *BB);
+ static bool MayAutorelease(ImmutableCallSite CS, unsigned Depth = 0);
+ static bool OptimizeBB(BasicBlock *BB);
public:
static char ID;
@@ -949,15 +947,16 @@ void ObjCARCAPElim::getAnalysisUsage(AnalysisUsage &AU) const {
/// MayAutorelease - Interprocedurally determine if calls made by the
/// given call site can possibly produce autoreleases.
-bool ObjCARCAPElim::MayAutorelease(CallSite CS, unsigned Depth) {
- if (Function *Callee = CS.getCalledFunction()) {
+bool ObjCARCAPElim::MayAutorelease(ImmutableCallSite CS, unsigned Depth) {
+ if (const Function *Callee = CS.getCalledFunction()) {
if (Callee->isDeclaration() || Callee->mayBeOverridden())
return true;
- for (Function::iterator I = Callee->begin(), E = Callee->end();
+ for (Function::const_iterator I = Callee->begin(), E = Callee->end();
I != E; ++I) {
- BasicBlock *BB = I;
- for (BasicBlock::iterator J = BB->begin(), F = BB->end(); J != F; ++J)
- if (CallSite JCS = CallSite(J))
+ const BasicBlock *BB = I;
+ for (BasicBlock::const_iterator J = BB->begin(), F = BB->end();
+ J != F; ++J)
+ if (ImmutableCallSite JCS = ImmutableCallSite(J))
// This recursion depth limit is arbitrary. It's just great
// enough to cover known interesting testcases.
if (Depth < 3 &&
@@ -992,7 +991,7 @@ bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) {
Push = 0;
break;
case IC_CallOrUser:
- if (MayAutorelease(CallSite(Inst)))
+ if (MayAutorelease(ImmutableCallSite(Inst)))
Push = 0;
break;
default:
@@ -1033,7 +1032,11 @@ bool ObjCARCAPElim::runOnModule(Module &M) {
Value *Op = *OI;
// llvm.global_ctors is an array of pairs where the second members
// are constructor functions.
- Function *F = cast<Function>(cast<ConstantStruct>(Op)->getOperand(1));
+ Function *F = dyn_cast<Function>(cast<ConstantStruct>(Op)->getOperand(1));
+ // If the user used a constructor function with the wrong signature and
+ // it got bitcasted or whatever, look the other way.
+ if (!F)
+ continue;
// Only look at function definitions.
if (F->isDeclaration())
continue;
@@ -1089,14 +1092,10 @@ bool ObjCARCAPElim::runOnModule(Module &M) {
// TODO: Delete release+retain pairs (rare).
-#include "llvm/GlobalAlias.h"
-#include "llvm/Constants.h"
#include "llvm/LLVMContext.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/CFG.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/DenseSet.h"
STATISTIC(NumNoops, "Number of no-op objc calls eliminated");
STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated");
@@ -1144,22 +1143,13 @@ bool ProvenanceAnalysis::relatedSelect(const SelectInst *A, const Value *B) {
// If the values are Selects with the same condition, we can do a more precise
// check: just check for relations between the values on corresponding arms.
if (const SelectInst *SB = dyn_cast<SelectInst>(B))
- if (A->getCondition() == SB->getCondition()) {
- if (related(A->getTrueValue(), SB->getTrueValue()))
- return true;
- if (related(A->getFalseValue(), SB->getFalseValue()))
- return true;
- return false;
- }
+ if (A->getCondition() == SB->getCondition())
+ return related(A->getTrueValue(), SB->getTrueValue()) ||
+ related(A->getFalseValue(), SB->getFalseValue());
// Check both arms of the Select node individually.
- if (related(A->getTrueValue(), B))
- return true;
- if (related(A->getFalseValue(), B))
- return true;
-
- // The arms both checked out.
- return false;
+ return related(A->getTrueValue(), B) ||
+ related(A->getFalseValue(), B);
}
bool ProvenanceAnalysis::relatedPHI(const PHINode *A, const Value *B) {
@@ -1357,12 +1347,6 @@ namespace {
/// with the "tail" keyword.
bool IsTailCallRelease;
- /// Partial - True of we've seen an opportunity for partial RR elimination,
- /// such as pushing calls into a CFG triangle or into one side of a
- /// CFG diamond.
- /// TODO: Consider moving this to PtrState.
- bool Partial;
-
/// ReleaseMetadata - If the Calls are objc_release calls and they all have
/// a clang.imprecise_release tag, this is the metadata tag.
MDNode *ReleaseMetadata;
@@ -1377,7 +1361,7 @@ namespace {
RRInfo() :
KnownSafe(false), IsRetainBlock(false),
- IsTailCallRelease(false), Partial(false),
+ IsTailCallRelease(false),
ReleaseMetadata(0) {}
void clear();
@@ -1388,7 +1372,6 @@ void RRInfo::clear() {
KnownSafe = false;
IsRetainBlock = false;
IsTailCallRelease = false;
- Partial = false;
ReleaseMetadata = 0;
Calls.clear();
ReverseInsertPts.clear();
@@ -1398,36 +1381,39 @@ namespace {
/// PtrState - This class summarizes several per-pointer runtime properties
/// which are propogated through the flow graph.
class PtrState {
- /// RefCount - The known minimum number of reference count increments.
- unsigned RefCount;
-
/// NestCount - The known minimum level of retain+release nesting.
unsigned NestCount;
+ /// KnownPositiveRefCount - True if the reference count is known to
+ /// be incremented.
+ bool KnownPositiveRefCount;
+
+ /// Partial - True of we've seen an opportunity for partial RR elimination,
+ /// such as pushing calls into a CFG triangle or into one side of a
+ /// CFG diamond.
+ bool Partial;
+
/// Seq - The current position in the sequence.
- Sequence Seq;
+ Sequence Seq : 8;
public:
/// RRI - Unidirectional information about the current sequence.
/// TODO: Encapsulate this better.
RRInfo RRI;
- PtrState() : RefCount(0), NestCount(0), Seq(S_None) {}
-
- void SetAtLeastOneRefCount() {
- if (RefCount == 0) RefCount = 1;
- }
+ PtrState() : NestCount(0), KnownPositiveRefCount(false), Partial(false),
+ Seq(S_None) {}
- void IncrementRefCount() {
- if (RefCount != UINT_MAX) ++RefCount;
+ void SetKnownPositiveRefCount() {
+ KnownPositiveRefCount = true;
}
- void DecrementRefCount() {
- if (RefCount != 0) --RefCount;
+ void ClearRefCount() {
+ KnownPositiveRefCount = false;
}
bool IsKnownIncremented() const {
- return RefCount > 0;
+ return KnownPositiveRefCount;
}
void IncrementNestCount() {
@@ -1451,7 +1437,12 @@ namespace {
}
void ClearSequenceProgress() {
- Seq = S_None;
+ ResetSequenceProgress(S_None);
+ }
+
+ void ResetSequenceProgress(Sequence NewSeq) {
+ Seq = NewSeq;
+ Partial = false;
RRI.clear();
}
@@ -1462,7 +1453,7 @@ namespace {
void
PtrState::Merge(const PtrState &Other, bool TopDown) {
Seq = MergeSeqs(Seq, Other.Seq, TopDown);
- RefCount = std::min(RefCount, Other.RefCount);
+ KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount;
NestCount = std::min(NestCount, Other.NestCount);
// We can't merge a plain objc_retain with an objc_retainBlock.
@@ -1471,31 +1462,31 @@ PtrState::Merge(const PtrState &Other, bool TopDown) {
// If we're not in a sequence (anymore), drop all associated state.
if (Seq == S_None) {
+ Partial = false;
RRI.clear();
- } else if (RRI.Partial || Other.RRI.Partial) {
+ } else if (Partial || Other.Partial) {
// If we're doing a merge on a path that's previously seen a partial
// merge, conservatively drop the sequence, to avoid doing partial
// RR elimination. If the branch predicates for the two merge differ,
// mixing them is unsafe.
- Seq = S_None;
- RRI.clear();
+ ClearSequenceProgress();
} else {
// Conservatively merge the ReleaseMetadata information.
if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata)
RRI.ReleaseMetadata = 0;
RRI.KnownSafe = RRI.KnownSafe && Other.RRI.KnownSafe;
- RRI.IsTailCallRelease = RRI.IsTailCallRelease && Other.RRI.IsTailCallRelease;
+ RRI.IsTailCallRelease = RRI.IsTailCallRelease &&
+ Other.RRI.IsTailCallRelease;
RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end());
// Merge the insert point sets. If there are any differences,
// that makes this a partial merge.
- RRI.Partial = RRI.ReverseInsertPts.size() !=
- Other.RRI.ReverseInsertPts.size();
+ Partial = RRI.ReverseInsertPts.size() != Other.RRI.ReverseInsertPts.size();
for (SmallPtrSet<Instruction *, 2>::const_iterator
I = Other.RRI.ReverseInsertPts.begin(),
E = Other.RRI.ReverseInsertPts.end(); I != E; ++I)
- RRI.Partial |= RRI.ReverseInsertPts.insert(*I);
+ Partial |= RRI.ReverseInsertPts.insert(*I);
}
}
@@ -1521,6 +1512,11 @@ namespace {
/// known about a pointer at the top of each block.
MapTy PerPtrBottomUp;
+ /// Preds, Succs - Effective successors and predecessors of the current
+ /// block (this ignores ignorable edges and ignored backedges).
+ SmallVector<BasicBlock *, 2> Preds;
+ SmallVector<BasicBlock *, 2> Succs;
+
public:
BBState() : TopDownPathCount(0), BottomUpPathCount(0) {}
@@ -1578,14 +1574,22 @@ namespace {
/// entry to an exit which pass through this block. This is only valid
/// after both the top-down and bottom-up traversals are complete.
unsigned GetAllPathCount() const {
+ assert(TopDownPathCount != 0);
+ assert(BottomUpPathCount != 0);
return TopDownPathCount * BottomUpPathCount;
}
- /// IsVisitedTopDown - Test whether the block for this BBState has been
- /// visited by the top-down portion of the algorithm.
- bool isVisitedTopDown() const {
- return TopDownPathCount != 0;
- }
+ // Specialized CFG utilities.
+ typedef SmallVectorImpl<BasicBlock *>::const_iterator edge_iterator;
+ edge_iterator pred_begin() { return Preds.begin(); }
+ edge_iterator pred_end() { return Preds.end(); }
+ edge_iterator succ_begin() { return Succs.begin(); }
+ edge_iterator succ_end() { return Succs.end(); }
+
+ void addSucc(BasicBlock *Succ) { Succs.push_back(Succ); }
+ void addPred(BasicBlock *Pred) { Preds.push_back(Pred); }
+
+ bool isExit() const { return Succs.empty(); }
};
}
@@ -1783,12 +1787,9 @@ Constant *ObjCARCOpt::getRetainRVCallee(Module *M) {
if (!RetainRVCallee) {
LLVMContext &C = M->getContext();
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
- std::vector<Type *> Params;
- Params.push_back(I8X);
- FunctionType *FTy =
- FunctionType::get(I8X, Params, /*isVarArg=*/false);
- AttrListPtr Attributes;
- Attributes.addAttr(~0u, Attribute::NoUnwind);
+ Type *Params[] = { I8X };
+ FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
+ AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind);
RetainRVCallee =
M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy,
Attributes);
@@ -1800,12 +1801,9 @@ Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
if (!AutoreleaseRVCallee) {
LLVMContext &C = M->getContext();
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
- std::vector<Type *> Params;
- Params.push_back(I8X);
- FunctionType *FTy =
- FunctionType::get(I8X, Params, /*isVarArg=*/false);
- AttrListPtr Attributes;
- Attributes.addAttr(~0u, Attribute::NoUnwind);
+ Type *Params[] = { I8X };
+ FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
+ AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind);
AutoreleaseRVCallee =
M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy,
Attributes);
@@ -1816,10 +1814,8 @@ Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
Constant *ObjCARCOpt::getReleaseCallee(Module *M) {
if (!ReleaseCallee) {
LLVMContext &C = M->getContext();
- std::vector<Type *> Params;
- Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C)));
- AttrListPtr Attributes;
- Attributes.addAttr(~0u, Attribute::NoUnwind);
+ Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
+ AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind);
ReleaseCallee =
M->getOrInsertFunction(
"objc_release",
@@ -1832,10 +1828,8 @@ Constant *ObjCARCOpt::getReleaseCallee(Module *M) {
Constant *ObjCARCOpt::getRetainCallee(Module *M) {
if (!RetainCallee) {
LLVMContext &C = M->getContext();
- std::vector<Type *> Params;
- Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C)));
- AttrListPtr Attributes;
- Attributes.addAttr(~0u, Attribute::NoUnwind);
+ Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
+ AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind);
RetainCallee =
M->getOrInsertFunction(
"objc_retain",
@@ -1848,16 +1842,14 @@ Constant *ObjCARCOpt::getRetainCallee(Module *M) {
Constant *ObjCARCOpt::getRetainBlockCallee(Module *M) {
if (!RetainBlockCallee) {
LLVMContext &C = M->getContext();
- std::vector<Type *> Params;
- Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C)));
- AttrListPtr Attributes;
+ Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
// objc_retainBlock is not nounwind because it calls user copy constructors
// which could theoretically throw.
RetainBlockCallee =
M->getOrInsertFunction(
"objc_retainBlock",
FunctionType::get(Params[0], Params, /*isVarArg=*/false),
- Attributes);
+ AttrListPtr());
}
return RetainBlockCallee;
}
@@ -1865,10 +1857,8 @@ Constant *ObjCARCOpt::getRetainBlockCallee(Module *M) {
Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
if (!AutoreleaseCallee) {
LLVMContext &C = M->getContext();
- std::vector<Type *> Params;
- Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C)));
- AttrListPtr Attributes;
- Attributes.addAttr(~0u, Attribute::NoUnwind);
+ Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
+ AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind);
AutoreleaseCallee =
M->getOrInsertFunction(
"objc_autorelease",
@@ -2153,13 +2143,13 @@ static bool isNoopInstruction(const Instruction *I) {
/// objc_retainAutoreleasedReturnValue if the operand is a return value.
void
ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) {
- CallSite CS(GetObjCArg(Retain));
- Instruction *Call = CS.getInstruction();
+ ImmutableCallSite CS(GetObjCArg(Retain));
+ const Instruction *Call = CS.getInstruction();
if (!Call) return;
if (Call->getParent() != Retain->getParent()) return;
// Check that the call is next to the retain.
- BasicBlock::iterator I = Call;
+ BasicBlock::const_iterator I = Call;
++I;
while (isNoopInstruction(I)) ++I;
if (&*I != Retain)
@@ -2172,25 +2162,24 @@ ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) {
}
/// OptimizeRetainRVCall - Turn objc_retainAutoreleasedReturnValue into
-/// objc_retain if the operand is not a return value. Or, if it can be
-/// paired with an objc_autoreleaseReturnValue, delete the pair and
-/// return true.
+/// objc_retain if the operand is not a return value. Or, if it can be paired
+/// with an objc_autoreleaseReturnValue, delete the pair and return true.
bool
ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
// Check for the argument being from an immediately preceding call or invoke.
- Value *Arg = GetObjCArg(RetainRV);
- CallSite CS(Arg);
- if (Instruction *Call = CS.getInstruction()) {
+ const Value *Arg = GetObjCArg(RetainRV);
+ ImmutableCallSite CS(Arg);
+ if (const Instruction *Call = CS.getInstruction()) {
if (Call->getParent() == RetainRV->getParent()) {
- BasicBlock::iterator I = Call;
+ BasicBlock::const_iterator I = Call;
++I;
while (isNoopInstruction(I)) ++I;
if (&*I == RetainRV)
return false;
- } else if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ } else if (const InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
BasicBlock *RetainRVParent = RetainRV->getParent();
if (II->getNormalDest() == RetainRVParent) {
- BasicBlock::iterator I = RetainRVParent->begin();
+ BasicBlock::const_iterator I = RetainRVParent->begin();
while (isNoopInstruction(I)) ++I;
if (&*I == RetainRV)
return false;
@@ -2418,7 +2407,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
// These can always be moved up.
break;
case IC_Release:
- // These can't be moved across things that care about the retain count.
+ // These can't be moved across things that care about the retain
+ // count.
FindDependencies(NeedsPositiveRetainCount, Arg,
Inst->getParent(), Inst,
DependingInstructions, Visited, PA);
@@ -2500,13 +2490,14 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
for (; SI != SE; ++SI) {
Sequence SuccSSeq = S_None;
bool SuccSRRIKnownSafe = false;
- // If VisitBottomUp has visited this successor, take what we know about it.
- DenseMap<const BasicBlock *, BBState>::iterator BBI = BBStates.find(*SI);
- if (BBI != BBStates.end()) {
- const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
- SuccSSeq = SuccS.GetSeq();
- SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
- }
+ // If VisitBottomUp has pointer information for this successor, take
+ // what we know about it.
+ DenseMap<const BasicBlock *, BBState>::iterator BBI =
+ BBStates.find(*SI);
+ assert(BBI != BBStates.end());
+ const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
+ SuccSSeq = SuccS.GetSeq();
+ SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
switch (SuccSSeq) {
case S_None:
case S_CanRelease: {
@@ -2553,13 +2544,14 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
for (; SI != SE; ++SI) {
Sequence SuccSSeq = S_None;
bool SuccSRRIKnownSafe = false;
- // If VisitBottomUp has visited this successor, take what we know about it.
- DenseMap<const BasicBlock *, BBState>::iterator BBI = BBStates.find(*SI);
- if (BBI != BBStates.end()) {
- const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
- SuccSSeq = SuccS.GetSeq();
- SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
- }
+ // If VisitBottomUp has pointer information for this successor, take
+ // what we know about it.
+ DenseMap<const BasicBlock *, BBState>::iterator BBI =
+ BBStates.find(*SI);
+ assert(BBI != BBStates.end());
+ const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
+ SuccSSeq = SuccS.GetSeq();
+ SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
switch (SuccSSeq) {
case S_None: {
if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
@@ -2617,16 +2609,13 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease)
NestingDetected = true;
- S.RRI.clear();
-
MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
- S.SetSeq(ReleaseMetadata ? S_MovableRelease : S_Release);
+ S.ResetSequenceProgress(ReleaseMetadata ? S_MovableRelease : S_Release);
S.RRI.ReleaseMetadata = ReleaseMetadata;
S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented();
S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
S.RRI.Calls.insert(Inst);
- S.IncrementRefCount();
S.IncrementNestCount();
break;
}
@@ -2641,8 +2630,7 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
Arg = GetObjCArg(Inst);
PtrState &S = MyStates.getPtrBottomUpState(Arg);
- S.DecrementRefCount();
- S.SetAtLeastOneRefCount();
+ S.SetKnownPositiveRefCount();
S.DecrementNestCount();
switch (S.GetSeq()) {
@@ -2692,7 +2680,7 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
// Check for possible releases.
if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
- S.DecrementRefCount();
+ S.ClearRefCount();
switch (Seq) {
case S_Use:
S.SetSeq(S_CanRelease);
@@ -2759,37 +2747,20 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
// Merge the states from each successor to compute the initial state
// for the current block.
- const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
- succ_const_iterator SI(TI), SE(TI, false);
- if (SI == SE)
- MyStates.SetAsExit();
- else {
- // If the terminator is an invoke marked with the
- // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be
- // ignored, for ARC purposes.
- if (isa<InvokeInst>(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind))
- --SE;
-
- do {
- const BasicBlock *Succ = *SI++;
- if (Succ == BB)
- continue;
- DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ);
- // If we haven't seen this node yet, then we've found a CFG cycle.
- // Be optimistic here; it's CheckForCFGHazards' job detect trouble.
- if (I == BBStates.end())
- continue;
- MyStates.InitFromSucc(I->second);
- while (SI != SE) {
- Succ = *SI++;
- if (Succ != BB) {
- I = BBStates.find(Succ);
- if (I != BBStates.end())
- MyStates.MergeSucc(I->second);
- }
- }
- break;
- } while (SI != SE);
+ for (BBState::edge_iterator SI(MyStates.succ_begin()),
+ SE(MyStates.succ_end()); SI != SE; ++SI) {
+ const BasicBlock *Succ = *SI;
+ DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ);
+ assert(I != BBStates.end());
+ MyStates.InitFromSucc(I->second);
+ ++SI;
+ for (; SI != SE; ++SI) {
+ Succ = *SI;
+ I = BBStates.find(Succ);
+ assert(I != BBStates.end());
+ MyStates.MergeSucc(I->second);
+ }
+ break;
}
// Visit all the instructions, bottom-up.
@@ -2803,15 +2774,14 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates);
}
- // If there's a predecessor with an invoke, visit the invoke as
- // if it were part of this block, since we can't insert code after
- // an invoke in its own block, and we don't want to split critical
- // edges.
- for (pred_iterator PI(BB), PE(BB, false); PI != PE; ++PI) {
+ // If there's a predecessor with an invoke, visit the invoke as if it were
+ // part of this block, since we can't insert code after an invoke in its own
+ // block, and we don't want to split critical edges.
+ for (BBState::edge_iterator PI(MyStates.pred_begin()),
+ PE(MyStates.pred_end()); PI != PE; ++PI) {
BasicBlock *Pred = *PI;
- TerminatorInst *PredTI = cast<TerminatorInst>(&Pred->back());
- if (isa<InvokeInst>(PredTI))
- NestingDetected |= VisitInstructionBottomUp(PredTI, BB, Retains, MyStates);
+ if (InvokeInst *II = dyn_cast<InvokeInst>(&Pred->back()))
+ NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates);
}
return NestingDetected;
@@ -2851,25 +2821,23 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
if (S.GetSeq() == S_Retain)
NestingDetected = true;
- S.SetSeq(S_Retain);
- S.RRI.clear();
+ S.ResetSequenceProgress(S_Retain);
S.RRI.IsRetainBlock = Class == IC_RetainBlock;
- // Don't check S.IsKnownIncremented() here because it's not
- // sufficient.
+ // Don't check S.IsKnownIncremented() here because it's not sufficient.
S.RRI.KnownSafe = S.IsKnownNested();
S.RRI.Calls.insert(Inst);
}
- S.SetAtLeastOneRefCount();
- S.IncrementRefCount();
S.IncrementNestCount();
- return NestingDetected;
+
+ // A retain can be a potential use; procede to the generic checking
+ // code below.
+ break;
}
case IC_Release: {
Arg = GetObjCArg(Inst);
PtrState &S = MyStates.getPtrTopDownState(Arg);
- S.DecrementRefCount();
S.DecrementNestCount();
switch (S.GetSeq()) {
@@ -2916,7 +2884,7 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
// Check for possible releases.
if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
- S.DecrementRefCount();
+ S.ClearRefCount();
switch (Seq) {
case S_Retain:
S.SetSeq(S_CanRelease);
@@ -2967,41 +2935,21 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
// Merge the states from each predecessor to compute the initial state
// for the current block.
- const_pred_iterator PI(BB), PE(BB, false);
- if (PI == PE)
- MyStates.SetAsEntry();
- else
- do {
- unsigned OperandNo = PI.getOperandNo();
- const Use &Us = PI.getUse();
- ++PI;
-
- // Skip invoke unwind edges on invoke instructions marked with
- // clang.arc.no_objc_arc_exceptions.
- if (const InvokeInst *II = dyn_cast<InvokeInst>(Us.getUser()))
- if (OperandNo == II->getNumArgOperands() + 2 &&
- II->getMetadata(NoObjCARCExceptionsMDKind))
- continue;
-
- const BasicBlock *Pred = cast<TerminatorInst>(Us.getUser())->getParent();
- if (Pred == BB)
- continue;
- DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
- // If we haven't seen this node yet, then we've found a CFG cycle.
- // Be optimistic here; it's CheckForCFGHazards' job detect trouble.
- if (I == BBStates.end() || !I->second.isVisitedTopDown())
- continue;
- MyStates.InitFromPred(I->second);
- while (PI != PE) {
- Pred = *PI++;
- if (Pred != BB) {
- I = BBStates.find(Pred);
- if (I != BBStates.end() && I->second.isVisitedTopDown())
- MyStates.MergePred(I->second);
- }
- }
- break;
- } while (PI != PE);
+ for (BBState::edge_iterator PI(MyStates.pred_begin()),
+ PE(MyStates.pred_end()); PI != PE; ++PI) {
+ const BasicBlock *Pred = *PI;
+ DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
+ assert(I != BBStates.end());
+ MyStates.InitFromPred(I->second);
+ ++PI;
+ for (; PI != PE; ++PI) {
+ Pred = *PI;
+ I = BBStates.find(Pred);
+ assert(I != BBStates.end());
+ MyStates.MergePred(I->second);
+ }
+ break;
+ }
// Visit all the instructions, top-down.
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
@@ -3016,73 +2964,82 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
static void
ComputePostOrders(Function &F,
SmallVectorImpl<BasicBlock *> &PostOrder,
- SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder) {
- /// Backedges - Backedges detected in the DFS. These edges will be
- /// ignored in the reverse-CFG DFS, so that loops with multiple exits will be
- /// traversed in the desired order.
- DenseSet<std::pair<BasicBlock *, BasicBlock *> > Backedges;
-
+ SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder,
+ unsigned NoObjCARCExceptionsMDKind,
+ DenseMap<const BasicBlock *, BBState> &BBStates) {
/// Visited - The visited set, for doing DFS walks.
SmallPtrSet<BasicBlock *, 16> Visited;
// Do DFS, computing the PostOrder.
SmallPtrSet<BasicBlock *, 16> OnStack;
SmallVector<std::pair<BasicBlock *, succ_iterator>, 16> SuccStack;
+
+ // Functions always have exactly one entry block, and we don't have
+ // any other block that we treat like an entry block.
BasicBlock *EntryBB = &F.getEntryBlock();
- SuccStack.push_back(std::make_pair(EntryBB, succ_begin(EntryBB)));
+ BBState &MyStates = BBStates[EntryBB];
+ MyStates.SetAsEntry();
+ TerminatorInst *EntryTI = cast<TerminatorInst>(&EntryBB->back());
+ SuccStack.push_back(std::make_pair(EntryBB, succ_iterator(EntryTI)));
Visited.insert(EntryBB);
OnStack.insert(EntryBB);
do {
dfs_next_succ:
- TerminatorInst *TI = cast<TerminatorInst>(&SuccStack.back().first->back());
- succ_iterator End = succ_iterator(TI, true);
- while (SuccStack.back().second != End) {
- BasicBlock *BB = *SuccStack.back().second++;
- if (Visited.insert(BB)) {
- SuccStack.push_back(std::make_pair(BB, succ_begin(BB)));
- OnStack.insert(BB);
+ BasicBlock *CurrBB = SuccStack.back().first;
+ TerminatorInst *TI = cast<TerminatorInst>(&CurrBB->back());
+ succ_iterator SE(TI, false);
+
+ // If the terminator is an invoke marked with the
+ // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be
+ // ignored, for ARC purposes.
+ if (isa<InvokeInst>(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind))
+ --SE;
+
+ while (SuccStack.back().second != SE) {
+ BasicBlock *SuccBB = *SuccStack.back().second++;
+ if (Visited.insert(SuccBB)) {
+ TerminatorInst *TI = cast<TerminatorInst>(&SuccBB->back());
+ SuccStack.push_back(std::make_pair(SuccBB, succ_iterator(TI)));
+ BBStates[CurrBB].addSucc(SuccBB);
+ BBState &SuccStates = BBStates[SuccBB];
+ SuccStates.addPred(CurrBB);
+ OnStack.insert(SuccBB);
goto dfs_next_succ;
}
- if (OnStack.count(BB))
- Backedges.insert(std::make_pair(SuccStack.back().first, BB));
+
+ if (!OnStack.count(SuccBB)) {
+ BBStates[CurrBB].addSucc(SuccBB);
+ BBStates[SuccBB].addPred(CurrBB);
+ }
}
- OnStack.erase(SuccStack.back().first);
- PostOrder.push_back(SuccStack.pop_back_val().first);
+ OnStack.erase(CurrBB);
+ PostOrder.push_back(CurrBB);
+ SuccStack.pop_back();
} while (!SuccStack.empty());
Visited.clear();
- // Compute the exits, which are the starting points for reverse-CFG DFS.
- // This includes blocks where all the successors are backedges that
- // we're skipping.
- SmallVector<BasicBlock *, 4> Exits;
+ // Do reverse-CFG DFS, computing the reverse-CFG PostOrder.
+ // Functions may have many exits, and there also blocks which we treat
+ // as exits due to ignored edges.
+ SmallVector<std::pair<BasicBlock *, BBState::edge_iterator>, 16> PredStack;
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
- BasicBlock *BB = I;
- TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
- for (succ_iterator SI(TI), SE(TI, true); SI != SE; ++SI)
- if (!Backedges.count(std::make_pair(BB, *SI)))
- goto HasNonBackedgeSucc;
- Exits.push_back(BB);
- HasNonBackedgeSucc:;
- }
+ BasicBlock *ExitBB = I;
+ BBState &MyStates = BBStates[ExitBB];
+ if (!MyStates.isExit())
+ continue;
- // Do reverse-CFG DFS, computing the reverse-CFG PostOrder.
- SmallVector<std::pair<BasicBlock *, pred_iterator>, 16> PredStack;
- for (SmallVectorImpl<BasicBlock *>::iterator I = Exits.begin(), E = Exits.end();
- I != E; ++I) {
- BasicBlock *ExitBB = *I;
- PredStack.push_back(std::make_pair(ExitBB, pred_begin(ExitBB)));
+ MyStates.SetAsExit();
+
+ PredStack.push_back(std::make_pair(ExitBB, MyStates.pred_begin()));
Visited.insert(ExitBB);
while (!PredStack.empty()) {
reverse_dfs_next_succ:
- pred_iterator End = pred_end(PredStack.back().first);
- while (PredStack.back().second != End) {
+ BBState::edge_iterator PE = BBStates[PredStack.back().first].pred_end();
+ while (PredStack.back().second != PE) {
BasicBlock *BB = *PredStack.back().second++;
- // Skip backedges detected in the forward-CFG DFS.
- if (Backedges.count(std::make_pair(BB, PredStack.back().first)))
- continue;
if (Visited.insert(BB)) {
- PredStack.push_back(std::make_pair(BB, pred_begin(BB)));
+ PredStack.push_back(std::make_pair(BB, BBStates[BB].pred_begin()));
goto reverse_dfs_next_succ;
}
}
@@ -3105,7 +3062,9 @@ ObjCARCOpt::Visit(Function &F,
// function exit point, and we want to ignore selected cycle edges.
SmallVector<BasicBlock *, 16> PostOrder;
SmallVector<BasicBlock *, 16> ReverseCFGPostOrder;
- ComputePostOrders(F, PostOrder, ReverseCFGPostOrder);
+ ComputePostOrders(F, PostOrder, ReverseCFGPostOrder,
+ NoObjCARCExceptionsMDKind,
+ BBStates);
// Use reverse-postorder on the reverse CFG for bottom-up.
bool BottomUpNestingDetected = false;
@@ -3214,7 +3173,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
// not being managed by ObjC reference counting, so we can delete pairs
// regardless of what possible decrements or uses lie between them.
bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg);
-
+
// A constant pointer can't be pointing to an object on the heap. It may
// be reference-counted, but it won't be deleted.
if (const LoadInst *LI = dyn_cast<LoadInst>(Arg))
@@ -3375,6 +3334,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
// Ok, everything checks out and we're all set. Let's move some code!
Changed = true;
+ assert(OldCount != 0 && "Unreachable code?");
AnyPairsCompletelyEliminated = NewCount == 0;
NumRRs += OldCount - NewCount;
MoveCalls(Arg, RetainsToMove, ReleasesToMove,
@@ -3515,7 +3475,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) {
for (Value::use_iterator UI = Alloca->use_begin(),
UE = Alloca->use_end(); UI != UE; ++UI) {
- Instruction *UserInst = cast<Instruction>(*UI);
+ const Instruction *UserInst = cast<Instruction>(*UI);
switch (GetBasicInstructionClass(UserInst)) {
case IC_InitWeak:
case IC_StoreWeak:
@@ -3529,8 +3489,18 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
for (Value::use_iterator UI = Alloca->use_begin(),
UE = Alloca->use_end(); UI != UE; ) {
CallInst *UserInst = cast<CallInst>(*UI++);
- if (!UserInst->use_empty())
- UserInst->replaceAllUsesWith(UserInst->getArgOperand(0));
+ switch (GetBasicInstructionClass(UserInst)) {
+ case IC_InitWeak:
+ case IC_StoreWeak:
+ // These functions return their second argument.
+ UserInst->replaceAllUsesWith(UserInst->getArgOperand(1));
+ break;
+ case IC_DestroyWeak:
+ // No return value.
+ break;
+ default:
+ llvm_unreachable("alloca really is used!");
+ }
UserInst->eraseFromParent();
}
Alloca->eraseFromParent();
@@ -3598,8 +3568,7 @@ void ObjCARCOpt::OptimizeReturns(Function &F) {
dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
if (!Autorelease)
goto next_block;
- InstructionClass AutoreleaseClass =
- GetBasicInstructionClass(Autorelease);
+ InstructionClass AutoreleaseClass = GetBasicInstructionClass(Autorelease);
if (!IsAutorelease(AutoreleaseClass))
goto next_block;
if (GetObjCArg(Autorelease) != Arg)
@@ -3690,7 +3659,7 @@ bool ObjCARCOpt::doInitialization(Module &M) {
// Intuitively, objc_retain and others are nocapture, however in practice
// they are not, because they return their argument value. And objc_release
- // calls finalizers.
+ // calls finalizers which can have arbitrary side effects.
// These are initialized lazily.
RetainRVCallee = 0;
@@ -3742,8 +3711,8 @@ bool ObjCARCOpt::runOnFunction(Function &F) {
while (OptimizeSequences(F)) {}
// Optimizations if objc_autorelease is used.
- if (UsedInThisFunction &
- ((1 << IC_Autorelease) | (1 << IC_AutoreleaseRV)))
+ if (UsedInThisFunction & ((1 << IC_Autorelease) |
+ (1 << IC_AutoreleaseRV)))
OptimizeReturns(F);
return Changed;
@@ -3791,7 +3760,7 @@ namespace {
/// StoreStrongCalls - The set of inserted objc_storeStrong calls. If
/// at the end of walking the function we have found no alloca
/// instructions, these calls can be marked "tail".
- DenseSet<CallInst *> StoreStrongCalls;
+ SmallPtrSet<CallInst *, 8> StoreStrongCalls;
Constant *getStoreStrongCallee(Module *M);
Constant *getRetainAutoreleaseCallee(Module *M);
@@ -3842,13 +3811,11 @@ Constant *ObjCARCContract::getStoreStrongCallee(Module *M) {
LLVMContext &C = M->getContext();
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
Type *I8XX = PointerType::getUnqual(I8X);
- std::vector<Type *> Params;
- Params.push_back(I8XX);
- Params.push_back(I8X);
+ Type *Params[] = { I8XX, I8X };
- AttrListPtr Attributes;
- Attributes.addAttr(~0u, Attribute::NoUnwind);
- Attributes.addAttr(1, Attribute::NoCapture);
+ AttrListPtr Attributes = AttrListPtr()
+ .addAttr(~0u, Attribute::NoUnwind)
+ .addAttr(1, Attribute::NoCapture);
StoreStrongCallee =
M->getOrInsertFunction(
@@ -3863,12 +3830,9 @@ Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) {
if (!RetainAutoreleaseCallee) {
LLVMContext &C = M->getContext();
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
- std::vector<Type *> Params;
- Params.push_back(I8X);
- FunctionType *FTy =
- FunctionType::get(I8X, Params, /*isVarArg=*/false);
- AttrListPtr Attributes;
- Attributes.addAttr(~0u, Attribute::NoUnwind);
+ Type *Params[] = { I8X };
+ FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
+ AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind);
RetainAutoreleaseCallee =
M->getOrInsertFunction("objc_retainAutorelease", FTy, Attributes);
}
@@ -3879,12 +3843,9 @@ Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) {
if (!RetainAutoreleaseRVCallee) {
LLVMContext &C = M->getContext();
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
- std::vector<Type *> Params;
- Params.push_back(I8X);
- FunctionType *FTy =
- FunctionType::get(I8X, Params, /*isVarArg=*/false);
- AttrListPtr Attributes;
- Attributes.addAttr(~0u, Attribute::NoUnwind);
+ Type *Params[] = { I8X };
+ FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
+ AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind);
RetainAutoreleaseRVCallee =
M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy,
Attributes);
@@ -3892,8 +3853,7 @@ Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) {
return RetainAutoreleaseRVCallee;
}
-/// ContractAutorelease - Merge an autorelease with a retain into a fused
-/// call.
+/// ContractAutorelease - Merge an autorelease with a retain into a fused call.
bool
ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
InstructionClass Class,
@@ -3954,18 +3914,41 @@ void ObjCARCContract::ContractRelease(Instruction *Release,
BasicBlock *BB = Release->getParent();
if (Load->getParent() != BB) return;
- // Walk down to find the store.
+ // Walk down to find the store and the release, which may be in either order.
BasicBlock::iterator I = Load, End = BB->end();
++I;
AliasAnalysis::Location Loc = AA->getLocation(Load);
- while (I != End &&
- (&*I == Release ||
- IsRetain(GetBasicInstructionClass(I)) ||
- !(AA->getModRefInfo(I, Loc) & AliasAnalysis::Mod)))
- ++I;
- StoreInst *Store = dyn_cast<StoreInst>(I);
- if (!Store || !Store->isSimple()) return;
- if (Store->getPointerOperand() != Loc.Ptr) return;
+ StoreInst *Store = 0;
+ bool SawRelease = false;
+ for (; !Store || !SawRelease; ++I) {
+ if (I == End)
+ return;
+
+ Instruction *Inst = I;
+ if (Inst == Release) {
+ SawRelease = true;
+ continue;
+ }
+
+ InstructionClass Class = GetBasicInstructionClass(Inst);
+
+ // Unrelated retains are harmless.
+ if (IsRetain(Class))
+ continue;
+
+ if (Store) {
+ // The store is the point where we're going to put the objc_storeStrong,
+ // so make sure there are no uses after it.
+ if (CanUse(Inst, Load, PA, Class))
+ return;
+ } else if (AA->getModRefInfo(Inst, Loc) & AliasAnalysis::Mod) {
+ // We are moving the load down to the store, so check for anything
+ // else which writes to the memory between the load and the store.
+ Store = dyn_cast<StoreInst>(Inst);
+ if (!Store || !Store->isSimple()) return;
+ if (Store->getPointerOperand() != Loc.Ptr) return;
+ }
+ }
Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand());
@@ -4053,7 +4036,8 @@ bool ObjCARCContract::runOnFunction(Function &F) {
// It seems that functions which "return twice" are also unsafe for the
// "tail" argument, because they are setjmp, which could need to
// return to an earlier stack state.
- bool TailOkForStoreStrongs = !F.isVarArg() && !F.callsFunctionThatReturnsTwice();
+ bool TailOkForStoreStrongs = !F.isVarArg() &&
+ !F.callsFunctionThatReturnsTwice();
// For ObjC library calls which return their argument, replace uses of the
// argument with uses of the call return value, if it dominates the use. This
@@ -4083,8 +4067,22 @@ bool ObjCARCContract::runOnFunction(Function &F) {
if (!RetainRVMarker)
break;
BasicBlock::iterator BBI = Inst;
- --BBI;
- while (isNoopInstruction(BBI)) --BBI;
+ BasicBlock *InstParent = Inst->getParent();
+
+ // Step up to see if the call immediately precedes the RetainRV call.
+ // If it's an invoke, we have to cross a block boundary. And we have
+ // to carefully dodge no-op instructions.
+ do {
+ if (&*BBI == InstParent->begin()) {
+ BasicBlock *Pred = InstParent->getSinglePredecessor();
+ if (!Pred)
+ goto decline_rv_optimization;
+ BBI = Pred->getTerminator();
+ break;
+ }
+ --BBI;
+ } while (isNoopInstruction(BBI));
+
if (&*BBI == GetObjCArg(Inst)) {
Changed = true;
InlineAsm *IA =
@@ -4094,6 +4092,7 @@ bool ObjCARCContract::runOnFunction(Function &F) {
/*Constraints=*/"", /*hasSideEffects=*/true);
CallInst::Create(IA, "", Inst);
}
+ decline_rv_optimization:
break;
}
case IC_InitWeak: {
@@ -4143,25 +4142,21 @@ bool ObjCARCContract::runOnFunction(Function &F) {
// trivially dominate itself, which would lead us to rewriting its
// argument in terms of its return value, which would lead to
// infinite loops in GetObjCArg.
- if (DT->isReachableFromEntry(U) &&
- DT->dominates(Inst, U)) {
+ if (DT->isReachableFromEntry(U) && DT->dominates(Inst, U)) {
Changed = true;
Instruction *Replacement = Inst;
Type *UseTy = U.get()->getType();
if (PHINode *PHI = dyn_cast<PHINode>(U.getUser())) {
// For PHI nodes, insert the bitcast in the predecessor block.
- unsigned ValNo =
- PHINode::getIncomingValueNumForOperand(OperandNo);
- BasicBlock *BB =
- PHI->getIncomingBlock(ValNo);
+ unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo);
+ BasicBlock *BB = PHI->getIncomingBlock(ValNo);
if (Replacement->getType() != UseTy)
Replacement = new BitCastInst(Replacement, UseTy, "",
&BB->back());
// While we're here, rewrite all edges for this PHI, rather
// than just one use at a time, to minimize the number of
// bitcasts we emit.
- for (unsigned i = 0, e = PHI->getNumIncomingValues();
- i != e; ++i)
+ for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
if (PHI->getIncomingBlock(i) == BB) {
// Keep the UI iterator valid.
if (&PHI->getOperandUse(
@@ -4179,8 +4174,7 @@ bool ObjCARCContract::runOnFunction(Function &F) {
}
}
- // If Arg is a no-op casted pointer, strip one level of casts and
- // iterate.
+ // If Arg is a no-op casted pointer, strip one level of casts and iterate.
if (const BitCastInst *BI = dyn_cast<BitCastInst>(Arg))
Arg = BI->getOperand(0);
else if (isa<GEPOperator>(Arg) &&
@@ -4197,7 +4191,7 @@ bool ObjCARCContract::runOnFunction(Function &F) {
// If this function has no escaping allocas or suspicious vararg usage,
// objc_storeStrong calls can be marked with the "tail" keyword.
if (TailOkForStoreStrongs)
- for (DenseSet<CallInst *>::iterator I = StoreStrongCalls.begin(),
+ for (SmallPtrSet<CallInst *, 8>::iterator I = StoreStrongCalls.begin(),
E = StoreStrongCalls.end(); I != E; ++I)
(*I)->setTailCall();
StoreStrongCalls.clear();
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 5de00d1..09687d8 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -26,21 +26,23 @@
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
+#include "llvm/IRBuilder.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Pass.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/DenseMap.h"
#include <algorithm>
using namespace llvm;
-STATISTIC(NumLinear , "Number of insts linearized");
STATISTIC(NumChanged, "Number of insts reassociated");
STATISTIC(NumAnnihil, "Number of expr tree annihilated");
STATISTIC(NumFactor , "Number of multiplies factored");
@@ -70,13 +72,51 @@ static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) {
}
}
#endif
-
+
+namespace {
+ /// \brief Utility class representing a base and exponent pair which form one
+ /// factor of some product.
+ struct Factor {
+ Value *Base;
+ unsigned Power;
+
+ Factor(Value *Base, unsigned Power) : Base(Base), Power(Power) {}
+
+ /// \brief Sort factors by their Base.
+ struct BaseSorter {
+ bool operator()(const Factor &LHS, const Factor &RHS) {
+ return LHS.Base < RHS.Base;
+ }
+ };
+
+ /// \brief Compare factors for equal bases.
+ struct BaseEqual {
+ bool operator()(const Factor &LHS, const Factor &RHS) {
+ return LHS.Base == RHS.Base;
+ }
+ };
+
+ /// \brief Sort factors in descending order by their power.
+ struct PowerDescendingSorter {
+ bool operator()(const Factor &LHS, const Factor &RHS) {
+ return LHS.Power > RHS.Power;
+ }
+ };
+
+ /// \brief Compare factors for equal powers.
+ struct PowerEqual {
+ bool operator()(const Factor &LHS, const Factor &RHS) {
+ return LHS.Power == RHS.Power;
+ }
+ };
+ };
+}
+
namespace {
class Reassociate : public FunctionPass {
DenseMap<BasicBlock*, unsigned> RankMap;
DenseMap<AssertingVH<Value>, unsigned> ValueRankMap;
- SmallVector<WeakVH, 8> RedoInsts;
- SmallVector<WeakVH, 8> DeadInsts;
+ SetVector<AssertingVH<Instruction> > RedoInsts;
bool MadeChange;
public:
static char ID; // Pass identification, replacement for typeid
@@ -92,18 +132,19 @@ namespace {
private:
void BuildRankMap(Function &F);
unsigned getRank(Value *V);
- Value *ReassociateExpression(BinaryOperator *I);
- void RewriteExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops,
- unsigned Idx = 0);
+ void ReassociateExpression(BinaryOperator *I);
+ void RewriteExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops);
Value *OptimizeExpression(BinaryOperator *I,
SmallVectorImpl<ValueEntry> &Ops);
Value *OptimizeAdd(Instruction *I, SmallVectorImpl<ValueEntry> &Ops);
- void LinearizeExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops);
- void LinearizeExpr(BinaryOperator *I);
+ bool collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops,
+ SmallVectorImpl<Factor> &Factors);
+ Value *buildMinimalMultiplyDAG(IRBuilder<> &Builder,
+ SmallVectorImpl<Factor> &Factors);
+ Value *OptimizeMul(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops);
Value *RemoveFactorFromExpression(Value *V, Value *Factor);
- void ReassociateInst(BasicBlock::iterator &BBI);
-
- void RemoveDeadBinaryOp(Value *V);
+ void EraseInst(Instruction *I);
+ void OptimizeInst(Instruction *I);
};
}
@@ -114,28 +155,24 @@ INITIALIZE_PASS(Reassociate, "reassociate",
// Public interface to the Reassociate pass
FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }
-void Reassociate::RemoveDeadBinaryOp(Value *V) {
- Instruction *Op = dyn_cast<Instruction>(V);
- if (!Op || !isa<BinaryOperator>(Op))
- return;
-
- Value *LHS = Op->getOperand(0), *RHS = Op->getOperand(1);
-
- ValueRankMap.erase(Op);
- DeadInsts.push_back(Op);
- RemoveDeadBinaryOp(LHS);
- RemoveDeadBinaryOp(RHS);
+/// isReassociableOp - Return true if V is an instruction of the specified
+/// opcode and if it only has one use.
+static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
+ if (V->hasOneUse() && isa<Instruction>(V) &&
+ cast<Instruction>(V)->getOpcode() == Opcode)
+ return cast<BinaryOperator>(V);
+ return 0;
}
-
static bool isUnmovableInstruction(Instruction *I) {
if (I->getOpcode() == Instruction::PHI ||
+ I->getOpcode() == Instruction::LandingPad ||
I->getOpcode() == Instruction::Alloca ||
I->getOpcode() == Instruction::Load ||
I->getOpcode() == Instruction::Invoke ||
(I->getOpcode() == Instruction::Call &&
!isa<DbgInfoIntrinsic>(I)) ||
- I->getOpcode() == Instruction::UDiv ||
+ I->getOpcode() == Instruction::UDiv ||
I->getOpcode() == Instruction::SDiv ||
I->getOpcode() == Instruction::FDiv ||
I->getOpcode() == Instruction::URem ||
@@ -198,211 +235,572 @@ unsigned Reassociate::getRank(Value *V) {
return ValueRankMap[I] = Rank;
}
-/// isReassociableOp - Return true if V is an instruction of the specified
-/// opcode and if it only has one use.
-static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
- if ((V->hasOneUse() || V->use_empty()) && isa<Instruction>(V) &&
- cast<Instruction>(V)->getOpcode() == Opcode)
- return cast<BinaryOperator>(V);
- return 0;
-}
-
/// LowerNegateToMultiply - Replace 0-X with X*-1.
///
-static Instruction *LowerNegateToMultiply(Instruction *Neg,
- DenseMap<AssertingVH<Value>, unsigned> &ValueRankMap) {
+static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) {
Constant *Cst = Constant::getAllOnesValue(Neg->getType());
- Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg);
- ValueRankMap.erase(Neg);
+ BinaryOperator *Res =
+ BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg);
+ Neg->setOperand(1, Constant::getNullValue(Neg->getType())); // Drop use of op.
Res->takeName(Neg);
Neg->replaceAllUsesWith(Res);
Res->setDebugLoc(Neg->getDebugLoc());
- Neg->eraseFromParent();
return Res;
}
-// Given an expression of the form '(A+B)+(D+C)', turn it into '(((A+B)+C)+D)'.
-// Note that if D is also part of the expression tree that we recurse to
-// linearize it as well. Besides that case, this does not recurse into A,B, or
-// C.
-void Reassociate::LinearizeExpr(BinaryOperator *I) {
- BinaryOperator *LHS = cast<BinaryOperator>(I->getOperand(0));
- BinaryOperator *RHS = cast<BinaryOperator>(I->getOperand(1));
- assert(isReassociableOp(LHS, I->getOpcode()) &&
- isReassociableOp(RHS, I->getOpcode()) &&
- "Not an expression that needs linearization?");
-
- DEBUG(dbgs() << "Linear" << *LHS << '\n' << *RHS << '\n' << *I << '\n');
-
- // Move the RHS instruction to live immediately before I, avoiding breaking
- // dominator properties.
- RHS->moveBefore(I);
-
- // Move operands around to do the linearization.
- I->setOperand(1, RHS->getOperand(0));
- RHS->setOperand(0, LHS);
- I->setOperand(0, RHS);
-
- // Conservatively clear all the optional flags, which may not hold
- // after the reassociation.
- I->clearSubclassOptionalData();
- LHS->clearSubclassOptionalData();
- RHS->clearSubclassOptionalData();
-
- ++NumLinear;
- MadeChange = true;
- DEBUG(dbgs() << "Linearized: " << *I << '\n');
-
- // If D is part of this expression tree, tail recurse.
- if (isReassociableOp(I->getOperand(1), I->getOpcode()))
- LinearizeExpr(I);
+/// CarmichaelShift - Returns k such that lambda(2^Bitwidth) = 2^k, where lambda
+/// is the Carmichael function. This means that x^(2^k) === 1 mod 2^Bitwidth for
+/// every odd x, i.e. x^(2^k) = 1 for every odd x in Bitwidth-bit arithmetic.
+/// Note that 0 <= k < Bitwidth, and if Bitwidth > 3 then x^(2^k) = 0 for every
+/// even x in Bitwidth-bit arithmetic.
+static unsigned CarmichaelShift(unsigned Bitwidth) {
+ if (Bitwidth < 3)
+ return Bitwidth - 1;
+ return Bitwidth - 2;
+}
+
+/// IncorporateWeight - Add the extra weight 'RHS' to the existing weight 'LHS',
+/// reducing the combined weight using any special properties of the operation.
+/// The existing weight LHS represents the computation X op X op ... op X where
+/// X occurs LHS times. The combined weight represents X op X op ... op X with
+/// X occurring LHS + RHS times. If op is "Xor" for example then the combined
+/// operation is equivalent to X if LHS + RHS is odd, or 0 if LHS + RHS is even;
+/// the routine returns 1 in LHS in the first case, and 0 in LHS in the second.
+static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) {
+ // If we were working with infinite precision arithmetic then the combined
+ // weight would be LHS + RHS. But we are using finite precision arithmetic,
+ // and the APInt sum LHS + RHS may not be correct if it wraps (it is correct
+ // for nilpotent operations and addition, but not for idempotent operations
+ // and multiplication), so it is important to correctly reduce the combined
+ // weight back into range if wrapping would be wrong.
+
+ // If RHS is zero then the weight didn't change.
+ if (RHS.isMinValue())
+ return;
+ // If LHS is zero then the combined weight is RHS.
+ if (LHS.isMinValue()) {
+ LHS = RHS;
+ return;
+ }
+ // From this point on we know that neither LHS nor RHS is zero.
+
+ if (Instruction::isIdempotent(Opcode)) {
+ // Idempotent means X op X === X, so any non-zero weight is equivalent to a
+ // weight of 1. Keeping weights at zero or one also means that wrapping is
+ // not a problem.
+ assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
+ return; // Return a weight of 1.
+ }
+ if (Instruction::isNilpotent(Opcode)) {
+ // Nilpotent means X op X === 0, so reduce weights modulo 2.
+ assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
+ LHS = 0; // 1 + 1 === 0 modulo 2.
+ return;
+ }
+ if (Opcode == Instruction::Add) {
+ // TODO: Reduce the weight by exploiting nsw/nuw?
+ LHS += RHS;
+ return;
+ }
+
+ assert(Opcode == Instruction::Mul && "Unknown associative operation!");
+ unsigned Bitwidth = LHS.getBitWidth();
+ // If CM is the Carmichael number then a weight W satisfying W >= CM+Bitwidth
+ // can be replaced with W-CM. That's because x^W=x^(W-CM) for every Bitwidth
+ // bit number x, since either x is odd in which case x^CM = 1, or x is even in
+ // which case both x^W and x^(W - CM) are zero. By subtracting off multiples
+ // of CM like this weights can always be reduced to the range [0, CM+Bitwidth)
+ // which by a happy accident means that they can always be represented using
+ // Bitwidth bits.
+ // TODO: Reduce the weight by exploiting nsw/nuw? (Could do much better than
+ // the Carmichael number).
+ if (Bitwidth > 3) {
+ /// CM - The value of Carmichael's lambda function.
+ APInt CM = APInt::getOneBitSet(Bitwidth, CarmichaelShift(Bitwidth));
+ // Any weight W >= Threshold can be replaced with W - CM.
+ APInt Threshold = CM + Bitwidth;
+ assert(LHS.ult(Threshold) && RHS.ult(Threshold) && "Weights not reduced!");
+ // For Bitwidth 4 or more the following sum does not overflow.
+ LHS += RHS;
+ while (LHS.uge(Threshold))
+ LHS -= CM;
+ } else {
+ // To avoid problems with overflow do everything the same as above but using
+ // a larger type.
+ unsigned CM = 1U << CarmichaelShift(Bitwidth);
+ unsigned Threshold = CM + Bitwidth;
+ assert(LHS.getZExtValue() < Threshold && RHS.getZExtValue() < Threshold &&
+ "Weights not reduced!");
+ unsigned Total = LHS.getZExtValue() + RHS.getZExtValue();
+ while (Total >= Threshold)
+ Total -= CM;
+ LHS = Total;
+ }
}
+/// EvaluateRepeatedConstant - Compute C op C op ... op C where the constant C
+/// is repeated Weight times.
+static Constant *EvaluateRepeatedConstant(unsigned Opcode, Constant *C,
+ APInt Weight) {
+ // For addition the result can be efficiently computed as the product of the
+ // constant and the weight.
+ if (Opcode == Instruction::Add)
+ return ConstantExpr::getMul(C, ConstantInt::get(C->getContext(), Weight));
+
+ // The weight might be huge, so compute by repeated squaring to ensure that
+ // compile time is proportional to the logarithm of the weight.
+ Constant *Result = 0;
+ Constant *Power = C; // Successively C, C op C, (C op C) op (C op C) etc.
+ // Visit the bits in Weight.
+ while (Weight != 0) {
+ // If the current bit in Weight is non-zero do Result = Result op Power.
+ if (Weight[0])
+ Result = Result ? ConstantExpr::get(Opcode, Result, Power) : Power;
+ // Move on to the next bit if any more are non-zero.
+ Weight = Weight.lshr(1);
+ if (Weight.isMinValue())
+ break;
+ // Square the power.
+ Power = ConstantExpr::get(Opcode, Power, Power);
+ }
+
+ assert(Result && "Only positive weights supported!");
+ return Result;
+}
-/// LinearizeExprTree - Given an associative binary expression tree, traverse
-/// all of the uses putting it into canonical form. This forces a left-linear
-/// form of the expression (((a+b)+c)+d), and collects information about the
-/// rank of the non-tree operands.
+typedef std::pair<Value*, APInt> RepeatedValue;
+
+/// LinearizeExprTree - Given an associative binary expression, return the leaf
+/// nodes in Ops along with their weights (how many times the leaf occurs). The
+/// original expression is the same as
+/// (Ops[0].first op Ops[0].first op ... Ops[0].first) <- Ops[0].second times
+/// op
+/// (Ops[1].first op Ops[1].first op ... Ops[1].first) <- Ops[1].second times
+/// op
+/// ...
+/// op
+/// (Ops[N].first op Ops[N].first op ... Ops[N].first) <- Ops[N].second times
+///
+/// Note that the values Ops[0].first, ..., Ops[N].first are all distinct, and
+/// they are all non-constant except possibly for the last one, which if it is
+/// constant will have weight one (Ops[N].second === 1).
+///
+/// This routine may modify the function, in which case it returns 'true'. The
+/// changes it makes may well be destructive, changing the value computed by 'I'
+/// to something completely different. Thus if the routine returns 'true' then
+/// you MUST either replace I with a new expression computed from the Ops array,
+/// or use RewriteExprTree to put the values back in.
+///
+/// A leaf node is either not a binary operation of the same kind as the root
+/// node 'I' (i.e. is not a binary operator at all, or is, but with a different
+/// opcode), or is the same kind of binary operator but has a use which either
+/// does not belong to the expression, or does belong to the expression but is
+/// a leaf node. Every leaf node has at least one use that is a non-leaf node
+/// of the expression, while for non-leaf nodes (except for the root 'I') every
+/// use is a non-leaf node of the expression.
+///
+/// For example:
+/// expression graph node names
+///
+/// + | I
+/// / \ |
+/// + + | A, B
+/// / \ / \ |
+/// * + * | C, D, E
+/// / \ / \ / \ |
+/// + * | F, G
+///
+/// The leaf nodes are C, E, F and G. The Ops array will contain (maybe not in
+/// that order) (C, 1), (E, 1), (F, 2), (G, 2).
///
-/// NOTE: These intentionally destroys the expression tree operands (turning
-/// them into undef values) to reduce #uses of the values. This means that the
-/// caller MUST use something like RewriteExprTree to put the values back in.
+/// The expression is maximal: if some instruction is a binary operator of the
+/// same kind as 'I', and all of its uses are non-leaf nodes of the expression,
+/// then the instruction also belongs to the expression, is not a leaf node of
+/// it, and its operands also belong to the expression (but may be leaf nodes).
///
-void Reassociate::LinearizeExprTree(BinaryOperator *I,
- SmallVectorImpl<ValueEntry> &Ops) {
- Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
+/// NOTE: This routine will set operands of non-leaf non-root nodes to undef in
+/// order to ensure that every non-root node in the expression has *exactly one*
+/// use by a non-leaf node of the expression. This destruction means that the
+/// caller MUST either replace 'I' with a new expression or use something like
+/// RewriteExprTree to put the values back in if the routine indicates that it
+/// made a change by returning 'true'.
+///
+/// In the above example either the right operand of A or the left operand of B
+/// will be replaced by undef. If it is B's operand then this gives:
+///
+/// + | I
+/// / \ |
+/// + + | A, B - operand of B replaced with undef
+/// / \ \ |
+/// * + * | C, D, E
+/// / \ / \ / \ |
+/// + * | F, G
+///
+/// Note that such undef operands can only be reached by passing through 'I'.
+/// For example, if you visit operands recursively starting from a leaf node
+/// then you will never see such an undef operand unless you get back to 'I',
+/// which requires passing through a phi node.
+///
+/// Note that this routine may also mutate binary operators of the wrong type
+/// that have all uses inside the expression (i.e. only used by non-leaf nodes
+/// of the expression) if it can turn them into binary operators of the right
+/// type and thus make the expression bigger.
+
+static bool LinearizeExprTree(BinaryOperator *I,
+ SmallVectorImpl<RepeatedValue> &Ops) {
+ DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
+ unsigned Bitwidth = I->getType()->getScalarType()->getPrimitiveSizeInBits();
unsigned Opcode = I->getOpcode();
+ assert(Instruction::isAssociative(Opcode) &&
+ Instruction::isCommutative(Opcode) &&
+ "Expected an associative and commutative operation!");
+ // If we see an absorbing element then the entire expression must be equal to
+ // it. For example, if this is a multiplication expression and zero occurs as
+ // an operand somewhere in it then the result of the expression must be zero.
+ Constant *Absorber = ConstantExpr::getBinOpAbsorber(Opcode, I->getType());
+
+ // Visit all operands of the expression, keeping track of their weight (the
+ // number of paths from the expression root to the operand, or if you like
+ // the number of times that operand occurs in the linearized expression).
+ // For example, if I = X + A, where X = A + B, then I, X and B have weight 1
+ // while A has weight two.
+
+ // Worklist of non-leaf nodes (their operands are in the expression too) along
+ // with their weights, representing a certain number of paths to the operator.
+ // If an operator occurs in the worklist multiple times then we found multiple
+ // ways to get to it.
+ SmallVector<std::pair<BinaryOperator*, APInt>, 8> Worklist; // (Op, Weight)
+ Worklist.push_back(std::make_pair(I, APInt(Bitwidth, 1)));
+ bool MadeChange = false;
+
+ // Leaves of the expression are values that either aren't the right kind of
+ // operation (eg: a constant, or a multiply in an add tree), or are, but have
+ // some uses that are not inside the expression. For example, in I = X + X,
+ // X = A + B, the value X has two uses (by I) that are in the expression. If
+ // X has any other uses, for example in a return instruction, then we consider
+ // X to be a leaf, and won't analyze it further. When we first visit a value,
+ // if it has more than one use then at first we conservatively consider it to
+ // be a leaf. Later, as the expression is explored, we may discover some more
+ // uses of the value from inside the expression. If all uses turn out to be
+ // from within the expression (and the value is a binary operator of the right
+ // kind) then the value is no longer considered to be a leaf, and its operands
+ // are explored.
+
+ // Leaves - Keeps track of the set of putative leaves as well as the number of
+ // paths to each leaf seen so far.
+ typedef DenseMap<Value*, APInt> LeafMap;
+ LeafMap Leaves; // Leaf -> Total weight so far.
+ SmallVector<Value*, 8> LeafOrder; // Ensure deterministic leaf output order.
- // First step, linearize the expression if it is in ((A+B)+(C+D)) form.
- BinaryOperator *LHSBO = isReassociableOp(LHS, Opcode);
- BinaryOperator *RHSBO = isReassociableOp(RHS, Opcode);
+#ifndef NDEBUG
+ SmallPtrSet<Value*, 8> Visited; // For sanity checking the iteration scheme.
+#endif
+ while (!Worklist.empty()) {
+ std::pair<BinaryOperator*, APInt> P = Worklist.pop_back_val();
+ I = P.first; // We examine the operands of this binary operator.
+
+ for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) { // Visit operands.
+ Value *Op = I->getOperand(OpIdx);
+ APInt Weight = P.second; // Number of paths to this operand.
+ DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n");
+ assert(!Op->use_empty() && "No uses, so how did we get to it?!");
+
+ // If the expression contains an absorbing element then there is no need
+ // to analyze it further: it must evaluate to the absorbing element.
+ if (Op == Absorber && !Weight.isMinValue()) {
+ Ops.push_back(std::make_pair(Absorber, APInt(Bitwidth, 1)));
+ return MadeChange;
+ }
- // If this is a multiply expression tree and it contains internal negations,
- // transform them into multiplies by -1 so they can be reassociated.
- if (I->getOpcode() == Instruction::Mul) {
- if (!LHSBO && LHS->hasOneUse() && BinaryOperator::isNeg(LHS)) {
- LHS = LowerNegateToMultiply(cast<Instruction>(LHS), ValueRankMap);
- LHSBO = isReassociableOp(LHS, Opcode);
- }
- if (!RHSBO && RHS->hasOneUse() && BinaryOperator::isNeg(RHS)) {
- RHS = LowerNegateToMultiply(cast<Instruction>(RHS), ValueRankMap);
- RHSBO = isReassociableOp(RHS, Opcode);
+ // If this is a binary operation of the right kind with only one use then
+ // add its operands to the expression.
+ if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
+ assert(Visited.insert(Op) && "Not first visit!");
+ DEBUG(dbgs() << "DIRECT ADD: " << *Op << " (" << Weight << ")\n");
+ Worklist.push_back(std::make_pair(BO, Weight));
+ continue;
+ }
+
+ // Appears to be a leaf. Is the operand already in the set of leaves?
+ LeafMap::iterator It = Leaves.find(Op);
+ if (It == Leaves.end()) {
+ // Not in the leaf map. Must be the first time we saw this operand.
+ assert(Visited.insert(Op) && "Not first visit!");
+ if (!Op->hasOneUse()) {
+ // This value has uses not accounted for by the expression, so it is
+ // not safe to modify. Mark it as being a leaf.
+ DEBUG(dbgs() << "ADD USES LEAF: " << *Op << " (" << Weight << ")\n");
+ LeafOrder.push_back(Op);
+ Leaves[Op] = Weight;
+ continue;
+ }
+ // No uses outside the expression, try morphing it.
+ } else if (It != Leaves.end()) {
+ // Already in the leaf map.
+ assert(Visited.count(Op) && "In leaf map but not visited!");
+
+ // Update the number of paths to the leaf.
+ IncorporateWeight(It->second, Weight, Opcode);
+
+#if 0 // TODO: Re-enable once PR13021 is fixed.
+ // The leaf already has one use from inside the expression. As we want
+ // exactly one such use, drop this new use of the leaf.
+ assert(!Op->hasOneUse() && "Only one use, but we got here twice!");
+ I->setOperand(OpIdx, UndefValue::get(I->getType()));
+ MadeChange = true;
+
+ // If the leaf is a binary operation of the right kind and we now see
+ // that its multiple original uses were in fact all by nodes belonging
+ // to the expression, then no longer consider it to be a leaf and add
+ // its operands to the expression.
+ if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
+ DEBUG(dbgs() << "UNLEAF: " << *Op << " (" << It->second << ")\n");
+ Worklist.push_back(std::make_pair(BO, It->second));
+ Leaves.erase(It);
+ continue;
+ }
+#endif
+
+ // If we still have uses that are not accounted for by the expression
+ // then it is not safe to modify the value.
+ if (!Op->hasOneUse())
+ continue;
+
+ // No uses outside the expression, try morphing it.
+ Weight = It->second;
+ Leaves.erase(It); // Since the value may be morphed below.
+ }
+
+ // At this point we have a value which, first of all, is not a binary
+ // expression of the right kind, and secondly, is only used inside the
+ // expression. This means that it can safely be modified. See if we
+ // can usefully morph it into an expression of the right kind.
+ assert((!isa<Instruction>(Op) ||
+ cast<Instruction>(Op)->getOpcode() != Opcode) &&
+ "Should have been handled above!");
+ assert(Op->hasOneUse() && "Has uses outside the expression tree!");
+
+ // If this is a multiply expression, turn any internal negations into
+ // multiplies by -1 so they can be reassociated.
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(Op);
+ if (Opcode == Instruction::Mul && BO && BinaryOperator::isNeg(BO)) {
+ DEBUG(dbgs() << "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
+ BO = LowerNegateToMultiply(BO);
+ DEBUG(dbgs() << *BO << 'n');
+ Worklist.push_back(std::make_pair(BO, Weight));
+ MadeChange = true;
+ continue;
+ }
+
+ // Failed to morph into an expression of the right type. This really is
+ // a leaf.
+ DEBUG(dbgs() << "ADD LEAF: " << *Op << " (" << Weight << ")\n");
+ assert(!isReassociableOp(Op, Opcode) && "Value was morphed?");
+ LeafOrder.push_back(Op);
+ Leaves[Op] = Weight;
}
}
- if (!LHSBO) {
- if (!RHSBO) {
- // Neither the LHS or RHS as part of the tree, thus this is a leaf. As
- // such, just remember these operands and their rank.
- Ops.push_back(ValueEntry(getRank(LHS), LHS));
- Ops.push_back(ValueEntry(getRank(RHS), RHS));
-
- // Clear the leaves out.
- I->setOperand(0, UndefValue::get(I->getType()));
- I->setOperand(1, UndefValue::get(I->getType()));
- return;
+ // The leaves, repeated according to their weights, represent the linearized
+ // form of the expression.
+ Constant *Cst = 0; // Accumulate constants here.
+ for (unsigned i = 0, e = LeafOrder.size(); i != e; ++i) {
+ Value *V = LeafOrder[i];
+ LeafMap::iterator It = Leaves.find(V);
+ if (It == Leaves.end())
+ // Node initially thought to be a leaf wasn't.
+ continue;
+ assert(!isReassociableOp(V, Opcode) && "Shouldn't be a leaf!");
+ APInt Weight = It->second;
+ if (Weight.isMinValue())
+ // Leaf already output or weight reduction eliminated it.
+ continue;
+ // Ensure the leaf is only output once.
+ It->second = 0;
+ // Glob all constants together into Cst.
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ C = EvaluateRepeatedConstant(Opcode, C, Weight);
+ Cst = Cst ? ConstantExpr::get(Opcode, Cst, C) : C;
+ continue;
}
-
- // Turn X+(Y+Z) -> (Y+Z)+X
- std::swap(LHSBO, RHSBO);
- std::swap(LHS, RHS);
- bool Success = !I->swapOperands();
- assert(Success && "swapOperands failed");
- (void)Success;
- MadeChange = true;
- } else if (RHSBO) {
- // Turn (A+B)+(C+D) -> (((A+B)+C)+D). This guarantees the RHS is not
- // part of the expression tree.
- LinearizeExpr(I);
- LHS = LHSBO = cast<BinaryOperator>(I->getOperand(0));
- RHS = I->getOperand(1);
- RHSBO = 0;
+ // Add non-constant
+ Ops.push_back(std::make_pair(V, Weight));
}
- // Okay, now we know that the LHS is a nested expression and that the RHS is
- // not. Perform reassociation.
- assert(!isReassociableOp(RHS, Opcode) && "LinearizeExpr failed!");
-
- // Move LHS right before I to make sure that the tree expression dominates all
- // values.
- LHSBO->moveBefore(I);
+ // Add any constants back into Ops, all globbed together and reduced to having
+ // weight 1 for the convenience of users.
+ Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType());
+ if (Cst && Cst != Identity) {
+ // If combining multiple constants resulted in the absorber then the entire
+ // expression must evaluate to the absorber.
+ if (Cst == Absorber)
+ Ops.clear();
+ Ops.push_back(std::make_pair(Cst, APInt(Bitwidth, 1)));
+ }
- // Linearize the expression tree on the LHS.
- LinearizeExprTree(LHSBO, Ops);
+ // For nilpotent operations or addition there may be no operands, for example
+ // because the expression was "X xor X" or consisted of 2^Bitwidth additions:
+ // in both cases the weight reduces to 0 causing the value to be skipped.
+ if (Ops.empty()) {
+ assert(Identity && "Associative operation without identity!");
+ Ops.push_back(std::make_pair(Identity, APInt(Bitwidth, 1)));
+ }
- // Remember the RHS operand and its rank.
- Ops.push_back(ValueEntry(getRank(RHS), RHS));
-
- // Clear the RHS leaf out.
- I->setOperand(1, UndefValue::get(I->getType()));
+ return MadeChange;
}
// RewriteExprTree - Now that the operands for this expression tree are
-// linearized and optimized, emit them in-order. This function is written to be
-// tail recursive.
+// linearized and optimized, emit them in-order.
void Reassociate::RewriteExprTree(BinaryOperator *I,
- SmallVectorImpl<ValueEntry> &Ops,
- unsigned i) {
- if (i+2 == Ops.size()) {
- if (I->getOperand(0) != Ops[i].Op ||
- I->getOperand(1) != Ops[i+1].Op) {
- Value *OldLHS = I->getOperand(0);
- DEBUG(dbgs() << "RA: " << *I << '\n');
- I->setOperand(0, Ops[i].Op);
- I->setOperand(1, Ops[i+1].Op);
-
- // Clear all the optional flags, which may not hold after the
- // reassociation if the expression involved more than just this operation.
- if (Ops.size() != 2)
- I->clearSubclassOptionalData();
-
- DEBUG(dbgs() << "TO: " << *I << '\n');
+ SmallVectorImpl<ValueEntry> &Ops) {
+ assert(Ops.size() > 1 && "Single values should be used directly!");
+
+ // Since our optimizations never increase the number of operations, the new
+ // expression can always be written by reusing the existing binary operators
+ // from the original expression tree, without creating any new instructions,
+ // though the rewritten expression may have a completely different topology.
+ // We take care to not change anything if the new expression will be the same
+ // as the original. If more than trivial changes (like commuting operands)
+ // were made then we are obliged to clear out any optional subclass data like
+ // nsw flags.
+
+ /// NodesToRewrite - Nodes from the original expression available for writing
+ /// the new expression into.
+ SmallVector<BinaryOperator*, 8> NodesToRewrite;
+ unsigned Opcode = I->getOpcode();
+ BinaryOperator *Op = I;
+
+ // ExpressionChanged - Non-null if the rewritten expression differs from the
+ // original in some non-trivial way, requiring the clearing of optional flags.
+ // Flags are cleared from the operator in ExpressionChanged up to I inclusive.
+ BinaryOperator *ExpressionChanged = 0;
+ for (unsigned i = 0; ; ++i) {
+ // The last operation (which comes earliest in the IR) is special as both
+ // operands will come from Ops, rather than just one with the other being
+ // a subexpression.
+ if (i+2 == Ops.size()) {
+ Value *NewLHS = Ops[i].Op;
+ Value *NewRHS = Ops[i+1].Op;
+ Value *OldLHS = Op->getOperand(0);
+ Value *OldRHS = Op->getOperand(1);
+
+ if (NewLHS == OldLHS && NewRHS == OldRHS)
+ // Nothing changed, leave it alone.
+ break;
+
+ if (NewLHS == OldRHS && NewRHS == OldLHS) {
+ // The order of the operands was reversed. Swap them.
+ DEBUG(dbgs() << "RA: " << *Op << '\n');
+ Op->swapOperands();
+ DEBUG(dbgs() << "TO: " << *Op << '\n');
+ MadeChange = true;
+ ++NumChanged;
+ break;
+ }
+
+ // The new operation differs non-trivially from the original. Overwrite
+ // the old operands with the new ones.
+ DEBUG(dbgs() << "RA: " << *Op << '\n');
+ if (NewLHS != OldLHS) {
+ if (BinaryOperator *BO = isReassociableOp(OldLHS, Opcode))
+ NodesToRewrite.push_back(BO);
+ Op->setOperand(0, NewLHS);
+ }
+ if (NewRHS != OldRHS) {
+ if (BinaryOperator *BO = isReassociableOp(OldRHS, Opcode))
+ NodesToRewrite.push_back(BO);
+ Op->setOperand(1, NewRHS);
+ }
+ DEBUG(dbgs() << "TO: " << *Op << '\n');
+
+ ExpressionChanged = Op;
+ MadeChange = true;
+ ++NumChanged;
+
+ break;
+ }
+
+ // Not the last operation. The left-hand side will be a sub-expression
+ // while the right-hand side will be the current element of Ops.
+ Value *NewRHS = Ops[i].Op;
+ if (NewRHS != Op->getOperand(1)) {
+ DEBUG(dbgs() << "RA: " << *Op << '\n');
+ if (NewRHS == Op->getOperand(0)) {
+ // The new right-hand side was already present as the left operand. If
+ // we are lucky then swapping the operands will sort out both of them.
+ Op->swapOperands();
+ } else {
+ // Overwrite with the new right-hand side.
+ if (BinaryOperator *BO = isReassociableOp(Op->getOperand(1), Opcode))
+ NodesToRewrite.push_back(BO);
+ Op->setOperand(1, NewRHS);
+ ExpressionChanged = Op;
+ }
+ DEBUG(dbgs() << "TO: " << *Op << '\n');
MadeChange = true;
++NumChanged;
-
- // If we reassociated a tree to fewer operands (e.g. (1+a+2) -> (a+3)
- // delete the extra, now dead, nodes.
- RemoveDeadBinaryOp(OldLHS);
}
- return;
- }
- assert(i+2 < Ops.size() && "Ops index out of range!");
- if (I->getOperand(1) != Ops[i].Op) {
- DEBUG(dbgs() << "RA: " << *I << '\n');
- I->setOperand(1, Ops[i].Op);
+ // Now deal with the left-hand side. If this is already an operation node
+ // from the original expression then just rewrite the rest of the expression
+ // into it.
+ if (BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode)) {
+ Op = BO;
+ continue;
+ }
- // Conservatively clear all the optional flags, which may not hold
- // after the reassociation.
- I->clearSubclassOptionalData();
+ // Otherwise, grab a spare node from the original expression and use that as
+ // the left-hand side. If there are no nodes left then the optimizers made
+ // an expression with more nodes than the original! This usually means that
+ // they did something stupid but it might mean that the problem was just too
+ // hard (finding the mimimal number of multiplications needed to realize a
+ // multiplication expression is NP-complete). Whatever the reason, smart or
+ // stupid, create a new node if there are none left.
+ BinaryOperator *NewOp;
+ if (NodesToRewrite.empty()) {
+ Constant *Undef = UndefValue::get(I->getType());
+ NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode),
+ Undef, Undef, "", I);
+ } else {
+ NewOp = NodesToRewrite.pop_back_val();
+ }
- DEBUG(dbgs() << "TO: " << *I << '\n');
+ DEBUG(dbgs() << "RA: " << *Op << '\n');
+ Op->setOperand(0, NewOp);
+ DEBUG(dbgs() << "TO: " << *Op << '\n');
+ ExpressionChanged = Op;
MadeChange = true;
++NumChanged;
+ Op = NewOp;
}
-
- BinaryOperator *LHS = cast<BinaryOperator>(I->getOperand(0));
- assert(LHS->getOpcode() == I->getOpcode() &&
- "Improper expression tree!");
-
- // Compactify the tree instructions together with each other to guarantee
- // that the expression tree is dominated by all of Ops.
- LHS->moveBefore(I);
- RewriteExprTree(LHS, Ops, i+1);
-}
-
+ // If the expression changed non-trivially then clear out all subclass data
+ // starting from the operator specified in ExpressionChanged, and compactify
+ // the operators to just before the expression root to guarantee that the
+ // expression tree is dominated by all of Ops.
+ if (ExpressionChanged)
+ do {
+ ExpressionChanged->clearSubclassOptionalData();
+ if (ExpressionChanged == I)
+ break;
+ ExpressionChanged->moveBefore(I);
+ ExpressionChanged = cast<BinaryOperator>(*ExpressionChanged->use_begin());
+ } while (1);
+
+ // Throw away any left over nodes from the original expression.
+ for (unsigned i = 0, e = NodesToRewrite.size(); i != e; ++i)
+ RedoInsts.insert(NodesToRewrite[i]);
+}
-// NegateValue - Insert instructions before the instruction pointed to by BI,
-// that computes the negative version of the value specified. The negative
-// version of the value is returned, and BI is left pointing at the instruction
-// that should be processed next by the reassociation pass.
-//
+/// NegateValue - Insert instructions before the instruction pointed to by BI,
+/// that computes the negative version of the value specified. The negative
+/// version of the value is returned, and BI is left pointing at the instruction
+/// that should be processed next by the reassociation pass.
static Value *NegateValue(Value *V, Instruction *BI) {
if (Constant *C = dyn_cast<Constant>(V))
return ConstantExpr::getNeg(C);
-
+
// We are trying to expose opportunity for reassociation. One of the things
// that we want to do to achieve this is to push a negation as deep into an
// expression chain as possible, to expose the add instructions. In practice,
@@ -412,22 +810,21 @@ static Value *NegateValue(Value *V, Instruction *BI) {
// the constants. We assume that instcombine will clean up the mess later if
// we introduce tons of unnecessary negation instructions.
//
- if (Instruction *I = dyn_cast<Instruction>(V))
- if (I->getOpcode() == Instruction::Add && I->hasOneUse()) {
- // Push the negates through the add.
- I->setOperand(0, NegateValue(I->getOperand(0), BI));
- I->setOperand(1, NegateValue(I->getOperand(1), BI));
-
- // We must move the add instruction here, because the neg instructions do
- // not dominate the old add instruction in general. By moving it, we are
- // assured that the neg instructions we just inserted dominate the
- // instruction we are about to insert after them.
- //
- I->moveBefore(BI);
- I->setName(I->getName()+".neg");
- return I;
- }
-
+ if (BinaryOperator *I = isReassociableOp(V, Instruction::Add)) {
+ // Push the negates through the add.
+ I->setOperand(0, NegateValue(I->getOperand(0), BI));
+ I->setOperand(1, NegateValue(I->getOperand(1), BI));
+
+ // We must move the add instruction here, because the neg instructions do
+ // not dominate the old add instruction in general. By moving it, we are
+ // assured that the neg instructions we just inserted dominate the
+ // instruction we are about to insert after them.
+ //
+ I->moveBefore(BI);
+ I->setName(I->getName()+".neg");
+ return I;
+ }
+
// Okay, we need to materialize a negated version of V with an instruction.
// Scan the use lists of V to see if we have one already.
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
@@ -443,7 +840,7 @@ static Value *NegateValue(Value *V, Instruction *BI) {
// Verify that the negate is in this function, V might be a constant expr.
if (TheNeg->getParent()->getParent() != BI->getParent()->getParent())
continue;
-
+
BasicBlock::iterator InsertPt;
if (Instruction *InstInput = dyn_cast<Instruction>(V)) {
if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) {
@@ -471,7 +868,7 @@ static bool ShouldBreakUpSubtract(Instruction *Sub) {
// If this is a negation, we can't split it up!
if (BinaryOperator::isNeg(Sub))
return false;
-
+
// Don't bother to break this up unless either the LHS is an associable add or
// subtract or if this is only used by one.
if (isReassociableOp(Sub->getOperand(0), Instruction::Add) ||
@@ -480,19 +877,18 @@ static bool ShouldBreakUpSubtract(Instruction *Sub) {
if (isReassociableOp(Sub->getOperand(1), Instruction::Add) ||
isReassociableOp(Sub->getOperand(1), Instruction::Sub))
return true;
- if (Sub->hasOneUse() &&
+ if (Sub->hasOneUse() &&
(isReassociableOp(Sub->use_back(), Instruction::Add) ||
isReassociableOp(Sub->use_back(), Instruction::Sub)))
return true;
-
+
return false;
}
/// BreakUpSubtract - If we have (X-Y), and if either X is an add, or if this is
/// only used by an add, transform this into (X+(0-Y)) to promote better
/// reassociation.
-static Instruction *BreakUpSubtract(Instruction *Sub,
- DenseMap<AssertingVH<Value>, unsigned> &ValueRankMap) {
+static BinaryOperator *BreakUpSubtract(Instruction *Sub) {
// Convert a subtract into an add and a neg instruction. This allows sub
// instructions to be commuted with other add instructions.
//
@@ -500,15 +896,15 @@ static Instruction *BreakUpSubtract(Instruction *Sub,
// and set it as the RHS of the add instruction we just made.
//
Value *NegVal = NegateValue(Sub->getOperand(1), Sub);
- Instruction *New =
+ BinaryOperator *New =
BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub);
+ Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op.
+ Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op.
New->takeName(Sub);
// Everyone now refers to the add instruction.
- ValueRankMap.erase(Sub);
Sub->replaceAllUsesWith(New);
New->setDebugLoc(Sub->getDebugLoc());
- Sub->eraseFromParent();
DEBUG(dbgs() << "Negated: " << *New << '\n');
return New;
@@ -517,32 +913,23 @@ static Instruction *BreakUpSubtract(Instruction *Sub,
/// ConvertShiftToMul - If this is a shift of a reassociable multiply or is used
/// by one, change this into a multiply by a constant to assist with further
/// reassociation.
-static Instruction *ConvertShiftToMul(Instruction *Shl,
- DenseMap<AssertingVH<Value>, unsigned> &ValueRankMap) {
- // If an operand of this shift is a reassociable multiply, or if the shift
- // is used by a reassociable multiply or add, turn into a multiply.
- if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) ||
- (Shl->hasOneUse() &&
- (isReassociableOp(Shl->use_back(), Instruction::Mul) ||
- isReassociableOp(Shl->use_back(), Instruction::Add)))) {
- Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
- MulCst = ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1)));
-
- Instruction *Mul =
- BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl);
- ValueRankMap.erase(Shl);
- Mul->takeName(Shl);
- Shl->replaceAllUsesWith(Mul);
- Mul->setDebugLoc(Shl->getDebugLoc());
- Shl->eraseFromParent();
- return Mul;
- }
- return 0;
+static BinaryOperator *ConvertShiftToMul(Instruction *Shl) {
+ Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
+ MulCst = ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1)));
+
+ BinaryOperator *Mul =
+ BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl);
+ Shl->setOperand(0, UndefValue::get(Shl->getType())); // Drop use of op.
+ Mul->takeName(Shl);
+ Shl->replaceAllUsesWith(Mul);
+ Mul->setDebugLoc(Shl->getDebugLoc());
+ return Mul;
}
-// Scan backwards and forwards among values with the same rank as element i to
-// see if X exists. If X does not exist, return i. This is useful when
-// scanning for 'x' when we see '-x' because they both get the same rank.
+/// FindInOperandList - Scan backwards and forwards among values with the same
+/// rank as element i to see if X exists. If X does not exist, return i. This
+/// is useful when scanning for 'x' when we see '-x' because they both get the
+/// same rank.
static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i,
Value *X) {
unsigned XRank = Ops[i].Rank;
@@ -562,22 +949,29 @@ static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i,
static Value *EmitAddTreeOfValues(Instruction *I,
SmallVectorImpl<WeakVH> &Ops){
if (Ops.size() == 1) return Ops.back();
-
+
Value *V1 = Ops.back();
Ops.pop_back();
Value *V2 = EmitAddTreeOfValues(I, Ops);
return BinaryOperator::CreateAdd(V2, V1, "tmp", I);
}
-/// RemoveFactorFromExpression - If V is an expression tree that is a
+/// RemoveFactorFromExpression - If V is an expression tree that is a
/// multiplication sequence, and if this sequence contains a multiply by Factor,
/// remove Factor from the tree and return the new tree.
Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
BinaryOperator *BO = isReassociableOp(V, Instruction::Mul);
if (!BO) return 0;
-
+
+ SmallVector<RepeatedValue, 8> Tree;
+ MadeChange |= LinearizeExprTree(BO, Tree);
SmallVector<ValueEntry, 8> Factors;
- LinearizeExprTree(BO, Factors);
+ Factors.reserve(Tree.size());
+ for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
+ RepeatedValue E = Tree[i];
+ Factors.append(E.second.getZExtValue(),
+ ValueEntry(getRank(E.first), E.first));
+ }
bool FoundFactor = false;
bool NeedsNegate = false;
@@ -587,7 +981,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
Factors.erase(Factors.begin()+i);
break;
}
-
+
// If this is a negative version of this factor, remove it.
if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor))
if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op))
@@ -597,29 +991,28 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
break;
}
}
-
+
if (!FoundFactor) {
// Make sure to restore the operands to the expression tree.
RewriteExprTree(BO, Factors);
return 0;
}
-
+
BasicBlock::iterator InsertPt = BO; ++InsertPt;
-
+
// If this was just a single multiply, remove the multiply and return the only
// remaining operand.
if (Factors.size() == 1) {
- ValueRankMap.erase(BO);
- DeadInsts.push_back(BO);
+ RedoInsts.insert(BO);
V = Factors[0].Op;
} else {
RewriteExprTree(BO, Factors);
V = BO;
}
-
+
if (NeedsNegate)
V = BinaryOperator::CreateNeg(V, "neg", InsertPt);
-
+
return V;
}
@@ -629,31 +1022,16 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
/// Ops is the top-level list of add operands we're trying to factor.
static void FindSingleUseMultiplyFactors(Value *V,
SmallVectorImpl<Value*> &Factors,
- const SmallVectorImpl<ValueEntry> &Ops,
- bool IsRoot) {
- BinaryOperator *BO;
- if (!(V->hasOneUse() || V->use_empty()) || // More than one use.
- !(BO = dyn_cast<BinaryOperator>(V)) ||
- BO->getOpcode() != Instruction::Mul) {
+ const SmallVectorImpl<ValueEntry> &Ops) {
+ BinaryOperator *BO = isReassociableOp(V, Instruction::Mul);
+ if (!BO) {
Factors.push_back(V);
return;
}
-
- // If this value has a single use because it is another input to the add
- // tree we're reassociating and we dropped its use, it actually has two
- // uses and we can't factor it.
- if (!IsRoot) {
- for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- if (Ops[i].Op == V) {
- Factors.push_back(V);
- return;
- }
- }
-
-
+
// Otherwise, add the LHS and RHS to the list of factors.
- FindSingleUseMultiplyFactors(BO->getOperand(1), Factors, Ops, false);
- FindSingleUseMultiplyFactors(BO->getOperand(0), Factors, Ops, false);
+ FindSingleUseMultiplyFactors(BO->getOperand(1), Factors, Ops);
+ FindSingleUseMultiplyFactors(BO->getOperand(0), Factors, Ops);
}
/// OptimizeAndOrXor - Optimize a series of operands to an 'and', 'or', or 'xor'
@@ -673,12 +1051,12 @@ static Value *OptimizeAndOrXor(unsigned Opcode,
if (FoundX != i) {
if (Opcode == Instruction::And) // ...&X&~X = 0
return Constant::getNullValue(X->getType());
-
+
if (Opcode == Instruction::Or) // ...|X|~X = -1
return Constant::getAllOnesValue(X->getType());
}
}
-
+
// Next, check for duplicate pairs of values, which we assume are next to
// each other, due to our sorting criteria.
assert(i < Ops.size());
@@ -690,12 +1068,12 @@ static Value *OptimizeAndOrXor(unsigned Opcode,
++NumAnnihil;
continue;
}
-
+
// Drop pairs of values for Xor.
assert(Opcode == Instruction::Xor);
if (e == 2)
return Constant::getNullValue(Ops[0].Op->getType());
-
+
// Y ^ X^X -> Y
Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
i -= 1; e -= 2;
@@ -728,46 +1106,46 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
Ops.erase(Ops.begin()+i);
++NumFound;
} while (i != Ops.size() && Ops[i].Op == TheOp);
-
+
DEBUG(errs() << "\nFACTORING [" << NumFound << "]: " << *TheOp << '\n');
++NumFactor;
-
+
// Insert a new multiply.
Value *Mul = ConstantInt::get(cast<IntegerType>(I->getType()), NumFound);
Mul = BinaryOperator::CreateMul(TheOp, Mul, "factor", I);
-
+
// Now that we have inserted a multiply, optimize it. This allows us to
// handle cases that require multiple factoring steps, such as this:
// (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6
- RedoInsts.push_back(Mul);
-
+ RedoInsts.insert(cast<Instruction>(Mul));
+
// If every add operand was a duplicate, return the multiply.
if (Ops.empty())
return Mul;
-
+
// Otherwise, we had some input that didn't have the dupe, such as
// "A + A + B" -> "A*2 + B". Add the new multiply to the list of
// things being added by this operation.
Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul));
-
+
--i;
e = Ops.size();
continue;
}
-
+
// Check for X and -X in the operand list.
if (!BinaryOperator::isNeg(TheOp))
continue;
-
+
Value *X = BinaryOperator::getNegArgument(TheOp);
unsigned FoundX = FindInOperandList(Ops, i, X);
if (FoundX == i)
continue;
-
+
// Remove X and -X from the operand list.
if (Ops.size() == 2)
return Constant::getNullValue(X->getType());
-
+
Ops.erase(Ops.begin()+i);
if (i < FoundX)
--FoundX;
@@ -778,37 +1156,37 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
--i; // Revisit element.
e -= 2; // Removed two elements.
}
-
+
// Scan the operand list, checking to see if there are any common factors
// between operands. Consider something like A*A+A*B*C+D. We would like to
// reassociate this to A*(A+B*C)+D, which reduces the number of multiplies.
// To efficiently find this, we count the number of times a factor occurs
// for any ADD operands that are MULs.
DenseMap<Value*, unsigned> FactorOccurrences;
-
+
// Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4)
// where they are actually the same multiply.
unsigned MaxOcc = 0;
Value *MaxOccVal = 0;
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
- BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op);
- if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty())
+ BinaryOperator *BOp = isReassociableOp(Ops[i].Op, Instruction::Mul);
+ if (!BOp)
continue;
-
+
// Compute all of the factors of this added value.
SmallVector<Value*, 8> Factors;
- FindSingleUseMultiplyFactors(BOp, Factors, Ops, true);
+ FindSingleUseMultiplyFactors(BOp, Factors, Ops);
assert(Factors.size() > 1 && "Bad linearize!");
-
+
// Add one to FactorOccurrences for each unique factor in this op.
SmallPtrSet<Value*, 8> Duplicates;
for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
Value *Factor = Factors[i];
if (!Duplicates.insert(Factor)) continue;
-
+
unsigned Occ = ++FactorOccurrences[Factor];
if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; }
-
+
// If Factor is a negative constant, add the negated value as a factor
// because we can percolate the negate out. Watch for minint, which
// cannot be positivified.
@@ -817,13 +1195,13 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
assert(!Duplicates.count(Factor) &&
"Shouldn't have two constant factors, missed a canonicalize");
-
+
unsigned Occ = ++FactorOccurrences[Factor];
if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; }
}
}
}
-
+
// If any factor occurred more than one time, we can pull it out.
if (MaxOcc > 1) {
DEBUG(errs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << '\n');
@@ -831,16 +1209,16 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
// Create a new instruction that uses the MaxOccVal twice. If we don't do
// this, we could otherwise run into situations where removing a factor
- // from an expression will drop a use of maxocc, and this can cause
+ // from an expression will drop a use of maxocc, and this can cause
// RemoveFactorFromExpression on successive values to behave differently.
Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal);
SmallVector<WeakVH, 4> NewMulOps;
for (unsigned i = 0; i != Ops.size(); ++i) {
// Only try to remove factors from expressions we're allowed to.
- BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op);
- if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty())
+ BinaryOperator *BOp = isReassociableOp(Ops[i].Op, Instruction::Mul);
+ if (!BOp)
continue;
-
+
if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) {
// The factorized operand may occur several times. Convert them all in
// one fell swoop.
@@ -854,7 +1232,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
--i;
}
}
-
+
// No need for extra uses anymore.
delete DummyInst;
@@ -866,26 +1244,201 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
// A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C))
assert(NumAddedValues > 1 && "Each occurrence should contribute a value");
(void)NumAddedValues;
- V = ReassociateExpression(cast<BinaryOperator>(V));
+ if (Instruction *VI = dyn_cast<Instruction>(V))
+ RedoInsts.insert(VI);
// Create the multiply.
- Value *V2 = BinaryOperator::CreateMul(V, MaxOccVal, "tmp", I);
+ Instruction *V2 = BinaryOperator::CreateMul(V, MaxOccVal, "tmp", I);
// Rerun associate on the multiply in case the inner expression turned into
// a multiply. We want to make sure that we keep things in canonical form.
- V2 = ReassociateExpression(cast<BinaryOperator>(V2));
-
+ RedoInsts.insert(V2);
+
// If every add operand included the factor (e.g. "A*B + A*C"), then the
// entire result expression is just the multiply "A*(B+C)".
if (Ops.empty())
return V2;
-
+
// Otherwise, we had some input that didn't have the factor, such as
// "A*B + A*C + D" -> "A*(B+C) + D". Add the new multiply to the list of
// things being added by this operation.
Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2));
}
-
+
+ return 0;
+}
+
+namespace {
+ /// \brief Predicate tests whether a ValueEntry's op is in a map.
+ struct IsValueInMap {
+ const DenseMap<Value *, unsigned> &Map;
+
+ IsValueInMap(const DenseMap<Value *, unsigned> &Map) : Map(Map) {}
+
+ bool operator()(const ValueEntry &Entry) {
+ return Map.find(Entry.Op) != Map.end();
+ }
+ };
+}
+
+/// \brief Build up a vector of value/power pairs factoring a product.
+///
+/// Given a series of multiplication operands, build a vector of factors and
+/// the powers each is raised to when forming the final product. Sort them in
+/// the order of descending power.
+///
+/// (x*x) -> [(x, 2)]
+/// ((x*x)*x) -> [(x, 3)]
+/// ((((x*y)*x)*y)*x) -> [(x, 3), (y, 2)]
+///
+/// \returns Whether any factors have a power greater than one.
+bool Reassociate::collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops,
+ SmallVectorImpl<Factor> &Factors) {
+ // FIXME: Have Ops be (ValueEntry, Multiplicity) pairs, simplifying this.
+ // Compute the sum of powers of simplifiable factors.
+ unsigned FactorPowerSum = 0;
+ for (unsigned Idx = 1, Size = Ops.size(); Idx < Size; ++Idx) {
+ Value *Op = Ops[Idx-1].Op;
+
+ // Count the number of occurrences of this value.
+ unsigned Count = 1;
+ for (; Idx < Size && Ops[Idx].Op == Op; ++Idx)
+ ++Count;
+ // Track for simplification all factors which occur 2 or more times.
+ if (Count > 1)
+ FactorPowerSum += Count;
+ }
+
+ // We can only simplify factors if the sum of the powers of our simplifiable
+ // factors is 4 or higher. When that is the case, we will *always* have
+ // a simplification. This is an important invariant to prevent cyclicly
+ // trying to simplify already minimal formations.
+ if (FactorPowerSum < 4)
+ return false;
+
+ // Now gather the simplifiable factors, removing them from Ops.
+ FactorPowerSum = 0;
+ for (unsigned Idx = 1; Idx < Ops.size(); ++Idx) {
+ Value *Op = Ops[Idx-1].Op;
+
+ // Count the number of occurrences of this value.
+ unsigned Count = 1;
+ for (; Idx < Ops.size() && Ops[Idx].Op == Op; ++Idx)
+ ++Count;
+ if (Count == 1)
+ continue;
+ // Move an even number of occurrences to Factors.
+ Count &= ~1U;
+ Idx -= Count;
+ FactorPowerSum += Count;
+ Factors.push_back(Factor(Op, Count));
+ Ops.erase(Ops.begin()+Idx, Ops.begin()+Idx+Count);
+ }
+
+ // None of the adjustments above should have reduced the sum of factor powers
+ // below our mininum of '4'.
+ assert(FactorPowerSum >= 4);
+
+ std::sort(Factors.begin(), Factors.end(), Factor::PowerDescendingSorter());
+ return true;
+}
+
+/// \brief Build a tree of multiplies, computing the product of Ops.
+static Value *buildMultiplyTree(IRBuilder<> &Builder,
+ SmallVectorImpl<Value*> &Ops) {
+ if (Ops.size() == 1)
+ return Ops.back();
+
+ Value *LHS = Ops.pop_back_val();
+ do {
+ LHS = Builder.CreateMul(LHS, Ops.pop_back_val());
+ } while (!Ops.empty());
+
+ return LHS;
+}
+
+/// \brief Build a minimal multiplication DAG for (a^x)*(b^y)*(c^z)*...
+///
+/// Given a vector of values raised to various powers, where no two values are
+/// equal and the powers are sorted in decreasing order, compute the minimal
+/// DAG of multiplies to compute the final product, and return that product
+/// value.
+Value *Reassociate::buildMinimalMultiplyDAG(IRBuilder<> &Builder,
+ SmallVectorImpl<Factor> &Factors) {
+ assert(Factors[0].Power);
+ SmallVector<Value *, 4> OuterProduct;
+ for (unsigned LastIdx = 0, Idx = 1, Size = Factors.size();
+ Idx < Size && Factors[Idx].Power > 0; ++Idx) {
+ if (Factors[Idx].Power != Factors[LastIdx].Power) {
+ LastIdx = Idx;
+ continue;
+ }
+
+ // We want to multiply across all the factors with the same power so that
+ // we can raise them to that power as a single entity. Build a mini tree
+ // for that.
+ SmallVector<Value *, 4> InnerProduct;
+ InnerProduct.push_back(Factors[LastIdx].Base);
+ do {
+ InnerProduct.push_back(Factors[Idx].Base);
+ ++Idx;
+ } while (Idx < Size && Factors[Idx].Power == Factors[LastIdx].Power);
+
+ // Reset the base value of the first factor to the new expression tree.
+ // We'll remove all the factors with the same power in a second pass.
+ Value *M = Factors[LastIdx].Base = buildMultiplyTree(Builder, InnerProduct);
+ if (Instruction *MI = dyn_cast<Instruction>(M))
+ RedoInsts.insert(MI);
+
+ LastIdx = Idx;
+ }
+ // Unique factors with equal powers -- we've folded them into the first one's
+ // base.
+ Factors.erase(std::unique(Factors.begin(), Factors.end(),
+ Factor::PowerEqual()),
+ Factors.end());
+
+ // Iteratively collect the base of each factor with an add power into the
+ // outer product, and halve each power in preparation for squaring the
+ // expression.
+ for (unsigned Idx = 0, Size = Factors.size(); Idx != Size; ++Idx) {
+ if (Factors[Idx].Power & 1)
+ OuterProduct.push_back(Factors[Idx].Base);
+ Factors[Idx].Power >>= 1;
+ }
+ if (Factors[0].Power) {
+ Value *SquareRoot = buildMinimalMultiplyDAG(Builder, Factors);
+ OuterProduct.push_back(SquareRoot);
+ OuterProduct.push_back(SquareRoot);
+ }
+ if (OuterProduct.size() == 1)
+ return OuterProduct.front();
+
+ Value *V = buildMultiplyTree(Builder, OuterProduct);
+ return V;
+}
+
+Value *Reassociate::OptimizeMul(BinaryOperator *I,
+ SmallVectorImpl<ValueEntry> &Ops) {
+ // We can only optimize the multiplies when there is a chain of more than
+ // three, such that a balanced tree might require fewer total multiplies.
+ if (Ops.size() < 4)
+ return 0;
+
+ // Try to turn linear trees of multiplies without other uses of the
+ // intermediate stages into minimal multiply DAGs with perfect sub-expression
+ // re-use.
+ SmallVector<Factor, 4> Factors;
+ if (!collectMultiplyFactors(Ops, Factors))
+ return 0; // All distinct factors, so nothing left for us to do.
+
+ IRBuilder<> Builder(I);
+ Value *V = buildMinimalMultiplyDAG(Builder, Factors);
+ if (Ops.empty())
+ return V;
+
+ ValueEntry NewEntry = ValueEntry(getRank(V), V);
+ Ops.insert(std::lower_bound(Ops.begin(), Ops.end(), NewEntry), NewEntry);
return 0;
}
@@ -893,95 +1446,105 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
SmallVectorImpl<ValueEntry> &Ops) {
// Now that we have the linearized expression tree, try to optimize it.
// Start by folding any constants that we found.
- bool IterateOptimization = false;
if (Ops.size() == 1) return Ops[0].Op;
unsigned Opcode = I->getOpcode();
-
- if (Constant *V1 = dyn_cast<Constant>(Ops[Ops.size()-2].Op))
- if (Constant *V2 = dyn_cast<Constant>(Ops.back().Op)) {
- Ops.pop_back();
- Ops.back().Op = ConstantExpr::get(Opcode, V1, V2);
- return OptimizeExpression(I, Ops);
- }
-
- // Check for destructive annihilation due to a constant being used.
- if (ConstantInt *CstVal = dyn_cast<ConstantInt>(Ops.back().Op))
- switch (Opcode) {
- default: break;
- case Instruction::And:
- if (CstVal->isZero()) // X & 0 -> 0
- return CstVal;
- if (CstVal->isAllOnesValue()) // X & -1 -> X
- Ops.pop_back();
- break;
- case Instruction::Mul:
- if (CstVal->isZero()) { // X * 0 -> 0
- ++NumAnnihil;
- return CstVal;
- }
-
- if (cast<ConstantInt>(CstVal)->isOne())
- Ops.pop_back(); // X * 1 -> X
- break;
- case Instruction::Or:
- if (CstVal->isAllOnesValue()) // X | -1 -> -1
- return CstVal;
- // FALLTHROUGH!
- case Instruction::Add:
- case Instruction::Xor:
- if (CstVal->isZero()) // X [|^+] 0 -> X
- Ops.pop_back();
- break;
- }
- if (Ops.size() == 1) return Ops[0].Op;
// Handle destructive annihilation due to identities between elements in the
// argument list here.
+ unsigned NumOps = Ops.size();
switch (Opcode) {
default: break;
case Instruction::And:
case Instruction::Or:
- case Instruction::Xor: {
- unsigned NumOps = Ops.size();
+ case Instruction::Xor:
if (Value *Result = OptimizeAndOrXor(Opcode, Ops))
return Result;
- IterateOptimization |= Ops.size() != NumOps;
break;
- }
- case Instruction::Add: {
- unsigned NumOps = Ops.size();
+ case Instruction::Add:
if (Value *Result = OptimizeAdd(I, Ops))
return Result;
- IterateOptimization |= Ops.size() != NumOps;
- }
+ break;
+ case Instruction::Mul:
+ if (Value *Result = OptimizeMul(I, Ops))
+ return Result;
break;
- //case Instruction::Mul:
}
- if (IterateOptimization)
+ if (Ops.size() != NumOps)
return OptimizeExpression(I, Ops);
return 0;
}
+/// EraseInst - Zap the given instruction, adding interesting operands to the
+/// work list.
+void Reassociate::EraseInst(Instruction *I) {
+ assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
+ SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
+ // Erase the dead instruction.
+ ValueRankMap.erase(I);
+ RedoInsts.remove(I);
+ I->eraseFromParent();
+ // Optimize its operands.
+ SmallPtrSet<Instruction *, 8> Visited; // Detect self-referential nodes.
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ if (Instruction *Op = dyn_cast<Instruction>(Ops[i])) {
+ // If this is a node in an expression tree, climb to the expression root
+ // and add that since that's where optimization actually happens.
+ unsigned Opcode = Op->getOpcode();
+ while (Op->hasOneUse() && Op->use_back()->getOpcode() == Opcode &&
+ Visited.insert(Op))
+ Op = Op->use_back();
+ RedoInsts.insert(Op);
+ }
+}
+
+/// OptimizeInst - Inspect and optimize the given instruction. Note that erasing
+/// instructions is not allowed.
+void Reassociate::OptimizeInst(Instruction *I) {
+ // Only consider operations that we understand.
+ if (!isa<BinaryOperator>(I))
+ return;
-/// ReassociateInst - Inspect and reassociate the instruction at the
-/// given position, post-incrementing the position.
-void Reassociate::ReassociateInst(BasicBlock::iterator &BBI) {
- Instruction *BI = BBI++;
- if (BI->getOpcode() == Instruction::Shl &&
- isa<ConstantInt>(BI->getOperand(1)))
- if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) {
+ if (I->getOpcode() == Instruction::Shl &&
+ isa<ConstantInt>(I->getOperand(1)))
+ // If an operand of this shift is a reassociable multiply, or if the shift
+ // is used by a reassociable multiply or add, turn into a multiply.
+ if (isReassociableOp(I->getOperand(0), Instruction::Mul) ||
+ (I->hasOneUse() &&
+ (isReassociableOp(I->use_back(), Instruction::Mul) ||
+ isReassociableOp(I->use_back(), Instruction::Add)))) {
+ Instruction *NI = ConvertShiftToMul(I);
+ RedoInsts.insert(I);
MadeChange = true;
- BI = NI;
+ I = NI;
+ }
+
+ // Floating point binary operators are not associative, but we can still
+ // commute (some) of them, to canonicalize the order of their operands.
+ // This can potentially expose more CSE opportunities, and makes writing
+ // other transformations simpler.
+ if ((I->getType()->isFloatingPointTy() || I->getType()->isVectorTy())) {
+ // FAdd and FMul can be commuted.
+ if (I->getOpcode() != Instruction::FMul &&
+ I->getOpcode() != Instruction::FAdd)
+ return;
+
+ Value *LHS = I->getOperand(0);
+ Value *RHS = I->getOperand(1);
+ unsigned LHSRank = getRank(LHS);
+ unsigned RHSRank = getRank(RHS);
+
+ // Sort the operands by rank.
+ if (RHSRank < LHSRank) {
+ I->setOperand(0, RHS);
+ I->setOperand(1, LHS);
}
- // Reject cases where it is pointless to do this.
- if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() ||
- BI->getType()->isVectorTy())
- return; // Floating point ops are not associative.
+ return;
+ }
// Do not reassociate boolean (i1) expressions. We want to preserve the
// original order of evaluation for short-circuited comparisons that
@@ -989,58 +1552,66 @@ void Reassociate::ReassociateInst(BasicBlock::iterator &BBI) {
// is not further optimized, it is likely to be transformed back to a
// short-circuited form for code gen, and the source order may have been
// optimized for the most likely conditions.
- if (BI->getType()->isIntegerTy(1))
+ if (I->getType()->isIntegerTy(1))
return;
// If this is a subtract instruction which is not already in negate form,
// see if we can convert it to X+-Y.
- if (BI->getOpcode() == Instruction::Sub) {
- if (ShouldBreakUpSubtract(BI)) {
- BI = BreakUpSubtract(BI, ValueRankMap);
- // Reset the BBI iterator in case BreakUpSubtract changed the
- // instruction it points to.
- BBI = BI;
- ++BBI;
+ if (I->getOpcode() == Instruction::Sub) {
+ if (ShouldBreakUpSubtract(I)) {
+ Instruction *NI = BreakUpSubtract(I);
+ RedoInsts.insert(I);
MadeChange = true;
- } else if (BinaryOperator::isNeg(BI)) {
+ I = NI;
+ } else if (BinaryOperator::isNeg(I)) {
// Otherwise, this is a negation. See if the operand is a multiply tree
// and if this is not an inner node of a multiply tree.
- if (isReassociableOp(BI->getOperand(1), Instruction::Mul) &&
- (!BI->hasOneUse() ||
- !isReassociableOp(BI->use_back(), Instruction::Mul))) {
- BI = LowerNegateToMultiply(BI, ValueRankMap);
+ if (isReassociableOp(I->getOperand(1), Instruction::Mul) &&
+ (!I->hasOneUse() ||
+ !isReassociableOp(I->use_back(), Instruction::Mul))) {
+ Instruction *NI = LowerNegateToMultiply(I);
+ RedoInsts.insert(I);
MadeChange = true;
+ I = NI;
}
}
}
- // If this instruction is a commutative binary operator, process it.
- if (!BI->isAssociative()) return;
- BinaryOperator *I = cast<BinaryOperator>(BI);
+ // If this instruction is an associative binary operator, process it.
+ if (!I->isAssociative()) return;
+ BinaryOperator *BO = cast<BinaryOperator>(I);
// If this is an interior node of a reassociable tree, ignore it until we
// get to the root of the tree, to avoid N^2 analysis.
- if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode()))
+ unsigned Opcode = BO->getOpcode();
+ if (BO->hasOneUse() && BO->use_back()->getOpcode() == Opcode)
return;
- // If this is an add tree that is used by a sub instruction, ignore it
+ // If this is an add tree that is used by a sub instruction, ignore it
// until we process the subtract.
- if (I->hasOneUse() && I->getOpcode() == Instruction::Add &&
- cast<Instruction>(I->use_back())->getOpcode() == Instruction::Sub)
+ if (BO->hasOneUse() && BO->getOpcode() == Instruction::Add &&
+ cast<Instruction>(BO->use_back())->getOpcode() == Instruction::Sub)
return;
- ReassociateExpression(I);
+ ReassociateExpression(BO);
}
-Value *Reassociate::ReassociateExpression(BinaryOperator *I) {
-
+void Reassociate::ReassociateExpression(BinaryOperator *I) {
+
// First, walk the expression tree, linearizing the tree, collecting the
// operand information.
+ SmallVector<RepeatedValue, 8> Tree;
+ MadeChange |= LinearizeExprTree(I, Tree);
SmallVector<ValueEntry, 8> Ops;
- LinearizeExprTree(I, Ops);
-
+ Ops.reserve(Tree.size());
+ for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
+ RepeatedValue E = Tree[i];
+ Ops.append(E.second.getZExtValue(),
+ ValueEntry(getRank(E.first), E.first));
+ }
+
DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n');
-
+
// Now that we have linearized the tree to a list and have gathered all of
// the operands and their ranks, sort the operands by their rank. Use a
// stable_sort so that values with equal ranks will have their relative
@@ -1048,21 +1619,24 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) {
// this sorts so that the highest ranking values end up at the beginning of
// the vector.
std::stable_sort(Ops.begin(), Ops.end());
-
+
// OptimizeExpression - Now that we have the expression tree in a convenient
// sorted form, optimize it globally if possible.
if (Value *V = OptimizeExpression(I, Ops)) {
+ if (V == I)
+ // Self-referential expression in unreachable code.
+ return;
// This expression tree simplified to something that isn't a tree,
// eliminate it.
DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n');
I->replaceAllUsesWith(V);
if (Instruction *VI = dyn_cast<Instruction>(V))
VI->setDebugLoc(I->getDebugLoc());
- RemoveDeadBinaryOp(I);
+ RedoInsts.insert(I);
++NumAnnihil;
- return V;
+ return;
}
-
+
// We want to sink immediates as deeply as possible except in the case where
// this is a multiply tree used only by an add, and the immediate is a -1.
// In this case we reassociate to put the negation on the outside so that we
@@ -1074,51 +1648,57 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) {
ValueEntry Tmp = Ops.pop_back_val();
Ops.insert(Ops.begin(), Tmp);
}
-
+
DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n');
-
+
if (Ops.size() == 1) {
+ if (Ops[0].Op == I)
+ // Self-referential expression in unreachable code.
+ return;
+
// This expression tree simplified to something that isn't a tree,
// eliminate it.
I->replaceAllUsesWith(Ops[0].Op);
if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op))
OI->setDebugLoc(I->getDebugLoc());
- RemoveDeadBinaryOp(I);
- return Ops[0].Op;
+ RedoInsts.insert(I);
+ return;
}
-
+
// Now that we ordered and optimized the expressions, splat them back into
// the expression tree, removing any unneeded nodes.
RewriteExprTree(I, Ops);
- return I;
}
-
bool Reassociate::runOnFunction(Function &F) {
- // Recalculate the rank map for F
+ // Calculate the rank map for F
BuildRankMap(F);
MadeChange = false;
- for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
- for (BasicBlock::iterator BBI = FI->begin(); BBI != FI->end(); )
- ReassociateInst(BBI);
-
- // Now that we're done, revisit any instructions which are likely to
- // have secondary reassociation opportunities.
- while (!RedoInsts.empty())
- if (Value *V = RedoInsts.pop_back_val()) {
- BasicBlock::iterator BBI = cast<Instruction>(V);
- ReassociateInst(BBI);
- }
+ for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+ // Optimize every instruction in the basic block.
+ for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; )
+ if (isInstructionTriviallyDead(II)) {
+ EraseInst(II++);
+ } else {
+ OptimizeInst(II);
+ assert(II->getParent() == BI && "Moved to a different block!");
+ ++II;
+ }
- // Now that we're done, delete any instructions which are no longer used.
- while (!DeadInsts.empty())
- if (Value *V = DeadInsts.pop_back_val())
- RecursivelyDeleteTriviallyDeadInstructions(V);
+ // If this produced extra instructions to optimize, handle them now.
+ while (!RedoInsts.empty()) {
+ Instruction *I = RedoInsts.pop_back_val();
+ if (isInstructionTriviallyDead(I))
+ EraseInst(I);
+ else
+ OptimizeInst(I);
+ }
+ }
// We are done with the rank map.
RankMap.clear();
ValueRankMap.clear();
+
return MadeChange;
}
-
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index 47afc77..ea1de63 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file demotes all registers to memory references. It is intented to be
+// This file demotes all registers to memory references. It is intended to be
// the inverse of PromoteMemoryToRegister. By converting to loads, the only
// values live across basic blocks are allocas and loads before phi nodes.
// It is intended that this should make CFG hacking much easier.
@@ -59,7 +59,7 @@ namespace {
virtual bool runOnFunction(Function &F);
};
}
-
+
char RegToMem::ID = 0;
INITIALIZE_PASS_BEGIN(RegToMem, "reg2mem", "Demote all values to stack slots",
false, false)
@@ -68,25 +68,25 @@ INITIALIZE_PASS_END(RegToMem, "reg2mem", "Demote all values to stack slots",
false, false)
bool RegToMem::runOnFunction(Function &F) {
- if (F.isDeclaration())
+ if (F.isDeclaration())
return false;
-
+
// Insert all new allocas into entry block.
BasicBlock *BBEntry = &F.getEntryBlock();
assert(pred_begin(BBEntry) == pred_end(BBEntry) &&
"Entry block to function must not have predecessors!");
-
+
// Find first non-alloca instruction and create insertion point. This is
// safe if block is well-formed: it always have terminator, otherwise
// we'll get and assertion.
BasicBlock::iterator I = BBEntry->begin();
while (isa<AllocaInst>(I)) ++I;
-
+
CastInst *AllocaInsertionPoint =
new BitCastInst(Constant::getNullValue(Type::getInt32Ty(F.getContext())),
Type::getInt32Ty(F.getContext()),
"reg2mem alloca point", I);
-
+
// Find the escaped instructions. But don't create stack slots for
// allocas in entry block.
std::list<Instruction*> WorkList;
@@ -99,15 +99,15 @@ bool RegToMem::runOnFunction(Function &F) {
WorkList.push_front(&*iib);
}
}
-
+
// Demote escaped instructions
NumRegsDemoted += WorkList.size();
- for (std::list<Instruction*>::iterator ilb = WorkList.begin(),
+ for (std::list<Instruction*>::iterator ilb = WorkList.begin(),
ile = WorkList.end(); ilb != ile; ++ilb)
DemoteRegToStack(**ilb, false, AllocaInsertionPoint);
-
+
WorkList.clear();
-
+
// Find all phi's
for (Function::iterator ibb = F.begin(), ibe = F.end();
ibb != ibe; ++ibb)
@@ -115,19 +115,18 @@ bool RegToMem::runOnFunction(Function &F) {
iib != iie; ++iib)
if (isa<PHINode>(iib))
WorkList.push_front(&*iib);
-
+
// Demote phi nodes
NumPhisDemoted += WorkList.size();
- for (std::list<Instruction*>::iterator ilb = WorkList.begin(),
+ for (std::list<Instruction*>::iterator ilb = WorkList.begin(),
ile = WorkList.end(); ilb != ile; ++ilb)
DemotePHIToStack(cast<PHINode>(*ilb), AllocaInsertionPoint);
-
+
return true;
}
// createDemoteRegisterToMemory - Provide an entry point to create this pass.
-//
char &llvm::DemoteRegisterToMemoryID = RegToMem::ID;
FunctionPass *llvm::createDemoteRegisterToMemoryPass() {
return new RegToMem();
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 16b64a5..2c39aab 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -409,7 +409,7 @@ private:
if (Constant *C = dyn_cast<Constant>(V)) {
Constant *Elt = C->getAggregateElement(i);
-
+
if (Elt == 0)
LV.markOverdefined(); // Unknown sort of constant.
else if (isa<UndefValue>(Elt))
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 7d65bcc..48318c8 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements common infrastructure for libLLVMScalarOpts.a, which
+// This file implements common infrastructure for libLLVMScalarOpts.a, which
// implements several scalar transformations over the LLVM intermediate
// representation, including the C bindings for that library.
//
@@ -24,7 +24,7 @@
using namespace llvm;
-/// initializeScalarOptsPasses - Initialize all passes linked into the
+/// initializeScalarOptsPasses - Initialize all passes linked into the
/// ScalarOpts library.
void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeADCEPass(Registry);
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 026fea1..6637126 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -22,33 +22,34 @@
#define DEBUG_TYPE "scalarrepl"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Constants.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/DebugInfo.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/GlobalVariable.h"
+#include "llvm/IRBuilder.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
+#include "llvm/Operator.h"
#include "llvm/Pass.h"
-#include "llvm/Analysis/DebugInfo.h"
-#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Transforms/Utils/PromoteMemToReg.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
STATISTIC(NumReplaced, "Number of allocas broken up");
@@ -59,12 +60,25 @@ STATISTIC(NumGlobals, "Number of allocas copied from constant global");
namespace {
struct SROA : public FunctionPass {
- SROA(int T, bool hasDT, char &ID)
+ SROA(int T, bool hasDT, char &ID, int ST, int AT, int SLT)
: FunctionPass(ID), HasDomTree(hasDT) {
if (T == -1)
SRThreshold = 128;
else
SRThreshold = T;
+ if (ST == -1)
+ StructMemberThreshold = 32;
+ else
+ StructMemberThreshold = ST;
+ if (AT == -1)
+ ArrayElementThreshold = 8;
+ else
+ ArrayElementThreshold = AT;
+ if (SLT == -1)
+ // Do not limit the scalar integer load size if no threshold is given.
+ ScalarLoadThreshold = -1;
+ else
+ ScalarLoadThreshold = SLT;
}
bool runOnFunction(Function &F);
@@ -86,11 +100,11 @@ namespace {
struct AllocaInfo {
/// The alloca to promote.
AllocaInst *AI;
-
+
/// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite
/// looping and avoid redundant work.
SmallPtrSet<PHINode*, 8> CheckedPHIs;
-
+
/// isUnsafe - This is set to true if the alloca cannot be SROA'd.
bool isUnsafe : 1;
@@ -104,19 +118,32 @@ namespace {
/// ever accessed, or false if the alloca is only accessed with mem
/// intrinsics or load/store that only access the entire alloca at once.
bool hasSubelementAccess : 1;
-
+
/// hasALoadOrStore - This is true if there are any loads or stores to it.
/// The alloca may just be accessed with memcpy, for example, which would
/// not set this.
bool hasALoadOrStore : 1;
-
+
explicit AllocaInfo(AllocaInst *ai)
: AI(ai), isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false),
hasSubelementAccess(false), hasALoadOrStore(false) {}
};
+ /// SRThreshold - The maximum alloca size to considered for SROA.
unsigned SRThreshold;
+ /// StructMemberThreshold - The maximum number of members a struct can
+ /// contain to be considered for SROA.
+ unsigned StructMemberThreshold;
+
+ /// ArrayElementThreshold - The maximum number of elements an array can
+ /// have to be considered for SROA.
+ unsigned ArrayElementThreshold;
+
+ /// ScalarLoadThreshold - The maximum size in bits of scalars to load when
+ /// converting to scalar
+ unsigned ScalarLoadThreshold;
+
void MarkUnsafe(AllocaInfo &I, Instruction *User) {
I.isUnsafe = true;
DEBUG(dbgs() << " Transformation preventing inst: " << *User << '\n');
@@ -155,19 +182,21 @@ namespace {
SmallVector<AllocaInst*, 32> &NewElts);
void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts);
+ bool ShouldAttemptScalarRepl(AllocaInst *AI);
static MemTransferInst *isOnlyCopiedFromConstantGlobal(
AllocaInst *AI, SmallVector<Instruction*, 4> &ToDelete);
};
-
+
// SROA_DT - SROA that uses DominatorTree.
struct SROA_DT : public SROA {
static char ID;
public:
- SROA_DT(int T = -1) : SROA(T, true, ID) {
+ SROA_DT(int T = -1, int ST = -1, int AT = -1, int SLT = -1) :
+ SROA(T, true, ID, ST, AT, SLT) {
initializeSROA_DTPass(*PassRegistry::getPassRegistry());
}
-
+
// getAnalysisUsage - This pass does not require any passes, but we know it
// will not alter the CFG, so say so.
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -175,22 +204,23 @@ namespace {
AU.setPreservesCFG();
}
};
-
+
// SROA_SSAUp - SROA that uses SSAUpdater.
struct SROA_SSAUp : public SROA {
static char ID;
public:
- SROA_SSAUp(int T = -1) : SROA(T, false, ID) {
+ SROA_SSAUp(int T = -1, int ST = -1, int AT = -1, int SLT = -1) :
+ SROA(T, false, ID, ST, AT, SLT) {
initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry());
}
-
+
// getAnalysisUsage - This pass does not require any passes, but we know it
// will not alter the CFG, so say so.
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
}
};
-
+
}
char SROA_DT::ID = 0;
@@ -209,10 +239,15 @@ INITIALIZE_PASS_END(SROA_SSAUp, "scalarrepl-ssa",
// Public interface to the ScalarReplAggregates pass
FunctionPass *llvm::createScalarReplAggregatesPass(int Threshold,
- bool UseDomTree) {
+ bool UseDomTree,
+ int StructMemberThreshold,
+ int ArrayElementThreshold,
+ int ScalarLoadThreshold) {
if (UseDomTree)
- return new SROA_DT(Threshold);
- return new SROA_SSAUp(Threshold);
+ return new SROA_DT(Threshold, StructMemberThreshold, ArrayElementThreshold,
+ ScalarLoadThreshold);
+ return new SROA_SSAUp(Threshold, StructMemberThreshold,
+ ArrayElementThreshold, ScalarLoadThreshold);
}
@@ -228,6 +263,7 @@ class ConvertToScalarInfo {
/// AllocaSize - The size of the alloca being considered in bytes.
unsigned AllocaSize;
const TargetData &TD;
+ unsigned ScalarLoadThreshold;
/// IsNotTrivial - This is set to true if there is some access to the object
/// which means that mem2reg can't promote it.
@@ -258,28 +294,38 @@ class ConvertToScalarInfo {
/// isn't possible to turn into a vector type, it gets set to VoidTy.
VectorType *VectorTy;
- /// HadNonMemTransferAccess - True if there is at least one access to the
+ /// HadNonMemTransferAccess - True if there is at least one access to the
/// alloca that is not a MemTransferInst. We don't want to turn structs into
/// large integers unless there is some potential for optimization.
bool HadNonMemTransferAccess;
+ /// HadDynamicAccess - True if some element of this alloca was dynamic.
+ /// We don't yet have support for turning a dynamic access into a large
+ /// integer.
+ bool HadDynamicAccess;
+
public:
- explicit ConvertToScalarInfo(unsigned Size, const TargetData &td)
- : AllocaSize(Size), TD(td), IsNotTrivial(false), ScalarKind(Unknown),
- VectorTy(0), HadNonMemTransferAccess(false) { }
+ explicit ConvertToScalarInfo(unsigned Size, const TargetData &td,
+ unsigned SLT)
+ : AllocaSize(Size), TD(td), ScalarLoadThreshold(SLT), IsNotTrivial(false),
+ ScalarKind(Unknown), VectorTy(0), HadNonMemTransferAccess(false),
+ HadDynamicAccess(false) { }
AllocaInst *TryConvert(AllocaInst *AI);
private:
- bool CanConvertToScalar(Value *V, uint64_t Offset);
+ bool CanConvertToScalar(Value *V, uint64_t Offset, Value* NonConstantIdx);
void MergeInTypeForLoadOrStore(Type *In, uint64_t Offset);
bool MergeInVectorType(VectorType *VInTy, uint64_t Offset);
- void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
+ void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset,
+ Value *NonConstantIdx);
Value *ConvertScalar_ExtractValue(Value *NV, Type *ToType,
- uint64_t Offset, IRBuilder<> &Builder);
+ uint64_t Offset, Value* NonConstantIdx,
+ IRBuilder<> &Builder);
Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
- uint64_t Offset, IRBuilder<> &Builder);
+ uint64_t Offset, Value* NonConstantIdx,
+ IRBuilder<> &Builder);
};
} // end anonymous namespace.
@@ -290,7 +336,7 @@ private:
AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
// If we can't convert this scalar, or if mem2reg can trivially do it, bail
// out.
- if (!CanConvertToScalar(AI, 0) || !IsNotTrivial)
+ if (!CanConvertToScalar(AI, 0, 0) || !IsNotTrivial)
return 0;
// If an alloca has only memset / memcpy uses, it may still have an Unknown
@@ -315,16 +361,27 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
NewTy = VectorTy; // Use the vector type.
} else {
unsigned BitWidth = AllocaSize * 8;
+
+ // Do not convert to scalar integer if the alloca size exceeds the
+ // scalar load threshold.
+ if (BitWidth > ScalarLoadThreshold)
+ return 0;
+
if ((ScalarKind == ImplicitVector || ScalarKind == Integer) &&
!HadNonMemTransferAccess && !TD.fitsInLegalInteger(BitWidth))
return 0;
+ // Dynamic accesses on integers aren't yet supported. They need us to shift
+ // by a dynamic amount which could be difficult to work out as we might not
+ // know whether to use a left or right shift.
+ if (ScalarKind == Integer && HadDynamicAccess)
+ return 0;
DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
// Create and insert the integer alloca.
NewTy = IntegerType::get(AI->getContext(), BitWidth);
}
AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
- ConvertUsesToScalar(AI, NewAI, 0);
+ ConvertUsesToScalar(AI, NewAI, 0, 0);
return NewAI;
}
@@ -411,7 +468,8 @@ bool ConvertToScalarInfo::MergeInVectorType(VectorType *VInTy,
///
/// If we see at least one access to the value that is as a vector type, set the
/// SawVec flag.
-bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
+bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
+ Value* NonConstantIdx) {
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
@@ -441,24 +499,35 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
if (!onlyUsedByLifetimeMarkers(BCI))
IsNotTrivial = true; // Can't be mem2reg'd.
- if (!CanConvertToScalar(BCI, Offset))
+ if (!CanConvertToScalar(BCI, Offset, NonConstantIdx))
return false;
continue;
}
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
// If this is a GEP with a variable indices, we can't handle it.
- if (!GEP->hasAllConstantIndices())
+ PointerType* PtrTy = dyn_cast<PointerType>(GEP->getPointerOperandType());
+ if (!PtrTy)
return false;
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
- if (!GEP->getPointerOperandType()->isPointerTy())
- return false;
- uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
+ Value *GEPNonConstantIdx = 0;
+ if (!GEP->hasAllConstantIndices()) {
+ if (!isa<VectorType>(PtrTy->getElementType()))
+ return false;
+ if (NonConstantIdx)
+ return false;
+ GEPNonConstantIdx = Indices.pop_back_val();
+ if (!GEPNonConstantIdx->getType()->isIntegerTy(32))
+ return false;
+ HadDynamicAccess = true;
+ } else
+ GEPNonConstantIdx = NonConstantIdx;
+ uint64_t GEPOffset = TD.getIndexedOffset(PtrTy,
Indices);
// See if all uses can be converted.
- if (!CanConvertToScalar(GEP, Offset+GEPOffset))
+ if (!CanConvertToScalar(GEP, Offset+GEPOffset, GEPNonConstantIdx))
return false;
IsNotTrivial = true; // Can't be mem2reg'd.
HadNonMemTransferAccess = true;
@@ -468,6 +537,9 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
// If this is a constant sized memset of a constant value (e.g. 0) we can
// handle it.
if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
+ // Store to dynamic index.
+ if (NonConstantIdx)
+ return false;
// Store of constant value.
if (!isa<ConstantInt>(MSI->getValue()))
return false;
@@ -492,6 +564,9 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
// If this is a memcpy or memmove into or out of the whole allocation, we
// can handle it like a load or store of the scalar type.
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
+ // Store to dynamic index.
+ if (NonConstantIdx)
+ return false;
ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength());
if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0)
return false;
@@ -523,12 +598,13 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
/// Offset is an offset from the original alloca, in bits that need to be
/// shifted to the right. By the end of this, there should be no uses of Ptr.
void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
- uint64_t Offset) {
+ uint64_t Offset,
+ Value* NonConstantIdx) {
while (!Ptr->use_empty()) {
Instruction *User = cast<Instruction>(Ptr->use_back());
if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) {
- ConvertUsesToScalar(CI, NewAI, Offset);
+ ConvertUsesToScalar(CI, NewAI, Offset, NonConstantIdx);
CI->eraseFromParent();
continue;
}
@@ -536,9 +612,16 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
+ Value* GEPNonConstantIdx = 0;
+ if (!GEP->hasAllConstantIndices()) {
+ assert(!NonConstantIdx &&
+ "Dynamic GEP reading from dynamic GEP unsupported");
+ GEPNonConstantIdx = Indices.pop_back_val();
+ } else
+ GEPNonConstantIdx = NonConstantIdx;
uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
Indices);
- ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8);
+ ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8, GEPNonConstantIdx);
GEP->eraseFromParent();
continue;
}
@@ -549,7 +632,8 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
// The load is a bit extract from NewAI shifted right by Offset bits.
Value *LoadedVal = Builder.CreateLoad(NewAI);
Value *NewLoadVal
- = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder);
+ = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset,
+ NonConstantIdx, Builder);
LI->replaceAllUsesWith(NewLoadVal);
LI->eraseFromParent();
continue;
@@ -559,7 +643,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
assert(SI->getOperand(0) != Ptr && "Consistency error!");
Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
- Builder);
+ NonConstantIdx, Builder);
Builder.CreateStore(New, NewAI);
SI->eraseFromParent();
@@ -574,6 +658,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
// transform it into a store of the expanded constant value.
if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
assert(MSI->getRawDest() == Ptr && "Consistency error!");
+ assert(!NonConstantIdx && "Cannot replace dynamic memset with insert");
int64_t SNumBytes = cast<ConstantInt>(MSI->getLength())->getSExtValue();
if (SNumBytes > 0 && (SNumBytes >> 32) == 0) {
unsigned NumBytes = static_cast<unsigned>(SNumBytes);
@@ -590,7 +675,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
Value *New = ConvertScalar_InsertValue(
ConstantInt::get(User->getContext(), APVal),
- Old, Offset, Builder);
+ Old, Offset, 0, Builder);
Builder.CreateStore(New, NewAI);
// If the load we just inserted is now dead, then the memset overwrote
@@ -606,6 +691,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
// can handle it like a load or store of the scalar type.
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
assert(Offset == 0 && "must be store to start of alloca");
+ assert(!NonConstantIdx && "Cannot replace dynamic transfer with insert");
// If the source and destination are both to the same alloca, then this is
// a noop copy-to-self, just delete it. Otherwise, emit a load and store
@@ -678,7 +764,8 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
/// shifted to the right.
Value *ConvertToScalarInfo::
ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
- uint64_t Offset, IRBuilder<> &Builder) {
+ uint64_t Offset, Value* NonConstantIdx,
+ IRBuilder<> &Builder) {
// If the load is of the whole new alloca, no conversion is needed.
Type *FromType = FromVal->getType();
if (FromType == ToType && Offset == 0)
@@ -700,7 +787,17 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
}
// Return the element extracted out of it.
- Value *V = Builder.CreateExtractElement(FromVal, Builder.getInt32(Elt));
+ Value *Idx;
+ if (NonConstantIdx) {
+ if (Elt)
+ Idx = Builder.CreateAdd(NonConstantIdx,
+ Builder.getInt32(Elt),
+ "dyn.offset");
+ else
+ Idx = NonConstantIdx;
+ } else
+ Idx = Builder.getInt32(Elt);
+ Value *V = Builder.CreateExtractElement(FromVal, Idx);
if (V->getType() != ToType)
V = Builder.CreateBitCast(V, ToType);
return V;
@@ -709,23 +806,27 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
// If ToType is a first class aggregate, extract out each of the pieces and
// use insertvalue's to form the FCA.
if (StructType *ST = dyn_cast<StructType>(ToType)) {
+ assert(!NonConstantIdx &&
+ "Dynamic indexing into struct types not supported");
const StructLayout &Layout = *TD.getStructLayout(ST);
Value *Res = UndefValue::get(ST);
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
Offset+Layout.getElementOffsetInBits(i),
- Builder);
+ 0, Builder);
Res = Builder.CreateInsertValue(Res, Elt, i);
}
return Res;
}
if (ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
+ assert(!NonConstantIdx &&
+ "Dynamic indexing into array types not supported");
uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
Value *Res = UndefValue::get(AT);
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
- Offset+i*EltSize, Builder);
+ Offset+i*EltSize, 0, Builder);
Res = Builder.CreateInsertValue(Res, Elt, i);
}
return Res;
@@ -791,9 +892,14 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
///
/// Offset is an offset from the original alloca, in bits that need to be
/// shifted to the right.
+///
+/// NonConstantIdx is an index value if there was a GEP with a non-constant
+/// index value. If this is 0 then all GEPs used to find this insert address
+/// are constant.
Value *ConvertToScalarInfo::
ConvertScalar_InsertValue(Value *SV, Value *Old,
- uint64_t Offset, IRBuilder<> &Builder) {
+ uint64_t Offset, Value* NonConstantIdx,
+ IRBuilder<> &Builder) {
// Convert the stored type to the actual type, shift it left to insert
// then 'or' into place.
Type *AllocaType = Old->getType();
@@ -814,26 +920,40 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
SV = Builder.CreateBitCast(SV, EltTy);
uint64_t EltSize = TD.getTypeAllocSizeInBits(EltTy);
unsigned Elt = Offset/EltSize;
- return Builder.CreateInsertElement(Old, SV, Builder.getInt32(Elt));
+ Value *Idx;
+ if (NonConstantIdx) {
+ if (Elt)
+ Idx = Builder.CreateAdd(NonConstantIdx,
+ Builder.getInt32(Elt),
+ "dyn.offset");
+ else
+ Idx = NonConstantIdx;
+ } else
+ Idx = Builder.getInt32(Elt);
+ return Builder.CreateInsertElement(Old, SV, Idx);
}
// If SV is a first-class aggregate value, insert each value recursively.
if (StructType *ST = dyn_cast<StructType>(SV->getType())) {
+ assert(!NonConstantIdx &&
+ "Dynamic indexing into struct types not supported");
const StructLayout &Layout = *TD.getStructLayout(ST);
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
Value *Elt = Builder.CreateExtractValue(SV, i);
Old = ConvertScalar_InsertValue(Elt, Old,
Offset+Layout.getElementOffsetInBits(i),
- Builder);
+ 0, Builder);
}
return Old;
}
if (ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
+ assert(!NonConstantIdx &&
+ "Dynamic indexing into array types not supported");
uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
Value *Elt = Builder.CreateExtractValue(SV, i);
- Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder);
+ Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, 0, Builder);
}
return Old;
}
@@ -935,7 +1055,7 @@ public:
AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
DIBuilder *DB)
: LoadAndStorePromoter(Insts, S), AI(0), DIB(DB) {}
-
+
void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) {
// Remember which alloca we're promoting (for isInstInList).
this->AI = AI;
@@ -950,18 +1070,18 @@ public:
LoadAndStorePromoter::run(Insts);
AI->eraseFromParent();
- for (SmallVector<DbgDeclareInst *, 4>::iterator I = DDIs.begin(),
+ for (SmallVector<DbgDeclareInst *, 4>::iterator I = DDIs.begin(),
E = DDIs.end(); I != E; ++I) {
DbgDeclareInst *DDI = *I;
DDI->eraseFromParent();
}
- for (SmallVector<DbgValueInst *, 4>::iterator I = DVIs.begin(),
+ for (SmallVector<DbgValueInst *, 4>::iterator I = DVIs.begin(),
E = DVIs.end(); I != E; ++I) {
DbgValueInst *DVI = *I;
DVI->eraseFromParent();
}
}
-
+
virtual bool isInstInList(Instruction *I,
const SmallVectorImpl<Instruction*> &Insts) const {
if (LoadInst *LI = dyn_cast<LoadInst>(I))
@@ -970,7 +1090,7 @@ public:
}
virtual void updateDebugInfo(Instruction *Inst) const {
- for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(),
+ for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(),
E = DDIs.end(); I != E; ++I) {
DbgDeclareInst *DDI = *I;
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
@@ -978,7 +1098,7 @@ public:
else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
ConvertDebugDeclareToDebugValue(DDI, LI, *DIB);
}
- for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(),
+ for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(),
E = DVIs.end(); I != E; ++I) {
DbgValueInst *DVI = *I;
Value *Arg = NULL;
@@ -1021,12 +1141,12 @@ public:
static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) {
bool TDerefable = SI->getTrueValue()->isDereferenceablePointer();
bool FDerefable = SI->getFalseValue()->isDereferenceablePointer();
-
+
for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
UI != UE; ++UI) {
LoadInst *LI = dyn_cast<LoadInst>(*UI);
if (LI == 0 || !LI->isSimple()) return false;
-
+
// Both operands to the select need to be dereferencable, either absolutely
// (e.g. allocas) or at this point because we can see other accesses to it.
if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI,
@@ -1036,7 +1156,7 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) {
LI->getAlignment(), TD))
return false;
}
-
+
return true;
}
@@ -1067,20 +1187,20 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) {
UI != UE; ++UI) {
LoadInst *LI = dyn_cast<LoadInst>(*UI);
if (LI == 0 || !LI->isSimple()) return false;
-
+
// For now we only allow loads in the same block as the PHI. This is a
// common case that happens when instcombine merges two loads through a PHI.
if (LI->getParent() != BB) return false;
-
+
// Ensure that there are no instructions between the PHI and the load that
// could store.
for (BasicBlock::iterator BBI = PN; &*BBI != LI; ++BBI)
if (BBI->mayWriteToMemory())
return false;
-
+
MaxAlign = std::max(MaxAlign, LI->getAlignment());
}
-
+
// Okay, we know that we have one or more loads in the same block as the PHI.
// We can transform this if it is safe to push the loads into the predecessor
// blocks. The only thing to watch out for is that we can't put a possibly
@@ -1108,10 +1228,10 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) {
if (InVal->isDereferenceablePointer() ||
isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, TD))
continue;
-
+
return false;
}
-
+
return true;
}
@@ -1123,7 +1243,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) {
static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
SetVector<Instruction*, SmallVector<Instruction*, 4>,
SmallPtrSet<Instruction*, 4> > InstsToRewrite;
-
+
for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
UI != UE; ++UI) {
User *U = *UI;
@@ -1132,7 +1252,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
return false;
continue;
}
-
+
if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
if (SI->getOperand(0) == AI || !SI->isSimple())
return false; // Don't allow a store OF the AI, only INTO the AI.
@@ -1146,7 +1266,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
Value *Result = SI->getOperand(1+CI->isZero());
SI->replaceAllUsesWith(Result);
SI->eraseFromParent();
-
+
// This is very rare and we just scrambled the use list of AI, start
// over completely.
return tryToMakeAllocaBePromotable(AI, TD);
@@ -1156,33 +1276,33 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
// loads, then we can transform this by rewriting the select.
if (!isSafeSelectToSpeculate(SI, TD))
return false;
-
+
InstsToRewrite.insert(SI);
continue;
}
-
+
if (PHINode *PN = dyn_cast<PHINode>(U)) {
if (PN->use_empty()) { // Dead PHIs can be stripped.
InstsToRewrite.insert(PN);
continue;
}
-
+
// If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads
// in the pred blocks, then we can transform this by rewriting the PHI.
if (!isSafePHIToSpeculate(PN, TD))
return false;
-
+
InstsToRewrite.insert(PN);
continue;
}
-
+
if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
if (onlyUsedByLifetimeMarkers(BCI)) {
InstsToRewrite.insert(BCI);
continue;
}
}
-
+
return false;
}
@@ -1190,7 +1310,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
// we're done!
if (InstsToRewrite.empty())
return true;
-
+
// If we have instructions that need to be rewritten for this to be promotable
// take care of it now.
for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) {
@@ -1211,13 +1331,13 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
// loads with a new select.
while (!SI->use_empty()) {
LoadInst *LI = cast<LoadInst>(SI->use_back());
-
+
IRBuilder<> Builder(LI);
- LoadInst *TrueLoad =
+ LoadInst *TrueLoad =
Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t");
- LoadInst *FalseLoad =
+ LoadInst *FalseLoad =
Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".f");
-
+
// Transfer alignment and TBAA info if present.
TrueLoad->setAlignment(LI->getAlignment());
FalseLoad->setAlignment(LI->getAlignment());
@@ -1225,18 +1345,18 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
TrueLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
FalseLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
}
-
+
Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad);
V->takeName(LI);
LI->replaceAllUsesWith(V);
LI->eraseFromParent();
}
-
+
// Now that all the loads are gone, the select is gone too.
SI->eraseFromParent();
continue;
}
-
+
// Otherwise, we have a PHI node which allows us to push the loads into the
// predecessors.
PHINode *PN = cast<PHINode>(InstsToRewrite[i]);
@@ -1244,7 +1364,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
PN->eraseFromParent();
continue;
}
-
+
Type *LoadTy = cast<PointerType>(PN->getType())->getElementType();
PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(),
PN->getName()+".ld", PN);
@@ -1254,18 +1374,18 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
LoadInst *SomeLoad = cast<LoadInst>(PN->use_back());
MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
unsigned Align = SomeLoad->getAlignment();
-
+
// Rewrite all loads of the PN to use the new PHI.
while (!PN->use_empty()) {
LoadInst *LI = cast<LoadInst>(PN->use_back());
LI->replaceAllUsesWith(NewPN);
LI->eraseFromParent();
}
-
+
// Inject loads into all of the pred blocks. Keep track of which blocks we
// insert them into in case we have multiple edges from the same block.
DenseMap<BasicBlock*, LoadInst*> InsertedLoads;
-
+
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *Pred = PN->getIncomingBlock(i);
LoadInst *&Load = InsertedLoads[Pred];
@@ -1276,13 +1396,13 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
Load->setAlignment(Align);
if (TBAATag) Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);
}
-
+
NewPN->addIncoming(Load, Pred);
}
-
+
PN->eraseFromParent();
}
-
+
++NumAdjusted;
return true;
}
@@ -1315,7 +1435,7 @@ bool SROA::performPromotion(Function &F) {
SSAUpdater SSA;
for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
AllocaInst *AI = Allocas[i];
-
+
// Build list of instructions to promote.
for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
UI != E; ++UI)
@@ -1334,18 +1454,36 @@ bool SROA::performPromotion(Function &F) {
/// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for
/// SROA. It must be a struct or array type with a small number of elements.
-static bool ShouldAttemptScalarRepl(AllocaInst *AI) {
+bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) {
Type *T = AI->getAllocatedType();
- // Do not promote any struct into more than 32 separate vars.
+ // Do not promote any struct that has too many members.
if (StructType *ST = dyn_cast<StructType>(T))
- return ST->getNumElements() <= 32;
- // Arrays are much less likely to be safe for SROA; only consider
- // them if they are very small.
+ return ST->getNumElements() <= StructMemberThreshold;
+ // Do not promote any array that has too many elements.
if (ArrayType *AT = dyn_cast<ArrayType>(T))
- return AT->getNumElements() <= 8;
+ return AT->getNumElements() <= ArrayElementThreshold;
return false;
}
+/// getPointeeAlignment - Compute the minimum alignment of the value pointed
+/// to by the given pointer.
+static unsigned getPointeeAlignment(Value *V, const TargetData &TD) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == Instruction::BitCast ||
+ (CE->getOpcode() == Instruction::GetElementPtr &&
+ cast<GEPOperator>(CE)->hasAllZeroIndices()))
+ return getPointeeAlignment(CE->getOperand(0), TD);
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ if (!GV->isDeclaration())
+ return TD.getPreferredAlignment(GV);
+
+ if (PointerType *PT = dyn_cast<PointerType>(V->getType()))
+ return TD.getABITypeAlignment(PT->getElementType());
+
+ return 0;
+}
+
// performScalarRepl - This algorithm is a simple worklist driven algorithm,
// which runs on all of the alloca instructions in the function, removing them
@@ -1379,23 +1517,26 @@ bool SROA::performScalarRepl(Function &F) {
continue;
// Check to see if this allocation is only modified by a memcpy/memmove from
- // a constant global. If this is the case, we can change all users to use
+ // a constant global whose alignment is equal to or exceeds that of the
+ // allocation. If this is the case, we can change all users to use
// the constant global instead. This is commonly produced by the CFE by
// constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
// is only subsequently read.
SmallVector<Instruction *, 4> ToDelete;
if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(AI, ToDelete)) {
- DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n');
- DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
- for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
- ToDelete[i]->eraseFromParent();
- Constant *TheSrc = cast<Constant>(Copy->getSource());
- AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
- Copy->eraseFromParent(); // Don't mutate the global.
- AI->eraseFromParent();
- ++NumGlobals;
- Changed = true;
- continue;
+ if (AI->getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) {
+ DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n');
+ DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
+ for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
+ ToDelete[i]->eraseFromParent();
+ Constant *TheSrc = cast<Constant>(Copy->getSource());
+ AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
+ Copy->eraseFromParent(); // Don't mutate the global.
+ AI->eraseFromParent();
+ ++NumGlobals;
+ Changed = true;
+ continue;
+ }
}
// Check to see if we can perform the core SROA transformation. We cannot
@@ -1425,8 +1566,8 @@ bool SROA::performScalarRepl(Function &F) {
// promoted itself. If so, we don't want to transform it needlessly. Note
// that we can't just check based on the type: the alloca may be of an i32
// but that has pointer arithmetic to set byte 3 of it or something.
- if (AllocaInst *NewAI =
- ConvertToScalarInfo((unsigned)AllocaSize, *TD).TryConvert(AI)) {
+ if (AllocaInst *NewAI = ConvertToScalarInfo(
+ (unsigned)AllocaSize, *TD, ScalarLoadThreshold).TryConvert(AI)) {
NewAI->takeName(AI);
AI->eraseFromParent();
++NumConverted;
@@ -1531,12 +1672,12 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
LIType, false, Info, LI, true /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
-
+
} else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Store is ok if storing INTO the pointer, not storing the pointer
if (!SI->isSimple() || SI->getOperand(0) == I)
return MarkUnsafe(Info, User);
-
+
Type *SIType = SI->getOperand(0)->getType();
isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
SIType, true, Info, SI, true /*AllowWholeAccess*/);
@@ -1553,7 +1694,7 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
if (Info.isUnsafe) return;
}
}
-
+
/// isSafePHIUseForScalarRepl - If we see a PHI node or select using a pointer
/// derived from the alloca, we can often still split the alloca into elements.
@@ -1570,10 +1711,10 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
if (PHINode *PN = dyn_cast<PHINode>(I))
if (!Info.CheckedPHIs.insert(PN))
return;
-
+
for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
-
+
if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
isSafePHISelectUseForScalarRepl(BC, Offset, Info);
} else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
@@ -1590,12 +1731,12 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
LIType, false, Info, LI, false /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
-
+
} else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Store is ok if storing INTO the pointer, not storing the pointer
if (!SI->isSimple() || SI->getOperand(0) == I)
return MarkUnsafe(Info, User);
-
+
Type *SIType = SI->getOperand(0)->getType();
isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
SIType, true, Info, SI, false /*AllowWholeAccess*/);
@@ -1619,6 +1760,8 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI,
gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI);
if (GEPIt == E)
return;
+ bool NonConstant = false;
+ unsigned NonConstantIdxSize = 0;
// Walk through the GEP type indices, checking the types that this indexes
// into.
@@ -1628,15 +1771,30 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI,
continue;
ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
- if (!IdxVal)
- return MarkUnsafe(Info, GEPI);
+ if (!IdxVal) {
+ // Non constant GEPs are only a problem on arrays, structs, and pointers
+ // Vectors can be dynamically indexed.
+ // FIXME: Add support for dynamic indexing on arrays. This should be
+ // ok on any subarrays of the alloca array, eg, a[0][i] is ok, but a[i][0]
+ // isn't.
+ if (!(*GEPIt)->isVectorTy())
+ return MarkUnsafe(Info, GEPI);
+ NonConstant = true;
+ NonConstantIdxSize = TD->getTypeAllocSize(*GEPIt);
+ }
}
// Compute the offset due to this GEP and check if the alloca has a
// component element at that offset.
SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
+ // If this GEP is non constant then the last operand must have been a
+ // dynamic index into a vector. Pop this now as it has no impact on the
+ // constant part of the offset.
+ if (NonConstant)
+ Indices.pop_back();
Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
- if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, 0))
+ if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset,
+ NonConstantIdxSize))
MarkUnsafe(Info, GEPI);
}
@@ -1741,6 +1899,12 @@ bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) {
if (Offset >= AT->getNumElements() * EltSize)
return false;
Offset %= EltSize;
+ } else if (VectorType *VT = dyn_cast<VectorType>(T)) {
+ EltTy = VT->getElementType();
+ EltSize = TD->getTypeAllocSize(EltTy);
+ if (Offset >= VT->getNumElements() * EltSize)
+ return false;
+ Offset %= EltSize;
} else {
return false;
}
@@ -1766,12 +1930,12 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
RewriteBitCast(BC, AI, Offset, NewElts);
continue;
}
-
+
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
RewriteGEP(GEPI, AI, Offset, NewElts);
continue;
}
-
+
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
uint64_t MemSize = Length->getZExtValue();
@@ -1790,10 +1954,10 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
}
continue;
}
-
+
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
Type *LIType = LI->getType();
-
+
if (isCompatibleAggregate(LIType, AI->getAllocatedType())) {
// Replace:
// %res = load { i32, i32 }* %alloc
@@ -1819,7 +1983,7 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
}
continue;
}
-
+
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
Value *Val = SI->getOperand(0);
Type *SIType = Val->getType();
@@ -1846,16 +2010,16 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
}
continue;
}
-
+
if (isa<SelectInst>(User) || isa<PHINode>(User)) {
- // If we have a PHI user of the alloca itself (as opposed to a GEP or
+ // If we have a PHI user of the alloca itself (as opposed to a GEP or
// bitcast) we have to rewrite it. GEP and bitcast uses will be RAUW'd to
// the new pointer.
if (!isa<AllocaInst>(I)) continue;
-
+
assert(Offset == 0 && NewElts[0] &&
"Direct alloca use should have a zero offset");
-
+
// If we have a use of the alloca, we know the derived uses will be
// utilizing just the first element of the scalarized result. Insert a
// bitcast of the first alloca before the user as required.
@@ -1908,9 +2072,16 @@ uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset,
Offset -= Layout->getElementOffset(Idx);
IdxTy = Type::getInt32Ty(T->getContext());
return Idx;
+ } else if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
+ T = AT->getElementType();
+ uint64_t EltSize = TD->getTypeAllocSize(T);
+ Idx = Offset / EltSize;
+ Offset -= Idx * EltSize;
+ IdxTy = Type::getInt64Ty(T->getContext());
+ return Idx;
}
- ArrayType *AT = cast<ArrayType>(T);
- T = AT->getElementType();
+ VectorType *VT = cast<VectorType>(T);
+ T = VT->getElementType();
uint64_t EltSize = TD->getTypeAllocSize(T);
Idx = Offset / EltSize;
Offset -= Idx * EltSize;
@@ -1925,6 +2096,13 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
SmallVector<AllocaInst*, 32> &NewElts) {
uint64_t OldOffset = Offset;
SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
+ // If the GEP was dynamic then it must have been a dynamic vector lookup.
+ // In this case, it must be the last GEP operand which is dynamic so keep that
+ // aside until we've found the constant GEP offset then add it back in at the
+ // end.
+ Value* NonConstantIdx = 0;
+ if (!GEPI->hasAllConstantIndices())
+ NonConstantIdx = Indices.pop_back_val();
Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
RewriteForScalarRepl(GEPI, AI, Offset, NewElts);
@@ -1951,6 +2129,17 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy);
NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx));
}
+ if (NonConstantIdx) {
+ Type* GepTy = T;
+ // This GEP has a dynamic index. We need to add "i32 0" to index through
+ // any structs or arrays in the original type until we get to the vector
+ // to index.
+ while (!isa<VectorType>(GepTy)) {
+ NewArgs.push_back(Constant::getNullValue(i32Ty));
+ GepTy = cast<CompositeType>(GepTy)->getTypeAtIndex(0U);
+ }
+ NewArgs.push_back(NonConstantIdx);
+ }
Instruction *Val = NewElts[Idx];
if (NewArgs.size() > 1) {
Val = GetElementPtrInst::CreateInBounds(Val, NewArgs, "", GEPI);
@@ -2202,7 +2391,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
IRBuilder<> Builder(SI);
-
+
// Handle tail padding by extending the operand
if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
SrcVal = Builder.CreateZExt(SrcVal,
@@ -2464,7 +2653,7 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
return false;
}
}
-
+
return true;
}
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index a66b3e3..d13e4ab 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -67,7 +67,7 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
// nodes.
for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
(*SI)->removePredecessor(BB);
-
+
// Insert a call to llvm.trap right before this. This turns the undefined
// behavior into a hard fail instead of falling through into random code.
if (UseLLVMTrap) {
@@ -77,7 +77,7 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
CallTrap->setDebugLoc(I->getDebugLoc());
}
new UnreachableInst(I->getContext(), I);
-
+
// All instructions after this are dead.
BasicBlock::iterator BBI = I, BBE = BB->end();
while (BBI != BBE) {
@@ -89,7 +89,6 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
/// ChangeToCall - Convert the specified invoke into a normal call.
static void ChangeToCall(InvokeInst *II) {
- BasicBlock *BB = II->getParent();
SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II);
NewCall->takeName(II);
@@ -102,19 +101,19 @@ static void ChangeToCall(InvokeInst *II) {
BranchInst::Create(II->getNormalDest(), II);
// Update PHI nodes in the unwind destination
- II->getUnwindDest()->removePredecessor(BB);
- BB->getInstList().erase(II);
+ II->getUnwindDest()->removePredecessor(II->getParent());
+ II->eraseFromParent();
}
static bool MarkAliveBlocks(BasicBlock *BB,
SmallPtrSet<BasicBlock*, 128> &Reachable) {
-
+
SmallVector<BasicBlock*, 128> Worklist;
Worklist.push_back(BB);
bool Changed = false;
do {
BB = Worklist.pop_back_val();
-
+
if (!Reachable.insert(BB))
continue;
@@ -136,7 +135,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
break;
}
}
-
+
// Store to undef and store to null are undefined and used to signal that
// they should be changed to unreachable by passes that can't modify the
// CFG.
@@ -145,7 +144,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
if (SI->isVolatile()) continue;
Value *Ptr = SI->getOperand(1);
-
+
if (isa<UndefValue>(Ptr) ||
(isa<ConstantPointerNull>(Ptr) &&
SI->getPointerAddressSpace() == 0)) {
@@ -157,11 +156,22 @@ static bool MarkAliveBlocks(BasicBlock *BB,
}
// Turn invokes that call 'nounwind' functions into ordinary calls.
- if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
- if (II->doesNotThrow()) {
- ChangeToCall(II);
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ Value *Callee = II->getCalledValue();
+ if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
+ ChangeToUnreachable(II, true);
+ Changed = true;
+ } else if (II->doesNotThrow()) {
+ if (II->use_empty() && II->onlyReadsMemory()) {
+ // jump to the normal destination branch.
+ BranchInst::Create(II->getNormalDest(), II);
+ II->getUnwindDest()->removePredecessor(II->getParent());
+ II->eraseFromParent();
+ } else
+ ChangeToCall(II);
Changed = true;
}
+ }
Changed |= ConstantFoldTerminator(BB, true);
for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
@@ -170,38 +180,38 @@ static bool MarkAliveBlocks(BasicBlock *BB,
return Changed;
}
-/// RemoveUnreachableBlocksFromFn - Remove blocks that are not reachable, even
-/// if they are in a dead cycle. Return true if a change was made, false
+/// RemoveUnreachableBlocksFromFn - Remove blocks that are not reachable, even
+/// if they are in a dead cycle. Return true if a change was made, false
/// otherwise.
static bool RemoveUnreachableBlocksFromFn(Function &F) {
SmallPtrSet<BasicBlock*, 128> Reachable;
bool Changed = MarkAliveBlocks(F.begin(), Reachable);
-
+
// If there are unreachable blocks in the CFG...
if (Reachable.size() == F.size())
return Changed;
-
+
assert(Reachable.size() < F.size());
NumSimpl += F.size()-Reachable.size();
-
+
// Loop over all of the basic blocks that are not reachable, dropping all of
// their internal references...
for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) {
if (Reachable.count(BB))
continue;
-
+
for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
if (Reachable.count(*SI))
(*SI)->removePredecessor(BB);
BB->dropAllReferences();
}
-
+
for (Function::iterator I = ++F.begin(); I != F.end();)
if (!Reachable.count(I))
I = F.getBasicBlockList().erase(I);
else
++I;
-
+
return true;
}
@@ -209,17 +219,17 @@ static bool RemoveUnreachableBlocksFromFn(Function &F) {
/// node) return blocks, merge them together to promote recursive block merging.
static bool MergeEmptyReturnBlocks(Function &F) {
bool Changed = false;
-
+
BasicBlock *RetBlock = 0;
-
+
// Scan all the blocks in the function, looking for empty return blocks.
for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; ) {
BasicBlock &BB = *BBI++;
-
+
// Only look at return blocks.
ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator());
if (Ret == 0) continue;
-
+
// Only look at the block if it is empty or the only other thing in it is a
// single PHI node that is the operand to the return.
if (Ret != &BB.front()) {
@@ -241,21 +251,21 @@ static bool MergeEmptyReturnBlocks(Function &F) {
RetBlock = &BB;
continue;
}
-
+
// Otherwise, we found a duplicate return block. Merge the two.
Changed = true;
-
+
// Case when there is no input to the return or when the returned values
// agree is trivial. Note that they can't agree if there are phis in the
// blocks.
if (Ret->getNumOperands() == 0 ||
- Ret->getOperand(0) ==
+ Ret->getOperand(0) ==
cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0)) {
BB.replaceAllUsesWith(RetBlock);
BB.eraseFromParent();
continue;
}
-
+
// If the canonical return block has no PHI node, create one now.
PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin());
if (RetBlockPHI == 0) {
@@ -264,12 +274,12 @@ static bool MergeEmptyReturnBlocks(Function &F) {
RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(),
std::distance(PB, PE), "merge",
&RetBlock->front());
-
+
for (pred_iterator PI = PB; PI != PE; ++PI)
RetBlockPHI->addIncoming(InVal, *PI);
RetBlock->getTerminator()->setOperand(0, RetBlockPHI);
}
-
+
// Turn BB into a block that just unconditionally branches to the return
// block. This handles the case when the two return blocks have a common
// predecessor but that return different things.
@@ -277,7 +287,7 @@ static bool MergeEmptyReturnBlocks(Function &F) {
BB.getTerminator()->eraseFromParent();
BranchInst::Create(RetBlock, &BB);
}
-
+
return Changed;
}
@@ -288,7 +298,7 @@ static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) {
bool LocalChange = true;
while (LocalChange) {
LocalChange = false;
-
+
// Loop over all of the basic blocks and remove them if they are unneeded...
//
for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
@@ -317,7 +327,7 @@ bool CFGSimplifyPass::runOnFunction(Function &F) {
// IterativeSimplifyCFG can (rarely) make some loops dead. If this happens,
// RemoveUnreachableBlocksFromFn is needed to nuke them, which means we should
// iterate between the two optimizations. We structure the code like this to
- // avoid reruning IterativeSimplifyCFG if the second pass of
+ // avoid reruning IterativeSimplifyCFG if the second pass of
// RemoveUnreachableBlocksFromFn doesn't do anything.
if (!RemoveUnreachableBlocksFromFn(F))
return true;
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index f7b6941..f110320 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -18,20 +18,20 @@
#define DEBUG_TYPE "simplify-libcalls"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/IRBuilder.h"
#include "llvm/Intrinsics.h"
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
-#include "llvm/Support/IRBuilder.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Config/config.h" // FIXME: Shouldn't depend on host!
using namespace llvm;
@@ -100,7 +100,7 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
}
return true;
}
-
+
static bool CallHasFloatingPointArgument(const CallInst *CI) {
for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end();
it != e; ++it) {
@@ -157,14 +157,15 @@ struct StrCatOpt : public LibCallOptimization {
// These optimizations require TargetData.
if (!TD) return 0;
- EmitStrLenMemCpy(Src, Dst, Len, B);
- return Dst;
+ return EmitStrLenMemCpy(Src, Dst, Len, B);
}
- void EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) {
+ Value *EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) {
// We need to find the end of the destination string. That's where the
// memory is to be moved to. We just generate a call to strlen.
- Value *DstLen = EmitStrLen(Dst, B, TD);
+ Value *DstLen = EmitStrLen(Dst, B, TD, TLI);
+ if (!DstLen)
+ return 0;
// Now that we have the destination's length, we must index into the
// destination's pointer to get the actual memcpy destination (end of
@@ -175,6 +176,7 @@ struct StrCatOpt : public LibCallOptimization {
// concatenation for us. Make a memcpy to copy the nul byte with align = 1.
B.CreateMemCpy(CpyDst, Src,
ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1);
+ return Dst;
}
};
@@ -221,8 +223,7 @@ struct StrNCatOpt : public StrCatOpt {
// strncat(x, s, c) -> strcat(x, s)
// s is constant so the strcat can be optimized further
- EmitStrLenMemCpy(Src, Dst, SrcLen, B);
- return Dst;
+ return EmitStrLenMemCpy(Src, Dst, SrcLen, B);
}
};
@@ -254,9 +255,9 @@ struct StrChrOpt : public LibCallOptimization {
return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
ConstantInt::get(TD->getIntPtrType(*Context), Len),
- B, TD);
+ B, TD, TLI);
}
-
+
// Otherwise, the character is a constant, see if the first argument is
// a string literal. If so, we can constant fold.
StringRef Str;
@@ -299,7 +300,7 @@ struct StrRChrOpt : public LibCallOptimization {
if (!getConstantStringInfo(SrcStr, Str)) {
// strrchr(s, 0) -> strchr(s, 0)
if (TD && CharC->isZero())
- return EmitStrChr(SrcStr, '\0', B, TD);
+ return EmitStrChr(SrcStr, '\0', B, TD, TLI);
return 0;
}
@@ -355,7 +356,7 @@ struct StrCmpOpt : public LibCallOptimization {
return EmitMemCmp(Str1P, Str2P,
ConstantInt::get(TD->getIntPtrType(*Context),
- std::min(Len1, Len2)), B, TD);
+ std::min(Len1, Len2)), B, TD, TLI);
}
return 0;
@@ -391,7 +392,7 @@ struct StrNCmpOpt : public LibCallOptimization {
return ConstantInt::get(CI->getType(), 0);
if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
- return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD);
+ return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD, TLI);
StringRef Str1, Str2;
bool HasStr1 = getConstantStringInfo(Str1P, Str1);
@@ -447,11 +448,10 @@ struct StrCpyOpt : public LibCallOptimization {
// We have enough information to now generate the memcpy call to do the
// concatenation for us. Make a memcpy to copy the nul byte with align = 1.
- if (OptChkCall)
- EmitMemCpyChk(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len),
- CI->getArgOperand(2), B, TD);
- else
+ if (!OptChkCall ||
+ !EmitMemCpyChk(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len),
+ CI->getArgOperand(2), B, TD, TLI))
B.CreateMemCpy(Dst, Src,
ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
return Dst;
@@ -459,6 +459,51 @@ struct StrCpyOpt : public LibCallOptimization {
};
//===---------------------------------------===//
+// 'stpcpy' Optimizations
+
+struct StpCpyOpt: public LibCallOptimization {
+ bool OptChkCall; // True if it's optimizing a __stpcpy_chk libcall.
+
+ StpCpyOpt(bool c) : OptChkCall(c) {}
+
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "stpcpy" function prototype.
+ unsigned NumParams = OptChkCall ? 3 : 2;
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != NumParams ||
+ FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != B.getInt8PtrTy())
+ return 0;
+
+ // These optimizations require TargetData.
+ if (!TD) return 0;
+
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+ if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
+ Value *StrLen = EmitStrLen(Src, B, TD, TLI);
+ return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0;
+ }
+
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0) return 0;
+
+ Value *LenV = ConstantInt::get(TD->getIntPtrType(*Context), Len);
+ Value *DstEnd = B.CreateGEP(Dst,
+ ConstantInt::get(TD->getIntPtrType(*Context),
+ Len - 1));
+
+ // We have enough information to now generate the memcpy call to do the
+ // copy for us. Make a memcpy to copy the nul byte with align = 1.
+ if (!OptChkCall || !EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B,
+ TD, TLI))
+ B.CreateMemCpy(Dst, Src, LenV, 1);
+ return DstEnd;
+ }
+};
+
+//===---------------------------------------===//
// 'strncpy' Optimizations
struct StrNCpyOpt : public LibCallOptimization {
@@ -565,7 +610,7 @@ struct StrPBrkOpt : public LibCallOptimization {
// strpbrk(s, "a") -> strchr(s, 'a')
if (TD && HasS2 && S2.size() == 1)
- return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD);
+ return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD, TLI);
return 0;
}
@@ -654,7 +699,7 @@ struct StrCSpnOpt : public LibCallOptimization {
// strcspn(s, "") -> strlen(s)
if (TD && HasS2 && S2.empty())
- return EmitStrLen(CI->getArgOperand(0), B, TD);
+ return EmitStrLen(CI->getArgOperand(0), B, TD, TLI);
return 0;
}
@@ -678,9 +723,13 @@ struct StrStrOpt : public LibCallOptimization {
// fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
if (TD && IsOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
- Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD);
+ Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD, TLI);
+ if (!StrLen)
+ return 0;
Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
- StrLen, B, TD);
+ StrLen, B, TD, TLI);
+ if (!StrNCmp)
+ return 0;
for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end();
UI != UE; ) {
ICmpInst *Old = cast<ICmpInst>(*UI++);
@@ -716,9 +765,10 @@ struct StrStrOpt : public LibCallOptimization {
}
// fold strstr(x, "y") -> strchr(x, 'y').
- if (HasStr2 && ToFindStr.size() == 1)
- return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0),
- ToFindStr[0], B, TD), CI->getType());
+ if (HasStr2 && ToFindStr.size() == 1) {
+ Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD, TLI);
+ return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0;
+ }
return 0;
}
};
@@ -1135,8 +1185,8 @@ struct PrintFOpt : public LibCallOptimization {
// printf("x") -> putchar('x'), even for '%'.
if (FormatStr.size() == 1) {
- Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD);
- if (CI->use_empty()) return CI;
+ Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD, TLI);
+ if (CI->use_empty() || !Res) return Res;
return B.CreateIntCast(Res, CI->getType(), true);
}
@@ -1147,26 +1197,26 @@ struct PrintFOpt : public LibCallOptimization {
// pass to be run after this pass, to merge duplicate strings.
FormatStr = FormatStr.drop_back();
Value *GV = B.CreateGlobalString(FormatStr, "str");
- EmitPutS(GV, B, TD);
- return CI->use_empty() ? (Value*)CI :
- ConstantInt::get(CI->getType(), FormatStr.size()+1);
+ Value *NewCI = EmitPutS(GV, B, TD, TLI);
+ return (CI->use_empty() || !NewCI) ?
+ NewCI :
+ ConstantInt::get(CI->getType(), FormatStr.size()+1);
}
// Optimize specific format strings.
// printf("%c", chr) --> putchar(chr)
if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
CI->getArgOperand(1)->getType()->isIntegerTy()) {
- Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD);
+ Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD, TLI);
- if (CI->use_empty()) return CI;
+ if (CI->use_empty() || !Res) return Res;
return B.CreateIntCast(Res, CI->getType(), true);
}
// printf("%s\n", str) --> puts(str)
if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
CI->getArgOperand(1)->getType()->isPointerTy()) {
- EmitPutS(CI->getArgOperand(1), B, TD);
- return CI;
+ return EmitPutS(CI->getArgOperand(1), B, TD, TLI);
}
return 0;
}
@@ -1253,7 +1303,9 @@ struct SPrintFOpt : public LibCallOptimization {
// sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0;
- Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD);
+ Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD, TLI);
+ if (!Len)
+ return 0;
Value *IncLen = B.CreateAdd(Len,
ConstantInt::get(Len->getType(), 1),
"leninc");
@@ -1320,8 +1372,8 @@ struct FWriteOpt : public LibCallOptimization {
// This optimisation is only valid, if the return value is unused.
if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
- EmitFPutC(Char, CI->getArgOperand(3), B, TD);
- return ConstantInt::get(CI->getType(), 1);
+ Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TD, TLI);
+ return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
}
return 0;
@@ -1346,10 +1398,10 @@ struct FPutsOpt : public LibCallOptimization {
// fputs(s,F) --> fwrite(s,1,strlen(s),F)
uint64_t Len = GetStringLength(CI->getArgOperand(0));
if (!Len) return 0;
- EmitFWrite(CI->getArgOperand(0),
- ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
- CI->getArgOperand(1), B, TD, TLI);
- return CI; // Known to have no uses (see above).
+ // Known to have no uses (see above).
+ return EmitFWrite(CI->getArgOperand(0),
+ ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
+ CI->getArgOperand(1), B, TD, TLI);
}
};
@@ -1373,11 +1425,11 @@ struct FPrintFOpt : public LibCallOptimization {
// These optimizations require TargetData.
if (!TD) return 0;
- EmitFWrite(CI->getArgOperand(1),
- ConstantInt::get(TD->getIntPtrType(*Context),
- FormatStr.size()),
- CI->getArgOperand(0), B, TD, TLI);
- return ConstantInt::get(CI->getType(), FormatStr.size());
+ Value *NewCI = EmitFWrite(CI->getArgOperand(1),
+ ConstantInt::get(TD->getIntPtrType(*Context),
+ FormatStr.size()),
+ CI->getArgOperand(0), B, TD, TLI);
+ return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0;
}
// The remaining optimizations require the format string to be "%s" or "%c"
@@ -1390,16 +1442,16 @@ struct FPrintFOpt : public LibCallOptimization {
if (FormatStr[1] == 'c') {
// fprintf(F, "%c", chr) --> fputc(chr, F)
if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
- EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD);
- return ConstantInt::get(CI->getType(), 1);
+ Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B,
+ TD, TLI);
+ return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
}
if (FormatStr[1] == 's') {
// fprintf(F, "%s", str) --> fputs(str, F)
if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty())
return 0;
- EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
- return CI;
+ return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
}
return 0;
}
@@ -1450,8 +1502,8 @@ struct PutsOpt : public LibCallOptimization {
if (Str.empty() && CI->use_empty()) {
// puts("") -> putchar('\n')
- Value *Res = EmitPutChar(B.getInt32('\n'), B, TD);
- if (CI->use_empty()) return CI;
+ Value *Res = EmitPutChar(B.getInt32('\n'), B, TD, TLI);
+ if (CI->use_empty() || !Res) return Res;
return B.CreateIntCast(Res, CI->getType(), true);
}
@@ -1470,12 +1522,15 @@ namespace {
///
class SimplifyLibCalls : public FunctionPass {
TargetLibraryInfo *TLI;
-
+
StringMap<LibCallOptimization*> Optimizations;
// String and Memory LibCall Optimizations
StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrRChrOpt StrRChr;
- StrCmpOpt StrCmp; StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrCpyOpt StrCpyChk;
- StrNCpyOpt StrNCpy; StrLenOpt StrLen; StrPBrkOpt StrPBrk;
+ StrCmpOpt StrCmp; StrNCmpOpt StrNCmp;
+ StrCpyOpt StrCpy; StrCpyOpt StrCpyChk;
+ StpCpyOpt StpCpy; StpCpyOpt StpCpyChk;
+ StrNCpyOpt StrNCpy;
+ StrLenOpt StrLen; StrPBrkOpt StrPBrk;
StrToOpt StrTo; StrSpnOpt StrSpn; StrCSpnOpt StrCSpn; StrStrOpt StrStr;
MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet;
// Math Library Optimizations
@@ -1487,11 +1542,12 @@ namespace {
SPrintFOpt SPrintF; PrintFOpt PrintF;
FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF;
PutsOpt Puts;
-
+
bool Modified; // This is only used by doInitialization.
public:
static char ID; // Pass identification
- SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true) {
+ SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true),
+ StpCpy(false), StpCpyChk(true) {
initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
}
void AddOpt(LibFunc::Func F, LibCallOptimization* Opt);
@@ -1542,6 +1598,7 @@ void SimplifyLibCalls::InitOptimizations() {
Optimizations["strncmp"] = &StrNCmp;
Optimizations["strcpy"] = &StrCpy;
Optimizations["strncpy"] = &StrNCpy;
+ Optimizations["stpcpy"] = &StpCpy;
Optimizations["strlen"] = &StrLen;
Optimizations["strpbrk"] = &StrPBrk;
Optimizations["strtol"] = &StrTo;
@@ -1561,6 +1618,7 @@ void SimplifyLibCalls::InitOptimizations() {
// _chk variants of String and Memory LibCall Optimizations.
Optimizations["__strcpy_chk"] = &StrCpyChk;
+ Optimizations["__stpcpy_chk"] = &StpCpyChk;
// Math Library Optimizations
Optimizations["cosf"] = &Cos;
@@ -1717,7 +1775,7 @@ void SimplifyLibCalls::setDoesNotAlias(Function &F, unsigned n) {
void SimplifyLibCalls::inferPrototypeAttributes(Function &F) {
FunctionType *FTy = F.getFunctionType();
-
+
StringRef Name = F.getName();
switch (Name[0]) {
case 's':
@@ -1746,6 +1804,7 @@ void SimplifyLibCalls::inferPrototypeAttributes(Function &F) {
Name == "strtold" ||
Name == "strncat" ||
Name == "strncpy" ||
+ Name == "stpncpy" ||
Name == "strtoull") {
if (FTy->getNumParams() < 2 ||
!FTy->getParamType(1)->isPointerTy())
@@ -2406,10 +2465,6 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
// * sqrt(Nroot(x)) -> pow(x,1/(2*N))
// * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
//
-// stpcpy:
-// * stpcpy(str, "literal") ->
-// llvm.memcpy(str,"literal",strlen("literal")+1,1)
-//
// strchr:
// * strchr(p, 0) -> strlen(p)
// tan, tanf, tanl:
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index ef65c0a..34f1d6c 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -27,6 +27,7 @@
using namespace llvm;
STATISTIC(NumSunk, "Number of instructions sunk");
+STATISTIC(NumSinkIter, "Number of sinking iterations");
namespace {
class Sinking : public FunctionPass {
@@ -39,9 +40,9 @@ namespace {
Sinking() : FunctionPass(ID) {
initializeSinkingPass(*PassRegistry::getPassRegistry());
}
-
+
virtual bool runOnFunction(Function &F);
-
+
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
FunctionPass::getAnalysisUsage(AU);
@@ -55,9 +56,10 @@ namespace {
bool ProcessBlock(BasicBlock &BB);
bool SinkInstruction(Instruction *I, SmallPtrSet<Instruction *, 8> &Stores);
bool AllUsesDominatedByBlock(Instruction *Inst, BasicBlock *BB) const;
+ bool IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo) const;
};
} // end anonymous namespace
-
+
char Sinking::ID = 0;
INITIALIZE_PASS_BEGIN(Sinking, "sink", "Code sinking", false, false)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
@@ -69,7 +71,7 @@ FunctionPass *llvm::createSinkingPass() { return new Sinking(); }
/// AllUsesDominatedByBlock - Return true if all uses of the specified value
/// occur in blocks dominated by the specified block.
-bool Sinking::AllUsesDominatedByBlock(Instruction *Inst,
+bool Sinking::AllUsesDominatedByBlock(Instruction *Inst,
BasicBlock *BB) const {
// Ignoring debug uses is necessary so debug info doesn't affect the code.
// This may leave a referencing dbg_value in the original block, before
@@ -98,20 +100,19 @@ bool Sinking::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfo>();
AA = &getAnalysis<AliasAnalysis>();
- bool EverMadeChange = false;
-
- while (1) {
- bool MadeChange = false;
+ bool MadeChange, EverMadeChange = false;
+ do {
+ MadeChange = false;
+ DEBUG(dbgs() << "Sinking iteration " << NumSinkIter << "\n");
// Process all basic blocks.
- for (Function::iterator I = F.begin(), E = F.end();
+ for (Function::iterator I = F.begin(), E = F.end();
I != E; ++I)
MadeChange |= ProcessBlock(*I);
-
- // If this iteration over the code changed anything, keep iterating.
- if (!MadeChange) break;
- EverMadeChange = true;
- }
+ EverMadeChange |= MadeChange;
+ NumSinkIter++;
+ } while (MadeChange);
+
return EverMadeChange;
}
@@ -120,8 +121,8 @@ bool Sinking::ProcessBlock(BasicBlock &BB) {
if (BB.getTerminator()->getNumSuccessors() <= 1 || BB.empty()) return false;
// Don't bother sinking code out of unreachable blocks. In addition to being
- // unprofitable, it can also lead to infinite looping, because in an unreachable
- // loop there may be nowhere to stop.
+ // unprofitable, it can also lead to infinite looping, because in an
+ // unreachable loop there may be nowhere to stop.
if (!DT->isReachableFromEntry(&BB)) return false;
bool MadeChange = false;
@@ -133,7 +134,7 @@ bool Sinking::ProcessBlock(BasicBlock &BB) {
SmallPtrSet<Instruction *, 8> Stores;
do {
Instruction *Inst = I; // The instruction to sink.
-
+
// Predecrement I (if it's not begin) so that it isn't invalidated by
// sinking.
ProcessedBegin = I == BB.begin();
@@ -145,10 +146,10 @@ bool Sinking::ProcessBlock(BasicBlock &BB) {
if (SinkInstruction(Inst, Stores))
++NumSunk, MadeChange = true;
-
+
// If we just processed the first instruction in the block, we're done.
} while (!ProcessedBegin);
-
+
return MadeChange;
}
@@ -174,6 +175,45 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
return true;
}
+/// IsAcceptableTarget - Return true if it is possible to sink the instruction
+/// in the specified basic block.
+bool Sinking::IsAcceptableTarget(Instruction *Inst,
+ BasicBlock *SuccToSinkTo) const {
+ assert(Inst && "Instruction to be sunk is null");
+ assert(SuccToSinkTo && "Candidate sink target is null");
+
+ // It is not possible to sink an instruction into its own block. This can
+ // happen with loops.
+ if (Inst->getParent() == SuccToSinkTo)
+ return false;
+
+ // If the block has multiple predecessors, this would introduce computation
+ // on different code paths. We could split the critical edge, but for now we
+ // just punt.
+ // FIXME: Split critical edges if not backedges.
+ if (SuccToSinkTo->getUniquePredecessor() != Inst->getParent()) {
+ // We cannot sink a load across a critical edge - there may be stores in
+ // other code paths.
+ if (!isSafeToSpeculativelyExecute(Inst))
+ return false;
+
+ // We don't want to sink across a critical edge if we don't dominate the
+ // successor. We could be introducing calculations to new code paths.
+ if (!DT->dominates(Inst->getParent(), SuccToSinkTo))
+ return false;
+
+ // Don't sink instructions into a loop.
+ Loop *succ = LI->getLoopFor(SuccToSinkTo);
+ Loop *cur = LI->getLoopFor(Inst->getParent());
+ if (succ != 0 && succ != cur)
+ return false;
+ }
+
+ // Finally, check that all the uses of the instruction are actually
+ // dominated by the candidate
+ return AllUsesDominatedByBlock(Inst, SuccToSinkTo);
+}
+
/// SinkInstruction - Determine whether it is safe to sink the specified machine
/// instruction out of its current block into a successor.
bool Sinking::SinkInstruction(Instruction *Inst,
@@ -181,7 +221,7 @@ bool Sinking::SinkInstruction(Instruction *Inst,
// Check if it's safe to move the instruction.
if (!isSafeToMove(Inst, AA, Stores))
return false;
-
+
// FIXME: This should include support for sinking instructions within the
// block they are currently in to shorten the live ranges. We often get
// instructions sunk into the top of a large block, but it would be better to
@@ -189,86 +229,42 @@ bool Sinking::SinkInstruction(Instruction *Inst,
// be careful not to *increase* register pressure though, e.g. sinking
// "x = y + z" down if it kills y and z would increase the live ranges of y
// and z and only shrink the live range of x.
-
- // Loop over all the operands of the specified instruction. If there is
- // anything we can't handle, bail out.
- BasicBlock *ParentBlock = Inst->getParent();
-
+
// SuccToSinkTo - This is the successor to sink this instruction to, once we
// decide.
BasicBlock *SuccToSinkTo = 0;
-
- // FIXME: This picks a successor to sink into based on having one
- // successor that dominates all the uses. However, there are cases where
- // sinking can happen but where the sink point isn't a successor. For
- // example:
- // x = computation
- // if () {} else {}
- // use x
- // the instruction could be sunk over the whole diamond for the
- // if/then/else (or loop, etc), allowing it to be sunk into other blocks
- // after that.
-
+
// Instructions can only be sunk if all their uses are in blocks
// dominated by one of the successors.
- // Look at all the successors and decide which one
- // we should sink to.
- for (succ_iterator SI = succ_begin(ParentBlock),
- E = succ_end(ParentBlock); SI != E; ++SI) {
- if (AllUsesDominatedByBlock(Inst, *SI)) {
- SuccToSinkTo = *SI;
- break;
- }
+ // Look at all the postdominators and see if we can sink it in one.
+ DomTreeNode *DTN = DT->getNode(Inst->getParent());
+ for (DomTreeNode::iterator I = DTN->begin(), E = DTN->end();
+ I != E && SuccToSinkTo == 0; ++I) {
+ BasicBlock *Candidate = (*I)->getBlock();
+ if ((*I)->getIDom()->getBlock() == Inst->getParent() &&
+ IsAcceptableTarget(Inst, Candidate))
+ SuccToSinkTo = Candidate;
+ }
+
+ // If no suitable postdominator was found, look at all the successors and
+ // decide which one we should sink to, if any.
+ for (succ_iterator I = succ_begin(Inst->getParent()),
+ E = succ_end(Inst->getParent()); I != E && SuccToSinkTo == 0; ++I) {
+ if (IsAcceptableTarget(Inst, *I))
+ SuccToSinkTo = *I;
}
-
+
// If we couldn't find a block to sink to, ignore this instruction.
if (SuccToSinkTo == 0)
return false;
-
- // It is not possible to sink an instruction into its own block. This can
- // happen with loops.
- if (Inst->getParent() == SuccToSinkTo)
- return false;
-
- DEBUG(dbgs() << "Sink instr " << *Inst);
- DEBUG(dbgs() << "to block ";
- WriteAsOperand(dbgs(), SuccToSinkTo, false));
-
- // If the block has multiple predecessors, this would introduce computation on
- // a path that it doesn't already exist. We could split the critical edge,
- // but for now we just punt.
- // FIXME: Split critical edges if not backedges.
- if (SuccToSinkTo->getUniquePredecessor() != ParentBlock) {
- // We cannot sink a load across a critical edge - there may be stores in
- // other code paths.
- if (!isSafeToSpeculativelyExecute(Inst)) {
- DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n");
- return false;
- }
- // We don't want to sink across a critical edge if we don't dominate the
- // successor. We could be introducing calculations to new code paths.
- if (!DT->dominates(ParentBlock, SuccToSinkTo)) {
- DEBUG(dbgs() << " *** PUNTING: Critical edge found\n");
- return false;
- }
-
- // Don't sink instructions into a loop.
- if (LI->isLoopHeader(SuccToSinkTo)) {
- DEBUG(dbgs() << " *** PUNTING: Loop header found\n");
- return false;
- }
+ DEBUG(dbgs() << "Sink" << *Inst << " (";
+ WriteAsOperand(dbgs(), Inst->getParent(), false);
+ dbgs() << " -> ";
+ WriteAsOperand(dbgs(), SuccToSinkTo, false);
+ dbgs() << ")\n");
- // Otherwise we are OK with sinking along a critical edge.
- DEBUG(dbgs() << "Sinking along critical edge.\n");
- }
-
- // Determine where to insert into. Skip phi nodes.
- BasicBlock::iterator InsertPos = SuccToSinkTo->begin();
- while (InsertPos != SuccToSinkTo->end() && isa<PHINode>(InsertPos))
- ++InsertPos;
-
// Move the instruction.
- Inst->moveBefore(InsertPos);
+ Inst->moveBefore(SuccToSinkTo->getFirstInsertionPt());
return true;
}
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index e21eb9d..6557d63 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -172,7 +172,7 @@ bool TailCallElim::runOnFunction(Function &F) {
FunctionContainsEscapingAllocas |=
CheckForEscapingAllocas(BB, CannotTCETailMarkedCall);
}
-
+
/// FIXME: The code generator produces really bad code when an 'escaping
/// alloca' is changed from being a static alloca to being a dynamic alloca.
/// Until this is resolved, disable this transformation if that would ever
@@ -234,7 +234,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
// call does not mod/ref the memory location being processed.
if (I->mayHaveSideEffects()) // This also handles volatile loads.
return false;
-
+
if (LoadInst *L = dyn_cast<LoadInst>(I)) {
// Loads may always be moved above calls without side effects.
if (CI->mayHaveSideEffects()) {
@@ -364,7 +364,7 @@ TailCallElim::FindTRECandidate(Instruction *TI,
if (&BB->front() == TI) // Make sure there is something before the terminator.
return 0;
-
+
// Scan backwards from the return, checking to see if there is a tail call in
// this block. If so, set CI to it.
CallInst *CI = 0;
@@ -388,10 +388,10 @@ TailCallElim::FindTRECandidate(Instruction *TI,
// double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
// and disable this xform in this case, because the code generator will
// lower the call to fabs into inline code.
- if (BB == &F->getEntryBlock() &&
+ if (BB == &F->getEntryBlock() &&
FirstNonDbg(BB->front()) == CI &&
FirstNonDbg(llvm::next(BB->begin())) == TI &&
- callIsSmall(F)) {
+ callIsSmall(CI)) {
// A single-block function with just a call and a return. Check that
// the arguments match.
CallSite::arg_iterator I = CallSite(CI).arg_begin(),
@@ -432,7 +432,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
BasicBlock::iterator BBI = CI;
for (++BBI; &*BBI != Ret; ++BBI) {
if (CanMoveAboveCall(BBI, CI)) continue;
-
+
// If we can't move the instruction above the call, it might be because it
// is an associative and commutative operation that could be transformed
// using accumulator recursion elimination. Check to see if this is the
OpenPOWER on IntegriCloud