summaryrefslogtreecommitdiffstats
path: root/lib/Analysis
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2011-06-12 15:42:51 +0000
committerdim <dim@FreeBSD.org>2011-06-12 15:42:51 +0000
commitece02cd5829cea836e9365b0845a8ef042d17b0a (patch)
treeb3032e51d630e8070e9e08d6641648f195316a80 /lib/Analysis
parent2b066988909948dc3d53d01760bc2d71d32f3feb (diff)
downloadFreeBSD-src-ece02cd5829cea836e9365b0845a8ef042d17b0a.zip
FreeBSD-src-ece02cd5829cea836e9365b0845a8ef042d17b0a.tar.gz
Vendor import of llvm trunk r132879:
http://llvm.org/svn/llvm-project/llvm/trunk@132879
Diffstat (limited to 'lib/Analysis')
-rw-r--r--lib/Analysis/Analysis.cpp1
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp143
-rw-r--r--lib/Analysis/BranchProbabilityInfo.cpp357
-rw-r--r--lib/Analysis/CMakeLists.txt1
-rw-r--r--lib/Analysis/ConstantFolding.cpp8
-rw-r--r--lib/Analysis/DIBuilder.cpp8
-rw-r--r--lib/Analysis/IPA/CallGraph.cpp2
-rw-r--r--lib/Analysis/IPA/CallGraphSCCPass.cpp4
-rw-r--r--lib/Analysis/IPA/FindUsedTypes.cpp4
-rw-r--r--lib/Analysis/IVUsers.cpp33
-rw-r--r--lib/Analysis/InlineCost.cpp19
-rw-r--r--lib/Analysis/InstructionSimplify.cpp250
-rw-r--r--lib/Analysis/LazyValueInfo.cpp10
-rw-r--r--lib/Analysis/Loads.cpp4
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp90
-rw-r--r--lib/Analysis/RegionPass.cpp2
-rw-r--r--lib/Analysis/ScalarEvolution.cpp137
-rw-r--r--lib/Analysis/ValueTracking.cpp16
18 files changed, 920 insertions, 169 deletions
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index 6ebe100..e57ba78 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -23,6 +23,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeAliasSetPrinterPass(Registry);
initializeNoAAPass(Registry);
initializeBasicAliasAnalysisPass(Registry);
+ initializeBranchProbabilityInfoPass(Registry);
initializeCFGViewerPass(Registry);
initializeCFGPrinterPass(Registry);
initializeCFGOnlyViewerPass(Registry);
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index f1bb8a3..8330ea7 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -281,17 +281,20 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
continue;
}
- if (const Instruction *I = dyn_cast<Instruction>(V))
- // TODO: Get a DominatorTree and use it here.
- if (const Value *Simplified =
- SimplifyInstruction(const_cast<Instruction *>(I), TD)) {
- V = Simplified;
- continue;
- }
-
const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op);
- if (GEPOp == 0)
+ if (GEPOp == 0) {
+ // If it's not a GEP, hand it off to SimplifyInstruction to see if it
+ // can come up with something. This matches what GetUnderlyingObject does.
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ // TODO: Get a DominatorTree and use it here.
+ if (const Value *Simplified =
+ SimplifyInstruction(const_cast<Instruction *>(I), TD)) {
+ V = Simplified;
+ continue;
+ }
+
return V;
+ }
// Don't attempt to analyze GEPs over unsized objects.
if (!cast<PointerType>(GEPOp->getOperand(0)->getType())
@@ -448,7 +451,13 @@ namespace {
/// BasicAliasAnalysis - This is the primary alias analysis implementation.
struct BasicAliasAnalysis : public ImmutablePass, public AliasAnalysis {
static char ID; // Class identification, replacement for typeinfo
- BasicAliasAnalysis() : ImmutablePass(ID) {
+ BasicAliasAnalysis() : ImmutablePass(ID),
+ // AliasCache rarely has more than 1 or 2 elements,
+ // so start it off fairly small so that clear()
+ // doesn't have to tromp through 64 (the default)
+ // elements on each alias query. This really wants
+ // something like a SmallDenseMap.
+ AliasCache(8) {
initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry());
}
@@ -462,12 +471,12 @@ namespace {
virtual AliasResult alias(const Location &LocA,
const Location &LocB) {
- assert(Visited.empty() && "Visited must be cleared after use!");
+ assert(AliasCache.empty() && "AliasCache must be cleared after use!");
assert(notDifferentParent(LocA.Ptr, LocB.Ptr) &&
"BasicAliasAnalysis doesn't support interprocedural queries.");
AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.TBAATag,
LocB.Ptr, LocB.Size, LocB.TBAATag);
- Visited.clear();
+ AliasCache.clear();
return Alias;
}
@@ -503,7 +512,12 @@ namespace {
}
private:
- // Visited - Track instructions visited by a aliasPHI, aliasSelect(), and aliasGEP().
+ // AliasCache - Track alias queries to guard against recursion.
+ typedef std::pair<Location, Location> LocPair;
+ typedef DenseMap<LocPair, AliasResult> AliasCacheTy;
+ AliasCacheTy AliasCache;
+
+ // Visited - Track instructions visited by pointsToConstantMemory.
SmallPtrSet<const Value*, 16> Visited;
// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP
@@ -680,9 +694,12 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
unsigned ArgNo = 0;
for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
CI != CE; ++CI, ++ArgNo) {
- // Only look at the no-capture pointer arguments.
+ // Only look at the no-capture or byval pointer arguments. If this
+ // pointer were passed to arguments that were neither of these, then it
+ // couldn't be no-capture.
if (!(*CI)->getType()->isPointerTy() ||
- !CS.paramHasAttr(ArgNo+1, Attribute::NoCapture))
+ (!CS.paramHasAttr(ArgNo+1, Attribute::NoCapture) &&
+ !CS.paramHasAttr(ArgNo+1, Attribute::ByVal)))
continue;
// If this is a no-capture pointer argument, see if we can tell that it
@@ -816,13 +833,6 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
const MDNode *V2TBAAInfo,
const Value *UnderlyingV1,
const Value *UnderlyingV2) {
- // If this GEP has been visited before, we're on a use-def cycle.
- // Such cycles are only valid when PHI nodes are involved or in unreachable
- // code. The visitPHI function catches cycles containing PHIs, but there
- // could still be a cycle without PHIs in unreachable code.
- if (!Visited.insert(GEP1))
- return MayAlias;
-
int64_t GEP1BaseOffset;
SmallVector<VariableGEPIndex, 4> GEP1VariableIndices;
@@ -940,7 +950,30 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
return NoAlias;
}
- return MayAlias;
+ // Statically, we can see that the base objects are the same, but the
+ // pointers have dynamic offsets which we can't resolve. And none of our
+ // little tricks above worked.
+ //
+ // TODO: Returning PartialAlias instead of MayAlias is a mild hack; the
+ // practical effect of this is protecting TBAA in the case of dynamic
+ // indices into arrays of unions. An alternative way to solve this would
+ // be to have clang emit extra metadata for unions and/or union accesses.
+ // A union-specific solution wouldn't handle the problem for malloc'd
+ // memory however.
+ return PartialAlias;
+}
+
+static AliasAnalysis::AliasResult
+MergeAliasResults(AliasAnalysis::AliasResult A, AliasAnalysis::AliasResult B) {
+ // If the results agree, take it.
+ if (A == B)
+ return A;
+ // A mix of PartialAlias and MustAlias is PartialAlias.
+ if ((A == AliasAnalysis::PartialAlias && B == AliasAnalysis::MustAlias) ||
+ (B == AliasAnalysis::PartialAlias && A == AliasAnalysis::MustAlias))
+ return AliasAnalysis::PartialAlias;
+ // Otherwise, we don't know anything.
+ return AliasAnalysis::MayAlias;
}
/// aliasSelect - Provide a bunch of ad-hoc rules to disambiguate a Select
@@ -950,13 +983,6 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize,
const MDNode *SITBAAInfo,
const Value *V2, uint64_t V2Size,
const MDNode *V2TBAAInfo) {
- // If this select has been visited before, we're on a use-def cycle.
- // Such cycles are only valid when PHI nodes are involved or in unreachable
- // code. The visitPHI function catches cycles containing PHIs, but there
- // could still be a cycle without PHIs in unreachable code.
- if (!Visited.insert(SI))
- return MayAlias;
-
// If the values are Selects with the same condition, we can do a more precise
// check: just check for aliases between the values on corresponding arms.
if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2))
@@ -969,9 +995,7 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize,
AliasResult ThisAlias =
aliasCheck(SI->getFalseValue(), SISize, SITBAAInfo,
SI2->getFalseValue(), V2Size, V2TBAAInfo);
- if (ThisAlias != Alias)
- return MayAlias;
- return Alias;
+ return MergeAliasResults(ThisAlias, Alias);
}
// If both arms of the Select node NoAlias or MustAlias V2, then returns
@@ -981,16 +1005,9 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize,
if (Alias == MayAlias)
return MayAlias;
- // If V2 is visited, the recursive case will have been caught in the
- // above aliasCheck call, so these subsequent calls to aliasCheck
- // don't need to assume that V2 is being visited recursively.
- Visited.erase(V2);
-
AliasResult ThisAlias =
aliasCheck(V2, V2Size, V2TBAAInfo, SI->getFalseValue(), SISize, SITBAAInfo);
- if (ThisAlias != Alias)
- return MayAlias;
- return Alias;
+ return MergeAliasResults(ThisAlias, Alias);
}
// aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction
@@ -1000,10 +1017,6 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
const MDNode *PNTBAAInfo,
const Value *V2, uint64_t V2Size,
const MDNode *V2TBAAInfo) {
- // The PHI node has already been visited, avoid recursion any further.
- if (!Visited.insert(PN))
- return MayAlias;
-
// If the values are PHIs in the same block, we can do a more precise
// as well as efficient check: just check for aliases between the values
// on corresponding edges.
@@ -1020,8 +1033,9 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
aliasCheck(PN->getIncomingValue(i), PNSize, PNTBAAInfo,
PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)),
V2Size, V2TBAAInfo);
- if (ThisAlias != Alias)
- return MayAlias;
+ Alias = MergeAliasResults(ThisAlias, Alias);
+ if (Alias == MayAlias)
+ break;
}
return Alias;
}
@@ -1052,15 +1066,11 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) {
Value *V = V1Srcs[i];
- // If V2 is visited, the recursive case will have been caught in the
- // above aliasCheck call, so these subsequent calls to aliasCheck
- // don't need to assume that V2 is being visited recursively.
- Visited.erase(V2);
-
AliasResult ThisAlias = aliasCheck(V2, V2Size, V2TBAAInfo,
V, PNSize, PNTBAAInfo);
- if (ThisAlias != Alias || ThisAlias == MayAlias)
- return MayAlias;
+ Alias = MergeAliasResults(ThisAlias, Alias);
+ if (Alias == MayAlias)
+ break;
}
return Alias;
@@ -1145,6 +1155,17 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
(V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD)))
return NoAlias;
+ // Check the cache before climbing up use-def chains. This also terminates
+ // otherwise infinitely recursive queries.
+ LocPair Locs(Location(V1, V1Size, V1TBAAInfo),
+ Location(V2, V2Size, V2TBAAInfo));
+ if (V1 > V2)
+ std::swap(Locs.first, Locs.second);
+ std::pair<AliasCacheTy::iterator, bool> Pair =
+ AliasCache.insert(std::make_pair(Locs, MayAlias));
+ if (!Pair.second)
+ return Pair.first->second;
+
// FIXME: This isn't aggressively handling alias(GEP, PHI) for example: if the
// GEP can't simplify, we don't even look at the PHI cases.
if (!isa<GEPOperator>(V1) && isa<GEPOperator>(V2)) {
@@ -1154,7 +1175,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
}
if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) {
AliasResult Result = aliasGEP(GV1, V1Size, V2, V2Size, V2TBAAInfo, O1, O2);
- if (Result != MayAlias) return Result;
+ if (Result != MayAlias) return AliasCache[Locs] = Result;
}
if (isa<PHINode>(V2) && !isa<PHINode>(V1)) {
@@ -1164,7 +1185,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
if (const PHINode *PN = dyn_cast<PHINode>(V1)) {
AliasResult Result = aliasPHI(PN, V1Size, V1TBAAInfo,
V2, V2Size, V2TBAAInfo);
- if (Result != MayAlias) return Result;
+ if (Result != MayAlias) return AliasCache[Locs] = Result;
}
if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) {
@@ -1174,7 +1195,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) {
AliasResult Result = aliasSelect(S1, V1Size, V1TBAAInfo,
V2, V2Size, V2TBAAInfo);
- if (Result != MayAlias) return Result;
+ if (Result != MayAlias) return AliasCache[Locs] = Result;
}
// If both pointers are pointing into the same object and one of them
@@ -1183,8 +1204,10 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
if (TD && O1 == O2)
if ((V1Size != UnknownSize && isObjectSize(O1, V1Size, *TD)) ||
(V2Size != UnknownSize && isObjectSize(O2, V2Size, *TD)))
- return PartialAlias;
+ return AliasCache[Locs] = PartialAlias;
- return AliasAnalysis::alias(Location(V1, V1Size, V1TBAAInfo),
- Location(V2, V2Size, V2TBAAInfo));
+ AliasResult Result =
+ AliasAnalysis::alias(Location(V1, V1Size, V1TBAAInfo),
+ Location(V2, V2Size, V2TBAAInfo));
+ return AliasCache[Locs] = Result;
}
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
new file mode 100644
index 0000000..812fac0
--- /dev/null
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -0,0 +1,357 @@
+//===-- BranchProbabilityInfo.cpp - Branch Probability Analysis -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loops should be simplified before this analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(BranchProbabilityInfo, "branch-prob",
+ "Branch Probability Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(BranchProbabilityInfo, "branch-prob",
+ "Branch Probability Analysis", false, true)
+
+char BranchProbabilityInfo::ID = 0;
+
+
+// Please note that BranchProbabilityAnalysis is not a FunctionPass.
+// It is created by BranchProbabilityInfo (which is a FunctionPass), which
+// provides a clear interface. Thanks to that, all heuristics and other
+// private methods are hidden in the .cpp file.
+class BranchProbabilityAnalysis {
+
+ typedef std::pair<BasicBlock *, BasicBlock *> Edge;
+
+ DenseMap<Edge, uint32_t> *Weights;
+
+ BranchProbabilityInfo *BP;
+
+ LoopInfo *LI;
+
+
+ // Weights are for internal use only. They are used by heuristics to help to
+ // estimate edges' probability. Example:
+ //
+ // Using "Loop Branch Heuristics" we predict weights of edges for the
+ // block BB2.
+ // ...
+ // |
+ // V
+ // BB1<-+
+ // | |
+ // | | (Weight = 128)
+ // V |
+ // BB2--+
+ // |
+ // | (Weight = 4)
+ // V
+ // BB3
+ //
+ // Probability of the edge BB2->BB1 = 128 / (128 + 4) = 0.9696..
+ // Probability of the edge BB2->BB3 = 4 / (128 + 4) = 0.0303..
+
+ static const uint32_t LBH_TAKEN_WEIGHT = 128;
+ static const uint32_t LBH_NONTAKEN_WEIGHT = 4;
+
+ // Standard weight value. Used when none of the heuristics set weight for
+ // the edge.
+ static const uint32_t NORMAL_WEIGHT = 16;
+
+ // Minimum weight of an edge. Please note, that weight is NEVER 0.
+ static const uint32_t MIN_WEIGHT = 1;
+
+ // Return TRUE if BB leads directly to a Return Instruction.
+ static bool isReturningBlock(BasicBlock *BB) {
+ SmallPtrSet<BasicBlock *, 8> Visited;
+
+ while (true) {
+ TerminatorInst *TI = BB->getTerminator();
+ if (isa<ReturnInst>(TI))
+ return true;
+
+ if (TI->getNumSuccessors() > 1)
+ break;
+
+ // It is unreachable block which we can consider as a return instruction.
+ if (TI->getNumSuccessors() == 0)
+ return true;
+
+ Visited.insert(BB);
+ BB = TI->getSuccessor(0);
+
+ // Stop if cycle is detected.
+ if (Visited.count(BB))
+ return false;
+ }
+
+ return false;
+ }
+
+ // Multiply Edge Weight by two.
+ void incEdgeWeight(BasicBlock *Src, BasicBlock *Dst) {
+ uint32_t Weight = BP->getEdgeWeight(Src, Dst);
+ uint32_t MaxWeight = getMaxWeightFor(Src);
+
+ if (Weight * 2 > MaxWeight)
+ BP->setEdgeWeight(Src, Dst, MaxWeight);
+ else
+ BP->setEdgeWeight(Src, Dst, Weight * 2);
+ }
+
+ // Divide Edge Weight by two.
+ void decEdgeWeight(BasicBlock *Src, BasicBlock *Dst) {
+ uint32_t Weight = BP->getEdgeWeight(Src, Dst);
+
+ assert(Weight > 0);
+ if (Weight / 2 < MIN_WEIGHT)
+ BP->setEdgeWeight(Src, Dst, MIN_WEIGHT);
+ else
+ BP->setEdgeWeight(Src, Dst, Weight / 2);
+ }
+
+
+ uint32_t getMaxWeightFor(BasicBlock *BB) const {
+ return UINT32_MAX / BB->getTerminator()->getNumSuccessors();
+ }
+
+public:
+ BranchProbabilityAnalysis(DenseMap<Edge, uint32_t> *W,
+ BranchProbabilityInfo *BP, LoopInfo *LI)
+ : Weights(W), BP(BP), LI(LI) {
+ }
+
+ // Return Heuristics
+ void calcReturnHeuristics(BasicBlock *BB);
+
+ // Pointer Heuristics
+ void calcPointerHeuristics(BasicBlock *BB);
+
+ // Loop Branch Heuristics
+ void calcLoopBranchHeuristics(BasicBlock *BB);
+
+ bool runOnFunction(Function &F);
+};
+
+// Calculate Edge Weights using "Return Heuristics". Predict a successor which
+// leads directly to Return Instruction will not be taken.
+void BranchProbabilityAnalysis::calcReturnHeuristics(BasicBlock *BB){
+ if (BB->getTerminator()->getNumSuccessors() == 1)
+ return;
+
+ for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+ BasicBlock *Succ = *I;
+ if (isReturningBlock(Succ)) {
+ decEdgeWeight(BB, Succ);
+ }
+ }
+}
+
+// Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion
+// between two pointer or pointer and NULL will fail.
+void BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) {
+ BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BI || !BI->isConditional())
+ return;
+
+ Value *Cond = BI->getCondition();
+ ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
+ if (!CI)
+ return;
+
+ Value *LHS = CI->getOperand(0);
+
+ if (!LHS->getType()->isPointerTy())
+ return;
+
+ assert(CI->getOperand(1)->getType()->isPointerTy());
+
+ BasicBlock *Taken = BI->getSuccessor(0);
+ BasicBlock *NonTaken = BI->getSuccessor(1);
+
+ // p != 0 -> isProb = true
+ // p == 0 -> isProb = false
+ // p != q -> isProb = true
+ // p == q -> isProb = false;
+ bool isProb = !CI->isEquality();
+ if (!isProb)
+ std::swap(Taken, NonTaken);
+
+ incEdgeWeight(BB, Taken);
+ decEdgeWeight(BB, NonTaken);
+}
+
+// Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges
+// as taken, exiting edges as not-taken.
+void BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) {
+ uint32_t numSuccs = BB->getTerminator()->getNumSuccessors();
+
+ Loop *L = LI->getLoopFor(BB);
+ if (!L)
+ return;
+
+ SmallVector<BasicBlock *, 8> BackEdges;
+ SmallVector<BasicBlock *, 8> ExitingEdges;
+
+ for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+ BasicBlock *Succ = *I;
+ Loop *SuccL = LI->getLoopFor(Succ);
+ if (SuccL != L)
+ ExitingEdges.push_back(Succ);
+ else if (Succ == L->getHeader())
+ BackEdges.push_back(Succ);
+ }
+
+ if (uint32_t numBackEdges = BackEdges.size()) {
+ uint32_t backWeight = LBH_TAKEN_WEIGHT / numBackEdges;
+ if (backWeight < NORMAL_WEIGHT)
+ backWeight = NORMAL_WEIGHT;
+
+ for (SmallVector<BasicBlock *, 8>::iterator EI = BackEdges.begin(),
+ EE = BackEdges.end(); EI != EE; ++EI) {
+ BasicBlock *Back = *EI;
+ BP->setEdgeWeight(BB, Back, backWeight);
+ }
+ }
+
+ uint32_t numExitingEdges = ExitingEdges.size();
+ if (uint32_t numNonExitingEdges = numSuccs - numExitingEdges) {
+ uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numNonExitingEdges;
+ if (exitWeight < MIN_WEIGHT)
+ exitWeight = MIN_WEIGHT;
+
+ for (SmallVector<BasicBlock *, 8>::iterator EI = ExitingEdges.begin(),
+ EE = ExitingEdges.end(); EI != EE; ++EI) {
+ BasicBlock *Exiting = *EI;
+ BP->setEdgeWeight(BB, Exiting, exitWeight);
+ }
+ }
+}
+
+bool BranchProbabilityAnalysis::runOnFunction(Function &F) {
+
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
+ BasicBlock *BB = I++;
+
+ // Only LBH uses setEdgeWeight method.
+ calcLoopBranchHeuristics(BB);
+
+ // PH and RH use only incEdgeWeight and decEwdgeWeight methods to
+ // not efface LBH results.
+ calcPointerHeuristics(BB);
+ calcReturnHeuristics(BB);
+ }
+
+ return false;
+}
+
+
+bool BranchProbabilityInfo::runOnFunction(Function &F) {
+ LoopInfo &LI = getAnalysis<LoopInfo>();
+ BranchProbabilityAnalysis BPA(&Weights, this, &LI);
+ return BPA.runOnFunction(F);
+}
+
+uint32_t BranchProbabilityInfo::getSumForBlock(BasicBlock *BB) const {
+ uint32_t Sum = 0;
+
+ for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+ BasicBlock *Succ = *I;
+ uint32_t Weight = getEdgeWeight(BB, Succ);
+ uint32_t PrevSum = Sum;
+
+ Sum += Weight;
+ assert(Sum > PrevSum); (void) PrevSum;
+ }
+
+ return Sum;
+}
+
+bool BranchProbabilityInfo::isEdgeHot(BasicBlock *Src, BasicBlock *Dst) const {
+ // Hot probability is at least 4/5 = 80%
+ uint32_t Weight = getEdgeWeight(Src, Dst);
+ uint32_t Sum = getSumForBlock(Src);
+
+ // FIXME: Implement BranchProbability::compare then change this code to
+ // compare this BranchProbability against a static "hot" BranchProbability.
+ return (uint64_t)Weight * 5 > (uint64_t)Sum * 4;
+}
+
+BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const {
+ uint32_t Sum = 0;
+ uint32_t MaxWeight = 0;
+ BasicBlock *MaxSucc = 0;
+
+ for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+ BasicBlock *Succ = *I;
+ uint32_t Weight = getEdgeWeight(BB, Succ);
+ uint32_t PrevSum = Sum;
+
+ Sum += Weight;
+ assert(Sum > PrevSum); (void) PrevSum;
+
+ if (Weight > MaxWeight) {
+ MaxWeight = Weight;
+ MaxSucc = Succ;
+ }
+ }
+
+ // FIXME: Use BranchProbability::compare.
+ if ((uint64_t)MaxWeight * 5 > (uint64_t)Sum * 4)
+ return MaxSucc;
+
+ return 0;
+}
+
+// Return edge's weight. If can't find it, return DEFAULT_WEIGHT value.
+uint32_t
+BranchProbabilityInfo::getEdgeWeight(BasicBlock *Src, BasicBlock *Dst) const {
+ Edge E(Src, Dst);
+ DenseMap<Edge, uint32_t>::const_iterator I = Weights.find(E);
+
+ if (I != Weights.end())
+ return I->second;
+
+ return DEFAULT_WEIGHT;
+}
+
+void BranchProbabilityInfo::setEdgeWeight(BasicBlock *Src, BasicBlock *Dst,
+ uint32_t Weight) {
+ Weights[std::make_pair(Src, Dst)] = Weight;
+ DEBUG(dbgs() << "set edge " << Src->getNameStr() << " -> "
+ << Dst->getNameStr() << " weight to " << Weight
+ << (isEdgeHot(Src, Dst) ? " [is HOT now]\n" : "\n"));
+}
+
+
+BranchProbability BranchProbabilityInfo::
+getEdgeProbability(BasicBlock *Src, BasicBlock *Dst) const {
+
+ uint32_t N = getEdgeWeight(Src, Dst);
+ uint32_t D = getSumForBlock(Src);
+
+ return BranchProbability(N, D);
+}
+
+raw_ostream &
+BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, BasicBlock *Src,
+ BasicBlock *Dst) const {
+ BranchProbability Prob = getEdgeProbability(Src, Dst);
+
+ OS << "edge " << Src->getNameStr() << " -> " << Dst->getNameStr()
+ << " probability is " << Prob
+ << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
+
+ return OS;
+}
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index 6be5617..1a975bf 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_library(LLVMAnalysis
AliasSetTracker.cpp
Analysis.cpp
BasicAliasAnalysis.cpp
+ BranchProbabilityInfo.cpp
CFGPrinter.cpp
CaptureTracking.cpp
ConstantFolding.cpp
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 5de2b04..08a6065 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -1085,7 +1085,7 @@ llvm::canConstantFoldCallTo(const Function *F) {
case 'c':
return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh";
case 'e':
- return Name == "exp";
+ return Name == "exp" || Name == "exp2";
case 'f':
return Name == "fabs" || Name == "fmod" || Name == "floor";
case 'l':
@@ -1221,6 +1221,12 @@ llvm::ConstantFoldCall(Function *F,
case 'e':
if (Name == "exp")
return ConstantFoldFP(exp, V, Ty);
+
+ if (Name == "exp2") {
+ // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a
+ // C99 library.
+ return ConstantFoldBinaryFP(pow, 2.0, V, Ty);
+ }
break;
case 'f':
if (Name == "fabs")
diff --git a/lib/Analysis/DIBuilder.cpp b/lib/Analysis/DIBuilder.cpp
index dc98c9e..ef5d03a 100644
--- a/lib/Analysis/DIBuilder.cpp
+++ b/lib/Analysis/DIBuilder.cpp
@@ -51,6 +51,10 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer)
};
TheCU = DICompileUnit(MDNode::get(VMContext, Elts));
+
+ // Create a named metadata so that it is easier to find cu in a module.
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.cu");
+ NMD->addOperand(TheCU);
}
/// createFile - Create a file descriptor to hold debugging information
@@ -156,12 +160,12 @@ DIType DIBuilder::createReferenceType(DIType RTy) {
/// createTypedef - Create debugging information entry for a typedef.
DIType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File,
- unsigned LineNo) {
+ unsigned LineNo, DIDescriptor Context) {
// typedefs are encoded in DIDerivedType format.
assert(Ty.Verify() && "Invalid typedef type!");
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_typedef),
- Ty.getContext(),
+ Context,
MDString::get(VMContext, Name),
File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp
index 690c4b4..2e79eab 100644
--- a/lib/Analysis/IPA/CallGraph.cpp
+++ b/lib/Analysis/IPA/CallGraph.cpp
@@ -148,7 +148,7 @@ private:
for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
II != IE; ++II) {
CallSite CS(cast<Value>(II));
- if (CS && !isa<DbgInfoIntrinsic>(II)) {
+ if (CS && !isa<IntrinsicInst>(II)) {
const Function *Callee = CS.getCalledFunction();
if (Callee)
Node->addCalledFunction(CS, getOrInsertFunction(Callee));
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index 725ab72..659ffab 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -245,8 +245,8 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- CallSite CS(cast<Value>(I));
- if (!CS || isa<DbgInfoIntrinsic>(I)) continue;
+ CallSite CS(cast<Value>(I));
+ if (!CS || isa<IntrinsicInst>(I)) continue;
// If this call site already existed in the callgraph, just verify it
// matches up to expectations and remove it from CallSites.
diff --git a/lib/Analysis/IPA/FindUsedTypes.cpp b/lib/Analysis/IPA/FindUsedTypes.cpp
index 06ae34c..dde2556 100644
--- a/lib/Analysis/IPA/FindUsedTypes.cpp
+++ b/lib/Analysis/IPA/FindUsedTypes.cpp
@@ -32,7 +32,7 @@ INITIALIZE_PASS(FindUsedTypes, "print-used-types",
void FindUsedTypes::IncorporateType(const Type *Ty) {
// If ty doesn't already exist in the used types map, add it now, otherwise
// return.
- if (!UsedTypes.insert(Ty).second) return; // Already contain Ty.
+ if (!UsedTypes.insert(Ty)) return; // Already contain Ty.
// Make sure to add any types this type references now.
//
@@ -94,7 +94,7 @@ bool FindUsedTypes::runOnModule(Module &m) {
//
void FindUsedTypes::print(raw_ostream &OS, const Module *M) const {
OS << "Types in use by this module:\n";
- for (std::set<const Type *>::const_iterator I = UsedTypes.begin(),
+ for (SetVector<const Type *>::const_iterator I = UsedTypes.begin(),
E = UsedTypes.end(); I != E; ++I) {
OS << " ";
WriteTypeSymbolic(OS, *I, M);
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 2cda791..a0c42f0 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -21,6 +21,7 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/ADT/STLExtras.h"
@@ -38,6 +39,15 @@ INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_END(IVUsers, "iv-users",
"Induction Variable Users", false, true)
+// IVUsers behavior currently depends on this temporary indvars mode. The
+// option must be defined upstream from its uses.
+namespace llvm {
+ bool DisableIVRewrite = false;
+}
+cl::opt<bool, true> DisableIVRewriteOpt(
+ "disable-iv-rewrite", cl::Hidden, cl::location(llvm::DisableIVRewrite),
+ cl::desc("Disable canonical induction variable rewriting"));
+
Pass *llvm::createIVUsersPass() {
return new IVUsers();
}
@@ -79,7 +89,7 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L,
/// AddUsersIfInteresting - Inspect the specified instruction. If it is a
/// reducible SCEV, recursively add its users to the IVUsesByStride set and
/// return true. Otherwise, return false.
-bool IVUsers::AddUsersIfInteresting(Instruction *I) {
+bool IVUsers::AddUsersIfInteresting(Instruction *I, PHINode *Phi) {
if (!SE->isSCEVable(I->getType()))
return false; // Void and FP expressions cannot be reduced.
@@ -90,6 +100,11 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
if (Width > 64 || (TD && !TD->isLegalInteger(Width)))
return false;
+ // We expect Sign/Zero extension to be eliminated from the IR before analyzing
+ // any downstream uses.
+ if (DisableIVRewrite && (isa<SExtInst>(I) || isa<ZExtInst>(I)))
+ return false;
+
if (!Processed.insert(I))
return true; // Instruction already handled.
@@ -121,13 +136,13 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
bool AddUserToIVUsers = false;
if (LI->getLoopFor(User->getParent()) != L) {
if (isa<PHINode>(User) || Processed.count(User) ||
- !AddUsersIfInteresting(User)) {
+ !AddUsersIfInteresting(User, Phi)) {
DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n'
<< " OF SCEV: " << *ISE << '\n');
AddUserToIVUsers = true;
}
} else if (Processed.count(User) ||
- !AddUsersIfInteresting(User)) {
+ !AddUsersIfInteresting(User, Phi)) {
DEBUG(dbgs() << "FOUND USER: " << *User << '\n'
<< " OF SCEV: " << *ISE << '\n');
AddUserToIVUsers = true;
@@ -135,9 +150,11 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
if (AddUserToIVUsers) {
// Okay, we found a user that we cannot reduce.
- IVUses.push_back(new IVStrideUse(this, User, I));
+ IVUses.push_back(new IVStrideUse(this, User, I, Phi));
IVStrideUse &NewUse = IVUses.back();
- // Transform the expression into a normalized form.
+ // Autodetect the post-inc loop set, populating NewUse.PostIncLoops.
+ // The regular return value here is discarded; instead of recording
+ // it, we just recompute it when we need it.
ISE = TransformForPostIncUse(NormalizeAutodetect,
ISE, User, I,
NewUse.PostIncLoops,
@@ -148,8 +165,8 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
return true;
}
-IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) {
- IVUses.push_back(new IVStrideUse(this, User, Operand));
+IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand, PHINode *Phi) {
+ IVUses.push_back(new IVStrideUse(this, User, Operand, Phi));
return IVUses.back();
}
@@ -177,7 +194,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
// them by stride. Start by finding all of the PHI nodes in the header for
// this loop. If they are induction variables, inspect their uses.
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
- (void)AddUsersIfInteresting(I);
+ (void)AddUsersIfInteresting(I, cast<PHINode>(I));
return false;
}
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index a820ecf..efde598 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -66,21 +66,13 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
ImmutableCallSite CS(cast<Instruction>(II));
- // If this function contains a call to setjmp or _setjmp, never inline
- // it. This is a hack because we depend on the user marking their local
- // variables as volatile if they are live across a setjmp call, and they
- // probably won't do this in callers.
if (const Function *F = CS.getCalledFunction()) {
// If a function is both internal and has a single use, then it is
// extremely likely to get inlined in the future (it was probably
// exposed by an interleaved devirtualization pass).
if (F->hasInternalLinkage() && F->hasOneUse())
++NumInlineCandidates;
-
- if (F->isDeclaration() &&
- (F->getName() == "setjmp" || F->getName() == "_setjmp"))
- callsSetJmp = true;
-
+
// If this call is to function itself, then the function is recursive.
// Inlining it into other functions is a bad idea, because this is
// basically just a form of loop peeling, and our metrics aren't useful
@@ -226,6 +218,13 @@ unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) {
/// analyzeFunction - Fill in the current structure with information gleaned
/// from the specified function.
void CodeMetrics::analyzeFunction(Function *F) {
+ // If this function contains a call to setjmp or _setjmp, never inline
+ // it. This is a hack because we depend on the user marking their local
+ // variables as volatile if they are live across a setjmp call, and they
+ // probably won't do this in callers.
+ if (F->callsFunctionThatReturnsTwice())
+ callsSetJmp = true;
+
// Look at the size of the callee.
for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
analyzeBasicBlock(&*BB);
@@ -594,7 +593,7 @@ InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) {
CodeMetrics &CallerMetrics = CachedFunctionInfo[Caller].Metrics;
// For small functions we prefer to recalculate the cost for better accuracy.
- if (CallerMetrics.NumBlocks < 10 || CallerMetrics.NumInsts < 1000) {
+ if (CallerMetrics.NumBlocks < 10 && CallerMetrics.NumInsts < 1000) {
resetCachedCostInfo(Caller);
return;
}
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 9d6d339..9d78f8b 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -913,8 +913,6 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
}
}
- bool isSigned = Opcode == Instruction::SRem;
-
// X % undef -> undef
if (match(Op1, m_Undef()))
return Op1;
@@ -1378,6 +1376,26 @@ static const Type *GetCompareTy(Value *Op) {
return CmpInst::makeCmpResultType(Op->getType());
}
+/// ExtractEquivalentCondition - Rummage around inside V looking for something
+/// equivalent to the comparison "LHS Pred RHS". Return such a value if found,
+/// otherwise return null. Helper function for analyzing max/min idioms.
+static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
+ Value *LHS, Value *RHS) {
+ SelectInst *SI = dyn_cast<SelectInst>(V);
+ if (!SI)
+ return 0;
+ CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
+ if (!Cmp)
+ return 0;
+ Value *CmpLHS = Cmp->getOperand(0), *CmpRHS = Cmp->getOperand(1);
+ if (Pred == Cmp->getPredicate() && LHS == CmpLHS && RHS == CmpRHS)
+ return Cmp;
+ if (Pred == CmpInst::getSwappedPredicate(Cmp->getPredicate()) &&
+ LHS == CmpRHS && RHS == CmpLHS)
+ return Cmp;
+ return 0;
+}
+
/// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
@@ -1460,46 +1478,48 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
default:
assert(false && "Unknown ICmp predicate!");
case ICmpInst::ICMP_ULT:
- return ConstantInt::getFalse(LHS->getContext());
+ // getNullValue also works for vectors, unlike getFalse.
+ return Constant::getNullValue(ITy);
case ICmpInst::ICMP_UGE:
- return ConstantInt::getTrue(LHS->getContext());
+ // getAllOnesValue also works for vectors, unlike getTrue.
+ return ConstantInt::getAllOnesValue(ITy);
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_ULE:
if (isKnownNonZero(LHS, TD))
- return ConstantInt::getFalse(LHS->getContext());
+ return Constant::getNullValue(ITy);
break;
case ICmpInst::ICMP_NE:
case ICmpInst::ICMP_UGT:
if (isKnownNonZero(LHS, TD))
- return ConstantInt::getTrue(LHS->getContext());
+ return ConstantInt::getAllOnesValue(ITy);
break;
case ICmpInst::ICMP_SLT:
ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
if (LHSKnownNegative)
- return ConstantInt::getTrue(LHS->getContext());
+ return ConstantInt::getAllOnesValue(ITy);
if (LHSKnownNonNegative)
- return ConstantInt::getFalse(LHS->getContext());
+ return Constant::getNullValue(ITy);
break;
case ICmpInst::ICMP_SLE:
ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
if (LHSKnownNegative)
- return ConstantInt::getTrue(LHS->getContext());
+ return ConstantInt::getAllOnesValue(ITy);
if (LHSKnownNonNegative && isKnownNonZero(LHS, TD))
- return ConstantInt::getFalse(LHS->getContext());
+ return Constant::getNullValue(ITy);
break;
case ICmpInst::ICMP_SGE:
ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
if (LHSKnownNegative)
- return ConstantInt::getFalse(LHS->getContext());
+ return Constant::getNullValue(ITy);
if (LHSKnownNonNegative)
- return ConstantInt::getTrue(LHS->getContext());
+ return ConstantInt::getAllOnesValue(ITy);
break;
case ICmpInst::ICMP_SGT:
ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
if (LHSKnownNegative)
- return ConstantInt::getFalse(LHS->getContext());
+ return Constant::getNullValue(ITy);
if (LHSKnownNonNegative && isKnownNonZero(LHS, TD))
- return ConstantInt::getTrue(LHS->getContext());
+ return ConstantInt::getAllOnesValue(ITy);
break;
}
}
@@ -1791,7 +1811,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_UGE:
- return ConstantInt::getFalse(RHS->getContext());
+ // getNullValue also works for vectors, unlike getFalse.
+ return Constant::getNullValue(ITy);
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD);
@@ -1801,7 +1822,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
case ICmpInst::ICMP_NE:
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_ULE:
- return ConstantInt::getTrue(RHS->getContext());
+ // getAllOnesValue also works for vectors, unlike getTrue.
+ return Constant::getAllOnesValue(ITy);
}
}
if (RBO && match(RBO, m_URem(m_Value(), m_Specific(LHS)))) {
@@ -1818,7 +1840,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
case ICmpInst::ICMP_NE:
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_UGE:
- return ConstantInt::getTrue(RHS->getContext());
+ // getAllOnesValue also works for vectors, unlike getTrue.
+ return Constant::getAllOnesValue(ITy);
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD);
@@ -1828,7 +1851,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_ULE:
- return ConstantInt::getFalse(RHS->getContext());
+ // getNullValue also works for vectors, unlike getFalse.
+ return Constant::getNullValue(ITy);
}
}
@@ -1843,7 +1867,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// fall-through
case Instruction::SDiv:
case Instruction::AShr:
- if (!LBO->isExact() && !RBO->isExact())
+ if (!LBO->isExact() || !RBO->isExact())
break;
if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
RBO->getOperand(0), TD, DT, MaxRecurse-1))
@@ -1864,6 +1888,194 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
}
+ // Simplify comparisons involving max/min.
+ Value *A, *B;
+ CmpInst::Predicate P = CmpInst::BAD_ICMP_PREDICATE;
+ CmpInst::Predicate EqP; // Chosen so that "A == max/min(A,B)" iff "A EqP B".
+
+ // Signed variants on "max(a,b)>=a -> true".
+ if (match(LHS, m_SMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) {
+ if (A != RHS) std::swap(A, B); // smax(A, B) pred A.
+ EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B".
+ // We analyze this as smax(A, B) pred A.
+ P = Pred;
+ } else if (match(RHS, m_SMax(m_Value(A), m_Value(B))) &&
+ (A == LHS || B == LHS)) {
+ if (A != LHS) std::swap(A, B); // A pred smax(A, B).
+ EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B".
+ // We analyze this as smax(A, B) swapped-pred A.
+ P = CmpInst::getSwappedPredicate(Pred);
+ } else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) &&
+ (A == RHS || B == RHS)) {
+ if (A != RHS) std::swap(A, B); // smin(A, B) pred A.
+ EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B".
+ // We analyze this as smax(-A, -B) swapped-pred -A.
+ // Note that we do not need to actually form -A or -B thanks to EqP.
+ P = CmpInst::getSwappedPredicate(Pred);
+ } else if (match(RHS, m_SMin(m_Value(A), m_Value(B))) &&
+ (A == LHS || B == LHS)) {
+ if (A != LHS) std::swap(A, B); // A pred smin(A, B).
+ EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B".
+ // We analyze this as smax(-A, -B) pred -A.
+ // Note that we do not need to actually form -A or -B thanks to EqP.
+ P = Pred;
+ }
+ if (P != CmpInst::BAD_ICMP_PREDICATE) {
+ // Cases correspond to "max(A, B) p A".
+ switch (P) {
+ default:
+ break;
+ case CmpInst::ICMP_EQ:
+ case CmpInst::ICMP_SLE:
+ // Equivalent to "A EqP B". This may be the same as the condition tested
+ // in the max/min; if so, we can just return that.
+ if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B))
+ return V;
+ if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B))
+ return V;
+ // Otherwise, see if "A EqP B" simplifies.
+ if (MaxRecurse)
+ if (Value *V = SimplifyICmpInst(EqP, A, B, TD, DT, MaxRecurse-1))
+ return V;
+ break;
+ case CmpInst::ICMP_NE:
+ case CmpInst::ICMP_SGT: {
+ CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP);
+ // Equivalent to "A InvEqP B". This may be the same as the condition
+ // tested in the max/min; if so, we can just return that.
+ if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B))
+ return V;
+ if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B))
+ return V;
+ // Otherwise, see if "A InvEqP B" simplifies.
+ if (MaxRecurse)
+ if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, DT, MaxRecurse-1))
+ return V;
+ break;
+ }
+ case CmpInst::ICMP_SGE:
+ // Always true.
+ return Constant::getAllOnesValue(ITy);
+ case CmpInst::ICMP_SLT:
+ // Always false.
+ return Constant::getNullValue(ITy);
+ }
+ }
+
+ // Unsigned variants on "max(a,b)>=a -> true".
+ P = CmpInst::BAD_ICMP_PREDICATE;
+ if (match(LHS, m_UMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) {
+ if (A != RHS) std::swap(A, B); // umax(A, B) pred A.
+ EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B".
+ // We analyze this as umax(A, B) pred A.
+ P = Pred;
+ } else if (match(RHS, m_UMax(m_Value(A), m_Value(B))) &&
+ (A == LHS || B == LHS)) {
+ if (A != LHS) std::swap(A, B); // A pred umax(A, B).
+ EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B".
+ // We analyze this as umax(A, B) swapped-pred A.
+ P = CmpInst::getSwappedPredicate(Pred);
+ } else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) &&
+ (A == RHS || B == RHS)) {
+ if (A != RHS) std::swap(A, B); // umin(A, B) pred A.
+ EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B".
+ // We analyze this as umax(-A, -B) swapped-pred -A.
+ // Note that we do not need to actually form -A or -B thanks to EqP.
+ P = CmpInst::getSwappedPredicate(Pred);
+ } else if (match(RHS, m_UMin(m_Value(A), m_Value(B))) &&
+ (A == LHS || B == LHS)) {
+ if (A != LHS) std::swap(A, B); // A pred umin(A, B).
+ EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B".
+ // We analyze this as umax(-A, -B) pred -A.
+ // Note that we do not need to actually form -A or -B thanks to EqP.
+ P = Pred;
+ }
+ if (P != CmpInst::BAD_ICMP_PREDICATE) {
+ // Cases correspond to "max(A, B) p A".
+ switch (P) {
+ default:
+ break;
+ case CmpInst::ICMP_EQ:
+ case CmpInst::ICMP_ULE:
+ // Equivalent to "A EqP B". This may be the same as the condition tested
+ // in the max/min; if so, we can just return that.
+ if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B))
+ return V;
+ if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B))
+ return V;
+ // Otherwise, see if "A EqP B" simplifies.
+ if (MaxRecurse)
+ if (Value *V = SimplifyICmpInst(EqP, A, B, TD, DT, MaxRecurse-1))
+ return V;
+ break;
+ case CmpInst::ICMP_NE:
+ case CmpInst::ICMP_UGT: {
+ CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP);
+ // Equivalent to "A InvEqP B". This may be the same as the condition
+ // tested in the max/min; if so, we can just return that.
+ if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B))
+ return V;
+ if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B))
+ return V;
+ // Otherwise, see if "A InvEqP B" simplifies.
+ if (MaxRecurse)
+ if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, DT, MaxRecurse-1))
+ return V;
+ break;
+ }
+ case CmpInst::ICMP_UGE:
+ // Always true.
+ return Constant::getAllOnesValue(ITy);
+ case CmpInst::ICMP_ULT:
+ // Always false.
+ return Constant::getNullValue(ITy);
+ }
+ }
+
+ // Variants on "max(x,y) >= min(x,z)".
+ Value *C, *D;
+ if (match(LHS, m_SMax(m_Value(A), m_Value(B))) &&
+ match(RHS, m_SMin(m_Value(C), m_Value(D))) &&
+ (A == C || A == D || B == C || B == D)) {
+ // max(x, ?) pred min(x, ?).
+ if (Pred == CmpInst::ICMP_SGE)
+ // Always true.
+ return Constant::getAllOnesValue(ITy);
+ if (Pred == CmpInst::ICMP_SLT)
+ // Always false.
+ return Constant::getNullValue(ITy);
+ } else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) &&
+ match(RHS, m_SMax(m_Value(C), m_Value(D))) &&
+ (A == C || A == D || B == C || B == D)) {
+ // min(x, ?) pred max(x, ?).
+ if (Pred == CmpInst::ICMP_SLE)
+ // Always true.
+ return Constant::getAllOnesValue(ITy);
+ if (Pred == CmpInst::ICMP_SGT)
+ // Always false.
+ return Constant::getNullValue(ITy);
+ } else if (match(LHS, m_UMax(m_Value(A), m_Value(B))) &&
+ match(RHS, m_UMin(m_Value(C), m_Value(D))) &&
+ (A == C || A == D || B == C || B == D)) {
+ // max(x, ?) pred min(x, ?).
+ if (Pred == CmpInst::ICMP_UGE)
+ // Always true.
+ return Constant::getAllOnesValue(ITy);
+ if (Pred == CmpInst::ICMP_ULT)
+ // Always false.
+ return Constant::getNullValue(ITy);
+ } else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) &&
+ match(RHS, m_UMax(m_Value(C), m_Value(D))) &&
+ (A == C || A == D || B == C || B == D)) {
+ // min(x, ?) pred max(x, ?).
+ if (Pred == CmpInst::ICMP_ULE)
+ // Always true.
+ return Constant::getAllOnesValue(ITy);
+ if (Pred == CmpInst::ICMP_UGT)
+ // Always false.
+ return Constant::getNullValue(ITy);
+ }
+
// If the comparison is with the result of a select instruction, check whether
// comparing with either branch of the select always yields the same value.
if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index d5f0b5c..6e27597 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -589,16 +589,18 @@ static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) {
}
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
if (MI->isVolatile()) return false;
- if (MI->getAddressSpace() != 0) return false;
// FIXME: check whether it has a valuerange that excludes zero?
ConstantInt *Len = dyn_cast<ConstantInt>(MI->getLength());
if (!Len || Len->isZero()) return false;
- if (MI->getRawDest() == Ptr || MI->getDest() == Ptr)
- return true;
+ if (MI->getDestAddressSpace() == 0)
+ if (MI->getRawDest() == Ptr || MI->getDest() == Ptr)
+ return true;
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
- return MTI->getRawSource() == Ptr || MTI->getSource() == Ptr;
+ if (MTI->getSourceAddressSpace() == 0)
+ if (MTI->getRawSource() == Ptr || MTI->getSource() == Ptr)
+ return true;
}
return false;
}
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index ab34fd6..c5c676b 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -31,7 +31,7 @@ using namespace llvm;
static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
// Test if the values are trivially equivalent.
if (A == B) return true;
-
+
// Test if the values come from identical arithmetic instructions.
// Use isIdenticalToWhenDefined instead of isIdenticalTo because
// this function is only used when one address use dominates the
@@ -42,7 +42,7 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
if (const Instruction *BI = dyn_cast<Instruction>(B))
if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
return true;
-
+
// Otherwise they may not be equivalent.
return false;
}
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index ce7fab6..5f640c0 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -374,10 +374,16 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
if (R == AliasAnalysis::MustAlias)
return MemDepResult::getDef(Inst);
+#if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads
+ // in terms of clobbering loads, but since it does this by looking
+ // at the clobbering load directly, it doesn't know about any
+ // phi translation that may have happened along the way.
+
// If we have a partial alias, then return this as a clobber for the
// client to handle.
if (R == AliasAnalysis::PartialAlias)
return MemDepResult::getClobber(Inst);
+#endif
// Random may-alias loads don't depend on each other without a
// dependence.
@@ -497,7 +503,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
// If we can do a pointer scan, make it happen.
bool isLoad = !(MR & AliasAnalysis::Mod);
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst))
- isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_end;
+ isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos,
QueryParent);
@@ -937,6 +943,9 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
SmallVector<BasicBlock*, 32> Worklist;
Worklist.push_back(StartBB);
+ // PredList used inside loop.
+ SmallVector<std::pair<BasicBlock*, PHITransAddr>, 16> PredList;
+
// Keep track of the entries that we know are sorted. Previously cached
// entries will all be sorted. The entries we add we only sort on demand (we
// don't insert every element into its sorted position). We know that we
@@ -973,22 +982,29 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
// the same Pointer.
if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
SkipFirstBlock = false;
+ SmallVector<BasicBlock*, 16> NewBlocks;
for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
// Verify that we haven't looked at this block yet.
std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
InsertRes = Visited.insert(std::make_pair(*PI, Pointer.getAddr()));
if (InsertRes.second) {
// First time we've looked at *PI.
- Worklist.push_back(*PI);
+ NewBlocks.push_back(*PI);
continue;
}
// If we have seen this block before, but it was with a different
// pointer then we have a phi translation failure and we have to treat
// this as a clobber.
- if (InsertRes.first->second != Pointer.getAddr())
+ if (InsertRes.first->second != Pointer.getAddr()) {
+ // Make sure to clean up the Visited map before continuing on to
+ // PredTranslationFailure.
+ for (unsigned i = 0; i < NewBlocks.size(); i++)
+ Visited.erase(NewBlocks[i]);
goto PredTranslationFailure;
+ }
}
+ Worklist.append(NewBlocks.begin(), NewBlocks.end());
continue;
}
@@ -1007,13 +1023,15 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
NumSortedEntries = Cache->size();
}
Cache = 0;
-
+
+ PredList.clear();
for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
BasicBlock *Pred = *PI;
-
+ PredList.push_back(std::make_pair(Pred, Pointer));
+
// Get the PHI translated pointer in this predecessor. This can fail if
// not translatable, in which case the getAddr() returns null.
- PHITransAddr PredPointer(Pointer);
+ PHITransAddr &PredPointer = PredList.back().second;
PredPointer.PHITranslateValue(BB, Pred, 0);
Value *PredPtrVal = PredPointer.getAddr();
@@ -1027,6 +1045,9 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
InsertRes = Visited.insert(std::make_pair(Pred, PredPtrVal));
if (!InsertRes.second) {
+ // We found the pred; take it off the list of preds to visit.
+ PredList.pop_back();
+
// If the predecessor was visited with PredPtr, then we already did
// the analysis and can ignore it.
if (InsertRes.first->second == PredPtrVal)
@@ -1035,14 +1056,47 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
// Otherwise, the block was previously analyzed with a different
// pointer. We can't represent the result of this case, so we just
// treat this as a phi translation failure.
+
+ // Make sure to clean up the Visited map before continuing on to
+ // PredTranslationFailure.
+ for (unsigned i = 0; i < PredList.size(); i++)
+ Visited.erase(PredList[i].first);
+
goto PredTranslationFailure;
}
-
+ }
+
+ // Actually process results here; this need to be a separate loop to avoid
+ // calling getNonLocalPointerDepFromBB for blocks we don't want to return
+ // any results for. (getNonLocalPointerDepFromBB will modify our
+ // datastructures in ways the code after the PredTranslationFailure label
+ // doesn't expect.)
+ for (unsigned i = 0; i < PredList.size(); i++) {
+ BasicBlock *Pred = PredList[i].first;
+ PHITransAddr &PredPointer = PredList[i].second;
+ Value *PredPtrVal = PredPointer.getAddr();
+
+ bool CanTranslate = true;
// If PHI translation was unable to find an available pointer in this
// predecessor, then we have to assume that the pointer is clobbered in
// that predecessor. We can still do PRE of the load, which would insert
// a computation of the pointer in this predecessor.
- if (PredPtrVal == 0) {
+ if (PredPtrVal == 0)
+ CanTranslate = false;
+
+ // FIXME: it is entirely possible that PHI translating will end up with
+ // the same value. Consider PHI translating something like:
+ // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
+ // to recurse here, pedantically speaking.
+
+ // If getNonLocalPointerDepFromBB fails here, that means the cached
+ // result conflicted with the Visited list; we have to conservatively
+ // assume a clobber, but this also does not block PRE of the load.
+ if (!CanTranslate ||
+ getNonLocalPointerDepFromBB(PredPointer,
+ Loc.getWithNewPtr(PredPtrVal),
+ isLoad, Pred,
+ Result, Visited)) {
// Add the entry to the Result list.
NonLocalDepResult Entry(Pred,
MemDepResult::getClobber(Pred->getTerminator()),
@@ -1058,19 +1112,6 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
NLPI.Pair = BBSkipFirstBlockPair();
continue;
}
-
- // FIXME: it is entirely possible that PHI translating will end up with
- // the same value. Consider PHI translating something like:
- // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
- // to recurse here, pedantically speaking.
-
- // If we have a problem phi translating, fall through to the code below
- // to handle the failure condition.
- if (getNonLocalPointerDepFromBB(PredPointer,
- Loc.getWithNewPtr(PredPointer.getAddr()),
- isLoad, Pred,
- Result, Visited))
- goto PredTranslationFailure;
}
// Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
@@ -1087,6 +1128,9 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
continue;
PredTranslationFailure:
+ // The following code is "failure"; we can't produce a sane translation
+ // for the given block. It assumes that we haven't modified any of
+ // our datastructures while processing the current block.
if (Cache == 0) {
// Refresh the CacheInfo/Cache pointer if it got invalidated.
@@ -1117,8 +1161,8 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
assert(I->getResult().isNonLocal() &&
"Should only be here with transparent block");
- I->setResult(MemDepResult::getClobber(BB->begin()));
- ReverseNonLocalPtrDeps[BB->begin()].insert(CacheKey);
+ I->setResult(MemDepResult::getClobber(BB->getTerminator()));
+ ReverseNonLocalPtrDeps[BB->getTerminator()].insert(CacheKey);
Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(),
Pointer.getAddr()));
break;
diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp
index 3269dcc..80eda79 100644
--- a/lib/Analysis/RegionPass.cpp
+++ b/lib/Analysis/RegionPass.cpp
@@ -249,7 +249,7 @@ void RegionPass::assignPassManager(PMStack &PMS,
assert (!PMS.empty() && "Unable to create Region Pass Manager");
PMDataManager *PMD = PMS.top();
- // [1] Create new Call Graph Pass Manager
+ // [1] Create new Region Pass Manager
RGPM = new RGPassManager(PMD->getDepth() + 1);
RGPM->populateInheritedAnalysis(PMS);
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index bab4619..025718e 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -1035,6 +1035,93 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
return S;
}
+// Get the limit of a recurrence such that incrementing by Step cannot cause
+// signed overflow as long as the value of the recurrence within the loop does
+// not exceed this limit before incrementing.
+static const SCEV *getOverflowLimitForStep(const SCEV *Step,
+ ICmpInst::Predicate *Pred,
+ ScalarEvolution *SE) {
+ unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
+ if (SE->isKnownPositive(Step)) {
+ *Pred = ICmpInst::ICMP_SLT;
+ return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
+ SE->getSignedRange(Step).getSignedMax());
+ }
+ if (SE->isKnownNegative(Step)) {
+ *Pred = ICmpInst::ICMP_SGT;
+ return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
+ SE->getSignedRange(Step).getSignedMin());
+ }
+ return 0;
+}
+
+// The recurrence AR has been shown to have no signed wrap. Typically, if we can
+// prove NSW for AR, then we can just as easily prove NSW for its preincrement
+// or postincrement sibling. This allows normalizing a sign extended AddRec as
+// such: {sext(Step + Start),+,Step} => {(Step + sext(Start),+,Step} As a
+// result, the expression "Step + sext(PreIncAR)" is congruent with
+// "sext(PostIncAR)"
+static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR,
+ const Type *Ty,
+ ScalarEvolution *SE) {
+ const Loop *L = AR->getLoop();
+ const SCEV *Start = AR->getStart();
+ const SCEV *Step = AR->getStepRecurrence(*SE);
+
+ // Check for a simple looking step prior to loop entry.
+ const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
+ if (!SA || SA->getNumOperands() != 2 || SA->getOperand(0) != Step)
+ return 0;
+
+ // This is a postinc AR. Check for overflow on the preinc recurrence using the
+ // same three conditions that getSignExtendedExpr checks.
+
+ // 1. NSW flags on the step increment.
+ const SCEV *PreStart = SA->getOperand(1);
+ const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
+ SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
+
+ if (PreAR && PreAR->getNoWrapFlags(SCEV::FlagNSW))
+ return PreStart;
+
+ // 2. Direct overflow check on the step operation's expression.
+ unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
+ const Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
+ const SCEV *OperandExtendedStart =
+ SE->getAddExpr(SE->getSignExtendExpr(PreStart, WideTy),
+ SE->getSignExtendExpr(Step, WideTy));
+ if (SE->getSignExtendExpr(Start, WideTy) == OperandExtendedStart) {
+ // Cache knowledge of PreAR NSW.
+ if (PreAR)
+ const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(SCEV::FlagNSW);
+ // FIXME: this optimization needs a unit test
+ DEBUG(dbgs() << "SCEV: untested prestart overflow check\n");
+ return PreStart;
+ }
+
+ // 3. Loop precondition.
+ ICmpInst::Predicate Pred;
+ const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, SE);
+
+ if (OverflowLimit &&
+ SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) {
+ return PreStart;
+ }
+ return 0;
+}
+
+// Get the normalized sign-extended expression for this AddRec's Start.
+static const SCEV *getSignExtendAddRecStart(const SCEVAddRecExpr *AR,
+ const Type *Ty,
+ ScalarEvolution *SE) {
+ const SCEV *PreStart = getPreStartForSignExtend(AR, Ty, SE);
+ if (!PreStart)
+ return SE->getSignExtendExpr(AR->getStart(), Ty);
+
+ return SE->getAddExpr(SE->getSignExtendExpr(AR->getStepRecurrence(*SE), Ty),
+ SE->getSignExtendExpr(PreStart, Ty));
+}
+
const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
const Type *Ty) {
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
@@ -1097,7 +1184,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
// If we have special knowledge that this addrec won't overflow,
// we don't need to do any further analysis.
if (AR->getNoWrapFlags(SCEV::FlagNSW))
- return getAddRecExpr(getSignExtendExpr(Start, Ty),
+ return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
getSignExtendExpr(Step, Ty),
L, SCEV::FlagNSW);
@@ -1133,7 +1220,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
// Cache knowledge of AR NSW, which is propagated to this AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
// Return the expression with the addrec on the outside.
- return getAddRecExpr(getSignExtendExpr(Start, Ty),
+ return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
getSignExtendExpr(Step, Ty),
L, AR->getNoWrapFlags());
}
@@ -1149,7 +1236,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
// Cache knowledge of AR NSW, which is propagated to this AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
// Return the expression with the addrec on the outside.
- return getAddRecExpr(getSignExtendExpr(Start, Ty),
+ return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
getZeroExtendExpr(Step, Ty),
L, AR->getNoWrapFlags());
}
@@ -1159,34 +1246,18 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
// the addrec is safe. Also, if the entry is guarded by a comparison
// with the start value and the backedge is guarded by a comparison
// with the post-inc value, the addrec is safe.
- if (isKnownPositive(Step)) {
- const SCEV *N = getConstant(APInt::getSignedMinValue(BitWidth) -
- getSignedRange(Step).getSignedMax());
- if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, AR, N) ||
- (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, Start, N) &&
- isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT,
- AR->getPostIncExpr(*this), N))) {
- // Cache knowledge of AR NSW, which is propagated to this AddRec.
- const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
- // Return the expression with the addrec on the outside.
- return getAddRecExpr(getSignExtendExpr(Start, Ty),
- getSignExtendExpr(Step, Ty),
- L, AR->getNoWrapFlags());
- }
- } else if (isKnownNegative(Step)) {
- const SCEV *N = getConstant(APInt::getSignedMaxValue(BitWidth) -
- getSignedRange(Step).getSignedMin());
- if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, AR, N) ||
- (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGT, Start, N) &&
- isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT,
- AR->getPostIncExpr(*this), N))) {
- // Cache knowledge of AR NSW, which is propagated to this AddRec.
- const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
- // Return the expression with the addrec on the outside.
- return getAddRecExpr(getSignExtendExpr(Start, Ty),
- getSignExtendExpr(Step, Ty),
- L, AR->getNoWrapFlags());
- }
+ ICmpInst::Predicate Pred;
+ const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, this);
+ if (OverflowLimit &&
+ (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) ||
+ (isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) &&
+ isLoopBackedgeGuardedByCond(L, Pred, AR->getPostIncExpr(*this),
+ OverflowLimit)))) {
+ // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
+ return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
+ getSignExtendExpr(Step, Ty),
+ L, AR->getNoWrapFlags());
}
}
}
@@ -3783,7 +3854,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
// update the value. The temporary CouldNotCompute value tells SCEV
// code elsewhere that it shouldn't attempt to request a new
// backedge-taken count, which could result in infinite recursion.
- std::pair<std::map<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
+ std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute()));
if (!Pair.second)
return Pair.first->second;
@@ -4433,7 +4504,7 @@ Constant *
ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
const APInt &BEs,
const Loop *L) {
- std::map<PHINode*, Constant*>::const_iterator I =
+ DenseMap<PHINode*, Constant*>::const_iterator I =
ConstantEvolutionLoopExitValue.find(PN);
if (I != ConstantEvolutionLoopExitValue.end())
return I->second;
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 8f18dd2..dab5aeb 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -131,8 +131,18 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
}
return;
}
+
+ if (Argument *A = dyn_cast<Argument>(V)) {
+ // Get alignment information off byval arguments if specified in the IR.
+ if (A->hasByValAttr())
+ if (unsigned Align = A->getParamAlignment())
+ KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
+ CountTrailingZeros_32(Align));
+ return;
+ }
- KnownZero.clearAllBits(); KnownOne.clearAllBits(); // Start out not knowing anything.
+ // Start out not knowing anything.
+ KnownZero.clearAllBits(); KnownOne.clearAllBits();
if (Depth == MaxDepth || Mask == 0)
return; // Limit search depth.
@@ -670,6 +680,10 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
break;
}
+ case Intrinsic::x86_sse42_crc32_64_8:
+ case Intrinsic::x86_sse42_crc32_64_64:
+ KnownZero = APInt::getHighBitsSet(64, 32);
+ break;
}
}
break;
OpenPOWER on IntegriCloud