summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Analysis
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2014-03-21 17:53:59 +0000
committerdim <dim@FreeBSD.org>2014-03-21 17:53:59 +0000
commit9cedb8bb69b89b0f0c529937247a6a80cabdbaec (patch)
treec978f0e9ec1ab92dc8123783f30b08a7fd1e2a39 /contrib/llvm/lib/Analysis
parent03fdc2934eb61c44c049a02b02aa974cfdd8a0eb (diff)
downloadFreeBSD-src-9cedb8bb69b89b0f0c529937247a6a80cabdbaec.zip
FreeBSD-src-9cedb8bb69b89b0f0c529937247a6a80cabdbaec.tar.gz
MFC 261991:
Upgrade our copy of llvm/clang to 3.4 release. This version supports all of the features in the current working draft of the upcoming C++ standard, provisionally named C++1y. The code generator's performance is greatly increased, and the loop auto-vectorizer is now enabled at -Os and -O2 in addition to -O3. The PowerPC backend has made several major improvements to code generation quality and compile time, and the X86, SPARC, ARM32, Aarch64 and SystemZ backends have all seen major feature work. Release notes for llvm and clang can be found here: <http://llvm.org/releases/3.4/docs/ReleaseNotes.html> <http://llvm.org/releases/3.4/tools/clang/docs/ReleaseNotes.html> MFC 262121 (by emaste): Update lldb for clang/llvm 3.4 import This commit largely restores the lldb source to the upstream r196259 snapshot with the addition of threaded inferior support and a few bug fixes. Specific upstream lldb revisions restored include: SVN git 181387 779e6ac 181703 7bef4e2 182099 b31044e 182650 f2dcf35 182683 0d91b80 183862 15c1774 183929 99447a6 184177 0b2934b 184948 4dc3761 184954 007e7bc 186990 eebd175 Sponsored by: DARPA, AFRL MFC 262186 (by emaste): Fix mismerge in r262121 A break statement was lost in the merge. The error had no functional impact, but restore it to reduce the diff against upstream. MFC 262303: Pull in r197521 from upstream clang trunk (by rdivacky): Use the integrated assembler by default on FreeBSD/ppc and ppc64. Requested by: jhibbits MFC 262611: Pull in r196874 from upstream llvm trunk: Fix a crash that occurs when PWD is invalid. MCJIT needs to be able to run in hostile environments, even when PWD is invalid. There's no need to crash MCJIT in this case. The obvious fix is to simply leave MCContext's CompilationDir empty when PWD can't be determined. This way, MCJIT clients, and other clients that link with LLVM don't need a valid working directory. If we do want to guarantee valid CompilationDir, that should be done only for clients of getCompilationDir(). This is as simple as checking for an empty string. The only current use of getCompilationDir is EmitGenDwarfInfo, which won't conceivably run with an invalid working dir. However, in the purely hypothetically and untestable case that this happens, the AT_comp_dir will be omitted from the compilation_unit DIE. This should help fix assertions occurring with ports-mgmt/tinderbox, when it is using jails, and sometimes invalidates clang's current working directory. Reported by: decke MFC 262809: Pull in r203007 from upstream clang trunk: Don't produce an alias between destructors with different calling conventions. Fixes pr19007. (Please note that is an LLVM PR identifier, not a FreeBSD one.) This should fix Firefox and/or libxul crashes (due to problems with regparm/stdcall calling conventions) on i386. Reported by: multiple users on freebsd-current PR: bin/187103 MFC 263048: Repair recognition of "CC" as an alias for the C++ compiler, since it was silently broken by upstream for a Windows-specific use-case. Apparently some versions of CMake still rely on this archaic feature... Reported by: rakuco MFC 263049: Garbage collect the old way of adding the libstdc++ include directories in clang's InitHeaderSearch.cpp. This has been superseded by David Chisnall's commit in r255321. Moreover, if libc++ is used, the libstdc++ include directories should not be in the search path at all. These directories are now only used if you pass -stdlib=libstdc++.
Diffstat (limited to 'contrib/llvm/lib/Analysis')
-rw-r--r--contrib/llvm/lib/Analysis/AliasAnalysis.cpp47
-rw-r--r--contrib/llvm/lib/Analysis/AliasSetTracker.cpp2
-rw-r--r--contrib/llvm/lib/Analysis/Analysis.cpp11
-rw-r--r--contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp122
-rw-r--r--contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp118
-rw-r--r--contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp115
-rw-r--r--contrib/llvm/lib/Analysis/CFG.cpp245
-rw-r--r--contrib/llvm/lib/Analysis/CaptureTracking.cpp8
-rw-r--r--contrib/llvm/lib/Analysis/ConstantFolding.cpp228
-rw-r--r--contrib/llvm/lib/Analysis/CostModel.cpp289
-rw-r--r--contrib/llvm/lib/Analysis/Delinearization.cpp133
-rw-r--r--contrib/llvm/lib/Analysis/DependenceAnalysis.cpp85
-rw-r--r--contrib/llvm/lib/Analysis/IPA/CallGraph.cpp230
-rw-r--r--contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp2
-rw-r--r--contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp2
-rw-r--r--contrib/llvm/lib/Analysis/IPA/IPA.cpp3
-rw-r--r--contrib/llvm/lib/Analysis/IPA/InlineCost.cpp159
-rw-r--r--contrib/llvm/lib/Analysis/InstructionSimplify.cpp46
-rw-r--r--contrib/llvm/lib/Analysis/LazyValueInfo.cpp8
-rw-r--r--contrib/llvm/lib/Analysis/Lint.cpp42
-rw-r--r--contrib/llvm/lib/Analysis/LoopInfo.cpp97
-rw-r--r--contrib/llvm/lib/Analysis/LoopPass.cpp4
-rw-r--r--contrib/llvm/lib/Analysis/MemoryBuiltins.cpp114
-rw-r--r--contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp15
-rw-r--r--contrib/llvm/lib/Analysis/PathNumbering.cpp521
-rw-r--r--contrib/llvm/lib/Analysis/PathProfileInfo.cpp433
-rw-r--r--contrib/llvm/lib/Analysis/PathProfileVerifier.cpp206
-rw-r--r--contrib/llvm/lib/Analysis/ProfileDataLoader.cpp155
-rw-r--r--contrib/llvm/lib/Analysis/ProfileDataLoaderPass.cpp188
-rw-r--r--contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp426
-rw-r--r--contrib/llvm/lib/Analysis/ProfileInfo.cpp1079
-rw-r--r--contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp155
-rw-r--r--contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp267
-rw-r--r--contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp383
-rw-r--r--contrib/llvm/lib/Analysis/RegionInfo.cpp6
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolution.cpp1112
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp145
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp18
-rw-r--r--contrib/llvm/lib/Analysis/TargetTransformInfo.cpp76
-rw-r--r--contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp116
-rw-r--r--contrib/llvm/lib/Analysis/ValueTracking.cpp88
41 files changed, 2793 insertions, 4706 deletions
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
index 210b80a..b8b6d37 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -26,6 +26,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
@@ -361,24 +362,6 @@ AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc) {
}
namespace {
- // Conservatively return true. Return false, if there is a single path
- // starting from "From" and the path does not reach "To".
- static bool hasPath(const BasicBlock *From, const BasicBlock *To) {
- const unsigned MaxCheck = 5;
- const BasicBlock *Current = From;
- for (unsigned I = 0; I < MaxCheck; I++) {
- unsigned NumSuccs = Current->getTerminator()->getNumSuccessors();
- if (NumSuccs > 1)
- return true;
- if (NumSuccs == 0)
- return false;
- Current = Current->getTerminator()->getSuccessor(0);
- if (Current == To)
- return true;
- }
- return true;
- }
-
/// Only find pointer captures which happen before the given instruction. Uses
/// the dominator tree to determine whether one instruction is before another.
/// Only support the case where the Value is defined in the same basic block
@@ -400,7 +383,7 @@ namespace {
// there is no need to explore the use if BeforeHere dominates use.
// Check whether there is a path from I to BeforeHere.
if (BeforeHere != I && DT->dominates(BeforeHere, I) &&
- !hasPath(BB, BeforeHere->getParent()))
+ !isPotentiallyReachable(I, BeforeHere, DT))
return false;
return true;
}
@@ -412,7 +395,7 @@ namespace {
if (BeforeHere != I && !DT->isReachableFromEntry(BB))
return false;
if (BeforeHere != I && DT->dominates(BeforeHere, I) &&
- !hasPath(BB, BeforeHere->getParent()))
+ !isPotentiallyReachable(I, BeforeHere, DT))
return false;
Captured = true;
return true;
@@ -450,6 +433,7 @@ AliasAnalysis::callCapturesBefore(const Instruction *I,
return AliasAnalysis::ModRef;
unsigned ArgNo = 0;
+ AliasAnalysis::ModRefResult R = AliasAnalysis::NoModRef;
for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
CI != CE; ++CI, ++ArgNo) {
// Only look at the no-capture or byval pointer arguments. If this
@@ -463,12 +447,18 @@ AliasAnalysis::callCapturesBefore(const Instruction *I,
// is impossible to alias the pointer we're checking. If not, we have to
// assume that the call could touch the pointer, even though it doesn't
// escape.
- if (!isNoAlias(AliasAnalysis::Location(*CI),
- AliasAnalysis::Location(Object))) {
- return AliasAnalysis::ModRef;
+ if (isNoAlias(AliasAnalysis::Location(*CI),
+ AliasAnalysis::Location(Object)))
+ continue;
+ if (CS.doesNotAccessMemory(ArgNo))
+ continue;
+ if (CS.onlyReadsMemory(ArgNo)) {
+ R = AliasAnalysis::Ref;
+ continue;
}
+ return AliasAnalysis::ModRef;
}
- return AliasAnalysis::NoModRef;
+ return R;
}
// AliasAnalysis destructor: DO NOT move this to the header file for
@@ -537,6 +527,15 @@ bool llvm::isNoAliasCall(const Value *V) {
return false;
}
+/// isNoAliasArgument - Return true if this is an argument with the noalias
+/// attribute.
+bool llvm::isNoAliasArgument(const Value *V)
+{
+ if (const Argument *A = dyn_cast<Argument>(V))
+ return A->hasNoAliasAttr();
+ return false;
+}
+
/// isIdentifiedObject - Return true if this pointer refers to a distinct and
/// identifiable object. This returns true for:
/// Global Variables and Functions (but not Global Aliases)
diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
index 5910526..2289c12 100644
--- a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
+++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
@@ -299,7 +299,6 @@ bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) {
bool AliasSetTracker::add(LoadInst *LI) {
if (LI->getOrdering() > Monotonic) return addUnknown(LI);
AliasSet::AccessType ATy = AliasSet::Refs;
- if (!LI->isUnordered()) ATy = AliasSet::ModRef;
bool NewPtr;
AliasSet &AS = addPointer(LI->getOperand(0),
AA.getTypeStoreSize(LI->getType()),
@@ -312,7 +311,6 @@ bool AliasSetTracker::add(LoadInst *LI) {
bool AliasSetTracker::add(StoreInst *SI) {
if (SI->getOrdering() > Monotonic) return addUnknown(SI);
AliasSet::AccessType ATy = AliasSet::Mods;
- if (!SI->isUnordered()) ATy = AliasSet::ModRef;
bool NewPtr;
Value *Val = SI->getOperand(0);
AliasSet &AS = addPointer(SI->getOperand(1),
diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp
index 349c417..98f2a55 100644
--- a/contrib/llvm/lib/Analysis/Analysis.cpp
+++ b/contrib/llvm/lib/Analysis/Analysis.cpp
@@ -34,6 +34,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeCFGOnlyViewerPass(Registry);
initializeCFGOnlyPrinterPass(Registry);
initializeDependenceAnalysisPass(Registry);
+ initializeDelinearizationPass(Registry);
initializeDominanceFrontierPass(Registry);
initializeDomViewerPass(Registry);
initializeDomPrinterPass(Registry);
@@ -54,16 +55,6 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeMemoryDependenceAnalysisPass(Registry);
initializeModuleDebugInfoPrinterPass(Registry);
initializePostDominatorTreePass(Registry);
- initializeProfileEstimatorPassPass(Registry);
- initializeNoProfileInfoPass(Registry);
- initializeNoPathProfileInfoPass(Registry);
- initializeProfileInfoAnalysisGroup(Registry);
- initializePathProfileInfoAnalysisGroup(Registry);
- initializeLoaderPassPass(Registry);
- initializePathProfileLoaderPassPass(Registry);
- initializeProfileVerifierPassPass(Registry);
- initializePathProfileVerifierPass(Registry);
- initializeProfileMetadataLoaderPassPass(Registry);
initializeRegionInfoPass(Registry);
initializeRegionViewerPass(Registry);
initializeRegionPrinterPass(Registry);
diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index f8509dd..b2c2011 100644
--- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -122,7 +122,7 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size,
// question (in this case rewind to p), or
// - just give up. It is up to caller to make sure the pointer is pointing
// to the base address the object.
- //
+ //
// We go for 2nd option for simplicity.
if (!isIdentifiedObject(V))
return false;
@@ -130,7 +130,7 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size,
// This function needs to use the aligned object size because we allow
// reads a bit past the end given sufficient alignment.
uint64_t ObjectSize = getObjectSize(V, TD, TLI, /*RoundToAlign*/true);
-
+
return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize < Size;
}
@@ -142,6 +142,17 @@ static bool isObjectSize(const Value *V, uint64_t Size,
return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize == Size;
}
+/// isIdentifiedFunctionLocal - Return true if V is umabigously identified
+/// at the function-level. Different IdentifiedFunctionLocals can't alias.
+/// Further, an IdentifiedFunctionLocal can not alias with any function
+/// arguments other than itself, which is not neccessarily true for
+/// IdentifiedObjects.
+static bool isIdentifiedFunctionLocal(const Value *V)
+{
+ return isa<AllocaInst>(V) || isNoAliasCall(V) || isNoAliasArgument(V);
+}
+
+
//===----------------------------------------------------------------------===//
// GetElementPtr Instruction Decomposition and Analysis
//===----------------------------------------------------------------------===//
@@ -152,7 +163,7 @@ namespace {
EK_SignExt,
EK_ZeroExt
};
-
+
struct VariableGEPIndex {
const Value *V;
ExtensionKind Extension;
@@ -189,7 +200,7 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
Offset = 0;
return V;
}
-
+
if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
switch (BOp->getOpcode()) {
@@ -220,7 +231,7 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
}
}
}
-
+
// Since GEP indices are sign extended anyway, we don't care about the high
// bits of a sign or zero extended value - just scales and offsets. The
// extensions have to be consistent though.
@@ -237,10 +248,10 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
TD, Depth+1);
Scale = Scale.zext(OldWidth);
Offset = Offset.zext(OldWidth);
-
+
return Result;
}
-
+
Scale = 1;
Offset = 0;
return V;
@@ -265,7 +276,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
const DataLayout *TD) {
// Limit recursion depth to limit compile time in crazy cases.
unsigned MaxLookup = 6;
-
+
BaseOffs = 0;
do {
// See if this is a bitcast or GEP.
@@ -280,7 +291,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
}
return V;
}
-
+
if (Op->getOpcode() == Instruction::BitCast) {
V = Op->getOperand(0);
continue;
@@ -297,15 +308,14 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
V = Simplified;
continue;
}
-
+
return V;
}
-
+
// Don't attempt to analyze GEPs over unsized objects.
- if (!cast<PointerType>(GEPOp->getOperand(0)->getType())
- ->getElementType()->isSized())
+ if (!GEPOp->getOperand(0)->getType()->getPointerElementType()->isSized())
return V;
-
+
// If we are lacking DataLayout information, we can't compute the offets of
// elements computed by GEPs. However, we can handle bitcast equivalent
// GEPs.
@@ -315,7 +325,8 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
V = GEPOp->getOperand(0);
continue;
}
-
+
+ unsigned AS = GEPOp->getPointerAddressSpace();
// Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
gep_type_iterator GTI = gep_type_begin(GEPOp);
for (User::const_op_iterator I = GEPOp->op_begin()+1,
@@ -326,38 +337,37 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
// For a struct, add the member offset.
unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
if (FieldNo == 0) continue;
-
+
BaseOffs += TD->getStructLayout(STy)->getElementOffset(FieldNo);
continue;
}
-
+
// For an array/pointer, add the element offset, explicitly scaled.
if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
if (CIdx->isZero()) continue;
BaseOffs += TD->getTypeAllocSize(*GTI)*CIdx->getSExtValue();
continue;
}
-
+
uint64_t Scale = TD->getTypeAllocSize(*GTI);
ExtensionKind Extension = EK_NotExtended;
-
+
// If the integer type is smaller than the pointer size, it is implicitly
// sign extended to pointer size.
- unsigned Width = cast<IntegerType>(Index->getType())->getBitWidth();
- if (TD->getPointerSizeInBits() > Width)
+ unsigned Width = Index->getType()->getIntegerBitWidth();
+ if (TD->getPointerSizeInBits(AS) > Width)
Extension = EK_SignExt;
-
+
// Use GetLinearExpression to decompose the index into a C1*V+C2 form.
APInt IndexScale(Width, 0), IndexOffset(Width, 0);
Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension,
*TD, 0);
-
+
// The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
// This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
BaseOffs += IndexOffset.getSExtValue()*Scale;
Scale *= IndexScale.getSExtValue();
-
-
+
// If we already had an occurrence of this index variable, merge this
// scale into it. For example, we want to handle:
// A[x][x] -> x*16 + x*4 -> x*20
@@ -370,25 +380,25 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
break;
}
}
-
+
// Make sure that we have a scale that makes sense for this target's
// pointer size.
- if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) {
+ if (unsigned ShiftBits = 64 - TD->getPointerSizeInBits(AS)) {
Scale <<= ShiftBits;
Scale = (int64_t)Scale >> ShiftBits;
}
-
+
if (Scale) {
VariableGEPIndex Entry = {Index, Extension,
static_cast<int64_t>(Scale)};
VarIndices.push_back(Entry);
}
}
-
+
// Analyze the base pointer next.
V = GEPOp->getOperand(0);
} while (--MaxLookup);
-
+
// If the chain of expressions is too deep, just return early.
return V;
}
@@ -396,7 +406,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
/// GetIndexDifference - Dest and Src are the variable indices from two
/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic
-/// difference between the two pointers.
+/// difference between the two pointers.
static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
const SmallVectorImpl<VariableGEPIndex> &Src) {
if (Src.empty()) return;
@@ -405,12 +415,12 @@ static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
const Value *V = Src[i].V;
ExtensionKind Extension = Src[i].Extension;
int64_t Scale = Src[i].Scale;
-
+
// Find V in Dest. This is N^2, but pointer indices almost never have more
// than a few variable indexes.
for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
if (Dest[j].V != V || Dest[j].Extension != Extension) continue;
-
+
// If we found it, subtract off Scale V's from the entry in Dest. If it
// goes to zero, remove the entry.
if (Dest[j].Scale != Scale)
@@ -420,7 +430,7 @@ static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
Scale = 0;
break;
}
-
+
// If we didn't consume this entry, add it to the end of the Dest list.
if (Scale) {
VariableGEPIndex Entry = { V, Extension, -Scale };
@@ -515,7 +525,7 @@ namespace {
return (AliasAnalysis*)this;
return this;
}
-
+
private:
// AliasCache - Track alias queries to guard against recursion.
typedef std::pair<Location, Location> LocPair;
@@ -685,7 +695,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
"AliasAnalysis query involving multiple functions!");
const Value *Object = GetUnderlyingObject(Loc.Ptr, TD);
-
+
// If this is a tail call and Loc.Ptr points to a stack location, we know that
// the tail call cannot access or modify the local stack.
// We cannot exclude byval arguments here; these belong to the caller of
@@ -695,7 +705,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction()))
if (CI->isTailCall())
return NoModRef;
-
+
// If the pointer is to a locally allocated object that does not escape,
// then the call can not mod/ref the pointer unless the call takes the pointer
// as an argument, and itself doesn't capture it.
@@ -711,7 +721,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
if (!(*CI)->getType()->isPointerTy() ||
(!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo)))
continue;
-
+
// If this is a no-capture pointer argument, see if we can tell that it
// is impossible to alias the pointer we're checking. If not, we have to
// assume that the call could touch the pointer, even though it doesn't
@@ -721,7 +731,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
break;
}
}
-
+
if (!PassedAsArg)
return NoModRef;
}
@@ -810,7 +820,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
}
// We can bound the aliasing properties of memset_pattern16 just as we can
- // for memcpy/memset. This is particularly important because the
+ // for memcpy/memset. This is particularly important because the
// LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
// whenever possible.
else if (TLI.has(LibFunc::memset_pattern16) &&
@@ -846,8 +856,8 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
return ModRefResult(AliasAnalysis::getModRefInfo(CS, Loc) & Min);
}
-static bool areVarIndicesEqual(SmallVector<VariableGEPIndex, 4> &Indices1,
- SmallVector<VariableGEPIndex, 4> &Indices2) {
+static bool areVarIndicesEqual(SmallVectorImpl<VariableGEPIndex> &Indices1,
+ SmallVectorImpl<VariableGEPIndex> &Indices2) {
unsigned Size1 = Indices1.size();
unsigned Size2 = Indices2.size();
@@ -914,22 +924,22 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
GEP1VariableIndices.clear();
}
}
-
+
// If we get a No or May, then return it immediately, no amount of analysis
// will improve this situation.
if (BaseAlias != MustAlias) return BaseAlias;
-
+
// Otherwise, we have a MustAlias. Since the base pointers alias each other
// exactly, see if the computed offset from the common pointer tells us
// about the relation of the resulting pointer.
const Value *GEP1BasePtr =
DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD);
-
+
int64_t GEP2BaseOffset;
SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
const Value *GEP2BasePtr =
DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD);
-
+
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
@@ -937,12 +947,12 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
"DecomposeGEPExpression and GetUnderlyingObject disagree!");
return MayAlias;
}
-
+
// Subtract the GEP2 pointer from the GEP1 pointer to find out their
// symbolic difference.
GEP1BaseOffset -= GEP2BaseOffset;
GetIndexDifference(GEP1VariableIndices, GEP2VariableIndices);
-
+
} else {
// Check to see if these two pointers are related by the getelementptr
// instruction. If one pointer is a GEP with a non-zero index of the other
@@ -964,7 +974,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
const Value *GEP1BasePtr =
DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD);
-
+
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
if (GEP1BasePtr != UnderlyingV1) {
@@ -973,7 +983,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
return MayAlias;
}
}
-
+
// In the two GEP Case, if there is no difference in the offsets of the
// computed pointers, the resultant pointers are a must alias. This
// hapens when we have two lexically identical GEP's (for example).
@@ -1205,17 +1215,17 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
(isa<Constant>(O2) && isIdentifiedObject(O1) && !isa<Constant>(O1)))
return NoAlias;
- // Arguments can't alias with local allocations or noalias calls
- // in the same function.
- if (((isa<Argument>(O1) && (isa<AllocaInst>(O2) || isNoAliasCall(O2))) ||
- (isa<Argument>(O2) && (isa<AllocaInst>(O1) || isNoAliasCall(O1)))))
+ // Function arguments can't alias with things that are known to be
+ // unambigously identified at the function level.
+ if ((isa<Argument>(O1) && isIdentifiedFunctionLocal(O2)) ||
+ (isa<Argument>(O2) && isIdentifiedFunctionLocal(O1)))
return NoAlias;
// Most objects can't alias null.
if ((isa<ConstantPointerNull>(O2) && isKnownNonNull(O1)) ||
(isa<ConstantPointerNull>(O1) && isKnownNonNull(O2)))
return NoAlias;
-
+
// If one pointer is the result of a call/invoke or load and the other is a
// non-escaping local object within the same function, then we know the
// object couldn't escape to a point where the call could return it.
@@ -1237,7 +1247,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
if ((V1Size != UnknownSize && isObjectSmallerThan(O2, V1Size, *TD, *TLI)) ||
(V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD, *TLI)))
return NoAlias;
-
+
// Check the cache before climbing up use-def chains. This also terminates
// otherwise infinitely recursive queries.
LocPair Locs(Location(V1, V1Size, V1TBAAInfo),
diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
index 100e5c8..62f3ab1 100644
--- a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
@@ -1,4 +1,4 @@
-//=======-------- BlockFrequencyInfo.cpp - Block Frequency Analysis -------=======//
+//=======-------- BlockFrequencyInfo.cpp - Block Frequency Analysis -------===//
//
// The LLVM Compiler Infrastructure
//
@@ -17,14 +17,97 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
using namespace llvm;
-INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis",
- true, true)
+#ifndef NDEBUG
+enum GVDAGType {
+ GVDT_None,
+ GVDT_Fraction,
+ GVDT_Integer
+};
+
+static cl::opt<GVDAGType>
+ViewBlockFreqPropagationDAG("view-block-freq-propagation-dags", cl::Hidden,
+ cl::desc("Pop up a window to show a dag displaying how block "
+ "frequencies propagation through the CFG."),
+ cl::values(
+ clEnumValN(GVDT_None, "none",
+ "do not display graphs."),
+ clEnumValN(GVDT_Fraction, "fraction", "display a graph using the "
+ "fractional block frequency representation."),
+ clEnumValN(GVDT_Integer, "integer", "display a graph using the raw "
+ "integer fractional block frequency representation."),
+ clEnumValEnd));
+
+namespace llvm {
+
+template <>
+struct GraphTraits<BlockFrequencyInfo *> {
+ typedef const BasicBlock NodeType;
+ typedef succ_const_iterator ChildIteratorType;
+ typedef Function::const_iterator nodes_iterator;
+
+ static inline const NodeType *getEntryNode(const BlockFrequencyInfo *G) {
+ return G->getFunction()->begin();
+ }
+ static ChildIteratorType child_begin(const NodeType *N) {
+ return succ_begin(N);
+ }
+ static ChildIteratorType child_end(const NodeType *N) {
+ return succ_end(N);
+ }
+ static nodes_iterator nodes_begin(const BlockFrequencyInfo *G) {
+ return G->getFunction()->begin();
+ }
+ static nodes_iterator nodes_end(const BlockFrequencyInfo *G) {
+ return G->getFunction()->end();
+ }
+};
+
+template<>
+struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits {
+ explicit DOTGraphTraits(bool isSimple=false) :
+ DefaultDOTGraphTraits(isSimple) {}
+
+ static std::string getGraphName(const BlockFrequencyInfo *G) {
+ return G->getFunction()->getName();
+ }
+
+ std::string getNodeLabel(const BasicBlock *Node,
+ const BlockFrequencyInfo *Graph) {
+ std::string Result;
+ raw_string_ostream OS(Result);
+
+ OS << Node->getName().str() << ":";
+ switch (ViewBlockFreqPropagationDAG) {
+ case GVDT_Fraction:
+ Graph->getBlockFreq(Node).print(OS);
+ break;
+ case GVDT_Integer:
+ OS << Graph->getBlockFreq(Node).getFrequency();
+ break;
+ case GVDT_None:
+ llvm_unreachable("If we are not supposed to render a graph we should "
+ "never reach this point.");
+ }
+
+ return Result;
+ }
+};
+
+} // end namespace llvm
+#endif
+
+INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq",
+ "Block Frequency Analysis", true, true)
INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo)
-INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis",
- true, true)
+INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq",
+ "Block Frequency Analysis", true, true)
char BlockFrequencyInfo::ID = 0;
@@ -46,6 +129,10 @@ void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
bool BlockFrequencyInfo::runOnFunction(Function &F) {
BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
BFI->doFunction(&F, &BPI);
+#ifndef NDEBUG
+ if (ViewBlockFreqPropagationDAG != GVDT_None)
+ view();
+#endif
return false;
}
@@ -53,11 +140,22 @@ void BlockFrequencyInfo::print(raw_ostream &O, const Module *) const {
if (BFI) BFI->print(O);
}
-/// getblockFreq - Return block frequency. Return 0 if we don't have the
-/// information. Please note that initial frequency is equal to 1024. It means
-/// that we should not rely on the value itself, but only on the comparison to
-/// the other block frequencies. We do this to avoid using of floating points.
-///
BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const {
return BFI->getBlockFreq(BB);
}
+
+/// Pop up a ghostview window with the current block frequency propagation
+/// rendered using dot.
+void BlockFrequencyInfo::view() const {
+// This code is only for debugging.
+#ifndef NDEBUG
+ ViewGraph(const_cast<BlockFrequencyInfo *>(this), "BlockFrequencyDAGs");
+#else
+ errs() << "BlockFrequencyInfo::view is only available in debug builds on "
+ "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+const Function *BlockFrequencyInfo::getFunction() const {
+ return BFI->Fn;
+}
diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
index 6c58856..86560ca 100644
--- a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -69,6 +69,20 @@ static const uint32_t UR_TAKEN_WEIGHT = 1;
/// easily subsume it.
static const uint32_t UR_NONTAKEN_WEIGHT = 1024*1024 - 1;
+/// \brief Weight for a branch taken going into a cold block.
+///
+/// This is the weight for a branch taken toward a block marked
+/// cold. A block is marked cold if it's postdominated by a
+/// block containing a call to a cold function. Cold functions
+/// are those marked with attribute 'cold'.
+static const uint32_t CC_TAKEN_WEIGHT = 4;
+
+/// \brief Weight for a branch not-taken into a cold block.
+///
+/// This is the weight for a branch not taken toward a block marked
+/// cold.
+static const uint32_t CC_NONTAKEN_WEIGHT = 64;
+
static const uint32_t PH_TAKEN_WEIGHT = 20;
static const uint32_t PH_NONTAKEN_WEIGHT = 12;
@@ -137,8 +151,8 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(BasicBlock *BB) {
uint32_t UnreachableWeight =
std::max(UR_TAKEN_WEIGHT / (unsigned)UnreachableEdges.size(), MIN_WEIGHT);
- for (SmallVector<unsigned, 4>::iterator I = UnreachableEdges.begin(),
- E = UnreachableEdges.end();
+ for (SmallVectorImpl<unsigned>::iterator I = UnreachableEdges.begin(),
+ E = UnreachableEdges.end();
I != E; ++I)
setEdgeWeight(BB, *I, UnreachableWeight);
@@ -147,8 +161,8 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(BasicBlock *BB) {
uint32_t ReachableWeight =
std::max(UR_NONTAKEN_WEIGHT / (unsigned)ReachableEdges.size(),
NORMAL_WEIGHT);
- for (SmallVector<unsigned, 4>::iterator I = ReachableEdges.begin(),
- E = ReachableEdges.end();
+ for (SmallVectorImpl<unsigned>::iterator I = ReachableEdges.begin(),
+ E = ReachableEdges.end();
I != E; ++I)
setEdgeWeight(BB, *I, ReachableWeight);
@@ -193,6 +207,67 @@ bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) {
return true;
}
+/// \brief Calculate edge weights for edges leading to cold blocks.
+///
+/// A cold block is one post-dominated by a block with a call to a
+/// cold function. Those edges are unlikely to be taken, so we give
+/// them relatively low weight.
+///
+/// Return true if we could compute the weights for cold edges.
+/// Return false, otherwise.
+bool BranchProbabilityInfo::calcColdCallHeuristics(BasicBlock *BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ if (TI->getNumSuccessors() == 0)
+ return false;
+
+ // Determine which successors are post-dominated by a cold block.
+ SmallVector<unsigned, 4> ColdEdges;
+ SmallVector<unsigned, 4> NormalEdges;
+ for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
+ if (PostDominatedByColdCall.count(*I))
+ ColdEdges.push_back(I.getSuccessorIndex());
+ else
+ NormalEdges.push_back(I.getSuccessorIndex());
+
+ // If all successors are in the set of blocks post-dominated by cold calls,
+ // this block is in the set post-dominated by cold calls.
+ if (ColdEdges.size() == TI->getNumSuccessors())
+ PostDominatedByColdCall.insert(BB);
+ else {
+ // Otherwise, if the block itself contains a cold function, add it to the
+ // set of blocks postdominated by a cold call.
+ assert(!PostDominatedByColdCall.count(BB));
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (CallInst *CI = dyn_cast<CallInst>(I))
+ if (CI->hasFnAttr(Attribute::Cold)) {
+ PostDominatedByColdCall.insert(BB);
+ break;
+ }
+ }
+
+ // Skip probabilities if this block has a single successor.
+ if (TI->getNumSuccessors() == 1 || ColdEdges.empty())
+ return false;
+
+ uint32_t ColdWeight =
+ std::max(CC_TAKEN_WEIGHT / (unsigned) ColdEdges.size(), MIN_WEIGHT);
+ for (SmallVectorImpl<unsigned>::iterator I = ColdEdges.begin(),
+ E = ColdEdges.end();
+ I != E; ++I)
+ setEdgeWeight(BB, *I, ColdWeight);
+
+ if (NormalEdges.empty())
+ return true;
+ uint32_t NormalWeight = std::max(
+ CC_NONTAKEN_WEIGHT / (unsigned) NormalEdges.size(), NORMAL_WEIGHT);
+ for (SmallVectorImpl<unsigned>::iterator I = NormalEdges.begin(),
+ E = NormalEdges.end();
+ I != E; ++I)
+ setEdgeWeight(BB, *I, NormalWeight);
+
+ return true;
+}
+
// Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion
// between two pointer or pointer and NULL will fail.
bool BranchProbabilityInfo::calcPointerHeuristics(BasicBlock *BB) {
@@ -251,7 +326,7 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) {
if (backWeight < NORMAL_WEIGHT)
backWeight = NORMAL_WEIGHT;
- for (SmallVector<unsigned, 8>::iterator EI = BackEdges.begin(),
+ for (SmallVectorImpl<unsigned>::iterator EI = BackEdges.begin(),
EE = BackEdges.end(); EI != EE; ++EI) {
setEdgeWeight(BB, *EI, backWeight);
}
@@ -262,7 +337,7 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) {
if (inWeight < NORMAL_WEIGHT)
inWeight = NORMAL_WEIGHT;
- for (SmallVector<unsigned, 8>::iterator EI = InEdges.begin(),
+ for (SmallVectorImpl<unsigned>::iterator EI = InEdges.begin(),
EE = InEdges.end(); EI != EE; ++EI) {
setEdgeWeight(BB, *EI, inWeight);
}
@@ -273,7 +348,7 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) {
if (exitWeight < MIN_WEIGHT)
exitWeight = MIN_WEIGHT;
- for (SmallVector<unsigned, 8>::iterator EI = ExitingEdges.begin(),
+ for (SmallVectorImpl<unsigned>::iterator EI = ExitingEdges.begin(),
EE = ExitingEdges.end(); EI != EE; ++EI) {
setEdgeWeight(BB, *EI, exitWeight);
}
@@ -323,10 +398,24 @@ bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) {
// InstCombine canonicalizes X <= 0 into X < 1.
// X <= 0 -> Unlikely
isProb = false;
- } else if (CV->isAllOnesValue() && CI->getPredicate() == CmpInst::ICMP_SGT) {
- // InstCombine canonicalizes X >= 0 into X > -1.
- // X >= 0 -> Likely
- isProb = true;
+ } else if (CV->isAllOnesValue()) {
+ switch (CI->getPredicate()) {
+ case CmpInst::ICMP_EQ:
+ // X == -1 -> Unlikely
+ isProb = false;
+ break;
+ case CmpInst::ICMP_NE:
+ // X != -1 -> Likely
+ isProb = true;
+ break;
+ case CmpInst::ICMP_SGT:
+ // InstCombine canonicalizes X >= 0 into X > -1.
+ // X >= 0 -> Likely
+ isProb = true;
+ break;
+ default:
+ return false;
+ }
} else {
return false;
}
@@ -397,6 +486,7 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) {
LastF = &F; // Store the last function we ran on for printing.
LI = &getAnalysis<LoopInfo>();
assert(PostDominatedByUnreachable.empty());
+ assert(PostDominatedByColdCall.empty());
// Walk the basic blocks in post-order so that we can build up state about
// the successors of a block iteratively.
@@ -408,6 +498,8 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) {
continue;
if (calcMetadataWeights(*I))
continue;
+ if (calcColdCallHeuristics(*I))
+ continue;
if (calcLoopBranchHeuristics(*I))
continue;
if (calcPointerHeuristics(*I))
@@ -420,6 +512,7 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) {
}
PostDominatedByUnreachable.clear();
+ PostDominatedByColdCall.clear();
return false;
}
diff --git a/contrib/llvm/lib/Analysis/CFG.cpp b/contrib/llvm/lib/Analysis/CFG.cpp
new file mode 100644
index 0000000..c3f32d3
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/CFG.cpp
@@ -0,0 +1,245 @@
+//===-- CFG.cpp - BasicBlock analysis --------------------------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions performs analyses on basic blocks, and instructions
+// contained within basic blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CFG.h"
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+
+using namespace llvm;
+
+/// FindFunctionBackedges - Analyze the specified function to find all of the
+/// loop backedges in the function and return them. This is a relatively cheap
+/// (compared to computing dominators and loop info) analysis.
+///
+/// The output is added to Result, as pairs of <from,to> edge info.
+void llvm::FindFunctionBackedges(const Function &F,
+ SmallVectorImpl<std::pair<const BasicBlock*,const BasicBlock*> > &Result) {
+ const BasicBlock *BB = &F.getEntryBlock();
+ if (succ_begin(BB) == succ_end(BB))
+ return;
+
+ SmallPtrSet<const BasicBlock*, 8> Visited;
+ SmallVector<std::pair<const BasicBlock*, succ_const_iterator>, 8> VisitStack;
+ SmallPtrSet<const BasicBlock*, 8> InStack;
+
+ Visited.insert(BB);
+ VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
+ InStack.insert(BB);
+ do {
+ std::pair<const BasicBlock*, succ_const_iterator> &Top = VisitStack.back();
+ const BasicBlock *ParentBB = Top.first;
+ succ_const_iterator &I = Top.second;
+
+ bool FoundNew = false;
+ while (I != succ_end(ParentBB)) {
+ BB = *I++;
+ if (Visited.insert(BB)) {
+ FoundNew = true;
+ break;
+ }
+ // Successor is in VisitStack, it's a back edge.
+ if (InStack.count(BB))
+ Result.push_back(std::make_pair(ParentBB, BB));
+ }
+
+ if (FoundNew) {
+ // Go down one level if there is a unvisited successor.
+ InStack.insert(BB);
+ VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
+ } else {
+ // Go up one level.
+ InStack.erase(VisitStack.pop_back_val().first);
+ }
+ } while (!VisitStack.empty());
+}
+
+/// GetSuccessorNumber - Search for the specified successor of basic block BB
+/// and return its position in the terminator instruction's list of
+/// successors. It is an error to call this with a block that is not a
+/// successor.
+unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) {
+ TerminatorInst *Term = BB->getTerminator();
+#ifndef NDEBUG
+ unsigned e = Term->getNumSuccessors();
+#endif
+ for (unsigned i = 0; ; ++i) {
+ assert(i != e && "Didn't find edge?");
+ if (Term->getSuccessor(i) == Succ)
+ return i;
+ }
+}
+
+/// isCriticalEdge - Return true if the specified edge is a critical edge.
+/// Critical edges are edges from a block with multiple successors to a block
+/// with multiple predecessors.
+bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
+ bool AllowIdenticalEdges) {
+ assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!");
+ if (TI->getNumSuccessors() == 1) return false;
+
+ const BasicBlock *Dest = TI->getSuccessor(SuccNum);
+ const_pred_iterator I = pred_begin(Dest), E = pred_end(Dest);
+
+ // If there is more than one predecessor, this is a critical edge...
+ assert(I != E && "No preds, but we have an edge to the block?");
+ const BasicBlock *FirstPred = *I;
+ ++I; // Skip one edge due to the incoming arc from TI.
+ if (!AllowIdenticalEdges)
+ return I != E;
+
+ // If AllowIdenticalEdges is true, then we allow this edge to be considered
+ // non-critical iff all preds come from TI's block.
+ while (I != E) {
+ const BasicBlock *P = *I;
+ if (P != FirstPred)
+ return true;
+ // Note: leave this as is until no one ever compiles with either gcc 4.0.1
+ // or Xcode 2. This seems to work around the pred_iterator assert in PR 2207
+ E = pred_end(P);
+ ++I;
+ }
+ return false;
+}
+
+// LoopInfo contains a mapping from basic block to the innermost loop. Find
+// the outermost loop in the loop nest that contains BB.
+static const Loop *getOutermostLoop(const LoopInfo *LI, const BasicBlock *BB) {
+ const Loop *L = LI->getLoopFor(BB);
+ if (L) {
+ while (const Loop *Parent = L->getParentLoop())
+ L = Parent;
+ }
+ return L;
+}
+
+// True if there is a loop which contains both BB1 and BB2.
+static bool loopContainsBoth(const LoopInfo *LI,
+ const BasicBlock *BB1, const BasicBlock *BB2) {
+ const Loop *L1 = getOutermostLoop(LI, BB1);
+ const Loop *L2 = getOutermostLoop(LI, BB2);
+ return L1 != NULL && L1 == L2;
+}
+
+static bool isPotentiallyReachableInner(SmallVectorImpl<BasicBlock *> &Worklist,
+ BasicBlock *StopBB,
+ const DominatorTree *DT,
+ const LoopInfo *LI) {
+ // When the stop block is unreachable, it's dominated from everywhere,
+ // regardless of whether there's a path between the two blocks.
+ if (DT && !DT->isReachableFromEntry(StopBB))
+ DT = 0;
+
+ // Limit the number of blocks we visit. The goal is to avoid run-away compile
+ // times on large CFGs without hampering sensible code. Arbitrarily chosen.
+ unsigned Limit = 32;
+ SmallSet<const BasicBlock*, 64> Visited;
+ do {
+ BasicBlock *BB = Worklist.pop_back_val();
+ if (!Visited.insert(BB))
+ continue;
+ if (BB == StopBB)
+ return true;
+ if (DT && DT->dominates(BB, StopBB))
+ return true;
+ if (LI && loopContainsBoth(LI, BB, StopBB))
+ return true;
+
+ if (!--Limit) {
+ // We haven't been able to prove it one way or the other. Conservatively
+ // answer true -- that there is potentially a path.
+ return true;
+ }
+
+ if (const Loop *Outer = LI ? getOutermostLoop(LI, BB) : 0) {
+ // All blocks in a single loop are reachable from all other blocks. From
+ // any of these blocks, we can skip directly to the exits of the loop,
+ // ignoring any other blocks inside the loop body.
+ Outer->getExitBlocks(Worklist);
+ } else {
+ for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
+ Worklist.push_back(*I);
+ }
+ } while (!Worklist.empty());
+
+ // We have exhausted all possible paths and are certain that 'To' can not be
+ // reached from 'From'.
+ return false;
+}
+
+bool llvm::isPotentiallyReachable(const BasicBlock *A, const BasicBlock *B,
+ const DominatorTree *DT, const LoopInfo *LI) {
+ assert(A->getParent() == B->getParent() &&
+ "This analysis is function-local!");
+
+ SmallVector<BasicBlock*, 32> Worklist;
+ Worklist.push_back(const_cast<BasicBlock*>(A));
+
+ return isPotentiallyReachableInner(Worklist, const_cast<BasicBlock*>(B),
+ DT, LI);
+}
+
+bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B,
+ const DominatorTree *DT, const LoopInfo *LI) {
+ assert(A->getParent()->getParent() == B->getParent()->getParent() &&
+ "This analysis is function-local!");
+
+ SmallVector<BasicBlock*, 32> Worklist;
+
+ if (A->getParent() == B->getParent()) {
+ // The same block case is special because it's the only time we're looking
+ // within a single block to see which instruction comes first. Once we
+ // start looking at multiple blocks, the first instruction of the block is
+ // reachable, so we only need to determine reachability between whole
+ // blocks.
+ BasicBlock *BB = const_cast<BasicBlock *>(A->getParent());
+
+ // If the block is in a loop then we can reach any instruction in the block
+ // from any other instruction in the block by going around a backedge.
+ if (LI && LI->getLoopFor(BB) != 0)
+ return true;
+
+ // Linear scan, start at 'A', see whether we hit 'B' or the end first.
+ for (BasicBlock::const_iterator I = A, E = BB->end(); I != E; ++I) {
+ if (&*I == B)
+ return true;
+ }
+
+ // Can't be in a loop if it's the entry block -- the entry block may not
+ // have predecessors.
+ if (BB == &BB->getParent()->getEntryBlock())
+ return false;
+
+ // Otherwise, continue doing the normal per-BB CFG walk.
+ for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
+ Worklist.push_back(*I);
+
+ if (Worklist.empty()) {
+ // We've proven that there's no path!
+ return false;
+ }
+ } else {
+ Worklist.push_back(const_cast<BasicBlock*>(A->getParent()));
+ }
+
+ if (A->getParent() == &A->getParent()->getParent()->getEntryBlock())
+ return true;
+ if (B->getParent() == &A->getParent()->getParent()->getEntryBlock())
+ return false;
+
+ return isPotentiallyReachableInner(Worklist,
+ const_cast<BasicBlock*>(B->getParent()),
+ DT, LI);
+}
diff --git a/contrib/llvm/lib/Analysis/CaptureTracking.cpp b/contrib/llvm/lib/Analysis/CaptureTracking.cpp
index 2118917..79fab1b 100644
--- a/contrib/llvm/lib/Analysis/CaptureTracking.cpp
+++ b/contrib/llvm/lib/Analysis/CaptureTracking.cpp
@@ -164,10 +164,10 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
// Don't count comparisons of a no-alias return value against null as
// captures. This allows us to ignore comparisons of malloc results
// with null, for example.
- if (isNoAliasCall(V->stripPointerCasts()))
- if (ConstantPointerNull *CPN =
- dyn_cast<ConstantPointerNull>(I->getOperand(1)))
- if (CPN->getType()->getAddressSpace() == 0)
+ if (ConstantPointerNull *CPN =
+ dyn_cast<ConstantPointerNull>(I->getOperand(1)))
+ if (CPN->getType()->getAddressSpace() == 0)
+ if (isNoAliasCall(V->stripPointerCasts()))
break;
// Otherwise, be conservative. There are crazy ways to capture pointers
// using comparisons.
diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
index bc0dffc..3d32232 100644
--- a/contrib/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
@@ -224,7 +224,8 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
APInt &Offset, const DataLayout &TD) {
// Trivial case, constant is the global.
if ((GV = dyn_cast<GlobalValue>(C))) {
- Offset.clearAllBits();
+ unsigned BitWidth = TD.getPointerTypeSizeInBits(GV->getType());
+ Offset = APInt(BitWidth, 0);
return true;
}
@@ -238,16 +239,23 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD);
// i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(CE)) {
- // If the base isn't a global+constant, we aren't either.
- if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD))
- return false;
+ GEPOperator *GEP = dyn_cast<GEPOperator>(CE);
+ if (!GEP)
+ return false;
- // Otherwise, add any offset that our operands provide.
- return GEP->accumulateConstantOffset(TD, Offset);
- }
+ unsigned BitWidth = TD.getPointerTypeSizeInBits(GEP->getType());
+ APInt TmpOffset(BitWidth, 0);
- return false;
+ // If the base isn't a global+constant, we aren't either.
+ if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, TD))
+ return false;
+
+ // Otherwise, add any offset that our operands provide.
+ if (!GEP->accumulateConstantOffset(TD, TmpOffset))
+ return false;
+
+ Offset = TmpOffset;
+ return true;
}
/// ReadDataFromGlobal - Recursive helper to read bits out of global. C is the
@@ -324,12 +332,12 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
// If we read all of the bytes we needed from this element we're done.
uint64_t NextEltOffset = SL->getElementOffset(Index);
- if (BytesLeft <= NextEltOffset-CurEltOffset-ByteOffset)
+ if (BytesLeft <= NextEltOffset - CurEltOffset - ByteOffset)
return true;
// Move to the next element of the struct.
- CurPtr += NextEltOffset-CurEltOffset-ByteOffset;
- BytesLeft -= NextEltOffset-CurEltOffset-ByteOffset;
+ CurPtr += NextEltOffset - CurEltOffset - ByteOffset;
+ BytesLeft -= NextEltOffset - CurEltOffset - ByteOffset;
ByteOffset = 0;
CurEltOffset = NextEltOffset;
}
@@ -338,7 +346,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
if (isa<ConstantArray>(C) || isa<ConstantVector>(C) ||
isa<ConstantDataSequential>(C)) {
- Type *EltTy = cast<SequentialType>(C->getType())->getElementType();
+ Type *EltTy = C->getType()->getSequentialElementType();
uint64_t EltSize = TD.getTypeAllocSize(EltTy);
uint64_t Index = ByteOffset / EltSize;
uint64_t Offset = ByteOffset - Index * EltSize;
@@ -346,7 +354,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
if (ArrayType *AT = dyn_cast<ArrayType>(C->getType()))
NumElts = AT->getNumElements();
else
- NumElts = cast<VectorType>(C->getType())->getNumElements();
+ NumElts = C->getType()->getVectorNumElements();
for (; Index != NumElts; ++Index) {
if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr,
@@ -367,9 +375,10 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
if (CE->getOpcode() == Instruction::IntToPtr &&
- CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getContext()))
+ CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getType())) {
return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr,
BytesLeft, TD);
+ }
}
// Otherwise, unknown initializer type.
@@ -378,26 +387,29 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
const DataLayout &TD) {
- Type *LoadTy = cast<PointerType>(C->getType())->getElementType();
+ PointerType *PTy = cast<PointerType>(C->getType());
+ Type *LoadTy = PTy->getElementType();
IntegerType *IntType = dyn_cast<IntegerType>(LoadTy);
// If this isn't an integer load we can't fold it directly.
if (!IntType) {
+ unsigned AS = PTy->getAddressSpace();
+
// If this is a float/double load, we can try folding it as an int32/64 load
// and then bitcast the result. This can be useful for union cases. Note
// that address spaces don't matter here since we're not going to result in
// an actual new load.
Type *MapTy;
if (LoadTy->isHalfTy())
- MapTy = Type::getInt16PtrTy(C->getContext());
+ MapTy = Type::getInt16PtrTy(C->getContext(), AS);
else if (LoadTy->isFloatTy())
- MapTy = Type::getInt32PtrTy(C->getContext());
+ MapTy = Type::getInt32PtrTy(C->getContext(), AS);
else if (LoadTy->isDoubleTy())
- MapTy = Type::getInt64PtrTy(C->getContext());
+ MapTy = Type::getInt64PtrTy(C->getContext(), AS);
else if (LoadTy->isVectorTy()) {
- MapTy = IntegerType::get(C->getContext(),
- TD.getTypeAllocSizeInBits(LoadTy));
- MapTy = PointerType::getUnqual(MapTy);
+ MapTy = PointerType::getIntNPtrTy(C->getContext(),
+ TD.getTypeAllocSizeInBits(LoadTy),
+ AS);
} else
return 0;
@@ -408,10 +420,11 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
}
unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8;
- if (BytesLoaded > 32 || BytesLoaded == 0) return 0;
+ if (BytesLoaded > 32 || BytesLoaded == 0)
+ return 0;
GlobalValue *GVal;
- APInt Offset(TD.getPointerSizeInBits(), 0);
+ APInt Offset;
if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD))
return 0;
@@ -422,7 +435,8 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
// If we're loading off the beginning of the global, some bytes may be valid,
// but we don't try to handle this.
- if (Offset.isNegative()) return 0;
+ if (Offset.isNegative())
+ return 0;
// If we're not accessing anything in this constant, the result is undefined.
if (Offset.getZExtValue() >=
@@ -439,7 +453,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
ResultVal = RawBytes[BytesLoaded - 1];
for (unsigned i = 1; i != BytesLoaded; ++i) {
ResultVal <<= 8;
- ResultVal |= RawBytes[BytesLoaded-1-i];
+ ResultVal |= RawBytes[BytesLoaded - 1 - i];
}
} else {
ResultVal = RawBytes[0];
@@ -464,14 +478,17 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
// If the loaded value isn't a constant expr, we can't handle it.
ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
- if (!CE) return 0;
+ if (!CE)
+ return 0;
if (CE->getOpcode() == Instruction::GetElementPtr) {
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0)))
- if (GV->isConstant() && GV->hasDefinitiveInitializer())
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0))) {
+ if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
if (Constant *V =
ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE))
return V;
+ }
+ }
}
// Instead of loading constant c string, use corresponding integer value
@@ -576,13 +593,13 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
// constant. This happens frequently when iterating over a global array.
if (Opc == Instruction::Sub && DL) {
GlobalValue *GV1, *GV2;
- unsigned PtrSize = DL->getPointerSizeInBits();
- unsigned OpSize = DL->getTypeSizeInBits(Op0->getType());
- APInt Offs1(PtrSize, 0), Offs2(PtrSize, 0);
+ APInt Offs1, Offs2;
if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *DL))
if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *DL) &&
GV1 == GV2) {
+ unsigned OpSize = DL->getTypeSizeInBits(Op0->getType());
+
// (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow.
// PtrToInt may change the bitwidth so we have convert to the right size
// first.
@@ -600,15 +617,18 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
Type *ResultTy, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
- if (!TD) return 0;
- Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext());
+ if (!TD)
+ return 0;
+
+ Type *IntPtrTy = TD->getIntPtrType(ResultTy);
bool Any = false;
SmallVector<Constant*, 32> NewIdxs;
for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
if ((i == 1 ||
- !isa<StructType>(GetElementPtrInst::getIndexedType(Ops[0]->getType(),
- Ops.slice(1, i-1)))) &&
+ !isa<StructType>(GetElementPtrInst::getIndexedType(
+ Ops[0]->getType(),
+ Ops.slice(1, i - 1)))) &&
Ops[i]->getType() != IntPtrTy) {
Any = true;
NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
@@ -619,13 +639,16 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
} else
NewIdxs.push_back(Ops[i]);
}
- if (!Any) return 0;
- Constant *C =
- ConstantExpr::getGetElementPtr(Ops[0], NewIdxs);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ if (!Any)
+ return 0;
+
+ Constant *C = ConstantExpr::getGetElementPtr(Ops[0], NewIdxs);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
C = Folded;
+ }
+
return C;
}
@@ -640,7 +663,7 @@ static Constant* StripPtrCastKeepAS(Constant* Ptr) {
if (NewPtrTy->getAddressSpace() != OldPtrTy->getAddressSpace()) {
NewPtrTy = NewPtrTy->getElementType()->getPointerTo(
OldPtrTy->getAddressSpace());
- Ptr = ConstantExpr::getBitCast(Ptr, NewPtrTy);
+ Ptr = ConstantExpr::getPointerCast(Ptr, NewPtrTy);
}
return Ptr;
}
@@ -651,11 +674,12 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
Type *ResultTy, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
Constant *Ptr = Ops[0];
- if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized() ||
+ if (!TD || !Ptr->getType()->getPointerElementType()->isSized() ||
!Ptr->getType()->isPointerTy())
return 0;
- Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext());
+ Type *IntPtrTy = TD->getIntPtrType(Ptr->getType());
+ Type *ResultElementTy = ResultTy->getPointerElementType();
// If this is a constant expr gep that is effectively computing an
// "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12'
@@ -664,8 +688,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
// If this is "gep i8* Ptr, (sub 0, V)", fold this as:
// "inttoptr (sub (ptrtoint Ptr), V)"
- if (Ops.size() == 2 &&
- cast<PointerType>(ResultTy)->getElementType()->isIntegerTy(8)) {
+ if (Ops.size() == 2 && ResultElementTy->isIntegerTy(8)) {
ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[1]);
assert((CE == 0 || CE->getType() == IntPtrTy) &&
"CastGEPIndices didn't canonicalize index types!");
@@ -692,7 +715,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
// If this is a GEP of a GEP, fold it all into a single GEP.
while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
- SmallVector<Value *, 4> NestedOps(GEP->op_begin()+1, GEP->op_end());
+ SmallVector<Value *, 4> NestedOps(GEP->op_begin() + 1, GEP->op_end());
// Do not try the incorporate the sub-GEP if some index is not a number.
bool AllConstantInt = true;
@@ -713,12 +736,15 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
// If the base value for this address is a literal integer value, fold the
// getelementptr to the resulting integer value casted to the pointer type.
APInt BasePtr(BitWidth, 0);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
- if (CE->getOpcode() == Instruction::IntToPtr)
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
+ if (CE->getOpcode() == Instruction::IntToPtr) {
if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0)))
BasePtr = Base->getValue().zextOrTrunc(BitWidth);
+ }
+ }
+
if (Ptr->isNullValue() || BasePtr != 0) {
- Constant *C = ConstantInt::get(Ptr->getContext(), Offset+BasePtr);
+ Constant *C = ConstantInt::get(Ptr->getContext(), Offset + BasePtr);
return ConstantExpr::getIntToPtr(C, ResultTy);
}
@@ -728,7 +754,8 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
// Also, this helps GlobalOpt do SROA on GlobalVariables.
Type *Ty = Ptr->getType();
assert(Ty->isPointerTy() && "Forming regular GEP of non-pointer type");
- SmallVector<Constant*, 32> NewIdxs;
+ SmallVector<Constant *, 32> NewIdxs;
+
do {
if (SequentialType *ATy = dyn_cast<SequentialType>(Ty)) {
if (ATy->isPointerTy()) {
@@ -743,7 +770,6 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
// Determine which element of the array the offset points into.
APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType()));
- IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext());
if (ElemSize == 0)
// The element size is 0. This may be [0 x Ty]*, so just use a zero
// index for this level and proceed to the next level to see if it can
@@ -778,7 +804,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
// We've reached some non-indexable type.
break;
}
- } while (Ty != cast<PointerType>(ResultTy)->getElementType());
+ } while (Ty != ResultElementTy);
// If we haven't used up the entire offset by descending the static
// type, then the offset is pointing into the middle of an indivisible
@@ -787,14 +813,13 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
return 0;
// Create a GEP.
- Constant *C =
- ConstantExpr::getGetElementPtr(Ptr, NewIdxs);
- assert(cast<PointerType>(C->getType())->getElementType() == Ty &&
+ Constant *C = ConstantExpr::getGetElementPtr(Ptr, NewIdxs);
+ assert(C->getType()->getPointerElementType() == Ty &&
"Computed GetElementPtr has unexpected type!");
// If we ended up indexing a member with a type that doesn't match
// the type of what the original indices indexed, add a cast.
- if (Ty != cast<PointerType>(ResultTy)->getElementType())
+ if (Ty != ResultElementTy)
C = FoldBitCast(C, ResultTy, *TD);
return C;
@@ -867,16 +892,18 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I,
if (const LoadInst *LI = dyn_cast<LoadInst>(I))
return ConstantFoldLoadInst(LI, TD);
- if (InsertValueInst *IVI = dyn_cast<InsertValueInst>(I))
+ if (InsertValueInst *IVI = dyn_cast<InsertValueInst>(I)) {
return ConstantExpr::getInsertValue(
cast<Constant>(IVI->getAggregateOperand()),
cast<Constant>(IVI->getInsertedValueOperand()),
IVI->getIndices());
+ }
- if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I))
+ if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I)) {
return ConstantExpr::getExtractValue(
cast<Constant>(EVI->getAggregateOperand()),
EVI->getIndices());
+ }
return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD, TLI);
}
@@ -930,9 +957,10 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
const TargetLibraryInfo *TLI) {
// Handle easy binops first.
if (Instruction::isBinaryOp(Opcode)) {
- if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1]))
+ if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1])) {
if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD))
return C;
+ }
return ConstantExpr::get(Opcode, Ops[0], Ops[1]);
}
@@ -953,10 +981,11 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
if (TD && CE->getOpcode() == Instruction::IntToPtr) {
Constant *Input = CE->getOperand(0);
unsigned InWidth = Input->getType()->getScalarSizeInBits();
- if (TD->getPointerSizeInBits() < InWidth) {
+ unsigned PtrWidth = TD->getPointerTypeSizeInBits(CE->getType());
+ if (PtrWidth < InWidth) {
Constant *Mask =
- ConstantInt::get(CE->getContext(), APInt::getLowBitsSet(InWidth,
- TD->getPointerSizeInBits()));
+ ConstantInt::get(CE->getContext(),
+ APInt::getLowBitsSet(InWidth, PtrWidth));
Input = ConstantExpr::getAnd(Input, Mask);
}
// Do a zext or trunc to get to the dest size.
@@ -966,13 +995,22 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
case Instruction::IntToPtr:
// If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if
- // the int size is >= the ptr size. This requires knowing the width of a
- // pointer, so it can't be done in ConstantExpr::getCast.
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
- if (TD &&
- TD->getPointerSizeInBits() <= CE->getType()->getScalarSizeInBits() &&
- CE->getOpcode() == Instruction::PtrToInt)
- return FoldBitCast(CE->getOperand(0), DestTy, *TD);
+ // the int size is >= the ptr size and the address spaces are the same.
+ // This requires knowing the width of a pointer, so it can't be done in
+ // ConstantExpr::getCast.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) {
+ if (TD && CE->getOpcode() == Instruction::PtrToInt) {
+ Constant *SrcPtr = CE->getOperand(0);
+ unsigned SrcPtrSize = TD->getPointerTypeSizeInBits(SrcPtr->getType());
+ unsigned MidIntSize = CE->getType()->getScalarSizeInBits();
+
+ if (MidIntSize >= SrcPtrSize) {
+ unsigned SrcAS = SrcPtr->getType()->getPointerAddressSpace();
+ if (SrcAS == DestTy->getPointerAddressSpace())
+ return FoldBitCast(CE->getOperand(0), DestTy, *TD);
+ }
+ }
+ }
return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
case Instruction::Trunc:
@@ -984,6 +1022,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
case Instruction::SIToFP:
case Instruction::FPToUI:
case Instruction::FPToSI:
+ case Instruction::AddrSpaceCast:
return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
case Instruction::BitCast:
if (TD)
@@ -1024,8 +1063,8 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
// around to know if bit truncation is happening.
if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops0)) {
if (TD && Ops1->isNullValue()) {
- Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
if (CE0->getOpcode() == Instruction::IntToPtr) {
+ Type *IntPtrTy = TD->getIntPtrType(CE0->getType());
// Convert the integer value to the right size to ensure we get the
// proper extension or truncation.
Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0),
@@ -1036,19 +1075,21 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
// Only do this transformation if the int is intptrty in size, otherwise
// there is a truncation or extension that we aren't modeling.
- if (CE0->getOpcode() == Instruction::PtrToInt &&
- CE0->getType() == IntPtrTy) {
- Constant *C = CE0->getOperand(0);
- Constant *Null = Constant::getNullValue(C->getType());
- return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI);
+ if (CE0->getOpcode() == Instruction::PtrToInt) {
+ Type *IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType());
+ if (CE0->getType() == IntPtrTy) {
+ Constant *C = CE0->getOperand(0);
+ Constant *Null = Constant::getNullValue(C->getType());
+ return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI);
+ }
}
}
if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops1)) {
if (TD && CE0->getOpcode() == CE1->getOpcode()) {
- Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
-
if (CE0->getOpcode() == Instruction::IntToPtr) {
+ Type *IntPtrTy = TD->getIntPtrType(CE0->getType());
+
// Convert the integer value to the right size to ensure we get the
// proper extension or truncation.
Constant *C0 = ConstantExpr::getIntegerCast(CE0->getOperand(0),
@@ -1060,11 +1101,17 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
// Only do this transformation if the int is intptrty in size, otherwise
// there is a truncation or extension that we aren't modeling.
- if ((CE0->getOpcode() == Instruction::PtrToInt &&
- CE0->getType() == IntPtrTy &&
- CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()))
- return ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0),
- CE1->getOperand(0), TD, TLI);
+ if (CE0->getOpcode() == Instruction::PtrToInt) {
+ Type *IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType());
+ if (CE0->getType() == IntPtrTy &&
+ CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()) {
+ return ConstantFoldCompareInstOperands(Predicate,
+ CE0->getOperand(0),
+ CE1->getOperand(0),
+ TD,
+ TLI);
+ }
+ }
}
}
@@ -1101,7 +1148,8 @@ Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C,
// addressing.
for (unsigned i = 2, e = CE->getNumOperands(); i != e; ++i) {
C = C->getAggregateElement(CE->getOperand(i));
- if (C == 0) return 0;
+ if (C == 0)
+ return 0;
}
return C;
}
@@ -1116,7 +1164,8 @@ Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
// addressing.
for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
C = C->getAggregateElement(Indices[i]);
- if (C == 0) return 0;
+ if (C == 0)
+ return 0;
}
return C;
}
@@ -1128,8 +1177,7 @@ Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
/// canConstantFoldCallTo - Return true if its even possible to fold a call to
/// the specified function.
-bool
-llvm::canConstantFoldCallTo(const Function *F) {
+bool llvm::canConstantFoldCallTo(const Function *F) {
switch (F->getIntrinsicID()) {
case Intrinsic::fabs:
case Intrinsic::log:
@@ -1167,7 +1215,8 @@ llvm::canConstantFoldCallTo(const Function *F) {
case 0: break;
}
- if (!F->hasName()) return false;
+ if (!F->hasName())
+ return false;
StringRef Name = F->getName();
// In these cases, the check of the length is required. We don't want to
@@ -1250,7 +1299,7 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
static Constant *ConstantFoldConvertToInt(const APFloat &Val,
bool roundTowardZero, Type *Ty) {
// All of these conversion intrinsics form an integer of at most 64bits.
- unsigned ResultWidth = cast<IntegerType>(Ty)->getBitWidth();
+ unsigned ResultWidth = Ty->getIntegerBitWidth();
assert(ResultWidth <= 64 &&
"Can only constant fold conversions to 64 and 32 bit ints");
@@ -1271,7 +1320,8 @@ static Constant *ConstantFoldConvertToInt(const APFloat &Val,
Constant *
llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
const TargetLibraryInfo *TLI) {
- if (!F->hasName()) return 0;
+ if (!F->hasName())
+ return 0;
StringRef Name = F->getName();
Type *Ty = F->getReturnType();
diff --git a/contrib/llvm/lib/Analysis/CostModel.cpp b/contrib/llvm/lib/Analysis/CostModel.cpp
index 98a7780..f943258 100644
--- a/contrib/llvm/lib/Analysis/CostModel.cpp
+++ b/contrib/llvm/lib/Analysis/CostModel.cpp
@@ -19,6 +19,7 @@
#define CM_NAME "cost-model"
#define DEBUG_TYPE CM_NAME
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Function.h"
@@ -26,10 +27,15 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
+ cl::Hidden,
+ cl::desc("Recognize reduction patterns."));
+
namespace {
class CostModelAnalysis : public FunctionPass {
@@ -81,7 +87,7 @@ CostModelAnalysis::runOnFunction(Function &F) {
return false;
}
-static bool isReverseVectorMask(SmallVector<int, 16> &Mask) {
+static bool isReverseVectorMask(SmallVectorImpl<int> &Mask) {
for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i)
if (Mask[i] > 0 && Mask[i] != (int)(MaskSize - 1 - i))
return false;
@@ -105,6 +111,260 @@ static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
return OpInfo;
}
+static bool matchMask(SmallVectorImpl<int> &M1, SmallVectorImpl<int> &M2) {
+ if (M1.size() != M2.size())
+ return false;
+
+ for (unsigned i = 0, e = M1.size(); i != e; ++i)
+ if (M1[i] != M2[i])
+ return false;
+
+ return true;
+}
+
+static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
+ unsigned Level) {
+ // We don't need a shuffle if we just want to have element 0 in position 0 of
+ // the vector.
+ if (!SI && Level == 0 && IsLeft)
+ return true;
+ else if (!SI)
+ return false;
+
+ SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1);
+
+ // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
+ // we look at the left or right side.
+ for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
+ Mask[i] = val;
+
+ SmallVector<int, 16> ActualMask = SI->getShuffleMask();
+ if (!matchMask(Mask, ActualMask))
+ return false;
+
+ return true;
+}
+
+static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp,
+ unsigned Level, unsigned NumLevels) {
+ // Match one level of pairwise operations.
+ // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
+ // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
+ // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
+ // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+ // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
+ if (BinOp == 0)
+ return false;
+
+ assert(BinOp->getType()->isVectorTy() && "Expecting a vector type");
+
+ unsigned Opcode = BinOp->getOpcode();
+ Value *L = BinOp->getOperand(0);
+ Value *R = BinOp->getOperand(1);
+
+ ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(L);
+ if (!LS && Level)
+ return false;
+ ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(R);
+ if (!RS && Level)
+ return false;
+
+ // On level 0 we can omit one shufflevector instruction.
+ if (!Level && !RS && !LS)
+ return false;
+
+ // Shuffle inputs must match.
+ Value *NextLevelOpL = LS ? LS->getOperand(0) : 0;
+ Value *NextLevelOpR = RS ? RS->getOperand(0) : 0;
+ Value *NextLevelOp = 0;
+ if (NextLevelOpR && NextLevelOpL) {
+ // If we have two shuffles their operands must match.
+ if (NextLevelOpL != NextLevelOpR)
+ return false;
+
+ NextLevelOp = NextLevelOpL;
+ } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) {
+ // On the first level we can omit the shufflevector <0, undef,...>. So the
+ // input to the other shufflevector <1, undef> must match with one of the
+ // inputs to the current binary operation.
+ // Example:
+ // %NextLevelOpL = shufflevector %R, <1, undef ...>
+ // %BinOp = fadd %NextLevelOpL, %R
+ if (NextLevelOpL && NextLevelOpL != R)
+ return false;
+ else if (NextLevelOpR && NextLevelOpR != L)
+ return false;
+
+ NextLevelOp = NextLevelOpL ? R : L;
+ } else
+ return false;
+
+ // Check that the next levels binary operation exists and matches with the
+ // current one.
+ BinaryOperator *NextLevelBinOp = 0;
+ if (Level + 1 != NumLevels) {
+ if (!(NextLevelBinOp = dyn_cast<BinaryOperator>(NextLevelOp)))
+ return false;
+ else if (NextLevelBinOp->getOpcode() != Opcode)
+ return false;
+ }
+
+ // Shuffle mask for pairwise operation must match.
+ if (matchPairwiseShuffleMask(LS, true, Level)) {
+ if (!matchPairwiseShuffleMask(RS, false, Level))
+ return false;
+ } else if (matchPairwiseShuffleMask(RS, true, Level)) {
+ if (!matchPairwiseShuffleMask(LS, false, Level))
+ return false;
+ } else
+ return false;
+
+ if (++Level == NumLevels)
+ return true;
+
+ // Match next level.
+ return matchPairwiseReductionAtLevel(NextLevelBinOp, Level, NumLevels);
+}
+
+static bool matchPairwiseReduction(const ExtractElementInst *ReduxRoot,
+ unsigned &Opcode, Type *&Ty) {
+ if (!EnableReduxCost)
+ return false;
+
+ // Need to extract the first element.
+ ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
+ unsigned Idx = ~0u;
+ if (CI)
+ Idx = CI->getZExtValue();
+ if (Idx != 0)
+ return false;
+
+ BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0));
+ if (!RdxStart)
+ return false;
+
+ Type *VecTy = ReduxRoot->getOperand(0)->getType();
+ unsigned NumVecElems = VecTy->getVectorNumElements();
+ if (!isPowerOf2_32(NumVecElems))
+ return false;
+
+ // We look for a sequence of shuffle,shuffle,add triples like the following
+ // that builds a pairwise reduction tree.
+ //
+ // (X0, X1, X2, X3)
+ // (X0 + X1, X2 + X3, undef, undef)
+ // ((X0 + X1) + (X2 + X3), undef, undef, undef)
+ //
+ // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
+ // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
+ // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
+ // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+ // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
+ // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
+ // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+ // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
+ // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+ // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
+ // %r = extractelement <4 x float> %bin.rdx8, i32 0
+ if (!matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)))
+ return false;
+
+ Opcode = RdxStart->getOpcode();
+ Ty = VecTy;
+
+ return true;
+}
+
+static std::pair<Value *, ShuffleVectorInst *>
+getShuffleAndOtherOprd(BinaryOperator *B) {
+
+ Value *L = B->getOperand(0);
+ Value *R = B->getOperand(1);
+ ShuffleVectorInst *S = 0;
+
+ if ((S = dyn_cast<ShuffleVectorInst>(L)))
+ return std::make_pair(R, S);
+
+ S = dyn_cast<ShuffleVectorInst>(R);
+ return std::make_pair(L, S);
+}
+
+static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
+ unsigned &Opcode, Type *&Ty) {
+ if (!EnableReduxCost)
+ return false;
+
+ // Need to extract the first element.
+ ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
+ unsigned Idx = ~0u;
+ if (CI)
+ Idx = CI->getZExtValue();
+ if (Idx != 0)
+ return false;
+
+ BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0));
+ if (!RdxStart)
+ return false;
+ unsigned RdxOpcode = RdxStart->getOpcode();
+
+ Type *VecTy = ReduxRoot->getOperand(0)->getType();
+ unsigned NumVecElems = VecTy->getVectorNumElements();
+ if (!isPowerOf2_32(NumVecElems))
+ return false;
+
+ // We look for a sequence of shuffles and adds like the following matching one
+ // fadd, shuffle vector pair at a time.
+ //
+ // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
+ // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+ // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
+ // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
+ // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+ // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
+ // %r = extractelement <4 x float> %bin.rdx8, i32 0
+
+ unsigned MaskStart = 1;
+ Value *RdxOp = RdxStart;
+ SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
+ unsigned NumVecElemsRemain = NumVecElems;
+ while (NumVecElemsRemain - 1) {
+ // Check for the right reduction operation.
+ BinaryOperator *BinOp;
+ if (!(BinOp = dyn_cast<BinaryOperator>(RdxOp)))
+ return false;
+ if (BinOp->getOpcode() != RdxOpcode)
+ return false;
+
+ Value *NextRdxOp;
+ ShuffleVectorInst *Shuffle;
+ tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp);
+
+ // Check the current reduction operation and the shuffle use the same value.
+ if (Shuffle == 0)
+ return false;
+ if (Shuffle->getOperand(0) != NextRdxOp)
+ return false;
+
+ // Check that shuffle masks matches.
+ for (unsigned j = 0; j != MaskStart; ++j)
+ ShuffleMask[j] = MaskStart + j;
+ // Fill the rest of the mask with -1 for undef.
+ std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);
+
+ SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
+ if (!matchMask(ShuffleMask, Mask))
+ return false;
+
+ RdxOp = NextRdxOp;
+ NumVecElemsRemain /= 2;
+ MaskStart *= 2;
+ }
+
+ Opcode = RdxOpcode;
+ Ty = VecTy;
+ return true;
+}
+
unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
if (!TTI)
return -1;
@@ -189,18 +449,29 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
unsigned Idx = -1;
if (CI)
Idx = CI->getZExtValue();
+
+ // Try to match a reduction sequence (series of shufflevector and vector
+ // adds followed by a extractelement).
+ unsigned ReduxOpCode;
+ Type *ReduxType;
+
+ if (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType))
+ return TTI->getReductionCost(ReduxOpCode, ReduxType, false);
+ else if (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType))
+ return TTI->getReductionCost(ReduxOpCode, ReduxType, true);
+
return TTI->getVectorInstrCost(I->getOpcode(),
EEI->getOperand(0)->getType(), Idx);
}
case Instruction::InsertElement: {
- const InsertElementInst * IE = cast<InsertElementInst>(I);
- ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
- unsigned Idx = -1;
- if (CI)
- Idx = CI->getZExtValue();
- return TTI->getVectorInstrCost(I->getOpcode(),
- IE->getType(), Idx);
- }
+ const InsertElementInst * IE = cast<InsertElementInst>(I);
+ ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
+ unsigned Idx = -1;
+ if (CI)
+ Idx = CI->getZExtValue();
+ return TTI->getVectorInstrCost(I->getOpcode(),
+ IE->getType(), Idx);
+ }
case Instruction::ShuffleVector: {
const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
Type *VecTypOp0 = Shuffle->getOperand(0)->getType();
diff --git a/contrib/llvm/lib/Analysis/Delinearization.cpp b/contrib/llvm/lib/Analysis/Delinearization.cpp
new file mode 100644
index 0000000..3ed0609
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/Delinearization.cpp
@@ -0,0 +1,133 @@
+//===---- Delinearization.cpp - MultiDimensional Index Delinearization ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements an analysis pass that tries to delinearize all GEP
+// instructions in all loops using the SCEV analysis functionality. This pass is
+// only used for testing purposes: if your pass needs delinearization, please
+// use the on-demand SCEVAddRecExpr::delinearize() function.
+//
+//===----------------------------------------------------------------------===//
+
+#define DL_NAME "delinearize"
+#define DEBUG_TYPE DL_NAME
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+class Delinearization : public FunctionPass {
+ Delinearization(const Delinearization &); // do not implement
+protected:
+ Function *F;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ Delinearization() : FunctionPass(ID) {
+ initializeDelinearizationPass(*PassRegistry::getPassRegistry());
+ }
+ virtual bool runOnFunction(Function &F);
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual void print(raw_ostream &O, const Module *M = 0) const;
+};
+
+} // end anonymous namespace
+
+void Delinearization::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<LoopInfo>();
+ AU.addRequired<ScalarEvolution>();
+}
+
+bool Delinearization::runOnFunction(Function &F) {
+ this->F = &F;
+ SE = &getAnalysis<ScalarEvolution>();
+ LI = &getAnalysis<LoopInfo>();
+ return false;
+}
+
+static Value *getPointerOperand(Instruction &Inst) {
+ if (LoadInst *Load = dyn_cast<LoadInst>(&Inst))
+ return Load->getPointerOperand();
+ else if (StoreInst *Store = dyn_cast<StoreInst>(&Inst))
+ return Store->getPointerOperand();
+ else if (GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(&Inst))
+ return Gep->getPointerOperand();
+ return NULL;
+}
+
+void Delinearization::print(raw_ostream &O, const Module *) const {
+ O << "Delinearization on function " << F->getName() << ":\n";
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+ Instruction *Inst = &(*I);
+
+ // Only analyze loads and stores.
+ if (!isa<StoreInst>(Inst) && !isa<LoadInst>(Inst) &&
+ !isa<GetElementPtrInst>(Inst))
+ continue;
+
+ const BasicBlock *BB = Inst->getParent();
+ // Delinearize the memory access as analyzed in all the surrounding loops.
+ // Do not analyze memory accesses outside loops.
+ for (Loop *L = LI->getLoopFor(BB); L != NULL; L = L->getParentLoop()) {
+ const SCEV *AccessFn = SE->getSCEVAtScope(getPointerOperand(*Inst), L);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(AccessFn);
+
+ // Do not try to delinearize memory accesses that are not AddRecs.
+ if (!AR)
+ break;
+
+ O << "AddRec: " << *AR << "\n";
+
+ SmallVector<const SCEV *, 3> Subscripts, Sizes;
+ const SCEV *Res = AR->delinearize(*SE, Subscripts, Sizes);
+ int Size = Subscripts.size();
+ if (Res == AR || Size == 0) {
+ O << "failed to delinearize\n";
+ continue;
+ }
+ O << "Base offset: " << *Res << "\n";
+ O << "ArrayDecl[UnknownSize]";
+ for (int i = 0; i < Size - 1; i++)
+ O << "[" << *Sizes[i] << "]";
+ O << " with elements of " << *Sizes[Size - 1] << " bytes.\n";
+
+ O << "ArrayRef";
+ for (int i = 0; i < Size; i++)
+ O << "[" << *Subscripts[i] << "]";
+ O << "\n";
+ }
+ }
+}
+
+char Delinearization::ID = 0;
+static const char delinearization_name[] = "Delinearization";
+INITIALIZE_PASS_BEGIN(Delinearization, DL_NAME, delinearization_name, true,
+ true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(Delinearization, DL_NAME, delinearization_name, true, true)
+
+FunctionPass *llvm::createDelinearizationPass() { return new Delinearization; }
diff --git a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
index cbc71bd..3b3e2ef 100644
--- a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -24,11 +24,11 @@
// Both of these are conservative weaknesses;
// that is, not a source of correctness problems.
//
-// The implementation depends on the GEP instruction to
-// differentiate subscripts. Since Clang linearizes subscripts
-// for most arrays, we give up some precision (though the existing MIV tests
-// will help). We trust that the GEP instruction will eventually be extended.
-// In the meantime, we should explore Maslov's ideas about delinearization.
+// The implementation depends on the GEP instruction to differentiate
+// subscripts. Since Clang linearizes some array subscripts, the dependence
+// analysis is using SCEV->delinearize to recover the representation of multiple
+// subscripts, and thus avoid the more expensive and less precise MIV tests. The
+// delinearization is controlled by the flag -da-delinearize.
//
// We should pay some careful attention to the possibility of integer overflow
// in the implementation of the various tests. This could happen with Add,
@@ -61,6 +61,7 @@
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Operator.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/InstIterator.h"
@@ -104,6 +105,10 @@ STATISTIC(BanerjeeApplications, "Banerjee applications");
STATISTIC(BanerjeeIndependence, "Banerjee independence");
STATISTIC(BanerjeeSuccesses, "Banerjee successes");
+static cl::opt<bool>
+Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Try to delinearize array references."));
+
//===----------------------------------------------------------------------===//
// basics
@@ -508,7 +513,7 @@ bool DependenceAnalysis::intersectConstraints(Constraint *X,
APInt Xr = Xtop; // though they're just going to be overwritten
APInt::sdivrem(Xtop, Xbot, Xq, Xr);
APInt Yq = Ytop;
- APInt Yr = Ytop;;
+ APInt Yr = Ytop;
APInt::sdivrem(Ytop, Ybot, Yq, Yr);
if (Xr != 0 || Yr != 0) {
X->setEmpty();
@@ -2951,6 +2956,11 @@ const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr,
AddRec->getLoop(),
AddRec->getNoWrapFlags());
}
+ if (SE->isLoopInvariant(AddRec, TargetLoop))
+ return SE->getAddRecExpr(AddRec,
+ Value,
+ TargetLoop,
+ SCEV::FlagAnyWrap);
return SE->getAddRecExpr(addToCoefficient(AddRec->getStart(),
TargetLoop, Value),
AddRec->getStepRecurrence(*SE),
@@ -2972,7 +2982,7 @@ const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr,
bool DependenceAnalysis::propagate(const SCEV *&Src,
const SCEV *&Dst,
SmallBitVector &Loops,
- SmallVector<Constraint, 4> &Constraints,
+ SmallVectorImpl<Constraint> &Constraints,
bool &Consistent) {
bool Result = false;
for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) {
@@ -3166,6 +3176,55 @@ void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level,
llvm_unreachable("constraint has unexpected kind");
}
+/// Check if we can delinearize the subscripts. If the SCEVs representing the
+/// source and destination array references are recurrences on a nested loop,
+/// this function flattens the nested recurrences into seperate recurrences
+/// for each loop level.
+bool
+DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, const SCEV *DstSCEV,
+ SmallVectorImpl<Subscript> &Pair) const {
+ const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV);
+ const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV);
+ if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine())
+ return false;
+
+ SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts, SrcSizes, DstSizes;
+ SrcAR->delinearize(*SE, SrcSubscripts, SrcSizes);
+ DstAR->delinearize(*SE, DstSubscripts, DstSizes);
+
+ int size = SrcSubscripts.size();
+ int dstSize = DstSubscripts.size();
+ if (size != dstSize || size < 2)
+ return false;
+
+#ifndef NDEBUG
+ DEBUG(errs() << "\nSrcSubscripts: ");
+ for (int i = 0; i < size; i++)
+ DEBUG(errs() << *SrcSubscripts[i]);
+ DEBUG(errs() << "\nDstSubscripts: ");
+ for (int i = 0; i < size; i++)
+ DEBUG(errs() << *DstSubscripts[i]);
+#endif
+
+ // The delinearization transforms a single-subscript MIV dependence test into
+ // a multi-subscript SIV dependence test that is easier to compute. So we
+ // resize Pair to contain as many pairs of subscripts as the delinearization
+ // has found, and then initialize the pairs following the delinearization.
+ Pair.resize(size);
+ for (int i = 0; i < size; ++i) {
+ Pair[i].Src = SrcSubscripts[i];
+ Pair[i].Dst = DstSubscripts[i];
+
+ // FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the
+ // delinearization has found, and add these constraints to the dependence
+ // check to avoid memory accesses overflow from one dimension into another.
+ // This is related to the problem of determining the existence of data
+ // dependences in array accesses using a different number of subscripts: in
+ // C one can access an array A[100][100]; as A[0][9999], *A[9999], etc.
+ }
+
+ return true;
+}
//===----------------------------------------------------------------------===//
@@ -3275,6 +3334,12 @@ Dependence *DependenceAnalysis::depends(Instruction *Src,
Pair[0].Dst = DstSCEV;
}
+ if (Delinearize && Pairs == 1 && CommonLevels > 1 &&
+ tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) {
+ DEBUG(dbgs() << " delinerized GEP\n");
+ Pairs = Pair.size();
+ }
+
for (unsigned P = 0; P < Pairs; ++P) {
Pair[P].Loops.resize(MaxLevels + 1);
Pair[P].GroupLoops.resize(MaxLevels + 1);
@@ -3693,6 +3758,12 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep,
Pair[0].Dst = DstSCEV;
}
+ if (Delinearize && Pairs == 1 && CommonLevels > 1 &&
+ tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) {
+ DEBUG(dbgs() << " delinerized GEP\n");
+ Pairs = Pair.size();
+ }
+
for (unsigned P = 0; P < Pairs; ++P) {
Pair[P].Loops.resize(MaxLevels + 1);
Pair[P].GroupLoops.resize(MaxLevels + 1);
diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp
index 7620fd9..f042964 100644
--- a/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp
+++ b/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp
@@ -6,11 +6,6 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// This file implements the CallGraph class and provides the BasicCallGraph
-// default implementation.
-//
-//===----------------------------------------------------------------------===//
#include "llvm/Analysis/CallGraph.h"
#include "llvm/IR/Instructions.h"
@@ -21,168 +16,92 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-namespace {
+CallGraph::CallGraph()
+ : ModulePass(ID), Root(0), ExternalCallingNode(0), CallsExternalNode(0) {
+ initializeCallGraphPass(*PassRegistry::getPassRegistry());
+}
-//===----------------------------------------------------------------------===//
-// BasicCallGraph class definition
-//
-class BasicCallGraph : public ModulePass, public CallGraph {
- // Root is root of the call graph, or the external node if a 'main' function
- // couldn't be found.
- //
- CallGraphNode *Root;
-
- // ExternalCallingNode - This node has edges to all external functions and
- // those internal functions that have their address taken.
- CallGraphNode *ExternalCallingNode;
-
- // CallsExternalNode - This node has edges to it from all functions making
- // indirect calls or calling an external function.
- CallGraphNode *CallsExternalNode;
-
-public:
- static char ID; // Class identification, replacement for typeinfo
- BasicCallGraph() : ModulePass(ID), Root(0),
- ExternalCallingNode(0), CallsExternalNode(0) {
- initializeBasicCallGraphPass(*PassRegistry::getPassRegistry());
- }
+void CallGraph::addToCallGraph(Function *F) {
+ CallGraphNode *Node = getOrInsertFunction(F);
- // runOnModule - Compute the call graph for the specified module.
- virtual bool runOnModule(Module &M) {
- CallGraph::initialize(M);
-
- ExternalCallingNode = getOrInsertFunction(0);
- CallsExternalNode = new CallGraphNode(0);
- Root = 0;
-
- // Add every function to the call graph.
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
- addToCallGraph(I);
-
- // If we didn't find a main function, use the external call graph node
- if (Root == 0) Root = ExternalCallingNode;
-
- return false;
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- }
+ // If this function has external linkage, anything could call it.
+ if (!F->hasLocalLinkage()) {
+ ExternalCallingNode->addCalledFunction(CallSite(), Node);
- virtual void print(raw_ostream &OS, const Module *) const {
- OS << "CallGraph Root is: ";
- if (Function *F = getRoot()->getFunction())
- OS << F->getName() << "\n";
- else {
- OS << "<<null function: 0x" << getRoot() << ">>\n";
+ // Found the entry point?
+ if (F->getName() == "main") {
+ if (Root) // Found multiple external mains? Don't pick one.
+ Root = ExternalCallingNode;
+ else
+ Root = Node; // Found a main, keep track of it!
}
-
- CallGraph::print(OS, 0);
}
- virtual void releaseMemory() {
- destroy();
- }
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it should
- /// override this to adjust the this pointer as needed for the specified pass
- /// info.
- virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
- if (PI == &CallGraph::ID)
- return (CallGraph*)this;
- return this;
- }
-
- CallGraphNode* getExternalCallingNode() const { return ExternalCallingNode; }
- CallGraphNode* getCallsExternalNode() const { return CallsExternalNode; }
-
- // getRoot - Return the root of the call graph, which is either main, or if
- // main cannot be found, the external node.
- //
- CallGraphNode *getRoot() { return Root; }
- const CallGraphNode *getRoot() const { return Root; }
-
-private:
- //===---------------------------------------------------------------------
- // Implementation of CallGraph construction
- //
-
- // addToCallGraph - Add a function to the call graph, and link the node to all
- // of the functions that it calls.
- //
- void addToCallGraph(Function *F) {
- CallGraphNode *Node = getOrInsertFunction(F);
-
- // If this function has external linkage, anything could call it.
- if (!F->hasLocalLinkage()) {
- ExternalCallingNode->addCalledFunction(CallSite(), Node);
-
- // Found the entry point?
- if (F->getName() == "main") {
- if (Root) // Found multiple external mains? Don't pick one.
- Root = ExternalCallingNode;
- else
- Root = Node; // Found a main, keep track of it!
+ // If this function has its address taken, anything could call it.
+ if (F->hasAddressTaken())
+ ExternalCallingNode->addCalledFunction(CallSite(), Node);
+
+ // If this function is not defined in this translation unit, it could call
+ // anything.
+ if (F->isDeclaration() && !F->isIntrinsic())
+ Node->addCalledFunction(CallSite(), CallsExternalNode);
+
+ // Look for calls by this function.
+ for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB)
+ for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;
+ ++II) {
+ CallSite CS(cast<Value>(II));
+ if (CS) {
+ const Function *Callee = CS.getCalledFunction();
+ if (!Callee)
+ // Indirect calls of intrinsics are not allowed so no need to check.
+ Node->addCalledFunction(CS, CallsExternalNode);
+ else if (!Callee->isIntrinsic())
+ Node->addCalledFunction(CS, getOrInsertFunction(Callee));
}
}
+}
- // If this function has its address taken, anything could call it.
- if (F->hasAddressTaken())
- ExternalCallingNode->addCalledFunction(CallSite(), Node);
-
- // If this function is not defined in this translation unit, it could call
- // anything.
- if (F->isDeclaration() && !F->isIntrinsic())
- Node->addCalledFunction(CallSite(), CallsExternalNode);
-
- // Look for calls by this function.
- for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB)
- for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
- II != IE; ++II) {
- CallSite CS(cast<Value>(II));
- if (CS) {
- const Function *Callee = CS.getCalledFunction();
- if (!Callee)
- // Indirect calls of intrinsics are not allowed so no need to check.
- Node->addCalledFunction(CS, CallsExternalNode);
- else if (!Callee->isIntrinsic())
- Node->addCalledFunction(CS, getOrInsertFunction(Callee));
- }
- }
- }
+void CallGraph::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+}
- //
- // destroy - Release memory for the call graph
- virtual void destroy() {
- /// CallsExternalNode is not in the function map, delete it explicitly.
- if (CallsExternalNode) {
- CallsExternalNode->allReferencesDropped();
- delete CallsExternalNode;
- CallsExternalNode = 0;
- }
- CallGraph::destroy();
- }
-};
+bool CallGraph::runOnModule(Module &M) {
+ Mod = &M;
-} //End anonymous namespace
+ ExternalCallingNode = getOrInsertFunction(0);
+ assert(!CallsExternalNode);
+ CallsExternalNode = new CallGraphNode(0);
+ Root = 0;
-INITIALIZE_ANALYSIS_GROUP(CallGraph, "Call Graph", BasicCallGraph)
-INITIALIZE_AG_PASS(BasicCallGraph, CallGraph, "basiccg",
- "Basic CallGraph Construction", false, true, true)
+ // Add every function to the call graph.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ addToCallGraph(I);
-char CallGraph::ID = 0;
-char BasicCallGraph::ID = 0;
+ // If we didn't find a main function, use the external call graph node
+ if (Root == 0)
+ Root = ExternalCallingNode;
-void CallGraph::initialize(Module &M) {
- Mod = &M;
+ return false;
}
-void CallGraph::destroy() {
- if (FunctionMap.empty()) return;
-
- // Reset all node's use counts to zero before deleting them to prevent an
- // assertion from firing.
+INITIALIZE_PASS(CallGraph, "basiccg", "CallGraph Construction", false, true)
+
+char CallGraph::ID = 0;
+
+void CallGraph::releaseMemory() {
+ /// CallsExternalNode is not in the function map, delete it explicitly.
+ if (CallsExternalNode) {
+ CallsExternalNode->allReferencesDropped();
+ delete CallsExternalNode;
+ CallsExternalNode = 0;
+ }
+
+ if (FunctionMap.empty())
+ return;
+
+// Reset all node's use counts to zero before deleting them to prevent an
+// assertion from firing.
#ifndef NDEBUG
for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
I != E; ++I)
@@ -195,7 +114,14 @@ void CallGraph::destroy() {
FunctionMap.clear();
}
-void CallGraph::print(raw_ostream &OS, Module*) const {
+void CallGraph::print(raw_ostream &OS, const Module*) const {
+ OS << "CallGraph Root is: ";
+ if (Function *F = Root->getFunction())
+ OS << F->getName() << "\n";
+ else {
+ OS << "<<null function: 0x" << Root << ">>\n";
+ }
+
for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I)
I->second->print(OS);
}
diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp
index a0d788f..182beca 100644
--- a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -22,7 +22,7 @@
#include "llvm/Analysis/CallGraph.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/PassManagers.h"
+#include "llvm/IR/LegacyPassManagers.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Timer.h"
diff --git a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp
index 92d0d23..7ec4644 100644
--- a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -189,7 +189,7 @@ char GlobalsModRef::ID = 0;
INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis,
"globalsmodref-aa", "Simple mod/ref analysis for globals",
false, true, false)
-INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(CallGraph)
INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis,
"globalsmodref-aa", "Simple mod/ref analysis for globals",
false, true, false)
diff --git a/contrib/llvm/lib/Analysis/IPA/IPA.cpp b/contrib/llvm/lib/Analysis/IPA/IPA.cpp
index 1c1816d..47357cf 100644
--- a/contrib/llvm/lib/Analysis/IPA/IPA.cpp
+++ b/contrib/llvm/lib/Analysis/IPA/IPA.cpp
@@ -19,8 +19,7 @@ using namespace llvm;
/// initializeIPA - Initialize all passes linked into the IPA library.
void llvm::initializeIPA(PassRegistry &Registry) {
- initializeBasicCallGraphPass(Registry);
- initializeCallGraphAnalysisGroup(Registry);
+ initializeCallGraphPass(Registry);
initializeCallGraphPrinterPass(Registry);
initializeCallGraphViewerPass(Registry);
initializeFindUsedTypesPass(Registry);
diff --git a/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp b/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp
index 35c45e6..3bc796e 100644
--- a/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp
+++ b/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp
@@ -59,6 +59,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool ExposesReturnsTwice;
bool HasDynamicAlloca;
bool ContainsNoDuplicateCall;
+ bool HasReturn;
+ bool HasIndirectBr;
/// Number of bytes allocated statically by the callee.
uint64_t AllocatedSize;
@@ -124,7 +126,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool visitIntToPtr(IntToPtrInst &I);
bool visitCastInst(CastInst &I);
bool visitUnaryInstruction(UnaryInstruction &I);
- bool visitICmp(ICmpInst &I);
+ bool visitCmpInst(CmpInst &I);
bool visitSub(BinaryOperator &I);
bool visitBinaryOperator(BinaryOperator &I);
bool visitLoad(LoadInst &I);
@@ -132,6 +134,12 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool visitExtractValue(ExtractValueInst &I);
bool visitInsertValue(InsertValueInst &I);
bool visitCallSite(CallSite CS);
+ bool visitReturnInst(ReturnInst &RI);
+ bool visitBranchInst(BranchInst &BI);
+ bool visitSwitchInst(SwitchInst &SI);
+ bool visitIndirectBrInst(IndirectBrInst &IBI);
+ bool visitResumeInst(ResumeInst &RI);
+ bool visitUnreachableInst(UnreachableInst &I);
public:
CallAnalyzer(const DataLayout *TD, const TargetTransformInfo &TTI,
@@ -139,12 +147,13 @@ public:
: TD(TD), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0),
IsCallerRecursive(false), IsRecursiveCall(false),
ExposesReturnsTwice(false), HasDynamicAlloca(false),
- ContainsNoDuplicateCall(false), AllocatedSize(0), NumInstructions(0),
- NumVectorInstructions(0), FiftyPercentVectorBonus(0),
- TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
- NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
- NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
- SROACostSavings(0), SROACostSavingsLost(0) {}
+ ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
+ AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),
+ FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
+ NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
+ NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
+ NumInstructionsSimplified(0), SROACostSavings(0),
+ SROACostSavingsLost(0) {}
bool analyzeCall(CallSite CS);
@@ -490,7 +499,7 @@ bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
return false;
}
-bool CallAnalyzer::visitICmp(ICmpInst &I) {
+bool CallAnalyzer::visitCmpInst(CmpInst &I) {
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
// First try to handle simplified comparisons.
if (!isa<Constant>(LHS))
@@ -499,12 +508,16 @@ bool CallAnalyzer::visitICmp(ICmpInst &I) {
if (!isa<Constant>(RHS))
if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
RHS = SimpleRHS;
- if (Constant *CLHS = dyn_cast<Constant>(LHS))
+ if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
if (Constant *CRHS = dyn_cast<Constant>(RHS))
- if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
+ if (Constant *C = ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) {
SimplifiedValues[&I] = C;
return true;
}
+ }
+
+ if (I.getOpcode() == Instruction::FCmp)
+ return false;
// Otherwise look for a comparison between constant offset pointers with
// a common base.
@@ -700,7 +713,7 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
}
bool CallAnalyzer::visitCallSite(CallSite CS) {
- if (CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice() &&
+ if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
!F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
Attribute::ReturnsTwice)) {
// This aborts the entire analysis.
@@ -781,6 +794,60 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
return Base::visitCallSite(CS);
}
+bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
+ // At least one return instruction will be free after inlining.
+ bool Free = !HasReturn;
+ HasReturn = true;
+ return Free;
+}
+
+bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
+ // We model unconditional branches as essentially free -- they really
+ // shouldn't exist at all, but handling them makes the behavior of the
+ // inliner more regular and predictable. Interestingly, conditional branches
+ // which will fold away are also free.
+ return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
+ dyn_cast_or_null<ConstantInt>(
+ SimplifiedValues.lookup(BI.getCondition()));
+}
+
+bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
+ // We model unconditional switches as free, see the comments on handling
+ // branches.
+ return isa<ConstantInt>(SI.getCondition()) ||
+ dyn_cast_or_null<ConstantInt>(
+ SimplifiedValues.lookup(SI.getCondition()));
+}
+
+bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
+ // We never want to inline functions that contain an indirectbr. This is
+ // incorrect because all the blockaddress's (in static global initializers
+ // for example) would be referring to the original function, and this
+ // indirect jump would jump from the inlined copy of the function into the
+ // original function which is extremely undefined behavior.
+ // FIXME: This logic isn't really right; we can safely inline functions with
+ // indirectbr's as long as no other function or global references the
+ // blockaddress of a block within the current function. And as a QOI issue,
+ // if someone is using a blockaddress without an indirectbr, and that
+ // reference somehow ends up in another function or global, we probably don't
+ // want to inline this function.
+ HasIndirectBr = true;
+ return false;
+}
+
+bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
+ // FIXME: It's not clear that a single instruction is an accurate model for
+ // the inline cost of a resume instruction.
+ return false;
+}
+
+bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
+ // FIXME: It might be reasonably to discount the cost of instructions leading
+ // to unreachable as they have the lowest possible impact on both runtime and
+ // code size.
+ return true; // No actual code is needed for unreachable.
+}
+
bool CallAnalyzer::visitInstruction(Instruction &I) {
// Some instructions are free. All of the free intrinsics can also be
// handled by SROA, etc.
@@ -804,8 +871,7 @@ bool CallAnalyzer::visitInstruction(Instruction &I) {
/// construct has been detected. It returns false if inlining is no longer
/// viable, and true if inlining remains viable.
bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
- for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end());
- I != E; ++I) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
++NumInstructions;
if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
++NumVectorInstructions;
@@ -821,7 +887,8 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
Cost += InlineConstants::InstrCost;
// If the visit this instruction detected an uninlinable pattern, abort.
- if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
+ if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
+ HasIndirectBr)
return false;
// If the caller is a recursive function then we don't want to inline
@@ -985,10 +1052,6 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
}
}
- // Track whether we've seen a return instruction. The first return
- // instruction is free, as at least one will usually disappear in inlining.
- bool HasReturn = false;
-
// Populate our simplified values by mapping from function arguments to call
// arguments with known important simplifications.
CallSite::arg_iterator CAI = CS.arg_begin();
@@ -1035,33 +1098,11 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
if (BB->empty())
continue;
- // Handle the terminator cost here where we can track returns and other
- // function-wide constructs.
- TerminatorInst *TI = BB->getTerminator();
-
- // We never want to inline functions that contain an indirectbr. This is
- // incorrect because all the blockaddress's (in static global initializers
- // for example) would be referring to the original function, and this
- // indirect jump would jump from the inlined copy of the function into the
- // original function which is extremely undefined behavior.
- // FIXME: This logic isn't really right; we can safely inline functions
- // with indirectbr's as long as no other function or global references the
- // blockaddress of a block within the current function. And as a QOI issue,
- // if someone is using a blockaddress without an indirectbr, and that
- // reference somehow ends up in another function or global, we probably
- // don't want to inline this function.
- if (isa<IndirectBrInst>(TI))
- return false;
-
- if (!HasReturn && isa<ReturnInst>(TI))
- HasReturn = true;
- else
- Cost += InlineConstants::InstrCost;
-
// Analyze the cost of this block. If we blow through the threshold, this
// returns false, and we can bail on out.
if (!analyzeBlock(BB)) {
- if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
+ if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
+ HasIndirectBr)
return false;
// If the caller is a recursive function then we don't want to inline
@@ -1074,6 +1115,8 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
break;
}
+ TerminatorInst *TI = BB->getTerminator();
+
// Add in the live successors by first checking whether we have terminator
// that may be simplified based on the values simplified by this call.
if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
@@ -1167,6 +1210,22 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) {
return getInlineCost(CS, CS.getCalledFunction(), Threshold);
}
+/// \brief Test that two functions either have or have not the given attribute
+/// at the same time.
+static bool attributeMatches(Function *F1, Function *F2,
+ Attribute::AttrKind Attr) {
+ return F1->hasFnAttribute(Attr) == F2->hasFnAttribute(Attr);
+}
+
+/// \brief Test that there are no attribute conflicts between Caller and Callee
+/// that prevent inlining.
+static bool functionsHaveCompatibleAttributes(Function *Caller,
+ Function *Callee) {
+ return attributeMatches(Caller, Callee, Attribute::SanitizeAddress) &&
+ attributeMatches(Caller, Callee, Attribute::SanitizeMemory) &&
+ attributeMatches(Caller, Callee, Attribute::SanitizeThread);
+}
+
InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
int Threshold) {
// Cannot inline indirect calls.
@@ -1175,20 +1234,26 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
// Calls to functions with always-inline attributes should be inlined
// whenever possible.
- if (Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::AlwaysInline)) {
+ if (Callee->hasFnAttribute(Attribute::AlwaysInline)) {
if (isInlineViable(*Callee))
return llvm::InlineCost::getAlways();
return llvm::InlineCost::getNever();
}
+ // Never inline functions with conflicting attributes (unless callee has
+ // always-inline attribute).
+ if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee))
+ return llvm::InlineCost::getNever();
+
+ // Don't inline this call if the caller has the optnone attribute.
+ if (CS.getCaller()->hasFnAttribute(Attribute::OptimizeNone))
+ return llvm::InlineCost::getNever();
+
// Don't inline functions which can be redefined at link-time to mean
// something else. Don't inline functions marked noinline or call sites
// marked noinline.
if (Callee->mayBeOverridden() ||
- Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::NoInline) ||
- CS.isNoInline())
+ Callee->hasFnAttribute(Attribute::NoInline) || CS.isNoInline())
return llvm::InlineCost::getNever();
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
index bf77451..b867af1 100644
--- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -668,7 +668,8 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
/// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc.
/// folding.
static Constant *stripAndComputeConstantOffsets(const DataLayout *TD,
- Value *&V) {
+ Value *&V,
+ bool AllowNonInbounds = false) {
assert(V->getType()->getScalarType()->isPointerTy());
// Without DataLayout, just be conservative for now. Theoretically, more could
@@ -676,8 +677,8 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *TD,
if (!TD)
return ConstantInt::get(IntegerType::get(V->getContext(), 64), 0);
- unsigned IntPtrWidth = TD->getPointerSizeInBits();
- APInt Offset = APInt::getNullValue(IntPtrWidth);
+ Type *IntPtrTy = TD->getIntPtrType(V->getType())->getScalarType();
+ APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth());
// Even though we don't look through PHI nodes, we could be called on an
// instruction in an unreachable block, which may be on a cycle.
@@ -685,7 +686,8 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *TD,
Visited.insert(V);
do {
if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
- if (!GEP->isInBounds() || !GEP->accumulateConstantOffset(*TD, Offset))
+ if ((!AllowNonInbounds && !GEP->isInBounds()) ||
+ !GEP->accumulateConstantOffset(*TD, Offset))
break;
V = GEP->getPointerOperand();
} else if (Operator::getOpcode(V) == Instruction::BitCast) {
@@ -701,7 +703,6 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *TD,
"Unexpected operand type!");
} while (Visited.insert(V));
- Type *IntPtrTy = TD->getIntPtrType(V->getContext());
Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset);
if (V->getType()->isVectorTy())
return ConstantVector::getSplat(V->getType()->getVectorNumElements(),
@@ -1363,6 +1364,10 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, Q, MaxRecurse))
return V;
+ // X >> X -> 0
+ if (Op0 == Op1)
+ return Constant::getNullValue(Op0->getType());
+
// undef >>l X -> 0
if (match(Op0, m_Undef()))
return Constant::getNullValue(Op0->getType());
@@ -1391,6 +1396,10 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, Q, MaxRecurse))
return V;
+ // X >> X -> 0
+ if (Op0 == Op1)
+ return Constant::getNullValue(Op0->getType());
+
// all ones >>a X -> all ones
if (match(Op0, m_AllOnes()))
return Op0;
@@ -1730,7 +1739,7 @@ static Constant *computePointerICmp(const DataLayout *TD,
RHS = RHS->stripPointerCasts();
// A non-null pointer is not equal to a null pointer.
- if (llvm::isKnownNonNull(LHS) && isa<ConstantPointerNull>(RHS) &&
+ if (llvm::isKnownNonNull(LHS, TLI) && isa<ConstantPointerNull>(RHS) &&
(Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE))
return ConstantInt::get(GetCompareTy(LHS),
!CmpInst::isTrueWhenEqual(Pred));
@@ -1830,6 +1839,17 @@ static Constant *computePointerICmp(const DataLayout *TD,
return ConstantInt::get(GetCompareTy(LHS),
!CmpInst::isTrueWhenEqual(Pred));
}
+
+ // Even if an non-inbounds GEP occurs along the path we can still optimize
+ // equality comparisons concerning the result. We avoid walking the whole
+ // chain again by starting where the last calls to
+ // stripAndComputeConstantOffsets left off and accumulate the offsets.
+ Constant *LHSNoBound = stripAndComputeConstantOffsets(TD, LHS, true);
+ Constant *RHSNoBound = stripAndComputeConstantOffsets(TD, RHS, true);
+ if (LHS == RHS)
+ return ConstantExpr::getICmp(Pred,
+ ConstantExpr::getAdd(LHSOffset, LHSNoBound),
+ ConstantExpr::getAdd(RHSOffset, RHSNoBound));
}
// Otherwise, fail.
@@ -2026,7 +2046,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input
// if the integer type is the same size as the pointer type.
if (MaxRecurse && Q.TD && isa<PtrToIntInst>(LI) &&
- Q.TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) {
+ Q.TD->getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) {
if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
// Transfer the cast to the constant.
if (Value *V = SimplifyICmpInst(Pred, SrcOp,
@@ -2238,6 +2258,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
}
+ // icmp pred (urem X, Y), Y
if (LBO && match(LBO, m_URem(m_Value(), m_Specific(RHS)))) {
bool KnownNonNegative, KnownNegative;
switch (Pred) {
@@ -2245,7 +2266,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
break;
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE:
- ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD);
+ ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD);
if (!KnownNonNegative)
break;
// fall-through
@@ -2255,7 +2276,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return getFalse(ITy);
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
- ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD);
+ ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD);
if (!KnownNonNegative)
break;
// fall-through
@@ -2265,6 +2286,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return getTrue(ITy);
}
}
+
+ // icmp pred X, (urem Y, X)
if (RBO && match(RBO, m_URem(m_Value(), m_Specific(LHS)))) {
bool KnownNonNegative, KnownNegative;
switch (Pred) {
@@ -2272,7 +2295,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
break;
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE:
- ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD);
+ ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD);
if (!KnownNonNegative)
break;
// fall-through
@@ -2282,7 +2305,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return getTrue(ITy);
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
- ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD);
+ ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD);
if (!KnownNonNegative)
break;
// fall-through
@@ -2936,6 +2959,7 @@ static bool IsIdempotent(Intrinsic::ID ID) {
case Intrinsic::trunc:
case Intrinsic::rint:
case Intrinsic::nearbyint:
+ case Intrinsic::round:
return true;
}
}
diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
index 66b5e85..b6970af 100644
--- a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -421,8 +421,8 @@ void LVIValueHandle::deleted() {
if (I->second == getValPtr())
ToErase.push_back(*I);
}
-
- for (SmallVector<OverDefinedPairTy, 4>::iterator I = ToErase.begin(),
+
+ for (SmallVectorImpl<OverDefinedPairTy>::iterator I = ToErase.begin(),
E = ToErase.end(); I != E; ++I)
Parent->OverDefinedCache.erase(*I);
@@ -444,8 +444,8 @@ void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
if (I->first == BB)
ToErase.push_back(*I);
}
-
- for (SmallVector<OverDefinedPairTy, 4>::iterator I = ToErase.begin(),
+
+ for (SmallVectorImpl<OverDefinedPairTy>::iterator I = ToErase.begin(),
E = ToErase.end(); I != E; ++I)
OverDefinedCache.erase(*I);
diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp
index 9393508..ec17f47 100644
--- a/contrib/llvm/lib/Analysis/Lint.cpp
+++ b/contrib/llvm/lib/Analysis/Lint.cpp
@@ -207,7 +207,7 @@ void Lint::visitCallSite(CallSite CS) {
&I);
FunctionType *FT = F->getFunctionType();
- unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
+ unsigned NumActualArgs = CS.arg_size();
Assert1(FT->isVarArg() ?
FT->getNumParams() <= NumActualArgs :
@@ -504,14 +504,42 @@ void Lint::visitShl(BinaryOperator &I) {
"Undefined result: Shift count out of range", &I);
}
-static bool isZero(Value *V, DataLayout *TD) {
+static bool isZero(Value *V, DataLayout *DL) {
// Assume undef could be zero.
- if (isa<UndefValue>(V)) return true;
+ if (isa<UndefValue>(V))
+ return true;
+
+ VectorType *VecTy = dyn_cast<VectorType>(V->getType());
+ if (!VecTy) {
+ unsigned BitWidth = V->getType()->getIntegerBitWidth();
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ ComputeMaskedBits(V, KnownZero, KnownOne, DL);
+ return KnownZero.isAllOnesValue();
+ }
+
+ // Per-component check doesn't work with zeroinitializer
+ Constant *C = dyn_cast<Constant>(V);
+ if (!C)
+ return false;
+
+ if (C->isZeroValue())
+ return true;
+
+ // For a vector, KnownZero will only be true if all values are zero, so check
+ // this per component
+ unsigned BitWidth = VecTy->getElementType()->getIntegerBitWidth();
+ for (unsigned I = 0, N = VecTy->getNumElements(); I != N; ++I) {
+ Constant *Elem = C->getAggregateElement(I);
+ if (isa<UndefValue>(Elem))
+ return true;
+
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ ComputeMaskedBits(Elem, KnownZero, KnownOne, DL);
+ if (KnownZero.isAllOnesValue())
+ return true;
+ }
- unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
- APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- ComputeMaskedBits(V, KnownZero, KnownOne, TD);
- return KnownZero.isAllOnesValue();
+ return false;
}
void Lint::visitSDiv(BinaryOperator &I) {
diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp
index f1ad650..e369633 100644
--- a/contrib/llvm/lib/Analysis/LoopInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp
@@ -50,6 +50,9 @@ INITIALIZE_PASS_BEGIN(LoopInfo, "loops", "Natural Loop Information", true, true)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_END(LoopInfo, "loops", "Natural Loop Information", true, true)
+// Loop identifier metadata name.
+static const char *const LoopMDName = "llvm.loop";
+
//===----------------------------------------------------------------------===//
// Loop implementation
//
@@ -174,10 +177,6 @@ PHINode *Loop::getCanonicalInductionVariable() const {
/// isLCSSAForm - Return true if the Loop is in LCSSA form
bool Loop::isLCSSAForm(DominatorTree &DT) const {
- // Sort the blocks vector so that we can use binary search to do quick
- // lookups.
- SmallPtrSet<BasicBlock*, 16> LoopBBs(block_begin(), block_end());
-
for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) {
BasicBlock *BB = *BI;
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I)
@@ -193,7 +192,7 @@ bool Loop::isLCSSAForm(DominatorTree &DT) const {
// block they are defined in. Also, blocks not reachable from the
// entry are special; uses in them don't need to go through PHIs.
if (UserBB != BB &&
- !LoopBBs.count(UserBB) &&
+ !contains(UserBB) &&
DT.isReachableFromEntry(UserBB))
return false;
}
@@ -217,12 +216,12 @@ bool Loop::isSafeToClone() const {
// Return false if any loop blocks contain indirectbrs, or there are any calls
// to noduplicate functions.
for (Loop::block_iterator I = block_begin(), E = block_end(); I != E; ++I) {
- if (isa<IndirectBrInst>((*I)->getTerminator())) {
+ if (isa<IndirectBrInst>((*I)->getTerminator()))
return false;
- } else if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator())) {
+
+ if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator()))
if (II->hasFnAttr(Attribute::NoDuplicate))
return false;
- }
for (BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); BI != BE; ++BI) {
if (const CallInst *CI = dyn_cast<CallInst>(BI)) {
@@ -234,14 +233,62 @@ bool Loop::isSafeToClone() const {
return true;
}
-bool Loop::isAnnotatedParallel() const {
+MDNode *Loop::getLoopID() const {
+ MDNode *LoopID = 0;
+ if (isLoopSimplifyForm()) {
+ LoopID = getLoopLatch()->getTerminator()->getMetadata(LoopMDName);
+ } else {
+ // Go through each predecessor of the loop header and check the
+ // terminator for the metadata.
+ BasicBlock *H = getHeader();
+ for (block_iterator I = block_begin(), IE = block_end(); I != IE; ++I) {
+ TerminatorInst *TI = (*I)->getTerminator();
+ MDNode *MD = 0;
+
+ // Check if this terminator branches to the loop header.
+ for (unsigned i = 0, ie = TI->getNumSuccessors(); i != ie; ++i) {
+ if (TI->getSuccessor(i) == H) {
+ MD = TI->getMetadata(LoopMDName);
+ break;
+ }
+ }
+ if (!MD)
+ return 0;
- BasicBlock *latch = getLoopLatch();
- if (latch == NULL)
- return false;
+ if (!LoopID)
+ LoopID = MD;
+ else if (MD != LoopID)
+ return 0;
+ }
+ }
+ if (!LoopID || LoopID->getNumOperands() == 0 ||
+ LoopID->getOperand(0) != LoopID)
+ return 0;
+ return LoopID;
+}
- MDNode *desiredLoopIdMetadata =
- latch->getTerminator()->getMetadata("llvm.loop.parallel");
+void Loop::setLoopID(MDNode *LoopID) const {
+ assert(LoopID && "Loop ID should not be null");
+ assert(LoopID->getNumOperands() > 0 && "Loop ID needs at least one operand");
+ assert(LoopID->getOperand(0) == LoopID && "Loop ID should refer to itself");
+
+ if (isLoopSimplifyForm()) {
+ getLoopLatch()->getTerminator()->setMetadata(LoopMDName, LoopID);
+ return;
+ }
+
+ BasicBlock *H = getHeader();
+ for (block_iterator I = block_begin(), IE = block_end(); I != IE; ++I) {
+ TerminatorInst *TI = (*I)->getTerminator();
+ for (unsigned i = 0, ie = TI->getNumSuccessors(); i != ie; ++i) {
+ if (TI->getSuccessor(i) == H)
+ TI->setMetadata(LoopMDName, LoopID);
+ }
+ }
+}
+
+bool Loop::isAnnotatedParallel() const {
+ MDNode *desiredLoopIdMetadata = getLoopID();
if (!desiredLoopIdMetadata)
return false;
@@ -258,15 +305,15 @@ bool Loop::isAnnotatedParallel() const {
if (!II->mayReadOrWriteMemory())
continue;
- if (!II->getMetadata("llvm.mem.parallel_loop_access"))
- return false;
-
// The memory instruction can refer to the loop identifier metadata
// directly or indirectly through another list metadata (in case of
// nested parallel loops). The loop identifier metadata refers to
// itself so we can check both cases with the same routine.
- MDNode *loopIdMD =
- dyn_cast<MDNode>(II->getMetadata("llvm.mem.parallel_loop_access"));
+ MDNode *loopIdMD = II->getMetadata("llvm.mem.parallel_loop_access");
+
+ if (!loopIdMD)
+ return false;
+
bool loopIdMDFound = false;
for (unsigned i = 0, e = loopIdMD->getNumOperands(); i < e; ++i) {
if (loopIdMD->getOperand(i) == desiredLoopIdMetadata) {
@@ -286,9 +333,6 @@ bool Loop::isAnnotatedParallel() const {
/// hasDedicatedExits - Return true if no exit block for the loop
/// has a predecessor that is outside the loop.
bool Loop::hasDedicatedExits() const {
- // Sort the blocks vector so that we can use binary search to do quick
- // lookups.
- SmallPtrSet<BasicBlock *, 16> LoopBBs(block_begin(), block_end());
// Each predecessor of each exit block of a normal loop is contained
// within the loop.
SmallVector<BasicBlock *, 4> ExitBlocks;
@@ -296,7 +340,7 @@ bool Loop::hasDedicatedExits() const {
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
for (pred_iterator PI = pred_begin(ExitBlocks[i]),
PE = pred_end(ExitBlocks[i]); PI != PE; ++PI)
- if (!LoopBBs.count(*PI))
+ if (!contains(*PI))
return false;
// All the requirements are met.
return true;
@@ -311,11 +355,6 @@ Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const {
assert(hasDedicatedExits() &&
"getUniqueExitBlocks assumes the loop has canonical form exits!");
- // Sort the blocks vector so that we can use binary search to do quick
- // lookups.
- SmallVector<BasicBlock *, 128> LoopBBs(block_begin(), block_end());
- std::sort(LoopBBs.begin(), LoopBBs.end());
-
SmallVector<BasicBlock *, 32> switchExitBlocks;
for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) {
@@ -325,7 +364,7 @@ Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const {
for (succ_iterator I = succ_begin(*BI), E = succ_end(*BI); I != E; ++I) {
// If block is inside the loop then it is not a exit block.
- if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I))
+ if (contains(*I))
continue;
pred_iterator PI = pred_begin(*I);
diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp
index 1540112..acf2ba6 100644
--- a/contrib/llvm/lib/Analysis/LoopPass.cpp
+++ b/contrib/llvm/lib/Analysis/LoopPass.cpp
@@ -188,6 +188,10 @@ bool LPPassManager::runOnFunction(Function &F) {
// advantage in deleting uses in a later loop before optimizing the
// definitions in an earlier loop. If we find a clear reason to process in
// forward order, then a forward variant of LoopPassManager should be created.
+ //
+ // Note that LoopInfo::iterator visits loops in reverse program
+ // order. Here, reverse_iterator gives us a forward order, and the LoopQueue
+ // reverses the order a third time by popping from the back.
for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I)
addLoopIntoQueue(*I, LQ);
diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
index 9c0d8ac..1db0f63 100644
--- a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -31,12 +31,13 @@
using namespace llvm;
enum AllocType {
- MallocLike = 1<<0, // allocates
- CallocLike = 1<<1, // allocates + bzero
- ReallocLike = 1<<2, // reallocates
- StrDupLike = 1<<3,
+ OpNewLike = 1<<0, // allocates; never returns null
+ MallocLike = 1<<1 | OpNewLike, // allocates; may return null
+ CallocLike = 1<<2, // allocates + bzero
+ ReallocLike = 1<<3, // reallocates
+ StrDupLike = 1<<4,
AllocLike = MallocLike | CallocLike | StrDupLike,
- AnyAlloc = MallocLike | CallocLike | ReallocLike | StrDupLike
+ AnyAlloc = AllocLike | ReallocLike
};
struct AllocFnsTy {
@@ -52,20 +53,20 @@ struct AllocFnsTy {
static const AllocFnsTy AllocationFnData[] = {
{LibFunc::malloc, MallocLike, 1, 0, -1},
{LibFunc::valloc, MallocLike, 1, 0, -1},
- {LibFunc::Znwj, MallocLike, 1, 0, -1}, // new(unsigned int)
+ {LibFunc::Znwj, OpNewLike, 1, 0, -1}, // new(unsigned int)
{LibFunc::ZnwjRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new(unsigned int, nothrow)
- {LibFunc::Znwm, MallocLike, 1, 0, -1}, // new(unsigned long)
+ {LibFunc::Znwm, OpNewLike, 1, 0, -1}, // new(unsigned long)
{LibFunc::ZnwmRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new(unsigned long, nothrow)
- {LibFunc::Znaj, MallocLike, 1, 0, -1}, // new[](unsigned int)
+ {LibFunc::Znaj, OpNewLike, 1, 0, -1}, // new[](unsigned int)
{LibFunc::ZnajRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned int, nothrow)
- {LibFunc::Znam, MallocLike, 1, 0, -1}, // new[](unsigned long)
+ {LibFunc::Znam, OpNewLike, 1, 0, -1}, // new[](unsigned long)
{LibFunc::ZnamRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned long, nothrow)
- {LibFunc::posix_memalign, MallocLike, 3, 2, -1},
{LibFunc::calloc, CallocLike, 2, 0, 1},
{LibFunc::realloc, ReallocLike, 2, 1, -1},
{LibFunc::reallocf, ReallocLike, 2, 1, -1},
{LibFunc::strdup, StrDupLike, 1, -1, -1},
{LibFunc::strndup, StrDupLike, 2, 1, -1}
+ // TODO: Handle "int posix_memalign(void **, size_t, size_t)"
};
@@ -77,6 +78,9 @@ static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) {
if (!CS.getInstruction())
return 0;
+ if (CS.isNoBuiltin())
+ return 0;
+
Function *Callee = CS.getCalledFunction();
if (!Callee || !Callee->isDeclaration())
return 0;
@@ -114,7 +118,7 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy,
return 0;
const AllocFnsTy *FnData = &AllocationFnData[i];
- if ((FnData->AllocTy & AllocTy) == 0)
+ if ((FnData->AllocTy & AllocTy) != FnData->AllocTy)
return 0;
// Check function prototype.
@@ -186,6 +190,13 @@ bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
return getAllocationData(V, ReallocLike, TLI, LookThroughBitCast);
}
+/// \brief Tests if a value is a call or invoke to a library function that
+/// allocates memory and never returns null (such as operator new).
+bool llvm::isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+ bool LookThroughBitCast) {
+ return getAllocationData(V, OpNewLike, TLI, LookThroughBitCast);
+}
+
/// extractMallocCall - Returns the corresponding CallInst if the instruction
/// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we
/// ignore InvokeInst here.
@@ -194,7 +205,7 @@ const CallInst *llvm::extractMallocCall(const Value *I,
return isMallocLikeFn(I, TLI) ? dyn_cast<CallInst>(I) : 0;
}
-static Value *computeArraySize(const CallInst *CI, const DataLayout *TD,
+static Value *computeArraySize(const CallInst *CI, const DataLayout *DL,
const TargetLibraryInfo *TLI,
bool LookThroughSExt = false) {
if (!CI)
@@ -202,12 +213,12 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *TD,
// The size of the malloc's result type must be known to determine array size.
Type *T = getMallocAllocatedType(CI, TLI);
- if (!T || !T->isSized() || !TD)
+ if (!T || !T->isSized() || !DL)
return 0;
- unsigned ElementSize = TD->getTypeAllocSize(T);
+ unsigned ElementSize = DL->getTypeAllocSize(T);
if (StructType *ST = dyn_cast<StructType>(T))
- ElementSize = TD->getStructLayout(ST)->getSizeInBytes();
+ ElementSize = DL->getStructLayout(ST)->getSizeInBytes();
// If malloc call's arg can be determined to be a multiple of ElementSize,
// return the multiple. Otherwise, return NULL.
@@ -224,10 +235,10 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *TD,
/// is a call to malloc whose array size can be determined and the array size
/// is not constant 1. Otherwise, return NULL.
const CallInst *llvm::isArrayMalloc(const Value *I,
- const DataLayout *TD,
+ const DataLayout *DL,
const TargetLibraryInfo *TLI) {
const CallInst *CI = extractMallocCall(I, TLI);
- Value *ArraySize = computeArraySize(CI, TD, TLI);
+ Value *ArraySize = computeArraySize(CI, DL, TLI);
if (ConstantInt *ConstSize = dyn_cast_or_null<ConstantInt>(ArraySize))
if (ConstSize->isOne())
@@ -285,11 +296,11 @@ Type *llvm::getMallocAllocatedType(const CallInst *CI,
/// then return that multiple. For non-array mallocs, the multiple is
/// constant 1. Otherwise, return NULL for mallocs whose array size cannot be
/// determined.
-Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout *TD,
+Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout *DL,
const TargetLibraryInfo *TLI,
bool LookThroughSExt) {
assert(isMallocLikeFn(CI, TLI) && "getMallocArraySize and not malloc call");
- return computeArraySize(CI, TD, TLI, LookThroughSExt);
+ return computeArraySize(CI, DL, TLI, LookThroughSExt);
}
@@ -315,9 +326,15 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn))
return 0;
- if (TLIFn != LibFunc::free &&
- TLIFn != LibFunc::ZdlPv && // operator delete(void*)
- TLIFn != LibFunc::ZdaPv) // operator delete[](void*)
+ unsigned ExpectedNumParams;
+ if (TLIFn == LibFunc::free ||
+ TLIFn == LibFunc::ZdlPv || // operator delete(void*)
+ TLIFn == LibFunc::ZdaPv) // operator delete[](void*)
+ ExpectedNumParams = 1;
+ else if (TLIFn == LibFunc::ZdlPvRKSt9nothrow_t || // delete(void*, nothrow)
+ TLIFn == LibFunc::ZdaPvRKSt9nothrow_t) // delete[](void*, nothrow)
+ ExpectedNumParams = 2;
+ else
return 0;
// Check free prototype.
@@ -326,7 +343,7 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
FunctionType *FTy = Callee->getFunctionType();
if (!FTy->getReturnType()->isVoidTy())
return 0;
- if (FTy->getNumParams() != 1)
+ if (FTy->getNumParams() != ExpectedNumParams)
return 0;
if (FTy->getParamType(0) != Type::getInt8PtrTy(Callee->getContext()))
return 0;
@@ -345,12 +362,12 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
/// object size in Size if successful, and false otherwise.
/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas,
/// byval arguments, and global variables.
-bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *TD,
+bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *DL,
const TargetLibraryInfo *TLI, bool RoundToAlign) {
- if (!TD)
+ if (!DL)
return false;
- ObjectSizeOffsetVisitor Visitor(TD, TLI, Ptr->getContext(), RoundToAlign);
+ ObjectSizeOffsetVisitor Visitor(DL, TLI, Ptr->getContext(), RoundToAlign);
SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr));
if (!Visitor.bothKnown(Data))
return false;
@@ -377,12 +394,12 @@ APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) {
return Size;
}
-ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *TD,
+ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *DL,
const TargetLibraryInfo *TLI,
LLVMContext &Context,
bool RoundToAlign)
-: TD(TD), TLI(TLI), RoundToAlign(RoundToAlign) {
- IntegerType *IntTy = TD->getIntPtrType(Context);
+: DL(DL), TLI(TLI), RoundToAlign(RoundToAlign) {
+ IntegerType *IntTy = DL->getIntPtrType(Context);
IntTyBits = IntTy->getBitWidth();
Zero = APInt::getNullValue(IntTyBits);
}
@@ -425,7 +442,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) {
if (!I.getAllocatedType()->isSized())
return unknown();
- APInt Size(IntTyBits, TD->getTypeAllocSize(I.getAllocatedType()));
+ APInt Size(IntTyBits, DL->getTypeAllocSize(I.getAllocatedType()));
if (!I.isArrayAllocation())
return std::make_pair(align(Size, I.getAlignment()), Zero);
@@ -444,7 +461,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) {
return unknown();
}
PointerType *PT = cast<PointerType>(A.getType());
- APInt Size(IntTyBits, TD->getTypeAllocSize(PT->getElementType()));
+ APInt Size(IntTyBits, DL->getTypeAllocSize(PT->getElementType()));
return std::make_pair(align(Size, A.getParamAlignment()), Zero);
}
@@ -517,7 +534,7 @@ ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) {
SizeOffsetType ObjectSizeOffsetVisitor::visitGEPOperator(GEPOperator &GEP) {
SizeOffsetType PtrData = compute(GEP.getPointerOperand());
APInt Offset(IntTyBits, 0);
- if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(*TD, Offset))
+ if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(*DL, Offset))
return unknown();
return std::make_pair(PtrData.first, PtrData.second + Offset);
@@ -533,7 +550,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){
if (!GV.hasDefinitiveInitializer())
return unknown();
- APInt Size(IntTyBits, TD->getTypeAllocSize(GV.getType()->getElementType()));
+ APInt Size(IntTyBits, DL->getTypeAllocSize(GV.getType()->getElementType()));
return std::make_pair(align(Size, GV.getAlignment()), Zero);
}
@@ -569,12 +586,13 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) {
return unknown();
}
-
-ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const DataLayout *TD,
- const TargetLibraryInfo *TLI,
- LLVMContext &Context)
-: TD(TD), TLI(TLI), Context(Context), Builder(Context, TargetFolder(TD)) {
- IntTy = TD->getIntPtrType(Context);
+ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const DataLayout *DL,
+ const TargetLibraryInfo *TLI,
+ LLVMContext &Context,
+ bool RoundToAlign)
+: DL(DL), TLI(TLI), Context(Context), Builder(Context, TargetFolder(DL)),
+ RoundToAlign(RoundToAlign) {
+ IntTy = DL->getIntPtrType(Context);
Zero = ConstantInt::get(IntTy, 0);
}
@@ -598,7 +616,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) {
}
SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {
- ObjectSizeOffsetVisitor Visitor(TD, TLI, Context);
+ ObjectSizeOffsetVisitor Visitor(DL, TLI, Context, RoundToAlign);
SizeOffsetType Const = Visitor.compute(V);
if (Visitor.bothKnown(Const))
return std::make_pair(ConstantInt::get(Context, Const.first),
@@ -617,13 +635,15 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {
if (Instruction *I = dyn_cast<Instruction>(V))
Builder.SetInsertPoint(I);
- // record the pointers that were handled in this run, so that they can be
- // cleaned later if something fails
- SeenVals.insert(V);
-
// now compute the size and offset
SizeOffsetEvalType Result;
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+
+ // Record the pointers that were handled in this run, so that they can be
+ // cleaned later if something fails. We also use this set to break cycles that
+ // can occur in dead code.
+ if (!SeenVals.insert(V)) {
+ Result = unknown();
+ } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
Result = visitGEPOperator(*GEP);
} else if (Instruction *I = dyn_cast<Instruction>(V)) {
Result = visit(*I);
@@ -656,7 +676,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitAllocaInst(AllocaInst &I) {
assert(I.isArrayAllocation());
Value *ArraySize = I.getArraySize();
Value *Size = ConstantInt::get(ArraySize->getType(),
- TD->getTypeAllocSize(I.getAllocatedType()));
+ DL->getTypeAllocSize(I.getAllocatedType()));
Size = Builder.CreateMul(Size, ArraySize);
return std::make_pair(Size, Zero);
}
@@ -708,7 +728,7 @@ ObjectSizeOffsetEvaluator::visitGEPOperator(GEPOperator &GEP) {
if (!bothKnown(PtrData))
return unknown();
- Value *Offset = EmitGEPOffset(&Builder, *TD, &GEP, /*NoAssumptions=*/true);
+ Value *Offset = EmitGEPOffset(&Builder, *DL, &GEP, /*NoAssumptions=*/true);
Offset = Builder.CreateAdd(PtrData.second, Offset);
return std::make_pair(PtrData.first, Offset);
}
diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index c0009cb..84ff2ee 100644
--- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -1,4 +1,4 @@
-//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation --*- C++ -*-===//
+//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -89,7 +89,7 @@ bool MemoryDependenceAnalysis::runOnFunction(Function &) {
AA = &getAnalysis<AliasAnalysis>();
TD = getAnalysisIfAvailable<DataLayout>();
DT = getAnalysisIfAvailable<DominatorTree>();
- if (PredCache == 0)
+ if (!PredCache)
PredCache.reset(new PredIteratorCache());
return false;
}
@@ -371,18 +371,19 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
// Walk backwards through the basic block, looking for dependencies.
while (ScanIt != BB->begin()) {
+ Instruction *Inst = --ScanIt;
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
+ // Debug intrinsics don't (and can't) cause dependencies.
+ if (isa<DbgInfoIntrinsic>(II)) continue;
+
// Limit the amount of scanning we do so we don't end up with quadratic
// running time on extreme testcases.
--Limit;
if (!Limit)
return MemDepResult::getUnknown();
- Instruction *Inst = --ScanIt;
-
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
- // Debug intrinsics don't (and can't) cause dependences.
- if (isa<DbgInfoIntrinsic>(II)) continue;
-
// If we reach a lifetime begin or end marker, then the query ends here
// because the value is undefined.
if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
diff --git a/contrib/llvm/lib/Analysis/PathNumbering.cpp b/contrib/llvm/lib/Analysis/PathNumbering.cpp
deleted file mode 100644
index 30d213b..0000000
--- a/contrib/llvm/lib/Analysis/PathNumbering.cpp
+++ /dev/null
@@ -1,521 +0,0 @@
-//===- PathNumbering.cpp --------------------------------------*- C++ -*---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Ball-Larus path numbers uniquely identify paths through a directed acyclic
-// graph (DAG) [Ball96]. For a CFG backedges are removed and replaced by phony
-// edges to obtain a DAG, and thus the unique path numbers [Ball96].
-//
-// The purpose of this analysis is to enumerate the edges in a CFG in order
-// to obtain paths from path numbers in a convenient manner. As described in
-// [Ball96] edges can be enumerated such that given a path number by following
-// the CFG and updating the path number, the path is obtained.
-//
-// [Ball96]
-// T. Ball and J. R. Larus. "Efficient Path Profiling."
-// International Symposium on Microarchitecture, pages 46-57, 1996.
-// http://portal.acm.org/citation.cfm?id=243857
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "ball-larus-numbering"
-
-#include "llvm/Analysis/PathNumbering.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/TypeBuilder.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <queue>
-#include <sstream>
-#include <stack>
-#include <string>
-#include <utility>
-
-using namespace llvm;
-
-// Are we enabling early termination
-static cl::opt<bool> ProcessEarlyTermination(
- "path-profile-early-termination", cl::Hidden,
- cl::desc("In path profiling, insert extra instrumentation to account for "
- "unexpected function termination."));
-
-// Returns the basic block for the BallLarusNode
-BasicBlock* BallLarusNode::getBlock() {
- return(_basicBlock);
-}
-
-// Returns the number of paths to the exit starting at the node.
-unsigned BallLarusNode::getNumberPaths() {
- return(_numberPaths);
-}
-
-// Sets the number of paths to the exit starting at the node.
-void BallLarusNode::setNumberPaths(unsigned numberPaths) {
- _numberPaths = numberPaths;
-}
-
-// Gets the NodeColor used in graph algorithms.
-BallLarusNode::NodeColor BallLarusNode::getColor() {
- return(_color);
-}
-
-// Sets the NodeColor used in graph algorithms.
-void BallLarusNode::setColor(BallLarusNode::NodeColor color) {
- _color = color;
-}
-
-// Returns an iterator over predecessor edges. Includes phony and
-// backedges.
-BLEdgeIterator BallLarusNode::predBegin() {
- return(_predEdges.begin());
-}
-
-// Returns the end sentinel for the predecessor iterator.
-BLEdgeIterator BallLarusNode::predEnd() {
- return(_predEdges.end());
-}
-
-// Returns the number of predecessor edges. Includes phony and
-// backedges.
-unsigned BallLarusNode::getNumberPredEdges() {
- return(_predEdges.size());
-}
-
-// Returns an iterator over successor edges. Includes phony and
-// backedges.
-BLEdgeIterator BallLarusNode::succBegin() {
- return(_succEdges.begin());
-}
-
-// Returns the end sentinel for the successor iterator.
-BLEdgeIterator BallLarusNode::succEnd() {
- return(_succEdges.end());
-}
-
-// Returns the number of successor edges. Includes phony and
-// backedges.
-unsigned BallLarusNode::getNumberSuccEdges() {
- return(_succEdges.size());
-}
-
-// Add an edge to the predecessor list.
-void BallLarusNode::addPredEdge(BallLarusEdge* edge) {
- _predEdges.push_back(edge);
-}
-
-// Remove an edge from the predecessor list.
-void BallLarusNode::removePredEdge(BallLarusEdge* edge) {
- removeEdge(_predEdges, edge);
-}
-
-// Add an edge to the successor list.
-void BallLarusNode::addSuccEdge(BallLarusEdge* edge) {
- _succEdges.push_back(edge);
-}
-
-// Remove an edge from the successor list.
-void BallLarusNode::removeSuccEdge(BallLarusEdge* edge) {
- removeEdge(_succEdges, edge);
-}
-
-// Returns the name of the BasicBlock being represented. If BasicBlock
-// is null then returns "<null>". If BasicBlock has no name, then
-// "<unnamed>" is returned. Intended for use with debug output.
-std::string BallLarusNode::getName() {
- std::stringstream name;
-
- if(getBlock() != NULL) {
- if(getBlock()->hasName()) {
- std::string tempName(getBlock()->getName());
- name << tempName.c_str() << " (" << _uid << ")";
- } else
- name << "<unnamed> (" << _uid << ")";
- } else
- name << "<null> (" << _uid << ")";
-
- return name.str();
-}
-
-// Removes an edge from an edgeVector. Used by removePredEdge and
-// removeSuccEdge.
-void BallLarusNode::removeEdge(BLEdgeVector& v, BallLarusEdge* e) {
- // TODO: Avoid linear scan by using a set instead
- for(BLEdgeIterator i = v.begin(),
- end = v.end();
- i != end;
- ++i) {
- if((*i) == e) {
- v.erase(i);
- break;
- }
- }
-}
-
-// Returns the source node of this edge.
-BallLarusNode* BallLarusEdge::getSource() const {
- return(_source);
-}
-
-// Returns the target node of this edge.
-BallLarusNode* BallLarusEdge::getTarget() const {
- return(_target);
-}
-
-// Sets the type of the edge.
-BallLarusEdge::EdgeType BallLarusEdge::getType() const {
- return _edgeType;
-}
-
-// Gets the type of the edge.
-void BallLarusEdge::setType(EdgeType type) {
- _edgeType = type;
-}
-
-// Returns the weight of this edge. Used to decode path numbers to sequences
-// of basic blocks.
-unsigned BallLarusEdge::getWeight() {
- return(_weight);
-}
-
-// Sets the weight of the edge. Used during path numbering.
-void BallLarusEdge::setWeight(unsigned weight) {
- _weight = weight;
-}
-
-// Gets the phony edge originating at the root.
-BallLarusEdge* BallLarusEdge::getPhonyRoot() {
- return _phonyRoot;
-}
-
-// Sets the phony edge originating at the root.
-void BallLarusEdge::setPhonyRoot(BallLarusEdge* phonyRoot) {
- _phonyRoot = phonyRoot;
-}
-
-// Gets the phony edge terminating at the exit.
-BallLarusEdge* BallLarusEdge::getPhonyExit() {
- return _phonyExit;
-}
-
-// Sets the phony edge terminating at the exit.
-void BallLarusEdge::setPhonyExit(BallLarusEdge* phonyExit) {
- _phonyExit = phonyExit;
-}
-
-// Gets the associated real edge if this is a phony edge.
-BallLarusEdge* BallLarusEdge::getRealEdge() {
- return _realEdge;
-}
-
-// Sets the associated real edge if this is a phony edge.
-void BallLarusEdge::setRealEdge(BallLarusEdge* realEdge) {
- _realEdge = realEdge;
-}
-
-// Returns the duplicate number of the edge.
-unsigned BallLarusEdge::getDuplicateNumber() {
- return(_duplicateNumber);
-}
-
-// Initialization that requires virtual functions which are not fully
-// functional in the constructor.
-void BallLarusDag::init() {
- BLBlockNodeMap inDag;
- std::stack<BallLarusNode*> dfsStack;
-
- _root = addNode(&(_function.getEntryBlock()));
- _exit = addNode(NULL);
-
- // start search from root
- dfsStack.push(getRoot());
-
- // dfs to add each bb into the dag
- while(dfsStack.size())
- buildNode(inDag, dfsStack);
-
- // put in the final edge
- addEdge(getExit(),getRoot(),0);
-}
-
-// Frees all memory associated with the DAG.
-BallLarusDag::~BallLarusDag() {
- for(BLEdgeIterator edge = _edges.begin(), end = _edges.end(); edge != end;
- ++edge)
- delete (*edge);
-
- for(BLNodeIterator node = _nodes.begin(), end = _nodes.end(); node != end;
- ++node)
- delete (*node);
-}
-
-// Calculate the path numbers by assigning edge increments as prescribed
-// in Ball-Larus path profiling.
-void BallLarusDag::calculatePathNumbers() {
- BallLarusNode* node;
- std::queue<BallLarusNode*> bfsQueue;
- bfsQueue.push(getExit());
-
- while(bfsQueue.size() > 0) {
- node = bfsQueue.front();
-
- DEBUG(dbgs() << "calculatePathNumbers on " << node->getName() << "\n");
-
- bfsQueue.pop();
- unsigned prevPathNumber = node->getNumberPaths();
- calculatePathNumbersFrom(node);
-
- // Check for DAG splitting
- if( node->getNumberPaths() > 100000000 && node != getRoot() ) {
- // Add new phony edge from the split-node to the DAG's exit
- BallLarusEdge* exitEdge = addEdge(node, getExit(), 0);
- exitEdge->setType(BallLarusEdge::SPLITEDGE_PHONY);
-
- // Counters to handle the possibility of a multi-graph
- BasicBlock* oldTarget = 0;
- unsigned duplicateNumber = 0;
-
- // Iterate through each successor edge, adding phony edges
- for( BLEdgeIterator succ = node->succBegin(), end = node->succEnd();
- succ != end; oldTarget = (*succ)->getTarget()->getBlock(), succ++ ) {
-
- if( (*succ)->getType() == BallLarusEdge::NORMAL ) {
- // is this edge a duplicate?
- if( oldTarget != (*succ)->getTarget()->getBlock() )
- duplicateNumber = 0;
-
- // create the new phony edge: root -> succ
- BallLarusEdge* rootEdge =
- addEdge(getRoot(), (*succ)->getTarget(), duplicateNumber++);
- rootEdge->setType(BallLarusEdge::SPLITEDGE_PHONY);
- rootEdge->setRealEdge(*succ);
-
- // split on this edge and reference it's exit/root phony edges
- (*succ)->setType(BallLarusEdge::SPLITEDGE);
- (*succ)->setPhonyRoot(rootEdge);
- (*succ)->setPhonyExit(exitEdge);
- (*succ)->setWeight(0);
- }
- }
-
- calculatePathNumbersFrom(node);
- }
-
- DEBUG(dbgs() << "prev, new number paths " << prevPathNumber << ", "
- << node->getNumberPaths() << ".\n");
-
- if(prevPathNumber == 0 && node->getNumberPaths() != 0) {
- DEBUG(dbgs() << "node ready : " << node->getName() << "\n");
- for(BLEdgeIterator pred = node->predBegin(), end = node->predEnd();
- pred != end; pred++) {
- if( (*pred)->getType() == BallLarusEdge::BACKEDGE ||
- (*pred)->getType() == BallLarusEdge::SPLITEDGE )
- continue;
-
- BallLarusNode* nextNode = (*pred)->getSource();
- // not yet visited?
- if(nextNode->getNumberPaths() == 0)
- bfsQueue.push(nextNode);
- }
- }
- }
-
- DEBUG(dbgs() << "\tNumber of paths: " << getRoot()->getNumberPaths() << "\n");
-}
-
-// Returns the number of paths for the Dag.
-unsigned BallLarusDag::getNumberOfPaths() {
- return(getRoot()->getNumberPaths());
-}
-
-// Returns the root (i.e. entry) node for the DAG.
-BallLarusNode* BallLarusDag::getRoot() {
- return _root;
-}
-
-// Returns the exit node for the DAG.
-BallLarusNode* BallLarusDag::getExit() {
- return _exit;
-}
-
-// Returns the function for the DAG.
-Function& BallLarusDag::getFunction() {
- return(_function);
-}
-
-// Clears the node colors.
-void BallLarusDag::clearColors(BallLarusNode::NodeColor color) {
- for (BLNodeIterator nodeIt = _nodes.begin(); nodeIt != _nodes.end(); nodeIt++)
- (*nodeIt)->setColor(color);
-}
-
-// Processes one node and its imediate edges for building the DAG.
-void BallLarusDag::buildNode(BLBlockNodeMap& inDag, BLNodeStack& dfsStack) {
- BallLarusNode* currentNode = dfsStack.top();
- BasicBlock* currentBlock = currentNode->getBlock();
-
- if(currentNode->getColor() != BallLarusNode::WHITE) {
- // we have already visited this node
- dfsStack.pop();
- currentNode->setColor(BallLarusNode::BLACK);
- } else {
- // are there any external procedure calls?
- if( ProcessEarlyTermination ) {
- for( BasicBlock::iterator bbCurrent = currentNode->getBlock()->begin(),
- bbEnd = currentNode->getBlock()->end(); bbCurrent != bbEnd;
- bbCurrent++ ) {
- Instruction& instr = *bbCurrent;
- if( instr.getOpcode() == Instruction::Call ) {
- BallLarusEdge* callEdge = addEdge(currentNode, getExit(), 0);
- callEdge->setType(BallLarusEdge::CALLEDGE_PHONY);
- break;
- }
- }
- }
-
- TerminatorInst* terminator = currentNode->getBlock()->getTerminator();
- if(isa<ReturnInst>(terminator) || isa<UnreachableInst>(terminator) ||
- isa<ResumeInst>(terminator))
- addEdge(currentNode, getExit(),0);
-
- currentNode->setColor(BallLarusNode::GRAY);
- inDag[currentBlock] = currentNode;
-
- BasicBlock* oldSuccessor = 0;
- unsigned duplicateNumber = 0;
-
- // iterate through this node's successors
- for(succ_iterator successor = succ_begin(currentBlock),
- succEnd = succ_end(currentBlock); successor != succEnd;
- oldSuccessor = *successor, ++successor ) {
- BasicBlock* succBB = *successor;
-
- // is this edge a duplicate?
- if (oldSuccessor == succBB)
- duplicateNumber++;
- else
- duplicateNumber = 0;
-
- buildEdge(inDag, dfsStack, currentNode, succBB, duplicateNumber);
- }
- }
-}
-
-// Process an edge in the CFG for DAG building.
-void BallLarusDag::buildEdge(BLBlockNodeMap& inDag, std::stack<BallLarusNode*>&
- dfsStack, BallLarusNode* currentNode,
- BasicBlock* succBB, unsigned duplicateCount) {
- BallLarusNode* succNode = inDag[succBB];
-
- if(succNode && succNode->getColor() == BallLarusNode::BLACK) {
- // visited node and forward edge
- addEdge(currentNode, succNode, duplicateCount);
- } else if(succNode && succNode->getColor() == BallLarusNode::GRAY) {
- // visited node and back edge
- DEBUG(dbgs() << "Backedge detected.\n");
- addBackedge(currentNode, succNode, duplicateCount);
- } else {
- BallLarusNode* childNode;
- // not visited node and forward edge
- if(succNode) // an unvisited node that is child of a gray node
- childNode = succNode;
- else { // an unvisited node that is a child of a an unvisted node
- childNode = addNode(succBB);
- inDag[succBB] = childNode;
- }
- addEdge(currentNode, childNode, duplicateCount);
- dfsStack.push(childNode);
- }
-}
-
-// The weight on each edge is the increment required along any path that
-// contains that edge.
-void BallLarusDag::calculatePathNumbersFrom(BallLarusNode* node) {
- if(node == getExit())
- // The Exit node must be base case
- node->setNumberPaths(1);
- else {
- unsigned sumPaths = 0;
- BallLarusNode* succNode;
-
- for(BLEdgeIterator succ = node->succBegin(), end = node->succEnd();
- succ != end; succ++) {
- if( (*succ)->getType() == BallLarusEdge::BACKEDGE ||
- (*succ)->getType() == BallLarusEdge::SPLITEDGE )
- continue;
-
- (*succ)->setWeight(sumPaths);
- succNode = (*succ)->getTarget();
-
- if( !succNode->getNumberPaths() )
- return;
- sumPaths += succNode->getNumberPaths();
- }
-
- node->setNumberPaths(sumPaths);
- }
-}
-
-// Allows subclasses to determine which type of Node is created.
-// Override this method to produce subclasses of BallLarusNode if
-// necessary. The destructor of BallLarusDag will call free on each
-// pointer created.
-BallLarusNode* BallLarusDag::createNode(BasicBlock* BB) {
- return( new BallLarusNode(BB) );
-}
-
-// Allows subclasses to determine which type of Edge is created.
-// Override this method to produce subclasses of BallLarusEdge if
-// necessary. The destructor of BallLarusDag will call free on each
-// pointer created.
-BallLarusEdge* BallLarusDag::createEdge(BallLarusNode* source,
- BallLarusNode* target,
- unsigned duplicateCount) {
- return( new BallLarusEdge(source, target, duplicateCount) );
-}
-
-// Proxy to node's constructor. Updates the DAG state.
-BallLarusNode* BallLarusDag::addNode(BasicBlock* BB) {
- BallLarusNode* newNode = createNode(BB);
- _nodes.push_back(newNode);
- return( newNode );
-}
-
-// Proxy to edge's constructor. Updates the DAG state.
-BallLarusEdge* BallLarusDag::addEdge(BallLarusNode* source,
- BallLarusNode* target,
- unsigned duplicateCount) {
- BallLarusEdge* newEdge = createEdge(source, target, duplicateCount);
- _edges.push_back(newEdge);
- source->addSuccEdge(newEdge);
- target->addPredEdge(newEdge);
- return(newEdge);
-}
-
-// Adds a backedge with its phony edges. Updates the DAG state.
-void BallLarusDag::addBackedge(BallLarusNode* source, BallLarusNode* target,
- unsigned duplicateCount) {
- BallLarusEdge* childEdge = addEdge(source, target, duplicateCount);
- childEdge->setType(BallLarusEdge::BACKEDGE);
-
- childEdge->setPhonyRoot(addEdge(getRoot(), target,0));
- childEdge->setPhonyExit(addEdge(source, getExit(),0));
-
- childEdge->getPhonyRoot()->setRealEdge(childEdge);
- childEdge->getPhonyRoot()->setType(BallLarusEdge::BACKEDGE_PHONY);
-
- childEdge->getPhonyExit()->setRealEdge(childEdge);
- childEdge->getPhonyExit()->setType(BallLarusEdge::BACKEDGE_PHONY);
- _backEdges.push_back(childEdge);
-}
diff --git a/contrib/llvm/lib/Analysis/PathProfileInfo.cpp b/contrib/llvm/lib/Analysis/PathProfileInfo.cpp
deleted file mode 100644
index bc53221..0000000
--- a/contrib/llvm/lib/Analysis/PathProfileInfo.cpp
+++ /dev/null
@@ -1,433 +0,0 @@
-//===- PathProfileInfo.cpp ------------------------------------*- C++ -*---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interface used by optimizers to load path profiles,
-// and provides a loader pass which reads a path profile file.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "path-profile-info"
-
-#include "llvm/Analysis/PathProfileInfo.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/ProfileInfoTypes.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cstdio>
-
-using namespace llvm;
-
-// command line option for loading path profiles
-static cl::opt<std::string>
-PathProfileInfoFilename("path-profile-loader-file", cl::init("llvmprof.out"),
- cl::value_desc("filename"),
- cl::desc("Path profile file loaded by -path-profile-loader"), cl::Hidden);
-
-namespace {
- class PathProfileLoaderPass : public ModulePass, public PathProfileInfo {
- public:
- PathProfileLoaderPass() : ModulePass(ID) { }
- ~PathProfileLoaderPass();
-
- // this pass doesn't change anything (only loads information)
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- }
-
- // the full name of the loader pass
- virtual const char* getPassName() const {
- return "Path Profiling Information Loader";
- }
-
- // required since this pass implements multiple inheritance
- virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
- if (PI == &PathProfileInfo::ID)
- return (PathProfileInfo*)this;
- return this;
- }
-
- // entry point to run the pass
- bool runOnModule(Module &M);
-
- // pass identification
- static char ID;
-
- private:
- // make a reference table to refer to function by number
- void buildFunctionRefs(Module &M);
-
- // process argument info of a program from the input file
- void handleArgumentInfo();
-
- // process path number information from the input file
- void handlePathInfo();
-
- // array of references to the functions in the module
- std::vector<Function*> _functions;
-
- // path profile file handle
- FILE* _file;
-
- // path profile file name
- std::string _filename;
- };
-}
-
-// register PathLoader
-char PathProfileLoaderPass::ID = 0;
-
-INITIALIZE_ANALYSIS_GROUP(PathProfileInfo, "Path Profile Information",
- NoPathProfileInfo)
-INITIALIZE_AG_PASS(PathProfileLoaderPass, PathProfileInfo,
- "path-profile-loader",
- "Load path profile information from file",
- false, true, false)
-
-char &llvm::PathProfileLoaderPassID = PathProfileLoaderPass::ID;
-
-// link PathLoader as a pass, and make it available as an optimisation
-ModulePass *llvm::createPathProfileLoaderPass() {
- return new PathProfileLoaderPass;
-}
-
-// ----------------------------------------------------------------------------
-// PathEdge implementation
-//
-ProfilePathEdge::ProfilePathEdge (BasicBlock* source, BasicBlock* target,
- unsigned duplicateNumber)
- : _source(source), _target(target), _duplicateNumber(duplicateNumber) {}
-
-// ----------------------------------------------------------------------------
-// Path implementation
-//
-
-ProfilePath::ProfilePath (unsigned int number, unsigned int count,
- double countStdDev, PathProfileInfo* ppi)
- : _number(number) , _count(count), _countStdDev(countStdDev), _ppi(ppi) {}
-
-double ProfilePath::getFrequency() const {
- return 100 * double(_count) /
- double(_ppi->_functionPathCounts[_ppi->_currentFunction]);
-}
-
-static BallLarusEdge* getNextEdge (BallLarusNode* node,
- unsigned int pathNumber) {
- BallLarusEdge* best = 0;
-
- for( BLEdgeIterator next = node->succBegin(),
- end = node->succEnd(); next != end; next++ ) {
- if( (*next)->getType() != BallLarusEdge::BACKEDGE && // no backedges
- (*next)->getType() != BallLarusEdge::SPLITEDGE && // no split edges
- (*next)->getWeight() <= pathNumber && // weight must be <= pathNumber
- (!best || (best->getWeight() < (*next)->getWeight())) ) // best one?
- best = *next;
- }
-
- return best;
-}
-
-ProfilePathEdgeVector* ProfilePath::getPathEdges() const {
- BallLarusNode* currentNode = _ppi->_currentDag->getRoot ();
- unsigned int increment = _number;
- ProfilePathEdgeVector* pev = new ProfilePathEdgeVector;
-
- while (currentNode != _ppi->_currentDag->getExit()) {
- BallLarusEdge* next = getNextEdge(currentNode, increment);
-
- increment -= next->getWeight();
-
- if( next->getType() != BallLarusEdge::BACKEDGE_PHONY &&
- next->getType() != BallLarusEdge::SPLITEDGE_PHONY &&
- next->getTarget() != _ppi->_currentDag->getExit() )
- pev->push_back(ProfilePathEdge(
- next->getSource()->getBlock(),
- next->getTarget()->getBlock(),
- next->getDuplicateNumber()));
-
- if( next->getType() == BallLarusEdge::BACKEDGE_PHONY &&
- next->getTarget() == _ppi->_currentDag->getExit() )
- pev->push_back(ProfilePathEdge(
- next->getRealEdge()->getSource()->getBlock(),
- next->getRealEdge()->getTarget()->getBlock(),
- next->getDuplicateNumber()));
-
- if( next->getType() == BallLarusEdge::SPLITEDGE_PHONY &&
- next->getSource() == _ppi->_currentDag->getRoot() )
- pev->push_back(ProfilePathEdge(
- next->getRealEdge()->getSource()->getBlock(),
- next->getRealEdge()->getTarget()->getBlock(),
- next->getDuplicateNumber()));
-
- // set the new node
- currentNode = next->getTarget();
- }
-
- return pev;
-}
-
-ProfilePathBlockVector* ProfilePath::getPathBlocks() const {
- BallLarusNode* currentNode = _ppi->_currentDag->getRoot ();
- unsigned int increment = _number;
- ProfilePathBlockVector* pbv = new ProfilePathBlockVector;
-
- while (currentNode != _ppi->_currentDag->getExit()) {
- BallLarusEdge* next = getNextEdge(currentNode, increment);
- increment -= next->getWeight();
-
- // add block to the block list if it is a real edge
- if( next->getType() == BallLarusEdge::NORMAL)
- pbv->push_back (currentNode->getBlock());
- // make the back edge the last edge since we are at the end
- else if( next->getTarget() == _ppi->_currentDag->getExit() ) {
- pbv->push_back (currentNode->getBlock());
- pbv->push_back (next->getRealEdge()->getTarget()->getBlock());
- }
-
- // set the new node
- currentNode = next->getTarget();
- }
-
- return pbv;
-}
-
-BasicBlock* ProfilePath::getFirstBlockInPath() const {
- BallLarusNode* root = _ppi->_currentDag->getRoot();
- BallLarusEdge* edge = getNextEdge(root, _number);
-
- if( edge && (edge->getType() == BallLarusEdge::BACKEDGE_PHONY ||
- edge->getType() == BallLarusEdge::SPLITEDGE_PHONY) )
- return edge->getTarget()->getBlock();
-
- return root->getBlock();
-}
-
-// ----------------------------------------------------------------------------
-// PathProfileInfo implementation
-//
-
-// Pass identification
-char llvm::PathProfileInfo::ID = 0;
-
-PathProfileInfo::PathProfileInfo () : _currentDag(0) , _currentFunction(0) {
-}
-
-PathProfileInfo::~PathProfileInfo() {
- if (_currentDag)
- delete _currentDag;
-}
-
-// set the function for which paths are currently begin processed
-void PathProfileInfo::setCurrentFunction(Function* F) {
- // Make sure it exists
- if (!F) return;
-
- if (_currentDag)
- delete _currentDag;
-
- _currentFunction = F;
- _currentDag = new BallLarusDag(*F);
- _currentDag->init();
- _currentDag->calculatePathNumbers();
-}
-
-// get the function for which paths are currently being processed
-Function* PathProfileInfo::getCurrentFunction() const {
- return _currentFunction;
-}
-
-// get the entry block of the function
-BasicBlock* PathProfileInfo::getCurrentFunctionEntry() {
- return _currentDag->getRoot()->getBlock();
-}
-
-// return the path based on its number
-ProfilePath* PathProfileInfo::getPath(unsigned int number) {
- return _functionPaths[_currentFunction][number];
-}
-
-// return the number of paths which a function may potentially execute
-unsigned int PathProfileInfo::getPotentialPathCount() {
- return _currentDag ? _currentDag->getNumberOfPaths() : 0;
-}
-
-// return an iterator for the beginning of a functions executed paths
-ProfilePathIterator PathProfileInfo::pathBegin() {
- return _functionPaths[_currentFunction].begin();
-}
-
-// return an iterator for the end of a functions executed paths
-ProfilePathIterator PathProfileInfo::pathEnd() {
- return _functionPaths[_currentFunction].end();
-}
-
-// returns the total number of paths run in the function
-unsigned int PathProfileInfo::pathsRun() {
- return _currentFunction ? _functionPaths[_currentFunction].size() : 0;
-}
-
-// ----------------------------------------------------------------------------
-// PathLoader implementation
-//
-
-// remove all generated paths
-PathProfileLoaderPass::~PathProfileLoaderPass() {
- for( FunctionPathIterator funcNext = _functionPaths.begin(),
- funcEnd = _functionPaths.end(); funcNext != funcEnd; funcNext++)
- for( ProfilePathIterator pathNext = funcNext->second.begin(),
- pathEnd = funcNext->second.end(); pathNext != pathEnd; pathNext++)
- delete pathNext->second;
-}
-
-// entry point of the pass; this loads and parses a file
-bool PathProfileLoaderPass::runOnModule(Module &M) {
- // get the filename and setup the module's function references
- _filename = PathProfileInfoFilename;
- buildFunctionRefs (M);
-
- if (!(_file = fopen(_filename.c_str(), "rb"))) {
- errs () << "error: input '" << _filename << "' file does not exist.\n";
- return false;
- }
-
- ProfilingType profType;
-
- while( fread(&profType, sizeof(ProfilingType), 1, _file) ) {
- switch (profType) {
- case ArgumentInfo:
- handleArgumentInfo ();
- break;
- case PathInfo:
- handlePathInfo ();
- break;
- default:
- errs () << "error: bad path profiling file syntax, " << profType << "\n";
- fclose (_file);
- return false;
- }
- }
-
- fclose (_file);
-
- return true;
-}
-
-// create a reference table for functions defined in the path profile file
-void PathProfileLoaderPass::buildFunctionRefs (Module &M) {
- _functions.push_back(0); // make the 0 index a null pointer
-
- for (Module::iterator F = M.begin(), E = M.end(); F != E; F++) {
- if (F->isDeclaration())
- continue;
- _functions.push_back(F);
- }
-}
-
-// handle command like argument infor in the output file
-void PathProfileLoaderPass::handleArgumentInfo() {
- // get the argument list's length
- unsigned savedArgsLength;
- if( fread(&savedArgsLength, sizeof(unsigned), 1, _file) != 1 ) {
- errs() << "warning: argument info header/data mismatch\n";
- return;
- }
-
- // allocate a buffer, and get the arguments
- char* args = new char[savedArgsLength+1];
- if( fread(args, 1, savedArgsLength, _file) != savedArgsLength )
- errs() << "warning: argument info header/data mismatch\n";
-
- args[savedArgsLength] = '\0';
- argList = std::string(args);
- delete [] args; // cleanup dynamic string
-
- // byte alignment
- if (savedArgsLength & 3)
- fseek(_file, 4-(savedArgsLength&3), SEEK_CUR);
-}
-
-// Handle path profile information in the output file
-void PathProfileLoaderPass::handlePathInfo () {
- // get the number of functions in this profile
- unsigned functionCount;
- if( fread(&functionCount, sizeof(functionCount), 1, _file) != 1 ) {
- errs() << "warning: path info header/data mismatch\n";
- return;
- }
-
- // gather path information for each function
- for (unsigned i = 0; i < functionCount; i++) {
- PathProfileHeader pathHeader;
- if( fread(&pathHeader, sizeof(pathHeader), 1, _file) != 1 ) {
- errs() << "warning: bad header for path function info\n";
- break;
- }
-
- Function* f = _functions[pathHeader.fnNumber];
-
- // dynamically allocate a table to store path numbers
- PathProfileTableEntry* pathTable =
- new PathProfileTableEntry[pathHeader.numEntries];
-
- if( fread(pathTable, sizeof(PathProfileTableEntry),
- pathHeader.numEntries, _file) != pathHeader.numEntries) {
- delete [] pathTable;
- errs() << "warning: path function info header/data mismatch\n";
- return;
- }
-
- // Build a new path for the current function
- unsigned int totalPaths = 0;
- for (unsigned int j = 0; j < pathHeader.numEntries; j++) {
- totalPaths += pathTable[j].pathCounter;
- _functionPaths[f][pathTable[j].pathNumber]
- = new ProfilePath(pathTable[j].pathNumber, pathTable[j].pathCounter,
- 0, this);
- }
-
- _functionPathCounts[f] = totalPaths;
-
- delete [] pathTable;
- }
-}
-
-//===----------------------------------------------------------------------===//
-// NoProfile PathProfileInfo implementation
-//
-
-namespace {
- struct NoPathProfileInfo : public ImmutablePass, public PathProfileInfo {
- static char ID; // Class identification, replacement for typeinfo
- NoPathProfileInfo() : ImmutablePass(ID) {
- initializeNoPathProfileInfoPass(*PassRegistry::getPassRegistry());
- }
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
- if (PI == &PathProfileInfo::ID)
- return (PathProfileInfo*)this;
- return this;
- }
-
- virtual const char *getPassName() const {
- return "NoPathProfileInfo";
- }
- };
-} // End of anonymous namespace
-
-char NoPathProfileInfo::ID = 0;
-// Register this pass...
-INITIALIZE_AG_PASS(NoPathProfileInfo, PathProfileInfo, "no-path-profile",
- "No Path Profile Information", false, true, true)
-
-ImmutablePass *llvm::createNoPathProfileInfoPass() { return new NoPathProfileInfo(); }
diff --git a/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp b/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp
deleted file mode 100644
index 48d7d05..0000000
--- a/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp
+++ /dev/null
@@ -1,206 +0,0 @@
-//===- PathProfileVerifier.cpp --------------------------------*- C++ -*---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This verifier derives an edge profile file from current path profile
-// information
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "path-profile-verifier"
-
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/PathProfileInfo.h"
-#include "llvm/Analysis/ProfileInfoTypes.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <stdio.h>
-
-using namespace llvm;
-
-namespace {
- class PathProfileVerifier : public ModulePass {
- private:
- bool runOnModule(Module &M);
-
- public:
- static char ID; // Pass identification, replacement for typeid
- PathProfileVerifier() : ModulePass(ID) {
- initializePathProfileVerifierPass(*PassRegistry::getPassRegistry());
- }
-
-
- virtual const char *getPassName() const {
- return "Path Profiler Verifier";
- }
-
- // The verifier requires the path profile and edge profile.
- virtual void getAnalysisUsage(AnalysisUsage& AU) const;
- };
-}
-
-static cl::opt<std::string>
-EdgeProfileFilename("path-profile-verifier-file",
- cl::init("edgefrompath.llvmprof.out"),
- cl::value_desc("filename"),
- cl::desc("Edge profile file generated by -path-profile-verifier"),
- cl::Hidden);
-
-char PathProfileVerifier::ID = 0;
-INITIALIZE_PASS(PathProfileVerifier, "path-profile-verifier",
- "Compare the path profile derived edge profile against the "
- "edge profile.", true, true)
-
-ModulePass *llvm::createPathProfileVerifierPass() {
- return new PathProfileVerifier();
-}
-
-// The verifier requires the path profile and edge profile.
-void PathProfileVerifier::getAnalysisUsage(AnalysisUsage& AU) const {
- AU.addRequired<PathProfileInfo>();
- AU.addPreserved<PathProfileInfo>();
-}
-
-typedef std::map<unsigned, unsigned> DuplicateToIndexMap;
-typedef std::map<BasicBlock*,DuplicateToIndexMap> BlockToDuplicateMap;
-typedef std::map<BasicBlock*,BlockToDuplicateMap> NestedBlockToIndexMap;
-
-// the verifier iterates through each path to gather the total
-// number of edge frequencies
-bool PathProfileVerifier::runOnModule (Module &M) {
- PathProfileInfo& pathProfileInfo = getAnalysis<PathProfileInfo>();
-
- // setup a data structure to map path edges which index an
- // array of edge counters
- NestedBlockToIndexMap arrayMap;
- unsigned i = 0;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
-
- arrayMap[(BasicBlock*)0][F->begin()][0] = i++;
-
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- TerminatorInst *TI = BB->getTerminator();
-
- unsigned duplicate = 0;
- BasicBlock* prev = 0;
- for (unsigned s = 0, e = TI->getNumSuccessors(); s != e;
- prev = TI->getSuccessor(s), ++s) {
- if (prev == TI->getSuccessor(s))
- duplicate++;
- else duplicate = 0;
-
- arrayMap[BB][TI->getSuccessor(s)][duplicate] = i++;
- }
- }
- }
-
- std::vector<unsigned> edgeArray(i);
-
- // iterate through each path and increment the edge counters as needed
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
-
- pathProfileInfo.setCurrentFunction(F);
-
- DEBUG(dbgs() << "function '" << F->getName() << "' ran "
- << pathProfileInfo.pathsRun()
- << "/" << pathProfileInfo.getPotentialPathCount()
- << " potential paths\n");
-
- for( ProfilePathIterator nextPath = pathProfileInfo.pathBegin(),
- endPath = pathProfileInfo.pathEnd();
- nextPath != endPath; nextPath++ ) {
- ProfilePath* currentPath = nextPath->second;
-
- ProfilePathEdgeVector* pev = currentPath->getPathEdges();
- DEBUG(dbgs () << "path #" << currentPath->getNumber() << ": "
- << currentPath->getCount() << "\n");
- // setup the entry edge (normally path profiling doesn't care about this)
- if (currentPath->getFirstBlockInPath() == &F->getEntryBlock())
- edgeArray[arrayMap[(BasicBlock*)0][currentPath->getFirstBlockInPath()][0]]
- += currentPath->getCount();
-
- for( ProfilePathEdgeIterator nextEdge = pev->begin(),
- endEdge = pev->end(); nextEdge != endEdge; nextEdge++ ) {
- if (nextEdge != pev->begin())
- DEBUG(dbgs() << " :: ");
-
- BasicBlock* source = nextEdge->getSource();
- BasicBlock* target = nextEdge->getTarget();
- unsigned duplicateNumber = nextEdge->getDuplicateNumber();
- DEBUG(dbgs() << source->getName() << " --{" << duplicateNumber
- << "}--> " << target->getName());
-
- // Ensure all the referenced edges exist
- // TODO: make this a separate function
- if( !arrayMap.count(source) ) {
- errs() << " error [" << F->getName() << "()]: source '"
- << source->getName()
- << "' does not exist in the array map.\n";
- } else if( !arrayMap[source].count(target) ) {
- errs() << " error [" << F->getName() << "()]: target '"
- << target->getName()
- << "' does not exist in the array map.\n";
- } else if( !arrayMap[source][target].count(duplicateNumber) ) {
- errs() << " error [" << F->getName() << "()]: edge "
- << source->getName() << " -> " << target->getName()
- << " duplicate number " << duplicateNumber
- << " does not exist in the array map.\n";
- } else {
- edgeArray[arrayMap[source][target][duplicateNumber]]
- += currentPath->getCount();
- }
- }
-
- DEBUG(errs() << "\n");
-
- delete pev;
- }
- }
-
- std::string errorInfo;
- std::string filename = EdgeProfileFilename;
-
- // Open a handle to the file
- FILE* edgeFile = fopen(filename.c_str(),"wb");
-
- if (!edgeFile) {
- errs() << "error: unable to open file '" << filename << "' for output.\n";
- return false;
- }
-
- errs() << "Generating edge profile '" << filename << "' ...\n";
-
- // write argument info
- unsigned type = ArgumentInfo;
- unsigned num = pathProfileInfo.argList.size();
- int zeros = 0;
-
- fwrite(&type,sizeof(unsigned),1,edgeFile);
- fwrite(&num,sizeof(unsigned),1,edgeFile);
- fwrite(pathProfileInfo.argList.c_str(),1,num,edgeFile);
- if (num&3)
- fwrite(&zeros, 1, 4-(num&3), edgeFile);
-
- type = EdgeInfo;
- num = edgeArray.size();
- fwrite(&type,sizeof(unsigned),1,edgeFile);
- fwrite(&num,sizeof(unsigned),1,edgeFile);
-
- // write each edge to the file
- for( std::vector<unsigned>::iterator s = edgeArray.begin(),
- e = edgeArray.end(); s != e; s++)
- fwrite(&*s, sizeof (unsigned), 1, edgeFile);
-
- fclose (edgeFile);
-
- return true;
-}
diff --git a/contrib/llvm/lib/Analysis/ProfileDataLoader.cpp b/contrib/llvm/lib/Analysis/ProfileDataLoader.cpp
deleted file mode 100644
index d7f444b..0000000
--- a/contrib/llvm/lib/Analysis/ProfileDataLoader.cpp
+++ /dev/null
@@ -1,155 +0,0 @@
-//===- ProfileDataLoader.cpp - Load profile information from disk ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// The ProfileDataLoader class is used to load raw profiling data from the dump
-// file.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/ProfileDataLoader.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/Analysis/ProfileDataTypes.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/system_error.h"
-#include <cstdio>
-#include <cstdlib>
-using namespace llvm;
-
-raw_ostream &llvm::operator<<(raw_ostream &O, std::pair<const BasicBlock *,
- const BasicBlock *> E) {
- O << "(";
-
- if (E.first)
- O << E.first->getName();
- else
- O << "0";
-
- O << ",";
-
- if (E.second)
- O << E.second->getName();
- else
- O << "0";
-
- return O << ")";
-}
-
-/// AddCounts - Add 'A' and 'B', accounting for the fact that the value of one
-/// (or both) may not be defined.
-static unsigned AddCounts(unsigned A, unsigned B) {
- // If either value is undefined, use the other.
- // Undefined + undefined = undefined.
- if (A == ProfileDataLoader::Uncounted) return B;
- if (B == ProfileDataLoader::Uncounted) return A;
-
- return A + B;
-}
-
-/// ReadProfilingData - Load 'NumEntries' items of type 'T' from file 'F'
-template <typename T>
-static void ReadProfilingData(const char *ToolName, FILE *F,
- T *Data, size_t NumEntries) {
- // Read in the block of data...
- if (fread(Data, sizeof(T), NumEntries, F) != NumEntries)
- report_fatal_error(Twine(ToolName) + ": Profiling data truncated");
-}
-
-/// ReadProfilingNumEntries - Read how many entries are in this profiling data
-/// packet.
-static unsigned ReadProfilingNumEntries(const char *ToolName, FILE *F,
- bool ShouldByteSwap) {
- unsigned Entry;
- ReadProfilingData<unsigned>(ToolName, F, &Entry, 1);
- return ShouldByteSwap ? ByteSwap_32(Entry) : Entry;
-}
-
-/// ReadProfilingBlock - Read the number of entries in the next profiling data
-/// packet and then accumulate the entries into 'Data'.
-static void ReadProfilingBlock(const char *ToolName, FILE *F,
- bool ShouldByteSwap,
- SmallVector<unsigned, 32> &Data) {
- // Read the number of entries...
- unsigned NumEntries = ReadProfilingNumEntries(ToolName, F, ShouldByteSwap);
-
- // Read in the data.
- SmallVector<unsigned, 8> TempSpace(NumEntries);
- ReadProfilingData<unsigned>(ToolName, F, TempSpace.data(), NumEntries);
-
- // Make sure we have enough space ...
- if (Data.size() < NumEntries)
- Data.resize(NumEntries, ProfileDataLoader::Uncounted);
-
- // Accumulate the data we just read into the existing data.
- for (unsigned i = 0; i < NumEntries; ++i) {
- unsigned Entry = ShouldByteSwap ? ByteSwap_32(TempSpace[i]) : TempSpace[i];
- Data[i] = AddCounts(Entry, Data[i]);
- }
-}
-
-/// ReadProfilingArgBlock - Read the command line arguments that the progam was
-/// run with when the current profiling data packet(s) were generated.
-static void ReadProfilingArgBlock(const char *ToolName, FILE *F,
- bool ShouldByteSwap,
- SmallVector<std::string, 1> &CommandLines) {
- // Read the number of bytes ...
- unsigned ArgLength = ReadProfilingNumEntries(ToolName, F, ShouldByteSwap);
-
- // Read in the arguments (if there are any to read). Round up the length to
- // the nearest 4-byte multiple.
- SmallVector<char, 8> Args(ArgLength+4);
- if (ArgLength)
- ReadProfilingData<char>(ToolName, F, Args.data(), (ArgLength+3) & ~3);
-
- // Store the arguments.
- CommandLines.push_back(std::string(&Args[0], &Args[ArgLength]));
-}
-
-const unsigned ProfileDataLoader::Uncounted = ~0U;
-
-/// ProfileDataLoader ctor - Read the specified profiling data file, reporting
-/// a fatal error if the file is invalid or broken.
-ProfileDataLoader::ProfileDataLoader(const char *ToolName,
- const std::string &Filename)
- : Filename(Filename) {
- FILE *F = fopen(Filename.c_str(), "rb");
- if (F == 0)
- report_fatal_error(Twine(ToolName) + ": Error opening '" +
- Filename + "': ");
-
- // Keep reading packets until we run out of them.
- unsigned PacketType;
- while (fread(&PacketType, sizeof(unsigned), 1, F) == 1) {
- // If the low eight bits of the packet are zero, we must be dealing with an
- // endianness mismatch. Byteswap all words read from the profiling
- // information. This can happen when the compiler host and target have
- // different endianness.
- bool ShouldByteSwap = (char)PacketType == 0;
- PacketType = ShouldByteSwap ? ByteSwap_32(PacketType) : PacketType;
-
- switch (PacketType) {
- case ArgumentInfo:
- ReadProfilingArgBlock(ToolName, F, ShouldByteSwap, CommandLines);
- break;
-
- case EdgeInfo:
- ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts);
- break;
-
- default:
- report_fatal_error(std::string(ToolName)
- + ": Unknown profiling packet type");
- break;
- }
- }
-
- fclose(F);
-}
diff --git a/contrib/llvm/lib/Analysis/ProfileDataLoaderPass.cpp b/contrib/llvm/lib/Analysis/ProfileDataLoaderPass.cpp
deleted file mode 100644
index 2ee0093..0000000
--- a/contrib/llvm/lib/Analysis/ProfileDataLoaderPass.cpp
+++ /dev/null
@@ -1,188 +0,0 @@
-//===- ProfileDataLoaderPass.cpp - Set branch weight metadata from prof ---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass loads profiling data from a dump file and sets branch weight
-// metadata.
-//
-// TODO: Replace all "profile-metadata-loader" strings with "profile-loader"
-// once ProfileInfo etc. has been removed.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "profile-metadata-loader"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/ProfileDataLoader.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/MDBuilder.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-STATISTIC(NumEdgesRead, "The # of edges read.");
-STATISTIC(NumTermsAnnotated, "The # of terminator instructions annotated.");
-
-static cl::opt<std::string>
-ProfileMetadataFilename("profile-file", cl::init("llvmprof.out"),
- cl::value_desc("filename"),
- cl::desc("Profile file loaded by -profile-metadata-loader"));
-
-namespace {
- /// This pass loads profiling data from a dump file and sets branch weight
- /// metadata.
- class ProfileMetadataLoaderPass : public ModulePass {
- std::string Filename;
- public:
- static char ID; // Class identification, replacement for typeinfo
- explicit ProfileMetadataLoaderPass(const std::string &filename = "")
- : ModulePass(ID), Filename(filename) {
- initializeProfileMetadataLoaderPassPass(*PassRegistry::getPassRegistry());
- if (filename.empty()) Filename = ProfileMetadataFilename;
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- }
-
- virtual const char *getPassName() const {
- return "Profile loader";
- }
-
- virtual void readEdge(unsigned, ProfileData&, ProfileData::Edge,
- ArrayRef<unsigned>);
- virtual unsigned matchEdges(Module&, ProfileData&, ArrayRef<unsigned>);
- virtual void setBranchWeightMetadata(Module&, ProfileData&);
-
- virtual bool runOnModule(Module &M);
- };
-} // End of anonymous namespace
-
-char ProfileMetadataLoaderPass::ID = 0;
-INITIALIZE_PASS_BEGIN(ProfileMetadataLoaderPass, "profile-metadata-loader",
- "Load profile information from llvmprof.out", false, true)
-INITIALIZE_PASS_END(ProfileMetadataLoaderPass, "profile-metadata-loader",
- "Load profile information from llvmprof.out", false, true)
-
-char &llvm::ProfileMetadataLoaderPassID = ProfileMetadataLoaderPass::ID;
-
-/// createProfileMetadataLoaderPass - This function returns a Pass that loads
-/// the profiling information for the module from the specified filename,
-/// making it available to the optimizers.
-ModulePass *llvm::createProfileMetadataLoaderPass() {
- return new ProfileMetadataLoaderPass();
-}
-ModulePass *llvm::createProfileMetadataLoaderPass(const std::string &Filename) {
- return new ProfileMetadataLoaderPass(Filename);
-}
-
-/// readEdge - Take the value from a profile counter and assign it to an edge.
-void ProfileMetadataLoaderPass::readEdge(unsigned ReadCount,
- ProfileData &PB, ProfileData::Edge e,
- ArrayRef<unsigned> Counters) {
- if (ReadCount >= Counters.size()) return;
-
- unsigned weight = Counters[ReadCount];
- assert(weight != ProfileDataLoader::Uncounted);
- PB.addEdgeWeight(e, weight);
-
- DEBUG(dbgs() << "-- Read Edge Counter for " << e
- << " (# "<< (ReadCount) << "): "
- << PB.getEdgeWeight(e) << "\n");
-}
-
-/// matchEdges - Link every profile counter with an edge.
-unsigned ProfileMetadataLoaderPass::matchEdges(Module &M, ProfileData &PB,
- ArrayRef<unsigned> Counters) {
- if (Counters.size() == 0) return 0;
-
- unsigned ReadCount = 0;
-
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- DEBUG(dbgs() << "Loading edges in '" << F->getName() << "'\n");
- readEdge(ReadCount++, PB, PB.getEdge(0, &F->getEntryBlock()), Counters);
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- TerminatorInst *TI = BB->getTerminator();
- for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
- readEdge(ReadCount++, PB, PB.getEdge(BB,TI->getSuccessor(s)),
- Counters);
- }
- }
- }
-
- return ReadCount;
-}
-
-/// setBranchWeightMetadata - Translate the counter values associated with each
-/// edge into branch weights for each conditional branch (a branch with 2 or
-/// more desinations).
-void ProfileMetadataLoaderPass::setBranchWeightMetadata(Module &M,
- ProfileData &PB) {
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- DEBUG(dbgs() << "Setting branch metadata in '" << F->getName() << "'\n");
-
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- TerminatorInst *TI = BB->getTerminator();
- unsigned NumSuccessors = TI->getNumSuccessors();
-
- // If there is only one successor then we can not set a branch
- // probability as the target is certain.
- if (NumSuccessors < 2) continue;
-
- // Load the weights of all edges leading from this terminator.
- DEBUG(dbgs() << "-- Terminator with " << NumSuccessors
- << " successors:\n");
- SmallVector<uint32_t, 4> Weights(NumSuccessors);
- for (unsigned s = 0 ; s < NumSuccessors ; ++s) {
- ProfileData::Edge edge = PB.getEdge(BB, TI->getSuccessor(s));
- Weights[s] = (uint32_t)PB.getEdgeWeight(edge);
- DEBUG(dbgs() << "---- Edge '" << edge << "' has weight "
- << Weights[s] << "\n");
- }
-
- // Set branch weight metadata. This will set branch probabilities of
- // 100%/0% if that is true of the dynamic execution.
- // BranchProbabilityInfo can account for this when it loads this metadata
- // (it gives the unexectuted branch a weight of 1 for the purposes of
- // probability calculations).
- MDBuilder MDB(TI->getContext());
- MDNode *Node = MDB.createBranchWeights(Weights);
- TI->setMetadata(LLVMContext::MD_prof, Node);
- NumTermsAnnotated++;
- }
- }
-}
-
-bool ProfileMetadataLoaderPass::runOnModule(Module &M) {
- ProfileDataLoader PDL("profile-data-loader", Filename);
- ProfileData PB;
-
- ArrayRef<unsigned> Counters = PDL.getRawEdgeCounts();
-
- unsigned ReadCount = matchEdges(M, PB, Counters);
-
- if (ReadCount != Counters.size()) {
- errs() << "WARNING: profile information is inconsistent with "
- << "the current program!\n";
- }
- NumEdgesRead = ReadCount;
-
- setBranchWeightMetadata(M, PB);
-
- return ReadCount > 0;
-}
diff --git a/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp b/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp
deleted file mode 100644
index b284b99..0000000
--- a/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp
+++ /dev/null
@@ -1,426 +0,0 @@
-//===- ProfileEstimatorPass.cpp - LLVM Pass to estimate profile info ------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a concrete implementation of profiling information that
-// estimates the profiling information in a very crude and unimaginative way.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "profile-estimator"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-static cl::opt<double>
-LoopWeight(
- "profile-estimator-loop-weight", cl::init(10),
- cl::value_desc("loop-weight"),
- cl::desc("Number of loop executions used for profile-estimator")
-);
-
-namespace {
- class ProfileEstimatorPass : public FunctionPass, public ProfileInfo {
- double ExecCount;
- LoopInfo *LI;
- std::set<BasicBlock*> BBToVisit;
- std::map<Loop*,double> LoopExitWeights;
- std::map<Edge,double> MinimalWeight;
- public:
- static char ID; // Class identification, replacement for typeinfo
- explicit ProfileEstimatorPass(const double execcount = 0)
- : FunctionPass(ID), ExecCount(execcount) {
- initializeProfileEstimatorPassPass(*PassRegistry::getPassRegistry());
- if (execcount == 0) ExecCount = LoopWeight;
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AU.addRequired<LoopInfo>();
- }
-
- virtual const char *getPassName() const {
- return "Profiling information estimator";
- }
-
- /// run - Estimate the profile information from the specified file.
- virtual bool runOnFunction(Function &F);
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
- if (PI == &ProfileInfo::ID)
- return (ProfileInfo*)this;
- return this;
- }
-
- virtual void recurseBasicBlock(BasicBlock *BB);
-
- void inline printEdgeWeight(Edge);
- };
-} // End of anonymous namespace
-
-char ProfileEstimatorPass::ID = 0;
-INITIALIZE_AG_PASS_BEGIN(ProfileEstimatorPass, ProfileInfo, "profile-estimator",
- "Estimate profiling information", false, true, false)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
-INITIALIZE_AG_PASS_END(ProfileEstimatorPass, ProfileInfo, "profile-estimator",
- "Estimate profiling information", false, true, false)
-
-namespace llvm {
- char &ProfileEstimatorPassID = ProfileEstimatorPass::ID;
-
- FunctionPass *createProfileEstimatorPass() {
- return new ProfileEstimatorPass();
- }
-
- /// createProfileEstimatorPass - This function returns a Pass that estimates
- /// profiling information using the given loop execution count.
- Pass *createProfileEstimatorPass(const unsigned execcount) {
- return new ProfileEstimatorPass(execcount);
- }
-}
-
-static double ignoreMissing(double w) {
- if (w == ProfileInfo::MissingValue) return 0;
- return w;
-}
-
-static void inline printEdgeError(ProfileInfo::Edge e, const char *M) {
- DEBUG(dbgs() << "-- Edge " << e << " is not calculated, " << M << "\n");
-}
-
-void inline ProfileEstimatorPass::printEdgeWeight(Edge E) {
- DEBUG(dbgs() << "-- Weight of Edge " << E << ":"
- << format("%20.20g", getEdgeWeight(E)) << "\n");
-}
-
-// recurseBasicBlock() - This calculates the ProfileInfo estimation for a
-// single block and then recurses into the successors.
-// The algorithm preserves the flow condition, meaning that the sum of the
-// weight of the incoming edges must be equal the block weight which must in
-// turn be equal to the sume of the weights of the outgoing edges.
-// Since the flow of an block is deterimined from the current state of the
-// flow, once an edge has a flow assigned this flow is never changed again,
-// otherwise it would be possible to violate the flow condition in another
-// block.
-void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) {
-
- // Break the recursion if this BasicBlock was already visited.
- if (BBToVisit.find(BB) == BBToVisit.end()) return;
-
- // Read the LoopInfo for this block.
- bool BBisHeader = LI->isLoopHeader(BB);
- Loop* BBLoop = LI->getLoopFor(BB);
-
- // To get the block weight, read all incoming edges.
- double BBWeight = 0;
- std::set<BasicBlock*> ProcessedPreds;
- for ( pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
- bbi != bbe; ++bbi ) {
- // If this block was not considered already, add weight.
- Edge edge = getEdge(*bbi,BB);
- double w = getEdgeWeight(edge);
- if (ProcessedPreds.insert(*bbi).second) {
- BBWeight += ignoreMissing(w);
- }
- // If this block is a loop header and the predecessor is contained in this
- // loop, thus the edge is a backedge, continue and do not check if the
- // value is valid.
- if (BBisHeader && BBLoop->contains(*bbi)) {
- printEdgeError(edge, "but is backedge, continuing");
- continue;
- }
- // If the edges value is missing (and this is no loop header, and this is
- // no backedge) return, this block is currently non estimatable.
- if (w == MissingValue) {
- printEdgeError(edge, "returning");
- return;
- }
- }
- if (getExecutionCount(BB) != MissingValue) {
- BBWeight = getExecutionCount(BB);
- }
-
- // Fetch all necessary information for current block.
- SmallVector<Edge, 8> ExitEdges;
- SmallVector<Edge, 8> Edges;
- if (BBLoop) {
- BBLoop->getExitEdges(ExitEdges);
- }
-
- // If this is a loop header, consider the following:
- // Exactly the flow that is entering this block, must exit this block too. So
- // do the following:
- // *) get all the exit edges, read the flow that is already leaving this
- // loop, remember the edges that do not have any flow on them right now.
- // (The edges that have already flow on them are most likely exiting edges of
- // other loops, do not touch those flows because the previously caclulated
- // loopheaders would not be exact anymore.)
- // *) In case there is not a single exiting edge left, create one at the loop
- // latch to prevent the flow from building up in the loop.
- // *) Take the flow that is not leaving the loop already and distribute it on
- // the remaining exiting edges.
- // (This ensures that all flow that enters the loop also leaves it.)
- // *) Increase the flow into the loop by increasing the weight of this block.
- // There is at least one incoming backedge that will bring us this flow later
- // on. (So that the flow condition in this node is valid again.)
- if (BBisHeader) {
- double incoming = BBWeight;
- // Subtract the flow leaving the loop.
- std::set<Edge> ProcessedExits;
- for (SmallVector<Edge, 8>::iterator ei = ExitEdges.begin(),
- ee = ExitEdges.end(); ei != ee; ++ei) {
- if (ProcessedExits.insert(*ei).second) {
- double w = getEdgeWeight(*ei);
- if (w == MissingValue) {
- Edges.push_back(*ei);
- // Check if there is a necessary minimal weight, if yes, subtract it
- // from weight.
- if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
- incoming -= MinimalWeight[*ei];
- DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
- }
- } else {
- incoming -= w;
- }
- }
- }
- // If no exit edges, create one:
- if (Edges.size() == 0) {
- BasicBlock *Latch = BBLoop->getLoopLatch();
- if (Latch) {
- Edge edge = getEdge(Latch,0);
- EdgeInformation[BB->getParent()][edge] = BBWeight;
- printEdgeWeight(edge);
- edge = getEdge(Latch, BB);
- EdgeInformation[BB->getParent()][edge] = BBWeight * ExecCount;
- printEdgeWeight(edge);
- }
- }
-
- // Distribute remaining weight to the exting edges. To prevent fractions
- // from building up and provoking precision problems the weight which is to
- // be distributed is split and the rounded, the last edge gets a somewhat
- // bigger value, but we are close enough for an estimation.
- double fraction = floor(incoming/Edges.size());
- for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end();
- ei != ee; ++ei) {
- double w = 0;
- if (ei != (ee-1)) {
- w = fraction;
- incoming -= fraction;
- } else {
- w = incoming;
- }
- EdgeInformation[BB->getParent()][*ei] += w;
- // Read necessary minimal weight.
- if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
- EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei];
- DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
- }
- printEdgeWeight(*ei);
-
- // Add minimal weight to paths to all exit edges, this is used to ensure
- // that enough flow is reaching this edges.
- Path p;
- const BasicBlock *Dest = GetPath(BB, (*ei).first, p, GetPathToDest);
- while (Dest != BB) {
- const BasicBlock *Parent = p.find(Dest)->second;
- Edge e = getEdge(Parent, Dest);
- if (MinimalWeight.find(e) == MinimalWeight.end()) {
- MinimalWeight[e] = 0;
- }
- MinimalWeight[e] += w;
- DEBUG(dbgs() << "Minimal Weight for " << e << ": " << format("%.20g",MinimalWeight[e]) << "\n");
- Dest = Parent;
- }
- }
- // Increase flow into the loop.
- BBWeight *= (ExecCount+1);
- }
-
- BlockInformation[BB->getParent()][BB] = BBWeight;
- // Up until now we considered only the loop exiting edges, now we have a
- // definite block weight and must distribute this onto the outgoing edges.
- // Since there may be already flow attached to some of the edges, read this
- // flow first and remember the edges that have still now flow attached.
- Edges.clear();
- std::set<BasicBlock*> ProcessedSuccs;
-
- succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
- // Also check for (BB,0) edges that may already contain some flow. (But only
- // in case there are no successors.)
- if (bbi == bbe) {
- Edge edge = getEdge(BB,0);
- EdgeInformation[BB->getParent()][edge] = BBWeight;
- printEdgeWeight(edge);
- }
- for ( ; bbi != bbe; ++bbi ) {
- if (ProcessedSuccs.insert(*bbi).second) {
- Edge edge = getEdge(BB,*bbi);
- double w = getEdgeWeight(edge);
- if (w != MissingValue) {
- BBWeight -= getEdgeWeight(edge);
- } else {
- Edges.push_back(edge);
- // If minimal weight is necessary, reserve weight by subtracting weight
- // from block weight, this is readded later on.
- if (MinimalWeight.find(edge) != MinimalWeight.end()) {
- BBWeight -= MinimalWeight[edge];
- DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[edge]) << " at " << edge << "\n");
- }
- }
- }
- }
-
- double fraction = Edges.size() ? floor(BBWeight/Edges.size()) : 0.0;
- // Finally we know what flow is still not leaving the block, distribute this
- // flow onto the empty edges.
- for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end();
- ei != ee; ++ei) {
- if (ei != (ee-1)) {
- EdgeInformation[BB->getParent()][*ei] += fraction;
- BBWeight -= fraction;
- } else {
- EdgeInformation[BB->getParent()][*ei] += BBWeight;
- }
- // Readd minial necessary weight.
- if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
- EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei];
- DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
- }
- printEdgeWeight(*ei);
- }
-
- // This block is visited, mark this before the recursion.
- BBToVisit.erase(BB);
-
- // Recurse into successors.
- for (succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
- bbi != bbe; ++bbi) {
- recurseBasicBlock(*bbi);
- }
-}
-
-bool ProfileEstimatorPass::runOnFunction(Function &F) {
- if (F.isDeclaration()) return false;
-
- // Fetch LoopInfo and clear ProfileInfo for this function.
- LI = &getAnalysis<LoopInfo>();
- FunctionInformation.erase(&F);
- BlockInformation[&F].clear();
- EdgeInformation[&F].clear();
- BBToVisit.clear();
-
- // Mark all blocks as to visit.
- for (Function::iterator bi = F.begin(), be = F.end(); bi != be; ++bi)
- BBToVisit.insert(bi);
-
- // Clear Minimal Edges.
- MinimalWeight.clear();
-
- DEBUG(dbgs() << "Working on function " << F.getName() << "\n");
-
- // Since the entry block is the first one and has no predecessors, the edge
- // (0,entry) is inserted with the starting weight of 1.
- BasicBlock *entry = &F.getEntryBlock();
- BlockInformation[&F][entry] = pow(2.0, 32.0);
- Edge edge = getEdge(0,entry);
- EdgeInformation[&F][edge] = BlockInformation[&F][entry];
- printEdgeWeight(edge);
-
- // Since recurseBasicBlock() maybe returns with a block which was not fully
- // estimated, use recurseBasicBlock() until everything is calculated.
- bool cleanup = false;
- recurseBasicBlock(entry);
- while (BBToVisit.size() > 0 && !cleanup) {
- // Remember number of open blocks, this is later used to check if progress
- // was made.
- unsigned size = BBToVisit.size();
-
- // Try to calculate all blocks in turn.
- for (std::set<BasicBlock*>::iterator bi = BBToVisit.begin(),
- be = BBToVisit.end(); bi != be; ++bi) {
- recurseBasicBlock(*bi);
- // If at least one block was finished, break because iterator may be
- // invalid.
- if (BBToVisit.size() < size) break;
- }
-
- // If there was not a single block resolved, make some assumptions.
- if (BBToVisit.size() == size) {
- bool found = false;
- for (std::set<BasicBlock*>::iterator BBI = BBToVisit.begin(), BBE = BBToVisit.end();
- (BBI != BBE) && (!found); ++BBI) {
- BasicBlock *BB = *BBI;
- // Try each predecessor if it can be assumend.
- for (pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
- (bbi != bbe) && (!found); ++bbi) {
- Edge e = getEdge(*bbi,BB);
- double w = getEdgeWeight(e);
- // Check that edge from predecessor is still free.
- if (w == MissingValue) {
- // Check if there is a circle from this block to predecessor.
- Path P;
- const BasicBlock *Dest = GetPath(BB, *bbi, P, GetPathToDest);
- if (Dest != *bbi) {
- // If there is no circle, just set edge weight to 0
- EdgeInformation[&F][e] = 0;
- DEBUG(dbgs() << "Assuming edge weight: ");
- printEdgeWeight(e);
- found = true;
- }
- }
- }
- }
- if (!found) {
- cleanup = true;
- DEBUG(dbgs() << "No assumption possible in Fuction "<<F.getName()<<", setting all to zero\n");
- }
- }
- }
- // In case there was no safe way to assume edges, set as a last measure,
- // set _everything_ to zero.
- if (cleanup) {
- FunctionInformation[&F] = 0;
- BlockInformation[&F].clear();
- EdgeInformation[&F].clear();
- for (Function::const_iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
- const BasicBlock *BB = &(*FI);
- BlockInformation[&F][BB] = 0;
- const_pred_iterator predi = pred_begin(BB), prede = pred_end(BB);
- if (predi == prede) {
- Edge e = getEdge(0,BB);
- setEdgeWeight(e,0);
- }
- for (;predi != prede; ++predi) {
- Edge e = getEdge(*predi,BB);
- setEdgeWeight(e,0);
- }
- succ_const_iterator succi = succ_begin(BB), succe = succ_end(BB);
- if (succi == succe) {
- Edge e = getEdge(BB,0);
- setEdgeWeight(e,0);
- }
- for (;succi != succe; ++succi) {
- Edge e = getEdge(*succi,BB);
- setEdgeWeight(e,0);
- }
- }
- }
-
- return false;
-}
diff --git a/contrib/llvm/lib/Analysis/ProfileInfo.cpp b/contrib/llvm/lib/Analysis/ProfileInfo.cpp
deleted file mode 100644
index 9626a48..0000000
--- a/contrib/llvm/lib/Analysis/ProfileInfo.cpp
+++ /dev/null
@@ -1,1079 +0,0 @@
-//===- ProfileInfo.cpp - Profile Info Interface ---------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the abstract ProfileInfo interface, and the default
-// "no profile" implementation.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "profile-info"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include <limits>
-#include <queue>
-#include <set>
-using namespace llvm;
-
-namespace llvm {
- template<> char ProfileInfoT<Function,BasicBlock>::ID = 0;
-}
-
-// Register the ProfileInfo interface, providing a nice name to refer to.
-INITIALIZE_ANALYSIS_GROUP(ProfileInfo, "Profile Information", NoProfileInfo)
-
-namespace llvm {
-
-template <>
-ProfileInfoT<MachineFunction, MachineBasicBlock>::ProfileInfoT() {}
-template <>
-ProfileInfoT<MachineFunction, MachineBasicBlock>::~ProfileInfoT() {}
-
-template <>
-ProfileInfoT<Function, BasicBlock>::ProfileInfoT() {
- MachineProfile = 0;
-}
-template <>
-ProfileInfoT<Function, BasicBlock>::~ProfileInfoT() {
- if (MachineProfile) delete MachineProfile;
-}
-
-template<>
-char ProfileInfoT<MachineFunction, MachineBasicBlock>::ID = 0;
-
-template<>
-const double ProfileInfoT<Function,BasicBlock>::MissingValue = -1;
-
-template<> const
-double ProfileInfoT<MachineFunction, MachineBasicBlock>::MissingValue = -1;
-
-template<> double
-ProfileInfoT<Function,BasicBlock>::getExecutionCount(const BasicBlock *BB) {
- std::map<const Function*, BlockCounts>::iterator J =
- BlockInformation.find(BB->getParent());
- if (J != BlockInformation.end()) {
- BlockCounts::iterator I = J->second.find(BB);
- if (I != J->second.end())
- return I->second;
- }
-
- double Count = MissingValue;
-
- const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
-
- // Are there zero predecessors of this block?
- if (PI == PE) {
- Edge e = getEdge(0, BB);
- Count = getEdgeWeight(e);
- } else {
- // Otherwise, if there are predecessors, the execution count of this block is
- // the sum of the edge frequencies from the incoming edges.
- std::set<const BasicBlock*> ProcessedPreds;
- Count = 0;
- for (; PI != PE; ++PI) {
- const BasicBlock *P = *PI;
- if (ProcessedPreds.insert(P).second) {
- double w = getEdgeWeight(getEdge(P, BB));
- if (w == MissingValue) {
- Count = MissingValue;
- break;
- }
- Count += w;
- }
- }
- }
-
- // If the predecessors did not suffice to get block weight, try successors.
- if (Count == MissingValue) {
-
- succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB);
-
- // Are there zero successors of this block?
- if (SI == SE) {
- Edge e = getEdge(BB,0);
- Count = getEdgeWeight(e);
- } else {
- std::set<const BasicBlock*> ProcessedSuccs;
- Count = 0;
- for (; SI != SE; ++SI)
- if (ProcessedSuccs.insert(*SI).second) {
- double w = getEdgeWeight(getEdge(BB, *SI));
- if (w == MissingValue) {
- Count = MissingValue;
- break;
- }
- Count += w;
- }
- }
- }
-
- if (Count != MissingValue) BlockInformation[BB->getParent()][BB] = Count;
- return Count;
-}
-
-template<>
-double ProfileInfoT<MachineFunction, MachineBasicBlock>::
- getExecutionCount(const MachineBasicBlock *MBB) {
- std::map<const MachineFunction*, BlockCounts>::iterator J =
- BlockInformation.find(MBB->getParent());
- if (J != BlockInformation.end()) {
- BlockCounts::iterator I = J->second.find(MBB);
- if (I != J->second.end())
- return I->second;
- }
-
- return MissingValue;
-}
-
-template<>
-double ProfileInfoT<Function,BasicBlock>::getExecutionCount(const Function *F) {
- std::map<const Function*, double>::iterator J =
- FunctionInformation.find(F);
- if (J != FunctionInformation.end())
- return J->second;
-
- // isDeclaration() is checked here and not at start of function to allow
- // functions without a body still to have a execution count.
- if (F->isDeclaration()) return MissingValue;
-
- double Count = getExecutionCount(&F->getEntryBlock());
- if (Count != MissingValue) FunctionInformation[F] = Count;
- return Count;
-}
-
-template<>
-double ProfileInfoT<MachineFunction, MachineBasicBlock>::
- getExecutionCount(const MachineFunction *MF) {
- std::map<const MachineFunction*, double>::iterator J =
- FunctionInformation.find(MF);
- if (J != FunctionInformation.end())
- return J->second;
-
- double Count = getExecutionCount(&MF->front());
- if (Count != MissingValue) FunctionInformation[MF] = Count;
- return Count;
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::
- setExecutionCount(const BasicBlock *BB, double w) {
- DEBUG(dbgs() << "Creating Block " << BB->getName()
- << " (weight: " << format("%.20g",w) << ")\n");
- BlockInformation[BB->getParent()][BB] = w;
-}
-
-template<>
-void ProfileInfoT<MachineFunction, MachineBasicBlock>::
- setExecutionCount(const MachineBasicBlock *MBB, double w) {
- DEBUG(dbgs() << "Creating Block " << MBB->getBasicBlock()->getName()
- << " (weight: " << format("%.20g",w) << ")\n");
- BlockInformation[MBB->getParent()][MBB] = w;
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::addEdgeWeight(Edge e, double w) {
- double oldw = getEdgeWeight(e);
- assert (oldw != MissingValue && "Adding weight to Edge with no previous weight");
- DEBUG(dbgs() << "Adding to Edge " << e
- << " (new weight: " << format("%.20g",oldw + w) << ")\n");
- EdgeInformation[getFunction(e)][e] = oldw + w;
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::
- addExecutionCount(const BasicBlock *BB, double w) {
- double oldw = getExecutionCount(BB);
- assert (oldw != MissingValue && "Adding weight to Block with no previous weight");
- DEBUG(dbgs() << "Adding to Block " << BB->getName()
- << " (new weight: " << format("%.20g",oldw + w) << ")\n");
- BlockInformation[BB->getParent()][BB] = oldw + w;
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::removeBlock(const BasicBlock *BB) {
- std::map<const Function*, BlockCounts>::iterator J =
- BlockInformation.find(BB->getParent());
- if (J == BlockInformation.end()) return;
-
- DEBUG(dbgs() << "Deleting " << BB->getName() << "\n");
- J->second.erase(BB);
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::removeEdge(Edge e) {
- std::map<const Function*, EdgeWeights>::iterator J =
- EdgeInformation.find(getFunction(e));
- if (J == EdgeInformation.end()) return;
-
- DEBUG(dbgs() << "Deleting" << e << "\n");
- J->second.erase(e);
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::
- replaceEdge(const Edge &oldedge, const Edge &newedge) {
- double w;
- if ((w = getEdgeWeight(newedge)) == MissingValue) {
- w = getEdgeWeight(oldedge);
- DEBUG(dbgs() << "Replacing " << oldedge << " with " << newedge << "\n");
- } else {
- w += getEdgeWeight(oldedge);
- DEBUG(dbgs() << "Adding " << oldedge << " to " << newedge << "\n");
- }
- setEdgeWeight(newedge,w);
- removeEdge(oldedge);
-}
-
-template<>
-const BasicBlock *ProfileInfoT<Function,BasicBlock>::
- GetPath(const BasicBlock *Src, const BasicBlock *Dest,
- Path &P, unsigned Mode) {
- const BasicBlock *BB = 0;
- bool hasFoundPath = false;
-
- std::queue<const BasicBlock *> BFS;
- BFS.push(Src);
-
- while(BFS.size() && !hasFoundPath) {
- BB = BFS.front();
- BFS.pop();
-
- succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB);
- if (Succ == End) {
- P[(const BasicBlock*)0] = BB;
- if (Mode & GetPathToExit) {
- hasFoundPath = true;
- BB = 0;
- }
- }
- for(;Succ != End; ++Succ) {
- if (P.find(*Succ) != P.end()) continue;
- Edge e = getEdge(BB,*Succ);
- if ((Mode & GetPathWithNewEdges) && (getEdgeWeight(e) != MissingValue)) continue;
- P[*Succ] = BB;
- BFS.push(*Succ);
- if ((Mode & GetPathToDest) && *Succ == Dest) {
- hasFoundPath = true;
- BB = *Succ;
- break;
- }
- if ((Mode & GetPathToValue) && (getExecutionCount(*Succ) != MissingValue)) {
- hasFoundPath = true;
- BB = *Succ;
- break;
- }
- }
- }
-
- return BB;
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::
- divertFlow(const Edge &oldedge, const Edge &newedge) {
- DEBUG(dbgs() << "Diverting " << oldedge << " via " << newedge );
-
- // First check if the old edge was taken, if not, just delete it...
- if (getEdgeWeight(oldedge) == 0) {
- removeEdge(oldedge);
- return;
- }
-
- Path P;
- P[newedge.first] = 0;
- P[newedge.second] = newedge.first;
- const BasicBlock *BB = GetPath(newedge.second,oldedge.second,P,GetPathToExit | GetPathToDest);
-
- double w = getEdgeWeight (oldedge);
- DEBUG(dbgs() << ", Weight: " << format("%.20g",w) << "\n");
- do {
- const BasicBlock *Parent = P.find(BB)->second;
- Edge e = getEdge(Parent,BB);
- double oldw = getEdgeWeight(e);
- double oldc = getExecutionCount(e.first);
- setEdgeWeight(e, w+oldw);
- if (Parent != oldedge.first) {
- setExecutionCount(e.first, w+oldc);
- }
- BB = Parent;
- } while (BB != newedge.first);
- removeEdge(oldedge);
-}
-
-/// Replaces all occurrences of RmBB in the ProfilingInfo with DestBB.
-/// This checks all edges of the function the blocks reside in and replaces the
-/// occurrences of RmBB with DestBB.
-template<>
-void ProfileInfoT<Function,BasicBlock>::
- replaceAllUses(const BasicBlock *RmBB, const BasicBlock *DestBB) {
- DEBUG(dbgs() << "Replacing " << RmBB->getName()
- << " with " << DestBB->getName() << "\n");
- const Function *F = DestBB->getParent();
- std::map<const Function*, EdgeWeights>::iterator J =
- EdgeInformation.find(F);
- if (J == EdgeInformation.end()) return;
-
- Edge e, newedge;
- bool erasededge = false;
- EdgeWeights::iterator I = J->second.begin(), E = J->second.end();
- while(I != E) {
- e = (I++)->first;
- bool foundedge = false; bool eraseedge = false;
- if (e.first == RmBB) {
- if (e.second == DestBB) {
- eraseedge = true;
- } else {
- newedge = getEdge(DestBB, e.second);
- foundedge = true;
- }
- }
- if (e.second == RmBB) {
- if (e.first == DestBB) {
- eraseedge = true;
- } else {
- newedge = getEdge(e.first, DestBB);
- foundedge = true;
- }
- }
- if (foundedge) {
- replaceEdge(e, newedge);
- }
- if (eraseedge) {
- if (erasededge) {
- Edge newedge = getEdge(DestBB, DestBB);
- replaceEdge(e, newedge);
- } else {
- removeEdge(e);
- erasededge = true;
- }
- }
- }
-}
-
-/// Splits an edge in the ProfileInfo and redirects flow over NewBB.
-/// Since its possible that there is more than one edge in the CFG from FristBB
-/// to SecondBB its necessary to redirect the flow proporionally.
-template<>
-void ProfileInfoT<Function,BasicBlock>::splitEdge(const BasicBlock *FirstBB,
- const BasicBlock *SecondBB,
- const BasicBlock *NewBB,
- bool MergeIdenticalEdges) {
- const Function *F = FirstBB->getParent();
- std::map<const Function*, EdgeWeights>::iterator J =
- EdgeInformation.find(F);
- if (J == EdgeInformation.end()) return;
-
- // Generate edges and read current weight.
- Edge e = getEdge(FirstBB, SecondBB);
- Edge n1 = getEdge(FirstBB, NewBB);
- Edge n2 = getEdge(NewBB, SecondBB);
- EdgeWeights &ECs = J->second;
- double w = ECs[e];
-
- int succ_count = 0;
- if (!MergeIdenticalEdges) {
- // First count the edges from FristBB to SecondBB, if there is more than
- // one, only slice out a proporional part for NewBB.
- for(succ_const_iterator BBI = succ_begin(FirstBB), BBE = succ_end(FirstBB);
- BBI != BBE; ++BBI) {
- if (*BBI == SecondBB) succ_count++;
- }
- // When the NewBB is completely new, increment the count by one so that
- // the counts are properly distributed.
- if (getExecutionCount(NewBB) == ProfileInfo::MissingValue) succ_count++;
- } else {
- // When the edges are merged anyway, then redirect all flow.
- succ_count = 1;
- }
-
- // We know now how many edges there are from FirstBB to SecondBB, reroute a
- // proportional part of the edge weight over NewBB.
- double neww = floor(w / succ_count);
- ECs[n1] += neww;
- ECs[n2] += neww;
- BlockInformation[F][NewBB] += neww;
- if (succ_count == 1) {
- ECs.erase(e);
- } else {
- ECs[e] -= neww;
- }
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *Old,
- const BasicBlock* New) {
- const Function *F = Old->getParent();
- std::map<const Function*, EdgeWeights>::iterator J =
- EdgeInformation.find(F);
- if (J == EdgeInformation.end()) return;
-
- DEBUG(dbgs() << "Splitting " << Old->getName() << " to " << New->getName() << "\n");
-
- std::set<Edge> Edges;
- for (EdgeWeights::iterator ewi = J->second.begin(), ewe = J->second.end();
- ewi != ewe; ++ewi) {
- Edge old = ewi->first;
- if (old.first == Old) {
- Edges.insert(old);
- }
- }
- for (std::set<Edge>::iterator EI = Edges.begin(), EE = Edges.end();
- EI != EE; ++EI) {
- Edge newedge = getEdge(New, EI->second);
- replaceEdge(*EI, newedge);
- }
-
- double w = getExecutionCount(Old);
- setEdgeWeight(getEdge(Old, New), w);
- setExecutionCount(New, w);
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *BB,
- const BasicBlock* NewBB,
- BasicBlock *const *Preds,
- unsigned NumPreds) {
- const Function *F = BB->getParent();
- std::map<const Function*, EdgeWeights>::iterator J =
- EdgeInformation.find(F);
- if (J == EdgeInformation.end()) return;
-
- DEBUG(dbgs() << "Splitting " << NumPreds << " Edges from " << BB->getName()
- << " to " << NewBB->getName() << "\n");
-
- // Collect weight that was redirected over NewBB.
- double newweight = 0;
-
- std::set<const BasicBlock *> ProcessedPreds;
- // For all requestes Predecessors.
- for (unsigned pred = 0; pred < NumPreds; ++pred) {
- const BasicBlock * Pred = Preds[pred];
- if (ProcessedPreds.insert(Pred).second) {
- // Create edges and read old weight.
- Edge oldedge = getEdge(Pred, BB);
- Edge newedge = getEdge(Pred, NewBB);
-
- // Remember how much weight was redirected.
- newweight += getEdgeWeight(oldedge);
-
- replaceEdge(oldedge,newedge);
- }
- }
-
- Edge newedge = getEdge(NewBB,BB);
- setEdgeWeight(newedge, newweight);
- setExecutionCount(NewBB, newweight);
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::transfer(const Function *Old,
- const Function *New) {
- DEBUG(dbgs() << "Replacing Function " << Old->getName() << " with "
- << New->getName() << "\n");
- std::map<const Function*, EdgeWeights>::iterator J =
- EdgeInformation.find(Old);
- if(J != EdgeInformation.end()) {
- EdgeInformation[New] = J->second;
- }
- EdgeInformation.erase(Old);
- BlockInformation.erase(Old);
- FunctionInformation.erase(Old);
-}
-
-static double readEdgeOrRemember(ProfileInfo::Edge edge, double w,
- ProfileInfo::Edge &tocalc, unsigned &uncalc) {
- if (w == ProfileInfo::MissingValue) {
- tocalc = edge;
- uncalc++;
- return 0;
- } else {
- return w;
- }
-}
-
-template<>
-bool ProfileInfoT<Function,BasicBlock>::
- CalculateMissingEdge(const BasicBlock *BB, Edge &removed,
- bool assumeEmptySelf) {
- Edge edgetocalc;
- unsigned uncalculated = 0;
-
- // collect weights of all incoming and outgoing edges, rememer edges that
- // have no value
- double incount = 0;
- SmallSet<const BasicBlock*,8> pred_visited;
- const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
- if (bbi==bbe) {
- Edge e = getEdge(0,BB);
- incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated);
- }
- for (;bbi != bbe; ++bbi) {
- if (pred_visited.insert(*bbi)) {
- Edge e = getEdge(*bbi,BB);
- incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated);
- }
- }
-
- double outcount = 0;
- SmallSet<const BasicBlock*,8> succ_visited;
- succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB);
- if (sbbi==sbbe) {
- Edge e = getEdge(BB,0);
- if (getEdgeWeight(e) == MissingValue) {
- double w = getExecutionCount(BB);
- if (w != MissingValue) {
- setEdgeWeight(e,w);
- removed = e;
- }
- }
- outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated);
- }
- for (;sbbi != sbbe; ++sbbi) {
- if (succ_visited.insert(*sbbi)) {
- Edge e = getEdge(BB,*sbbi);
- outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated);
- }
- }
-
- // if exactly one edge weight was missing, calculate it and remove it from
- // spanning tree
- if (uncalculated == 0 ) {
- return true;
- } else
- if (uncalculated == 1) {
- if (incount < outcount) {
- EdgeInformation[BB->getParent()][edgetocalc] = outcount-incount;
- } else {
- EdgeInformation[BB->getParent()][edgetocalc] = incount-outcount;
- }
- DEBUG(dbgs() << "--Calc Edge Counter for " << edgetocalc << ": "
- << format("%.20g", getEdgeWeight(edgetocalc)) << "\n");
- removed = edgetocalc;
- return true;
- } else
- if (uncalculated == 2 && assumeEmptySelf && edgetocalc.first == edgetocalc.second && incount == outcount) {
- setEdgeWeight(edgetocalc, incount * 10);
- removed = edgetocalc;
- return true;
- } else {
- return false;
- }
-}
-
-static void readEdge(ProfileInfo *PI, ProfileInfo::Edge e, double &calcw, std::set<ProfileInfo::Edge> &misscount) {
- double w = PI->getEdgeWeight(e);
- if (w != ProfileInfo::MissingValue) {
- calcw += w;
- } else {
- misscount.insert(e);
- }
-}
-
-template<>
-bool ProfileInfoT<Function,BasicBlock>::EstimateMissingEdges(const BasicBlock *BB) {
- double inWeight = 0;
- std::set<Edge> inMissing;
- std::set<const BasicBlock*> ProcessedPreds;
- const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
- if (bbi == bbe) {
- readEdge(this,getEdge(0,BB),inWeight,inMissing);
- }
- for( ; bbi != bbe; ++bbi ) {
- if (ProcessedPreds.insert(*bbi).second) {
- readEdge(this,getEdge(*bbi,BB),inWeight,inMissing);
- }
- }
-
- double outWeight = 0;
- std::set<Edge> outMissing;
- std::set<const BasicBlock*> ProcessedSuccs;
- succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB);
- if (sbbi == sbbe)
- readEdge(this,getEdge(BB,0),outWeight,outMissing);
- for ( ; sbbi != sbbe; ++sbbi ) {
- if (ProcessedSuccs.insert(*sbbi).second) {
- readEdge(this,getEdge(BB,*sbbi),outWeight,outMissing);
- }
- }
-
- double share;
- std::set<Edge>::iterator ei,ee;
- if (inMissing.size() == 0 && outMissing.size() > 0) {
- ei = outMissing.begin();
- ee = outMissing.end();
- share = inWeight/outMissing.size();
- setExecutionCount(BB,inWeight);
- } else
- if (inMissing.size() > 0 && outMissing.size() == 0 && outWeight == 0) {
- ei = inMissing.begin();
- ee = inMissing.end();
- share = 0;
- setExecutionCount(BB,0);
- } else
- if (inMissing.size() == 0 && outMissing.size() == 0) {
- setExecutionCount(BB,outWeight);
- return true;
- } else {
- return false;
- }
- for ( ; ei != ee; ++ei ) {
- setEdgeWeight(*ei,share);
- }
- return true;
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
-// if (getExecutionCount(&(F->getEntryBlock())) == 0) {
-// for (Function::const_iterator FI = F->begin(), FE = F->end();
-// FI != FE; ++FI) {
-// const BasicBlock* BB = &(*FI);
-// {
-// const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
-// if (NBB == End) {
-// setEdgeWeight(getEdge(0,BB),0);
-// }
-// for(;NBB != End; ++NBB) {
-// setEdgeWeight(getEdge(*NBB,BB),0);
-// }
-// }
-// {
-// succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
-// if (NBB == End) {
-// setEdgeWeight(getEdge(0,BB),0);
-// }
-// for(;NBB != End; ++NBB) {
-// setEdgeWeight(getEdge(*NBB,BB),0);
-// }
-// }
-// }
-// return;
-// }
- // The set of BasicBlocks that are still unvisited.
- std::set<const BasicBlock*> Unvisited;
-
- // The set of return edges (Edges with no successors).
- std::set<Edge> ReturnEdges;
- double ReturnWeight = 0;
-
- // First iterate over the whole function and collect:
- // 1) The blocks in this function in the Unvisited set.
- // 2) The return edges in the ReturnEdges set.
- // 3) The flow that is leaving the function already via return edges.
-
- // Data structure for searching the function.
- std::queue<const BasicBlock *> BFS;
- const BasicBlock *BB = &(F->getEntryBlock());
- BFS.push(BB);
- Unvisited.insert(BB);
-
- while (BFS.size()) {
- BB = BFS.front(); BFS.pop();
- succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
- if (NBB == End) {
- Edge e = getEdge(BB,0);
- double w = getEdgeWeight(e);
- if (w == MissingValue) {
- // If the return edge has no value, try to read value from block.
- double bw = getExecutionCount(BB);
- if (bw != MissingValue) {
- setEdgeWeight(e,bw);
- ReturnWeight += bw;
- } else {
- // If both return edge and block provide no value, collect edge.
- ReturnEdges.insert(e);
- }
- } else {
- // If the return edge has a proper value, collect it.
- ReturnWeight += w;
- }
- }
- for (;NBB != End; ++NBB) {
- if (Unvisited.insert(*NBB).second) {
- BFS.push(*NBB);
- }
- }
- }
-
- while (Unvisited.size() > 0) {
- unsigned oldUnvisitedCount = Unvisited.size();
- bool FoundPath = false;
-
- // If there is only one edge left, calculate it.
- if (ReturnEdges.size() == 1) {
- ReturnWeight = getExecutionCount(&(F->getEntryBlock())) - ReturnWeight;
-
- Edge e = *ReturnEdges.begin();
- setEdgeWeight(e,ReturnWeight);
- setExecutionCount(e.first,ReturnWeight);
-
- Unvisited.erase(e.first);
- ReturnEdges.erase(e);
- continue;
- }
-
- // Calculate all blocks where only one edge is missing, this may also
- // resolve furhter return edges.
- std::set<const BasicBlock *>::iterator FI = Unvisited.begin(), FE = Unvisited.end();
- while(FI != FE) {
- const BasicBlock *BB = *FI; ++FI;
- Edge e;
- if(CalculateMissingEdge(BB,e,true)) {
- if (BlockInformation[F].find(BB) == BlockInformation[F].end()) {
- setExecutionCount(BB,getExecutionCount(BB));
- }
- Unvisited.erase(BB);
- if (e.first != 0 && e.second == 0) {
- ReturnEdges.erase(e);
- ReturnWeight += getEdgeWeight(e);
- }
- }
- }
- if (oldUnvisitedCount > Unvisited.size()) continue;
-
- // Estimate edge weights by dividing the flow proportionally.
- FI = Unvisited.begin(), FE = Unvisited.end();
- while(FI != FE) {
- const BasicBlock *BB = *FI; ++FI;
- const BasicBlock *Dest = 0;
- bool AllEdgesHaveSameReturn = true;
- // Check each Successor, these must all end up in the same or an empty
- // return block otherwise its dangerous to do an estimation on them.
- for (succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB);
- Succ != End; ++Succ) {
- Path P;
- GetPath(*Succ, 0, P, GetPathToExit);
- if (Dest && Dest != P[(const BasicBlock*)0]) {
- AllEdgesHaveSameReturn = false;
- }
- Dest = P[(const BasicBlock*)0];
- }
- if (AllEdgesHaveSameReturn) {
- if(EstimateMissingEdges(BB)) {
- Unvisited.erase(BB);
- break;
- }
- }
- }
- if (oldUnvisitedCount > Unvisited.size()) continue;
-
- // Check if there is a path to an block that has a known value and redirect
- // flow accordingly.
- FI = Unvisited.begin(), FE = Unvisited.end();
- while(FI != FE && !FoundPath) {
- // Fetch path.
- const BasicBlock *BB = *FI; ++FI;
- Path P;
- const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToValue);
-
- // Calculate incoming flow.
- double iw = 0; unsigned inmissing = 0; unsigned incount = 0; unsigned invalid = 0;
- std::set<const BasicBlock *> Processed;
- for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
- NBB != End; ++NBB) {
- if (Processed.insert(*NBB).second) {
- Edge e = getEdge(*NBB, BB);
- double ew = getEdgeWeight(e);
- if (ew != MissingValue) {
- iw += ew;
- invalid++;
- } else {
- // If the path contains the successor, this means its a backedge,
- // do not count as missing.
- if (P.find(*NBB) == P.end())
- inmissing++;
- }
- incount++;
- }
- }
- if (inmissing == incount) continue;
- if (invalid == 0) continue;
-
- // Subtract (already) outgoing flow.
- Processed.clear();
- for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
- NBB != End; ++NBB) {
- if (Processed.insert(*NBB).second) {
- Edge e = getEdge(BB, *NBB);
- double ew = getEdgeWeight(e);
- if (ew != MissingValue) {
- iw -= ew;
- }
- }
- }
- if (iw < 0) continue;
-
- // Check the receiving end of the path if it can handle the flow.
- double ow = getExecutionCount(Dest);
- Processed.clear();
- for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
- NBB != End; ++NBB) {
- if (Processed.insert(*NBB).second) {
- Edge e = getEdge(BB, *NBB);
- double ew = getEdgeWeight(e);
- if (ew != MissingValue) {
- ow -= ew;
- }
- }
- }
- if (ow < 0) continue;
-
- // Determine how much flow shall be used.
- double ew = getEdgeWeight(getEdge(P[Dest],Dest));
- if (ew != MissingValue) {
- ew = ew<ow?ew:ow;
- ew = ew<iw?ew:iw;
- } else {
- if (inmissing == 0)
- ew = iw;
- }
-
- // Create flow.
- if (ew != MissingValue) {
- do {
- Edge e = getEdge(P[Dest],Dest);
- if (getEdgeWeight(e) == MissingValue) {
- setEdgeWeight(e,ew);
- FoundPath = true;
- }
- Dest = P[Dest];
- } while (Dest != BB);
- }
- }
- if (FoundPath) continue;
-
- // Calculate a block with self loop.
- FI = Unvisited.begin(), FE = Unvisited.end();
- while(FI != FE && !FoundPath) {
- const BasicBlock *BB = *FI; ++FI;
- bool SelfEdgeFound = false;
- for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
- NBB != End; ++NBB) {
- if (*NBB == BB) {
- SelfEdgeFound = true;
- break;
- }
- }
- if (SelfEdgeFound) {
- Edge e = getEdge(BB,BB);
- if (getEdgeWeight(e) == MissingValue) {
- double iw = 0;
- std::set<const BasicBlock *> Processed;
- for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
- NBB != End; ++NBB) {
- if (Processed.insert(*NBB).second) {
- Edge e = getEdge(*NBB, BB);
- double ew = getEdgeWeight(e);
- if (ew != MissingValue) {
- iw += ew;
- }
- }
- }
- setEdgeWeight(e,iw * 10);
- FoundPath = true;
- }
- }
- }
- if (FoundPath) continue;
-
- // Determine backedges, set them to zero.
- FI = Unvisited.begin(), FE = Unvisited.end();
- while(FI != FE && !FoundPath) {
- const BasicBlock *BB = *FI; ++FI;
- const BasicBlock *Dest = 0;
- Path P;
- bool BackEdgeFound = false;
- for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
- NBB != End; ++NBB) {
- Dest = GetPath(BB, *NBB, P, GetPathToDest | GetPathWithNewEdges);
- if (Dest == *NBB) {
- BackEdgeFound = true;
- break;
- }
- }
- if (BackEdgeFound) {
- Edge e = getEdge(Dest,BB);
- double w = getEdgeWeight(e);
- if (w == MissingValue) {
- setEdgeWeight(e,0);
- FoundPath = true;
- }
- do {
- Edge e = getEdge(P[Dest], Dest);
- double w = getEdgeWeight(e);
- if (w == MissingValue) {
- setEdgeWeight(e,0);
- FoundPath = true;
- }
- Dest = P[Dest];
- } while (Dest != BB);
- }
- }
- if (FoundPath) continue;
-
- // Channel flow to return block.
- FI = Unvisited.begin(), FE = Unvisited.end();
- while(FI != FE && !FoundPath) {
- const BasicBlock *BB = *FI; ++FI;
-
- Path P;
- const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToExit | GetPathWithNewEdges);
- Dest = P[(const BasicBlock*)0];
- if (!Dest) continue;
-
- if (getEdgeWeight(getEdge(Dest,0)) == MissingValue) {
- // Calculate incoming flow.
- double iw = 0;
- std::set<const BasicBlock *> Processed;
- for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
- NBB != End; ++NBB) {
- if (Processed.insert(*NBB).second) {
- Edge e = getEdge(*NBB, BB);
- double ew = getEdgeWeight(e);
- if (ew != MissingValue) {
- iw += ew;
- }
- }
- }
- do {
- Edge e = getEdge(P[Dest], Dest);
- double w = getEdgeWeight(e);
- if (w == MissingValue) {
- setEdgeWeight(e,iw);
- FoundPath = true;
- } else {
- assert(0 && "Edge should not have value already!");
- }
- Dest = P[Dest];
- } while (Dest != BB);
- }
- }
- if (FoundPath) continue;
-
- // Speculatively set edges to zero.
- FI = Unvisited.begin(), FE = Unvisited.end();
- while(FI != FE && !FoundPath) {
- const BasicBlock *BB = *FI; ++FI;
-
- for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
- NBB != End; ++NBB) {
- Edge e = getEdge(*NBB,BB);
- double w = getEdgeWeight(e);
- if (w == MissingValue) {
- setEdgeWeight(e,0);
- FoundPath = true;
- break;
- }
- }
- }
- if (FoundPath) continue;
-
- errs() << "{";
- FI = Unvisited.begin(), FE = Unvisited.end();
- while(FI != FE) {
- const BasicBlock *BB = *FI; ++FI;
- dbgs() << BB->getName();
- if (FI != FE)
- dbgs() << ",";
- }
- errs() << "}";
-
- errs() << "ASSERT: could not repair function";
- assert(0 && "could not repair function");
- }
-
- EdgeWeights J = EdgeInformation[F];
- for (EdgeWeights::iterator EI = J.begin(), EE = J.end(); EI != EE; ++EI) {
- Edge e = EI->first;
-
- bool SuccFound = false;
- if (e.first != 0) {
- succ_const_iterator NBB = succ_begin(e.first), End = succ_end(e.first);
- if (NBB == End) {
- if (0 == e.second) {
- SuccFound = true;
- }
- }
- for (;NBB != End; ++NBB) {
- if (*NBB == e.second) {
- SuccFound = true;
- break;
- }
- }
- if (!SuccFound) {
- removeEdge(e);
- }
- }
- }
-}
-
-raw_ostream& operator<<(raw_ostream &O, const MachineFunction *MF) {
- return O << MF->getFunction()->getName() << "(MF)";
-}
-
-raw_ostream& operator<<(raw_ostream &O, const MachineBasicBlock *MBB) {
- return O << MBB->getBasicBlock()->getName() << "(MB)";
-}
-
-raw_ostream& operator<<(raw_ostream &O, std::pair<const MachineBasicBlock *, const MachineBasicBlock *> E) {
- O << "(";
-
- if (E.first)
- O << E.first;
- else
- O << "0";
-
- O << ",";
-
- if (E.second)
- O << E.second;
- else
- O << "0";
-
- return O << ")";
-}
-
-} // namespace llvm
-
-//===----------------------------------------------------------------------===//
-// NoProfile ProfileInfo implementation
-//
-
-namespace {
- struct NoProfileInfo : public ImmutablePass, public ProfileInfo {
- static char ID; // Class identification, replacement for typeinfo
- NoProfileInfo() : ImmutablePass(ID) {
- initializeNoProfileInfoPass(*PassRegistry::getPassRegistry());
- }
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
- if (PI == &ProfileInfo::ID)
- return (ProfileInfo*)this;
- return this;
- }
-
- virtual const char *getPassName() const {
- return "NoProfileInfo";
- }
- };
-} // End of anonymous namespace
-
-char NoProfileInfo::ID = 0;
-// Register this pass...
-INITIALIZE_AG_PASS(NoProfileInfo, ProfileInfo, "no-profile",
- "No Profile Information", false, true, true)
-
-ImmutablePass *llvm::createNoProfileInfoPass() { return new NoProfileInfo(); }
diff --git a/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp b/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp
deleted file mode 100644
index f1f3e940..0000000
--- a/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp
+++ /dev/null
@@ -1,155 +0,0 @@
-//===- ProfileInfoLoad.cpp - Load profile information from disk -----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// The ProfileInfoLoader class is used to load and represent profiling
-// information read in from the dump file.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/ProfileInfoLoader.h"
-#include "llvm/Analysis/ProfileInfoTypes.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cstdio>
-#include <cstdlib>
-using namespace llvm;
-
-// ByteSwap - Byteswap 'Var' if 'Really' is true.
-//
-static inline unsigned ByteSwap(unsigned Var, bool Really) {
- if (!Really) return Var;
- return ((Var & (255U<< 0U)) << 24U) |
- ((Var & (255U<< 8U)) << 8U) |
- ((Var & (255U<<16U)) >> 8U) |
- ((Var & (255U<<24U)) >> 24U);
-}
-
-static unsigned AddCounts(unsigned A, unsigned B) {
- // If either value is undefined, use the other.
- if (A == ProfileInfoLoader::Uncounted) return B;
- if (B == ProfileInfoLoader::Uncounted) return A;
- return A + B;
-}
-
-static void ReadProfilingBlock(const char *ToolName, FILE *F,
- bool ShouldByteSwap,
- std::vector<unsigned> &Data) {
- // Read the number of entries...
- unsigned NumEntries;
- if (fread(&NumEntries, sizeof(unsigned), 1, F) != 1) {
- errs() << ToolName << ": data packet truncated!\n";
- perror(0);
- exit(1);
- }
- NumEntries = ByteSwap(NumEntries, ShouldByteSwap);
-
- // Read the counts...
- std::vector<unsigned> TempSpace(NumEntries);
-
- // Read in the block of data...
- if (fread(&TempSpace[0], sizeof(unsigned)*NumEntries, 1, F) != 1) {
- errs() << ToolName << ": data packet truncated!\n";
- perror(0);
- exit(1);
- }
-
- // Make sure we have enough space... The space is initialised to -1 to
- // facitiltate the loading of missing values for OptimalEdgeProfiling.
- if (Data.size() < NumEntries)
- Data.resize(NumEntries, ProfileInfoLoader::Uncounted);
-
- // Accumulate the data we just read into the data.
- if (!ShouldByteSwap) {
- for (unsigned i = 0; i != NumEntries; ++i) {
- Data[i] = AddCounts(TempSpace[i], Data[i]);
- }
- } else {
- for (unsigned i = 0; i != NumEntries; ++i) {
- Data[i] = AddCounts(ByteSwap(TempSpace[i], true), Data[i]);
- }
- }
-}
-
-const unsigned ProfileInfoLoader::Uncounted = ~0U;
-
-// ProfileInfoLoader ctor - Read the specified profiling data file, exiting the
-// program if the file is invalid or broken.
-//
-ProfileInfoLoader::ProfileInfoLoader(const char *ToolName,
- const std::string &Filename)
- : Filename(Filename) {
- FILE *F = fopen(Filename.c_str(), "rb");
- if (F == 0) {
- errs() << ToolName << ": Error opening '" << Filename << "': ";
- perror(0);
- exit(1);
- }
-
- // Keep reading packets until we run out of them.
- unsigned PacketType;
- while (fread(&PacketType, sizeof(unsigned), 1, F) == 1) {
- // If the low eight bits of the packet are zero, we must be dealing with an
- // endianness mismatch. Byteswap all words read from the profiling
- // information.
- bool ShouldByteSwap = (char)PacketType == 0;
- PacketType = ByteSwap(PacketType, ShouldByteSwap);
-
- switch (PacketType) {
- case ArgumentInfo: {
- unsigned ArgLength;
- if (fread(&ArgLength, sizeof(unsigned), 1, F) != 1) {
- errs() << ToolName << ": arguments packet truncated!\n";
- perror(0);
- exit(1);
- }
- ArgLength = ByteSwap(ArgLength, ShouldByteSwap);
-
- // Read in the arguments...
- std::vector<char> Chars(ArgLength+4);
-
- if (ArgLength)
- if (fread(&Chars[0], (ArgLength+3) & ~3, 1, F) != 1) {
- errs() << ToolName << ": arguments packet truncated!\n";
- perror(0);
- exit(1);
- }
- CommandLines.push_back(std::string(&Chars[0], &Chars[ArgLength]));
- break;
- }
-
- case FunctionInfo:
- ReadProfilingBlock(ToolName, F, ShouldByteSwap, FunctionCounts);
- break;
-
- case BlockInfo:
- ReadProfilingBlock(ToolName, F, ShouldByteSwap, BlockCounts);
- break;
-
- case EdgeInfo:
- ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts);
- break;
-
- case OptEdgeInfo:
- ReadProfilingBlock(ToolName, F, ShouldByteSwap, OptimalEdgeCounts);
- break;
-
- case BBTraceInfo:
- ReadProfilingBlock(ToolName, F, ShouldByteSwap, BBTrace);
- break;
-
- default:
- errs() << ToolName << ": Unknown packet type #" << PacketType << "!\n";
- exit(1);
- }
- }
-
- fclose(F);
-}
-
diff --git a/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp b/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp
deleted file mode 100644
index 346f8d6..0000000
--- a/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp
+++ /dev/null
@@ -1,267 +0,0 @@
-//===- ProfileInfoLoaderPass.cpp - LLVM Pass to load profile info ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a concrete implementation of profiling information that
-// loads the information from a profile dump file.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "profile-loader"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/Analysis/ProfileInfoLoader.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-#include <set>
-using namespace llvm;
-
-STATISTIC(NumEdgesRead, "The # of edges read.");
-
-static cl::opt<std::string>
-ProfileInfoFilename("profile-info-file", cl::init("llvmprof.out"),
- cl::value_desc("filename"),
- cl::desc("Profile file loaded by -profile-loader"));
-
-namespace {
- class LoaderPass : public ModulePass, public ProfileInfo {
- std::string Filename;
- std::set<Edge> SpanningTree;
- std::set<const BasicBlock*> BBisUnvisited;
- unsigned ReadCount;
- public:
- static char ID; // Class identification, replacement for typeinfo
- explicit LoaderPass(const std::string &filename = "")
- : ModulePass(ID), Filename(filename) {
- initializeLoaderPassPass(*PassRegistry::getPassRegistry());
- if (filename.empty()) Filename = ProfileInfoFilename;
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- }
-
- virtual const char *getPassName() const {
- return "Profiling information loader";
- }
-
- // recurseBasicBlock() - Calculates the edge weights for as much basic
- // blocks as possbile.
- virtual void recurseBasicBlock(const BasicBlock *BB);
- virtual void readEdgeOrRemember(Edge, Edge&, unsigned &, double &);
- virtual void readEdge(ProfileInfo::Edge, std::vector<unsigned>&);
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
- if (PI == &ProfileInfo::ID)
- return (ProfileInfo*)this;
- return this;
- }
-
- /// run - Load the profile information from the specified file.
- virtual bool runOnModule(Module &M);
- };
-} // End of anonymous namespace
-
-char LoaderPass::ID = 0;
-INITIALIZE_AG_PASS(LoaderPass, ProfileInfo, "profile-loader",
- "Load profile information from llvmprof.out", false, true, false)
-
-char &llvm::ProfileLoaderPassID = LoaderPass::ID;
-
-ModulePass *llvm::createProfileLoaderPass() { return new LoaderPass(); }
-
-/// createProfileLoaderPass - This function returns a Pass that loads the
-/// profiling information for the module from the specified filename, making it
-/// available to the optimizers.
-Pass *llvm::createProfileLoaderPass(const std::string &Filename) {
- return new LoaderPass(Filename);
-}
-
-void LoaderPass::readEdgeOrRemember(Edge edge, Edge &tocalc,
- unsigned &uncalc, double &count) {
- double w;
- if ((w = getEdgeWeight(edge)) == MissingValue) {
- tocalc = edge;
- uncalc++;
- } else {
- count+=w;
- }
-}
-
-// recurseBasicBlock - Visits all neighbours of a block and then tries to
-// calculate the missing edge values.
-void LoaderPass::recurseBasicBlock(const BasicBlock *BB) {
-
- // break recursion if already visited
- if (BBisUnvisited.find(BB) == BBisUnvisited.end()) return;
- BBisUnvisited.erase(BB);
- if (!BB) return;
-
- for (succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
- bbi != bbe; ++bbi) {
- recurseBasicBlock(*bbi);
- }
- for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
- bbi != bbe; ++bbi) {
- recurseBasicBlock(*bbi);
- }
-
- Edge tocalc;
- if (CalculateMissingEdge(BB, tocalc)) {
- SpanningTree.erase(tocalc);
- }
-}
-
-void LoaderPass::readEdge(ProfileInfo::Edge e,
- std::vector<unsigned> &ECs) {
- if (ReadCount < ECs.size()) {
- double weight = ECs[ReadCount++];
- if (weight != ProfileInfoLoader::Uncounted) {
- // Here the data realm changes from the unsigned of the file to the
- // double of the ProfileInfo. This conversion is save because we know
- // that everything thats representable in unsinged is also representable
- // in double.
- EdgeInformation[getFunction(e)][e] += (double)weight;
-
- DEBUG(dbgs() << "--Read Edge Counter for " << e
- << " (# "<< (ReadCount-1) << "): "
- << (unsigned)getEdgeWeight(e) << "\n");
- } else {
- // This happens only if reading optimal profiling information, not when
- // reading regular profiling information.
- SpanningTree.insert(e);
- }
- }
-}
-
-bool LoaderPass::runOnModule(Module &M) {
- ProfileInfoLoader PIL("profile-loader", Filename);
-
- EdgeInformation.clear();
- std::vector<unsigned> Counters = PIL.getRawEdgeCounts();
- if (Counters.size() > 0) {
- ReadCount = 0;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- DEBUG(dbgs() << "Working on " << F->getName() << "\n");
- readEdge(getEdge(0,&F->getEntryBlock()), Counters);
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- TerminatorInst *TI = BB->getTerminator();
- for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
- readEdge(getEdge(BB,TI->getSuccessor(s)), Counters);
- }
- }
- }
- if (ReadCount != Counters.size()) {
- errs() << "WARNING: profile information is inconsistent with "
- << "the current program!\n";
- }
- NumEdgesRead = ReadCount;
- }
-
- Counters = PIL.getRawOptimalEdgeCounts();
- if (Counters.size() > 0) {
- ReadCount = 0;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- DEBUG(dbgs() << "Working on " << F->getName() << "\n");
- readEdge(getEdge(0,&F->getEntryBlock()), Counters);
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- TerminatorInst *TI = BB->getTerminator();
- if (TI->getNumSuccessors() == 0) {
- readEdge(getEdge(BB,0), Counters);
- }
- for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
- readEdge(getEdge(BB,TI->getSuccessor(s)), Counters);
- }
- }
- while (SpanningTree.size() > 0) {
-
- unsigned size = SpanningTree.size();
-
- BBisUnvisited.clear();
- for (std::set<Edge>::iterator ei = SpanningTree.begin(),
- ee = SpanningTree.end(); ei != ee; ++ei) {
- BBisUnvisited.insert(ei->first);
- BBisUnvisited.insert(ei->second);
- }
- while (BBisUnvisited.size() > 0) {
- recurseBasicBlock(*BBisUnvisited.begin());
- }
-
- if (SpanningTree.size() == size) {
- DEBUG(dbgs()<<"{");
- for (std::set<Edge>::iterator ei = SpanningTree.begin(),
- ee = SpanningTree.end(); ei != ee; ++ei) {
- DEBUG(dbgs()<< *ei <<",");
- }
- assert(0 && "No edge calculated!");
- }
-
- }
- }
- if (ReadCount != Counters.size()) {
- errs() << "WARNING: profile information is inconsistent with "
- << "the current program!\n";
- }
- NumEdgesRead = ReadCount;
- }
-
- BlockInformation.clear();
- Counters = PIL.getRawBlockCounts();
- if (Counters.size() > 0) {
- ReadCount = 0;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
- if (ReadCount < Counters.size())
- // Here the data realm changes from the unsigned of the file to the
- // double of the ProfileInfo. This conversion is save because we know
- // that everything thats representable in unsinged is also
- // representable in double.
- BlockInformation[F][BB] = (double)Counters[ReadCount++];
- }
- if (ReadCount != Counters.size()) {
- errs() << "WARNING: profile information is inconsistent with "
- << "the current program!\n";
- }
- }
-
- FunctionInformation.clear();
- Counters = PIL.getRawFunctionCounts();
- if (Counters.size() > 0) {
- ReadCount = 0;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- if (ReadCount < Counters.size())
- // Here the data realm changes from the unsigned of the file to the
- // double of the ProfileInfo. This conversion is save because we know
- // that everything thats representable in unsinged is also
- // representable in double.
- FunctionInformation[F] = (double)Counters[ReadCount++];
- }
- if (ReadCount != Counters.size()) {
- errs() << "WARNING: profile information is inconsistent with "
- << "the current program!\n";
- }
- }
-
- return false;
-}
diff --git a/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp b/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp
deleted file mode 100644
index c8896de..0000000
--- a/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp
+++ /dev/null
@@ -1,383 +0,0 @@
-//===- ProfileVerifierPass.cpp - LLVM Pass to estimate profile info -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a pass that checks profiling information for
-// plausibility.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "profile-verifier"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/InstIterator.h"
-#include "llvm/Support/raw_ostream.h"
-#include <set>
-using namespace llvm;
-
-static cl::opt<bool,false>
-ProfileVerifierDisableAssertions("profile-verifier-noassert",
- cl::desc("Disable assertions"));
-
-namespace {
- template<class FType, class BType>
- class ProfileVerifierPassT : public FunctionPass {
-
- struct DetailedBlockInfo {
- const BType *BB;
- double BBWeight;
- double inWeight;
- int inCount;
- double outWeight;
- int outCount;
- };
-
- ProfileInfoT<FType, BType> *PI;
- std::set<const BType*> BBisVisited;
- std::set<const FType*> FisVisited;
- bool DisableAssertions;
-
- // When debugging is enabled, the verifier prints a whole slew of debug
- // information, otherwise its just the assert. These are all the helper
- // functions.
- bool PrintedDebugTree;
- std::set<const BType*> BBisPrinted;
- void debugEntry(DetailedBlockInfo*);
- void printDebugInfo(const BType *BB);
-
- public:
- static char ID; // Class identification, replacement for typeinfo
-
- explicit ProfileVerifierPassT () : FunctionPass(ID) {
- initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry());
- DisableAssertions = ProfileVerifierDisableAssertions;
- }
- explicit ProfileVerifierPassT (bool da) : FunctionPass(ID),
- DisableAssertions(da) {
- initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AU.addRequired<ProfileInfoT<FType, BType> >();
- }
-
- const char *getPassName() const {
- return "Profiling information verifier";
- }
-
- /// run - Verify the profile information.
- bool runOnFunction(FType &F);
- void recurseBasicBlock(const BType*);
-
- bool exitReachable(const FType*);
- double ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge);
- void CheckValue(bool, const char*, DetailedBlockInfo*);
- };
-
- typedef ProfileVerifierPassT<Function, BasicBlock> ProfileVerifierPass;
-
- template<class FType, class BType>
- void ProfileVerifierPassT<FType, BType>::printDebugInfo(const BType *BB) {
-
- if (BBisPrinted.find(BB) != BBisPrinted.end()) return;
-
- double BBWeight = PI->getExecutionCount(BB);
- if (BBWeight == ProfileInfoT<FType, BType>::MissingValue) { BBWeight = 0; }
- double inWeight = 0;
- int inCount = 0;
- std::set<const BType*> ProcessedPreds;
- for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
- bbi != bbe; ++bbi ) {
- if (ProcessedPreds.insert(*bbi).second) {
- typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(*bbi,BB);
- double EdgeWeight = PI->getEdgeWeight(E);
- if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; }
- dbgs() << "calculated in-edge " << E << ": "
- << format("%20.20g",EdgeWeight) << "\n";
- inWeight += EdgeWeight;
- inCount++;
- }
- }
- double outWeight = 0;
- int outCount = 0;
- std::set<const BType*> ProcessedSuccs;
- for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
- bbi != bbe; ++bbi ) {
- if (ProcessedSuccs.insert(*bbi).second) {
- typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(BB,*bbi);
- double EdgeWeight = PI->getEdgeWeight(E);
- if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; }
- dbgs() << "calculated out-edge " << E << ": "
- << format("%20.20g",EdgeWeight) << "\n";
- outWeight += EdgeWeight;
- outCount++;
- }
- }
- dbgs() << "Block " << BB->getName() << " in "
- << BB->getParent()->getName() << ":"
- << "BBWeight=" << format("%20.20g",BBWeight) << ","
- << "inWeight=" << format("%20.20g",inWeight) << ","
- << "inCount=" << inCount << ","
- << "outWeight=" << format("%20.20g",outWeight) << ","
- << "outCount" << outCount << "\n";
-
- // mark as visited and recurse into subnodes
- BBisPrinted.insert(BB);
- for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
- bbi != bbe; ++bbi ) {
- printDebugInfo(*bbi);
- }
- }
-
- template<class FType, class BType>
- void ProfileVerifierPassT<FType, BType>::debugEntry (DetailedBlockInfo *DI) {
- dbgs() << "TROUBLE: Block " << DI->BB->getName() << " in "
- << DI->BB->getParent()->getName() << ":"
- << "BBWeight=" << format("%20.20g",DI->BBWeight) << ","
- << "inWeight=" << format("%20.20g",DI->inWeight) << ","
- << "inCount=" << DI->inCount << ","
- << "outWeight=" << format("%20.20g",DI->outWeight) << ","
- << "outCount=" << DI->outCount << "\n";
- if (!PrintedDebugTree) {
- PrintedDebugTree = true;
- printDebugInfo(&(DI->BB->getParent()->getEntryBlock()));
- }
- }
-
- // This compares A and B for equality.
- static bool Equals(double A, double B) {
- return A == B;
- }
-
- // This checks if the function "exit" is reachable from an given function
- // via calls, this is necessary to check if a profile is valid despite the
- // counts not fitting exactly.
- template<class FType, class BType>
- bool ProfileVerifierPassT<FType, BType>::exitReachable(const FType *F) {
- if (!F) return false;
-
- if (FisVisited.count(F)) return false;
-
- FType *Exit = F->getParent()->getFunction("exit");
- if (Exit == F) {
- return true;
- }
-
- FisVisited.insert(F);
- bool exits = false;
- for (const_inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
- if (const CallInst *CI = dyn_cast<CallInst>(&*I)) {
- FType *F = CI->getCalledFunction();
- if (F) {
- exits |= exitReachable(F);
- } else {
- // This is a call to a pointer, all bets are off...
- exits = true;
- }
- if (exits) break;
- }
- }
- return exits;
- }
-
- #define ASSERTMESSAGE(M) \
- { dbgs() << "ASSERT:" << (M) << "\n"; \
- if (!DisableAssertions) assert(0 && (M)); }
-
- template<class FType, class BType>
- double ProfileVerifierPassT<FType, BType>::ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge E) {
- double EdgeWeight = PI->getEdgeWeight(E);
- if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) {
- dbgs() << "Edge " << E << " in Function "
- << ProfileInfoT<FType, BType>::getFunction(E)->getName() << ": ";
- ASSERTMESSAGE("Edge has missing value");
- return 0;
- } else {
- if (EdgeWeight < 0) {
- dbgs() << "Edge " << E << " in Function "
- << ProfileInfoT<FType, BType>::getFunction(E)->getName() << ": ";
- ASSERTMESSAGE("Edge has negative value");
- }
- return EdgeWeight;
- }
- }
-
- template<class FType, class BType>
- void ProfileVerifierPassT<FType, BType>::CheckValue(bool Error,
- const char *Message,
- DetailedBlockInfo *DI) {
- if (Error) {
- DEBUG(debugEntry(DI));
- dbgs() << "Block " << DI->BB->getName() << " in Function "
- << DI->BB->getParent()->getName() << ": ";
- ASSERTMESSAGE(Message);
- }
- return;
- }
-
- // This calculates the Information for a block and then recurses into the
- // successors.
- template<class FType, class BType>
- void ProfileVerifierPassT<FType, BType>::recurseBasicBlock(const BType *BB) {
-
- // Break the recursion by remembering all visited blocks.
- if (BBisVisited.find(BB) != BBisVisited.end()) return;
-
- // Use a data structure to store all the information, this can then be handed
- // to debug printers.
- DetailedBlockInfo DI;
- DI.BB = BB;
- DI.outCount = DI.inCount = 0;
- DI.inWeight = DI.outWeight = 0;
-
- // Read predecessors.
- std::set<const BType*> ProcessedPreds;
- const_pred_iterator bpi = pred_begin(BB), bpe = pred_end(BB);
- // If there are none, check for (0,BB) edge.
- if (bpi == bpe) {
- DI.inWeight += ReadOrAssert(PI->getEdge(0,BB));
- DI.inCount++;
- }
- for (;bpi != bpe; ++bpi) {
- if (ProcessedPreds.insert(*bpi).second) {
- DI.inWeight += ReadOrAssert(PI->getEdge(*bpi,BB));
- DI.inCount++;
- }
- }
-
- // Read successors.
- std::set<const BType*> ProcessedSuccs;
- succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
- // If there is an (0,BB) edge, consider it too. (This is done not only when
- // there are no successors, but every time; not every function contains
- // return blocks with no successors (think loop latch as return block)).
- double w = PI->getEdgeWeight(PI->getEdge(BB,0));
- if (w != ProfileInfoT<FType, BType>::MissingValue) {
- DI.outWeight += w;
- DI.outCount++;
- }
- for (;bbi != bbe; ++bbi) {
- if (ProcessedSuccs.insert(*bbi).second) {
- DI.outWeight += ReadOrAssert(PI->getEdge(BB,*bbi));
- DI.outCount++;
- }
- }
-
- // Read block weight.
- DI.BBWeight = PI->getExecutionCount(BB);
- CheckValue(DI.BBWeight == ProfileInfoT<FType, BType>::MissingValue,
- "BasicBlock has missing value", &DI);
- CheckValue(DI.BBWeight < 0,
- "BasicBlock has negative value", &DI);
-
- // Check if this block is a setjmp target.
- bool isSetJmpTarget = false;
- if (DI.outWeight > DI.inWeight) {
- for (typename BType::const_iterator i = BB->begin(), ie = BB->end();
- i != ie; ++i) {
- if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
- FType *F = CI->getCalledFunction();
- if (F && (F->getName() == "_setjmp")) {
- isSetJmpTarget = true; break;
- }
- }
- }
- }
- // Check if this block is eventually reaching exit.
- bool isExitReachable = false;
- if (DI.inWeight > DI.outWeight) {
- for (typename BType::const_iterator i = BB->begin(), ie = BB->end();
- i != ie; ++i) {
- if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
- FType *F = CI->getCalledFunction();
- if (F) {
- FisVisited.clear();
- isExitReachable |= exitReachable(F);
- } else {
- // This is a call to a pointer, all bets are off...
- isExitReachable = true;
- }
- if (isExitReachable) break;
- }
- }
- }
-
- if (DI.inCount > 0 && DI.outCount == 0) {
- // If this is a block with no successors.
- if (!isSetJmpTarget) {
- CheckValue(!Equals(DI.inWeight,DI.BBWeight),
- "inWeight and BBWeight do not match", &DI);
- }
- } else if (DI.inCount == 0 && DI.outCount > 0) {
- // If this is a block with no predecessors.
- if (!isExitReachable)
- CheckValue(!Equals(DI.BBWeight,DI.outWeight),
- "BBWeight and outWeight do not match", &DI);
- } else {
- // If this block has successors and predecessors.
- if (DI.inWeight > DI.outWeight && !isExitReachable)
- CheckValue(!Equals(DI.inWeight,DI.outWeight),
- "inWeight and outWeight do not match", &DI);
- if (DI.inWeight < DI.outWeight && !isSetJmpTarget)
- CheckValue(!Equals(DI.inWeight,DI.outWeight),
- "inWeight and outWeight do not match", &DI);
- }
-
-
- // Mark this block as visited, rescurse into successors.
- BBisVisited.insert(BB);
- for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
- bbi != bbe; ++bbi ) {
- recurseBasicBlock(*bbi);
- }
- }
-
- template<class FType, class BType>
- bool ProfileVerifierPassT<FType, BType>::runOnFunction(FType &F) {
- PI = getAnalysisIfAvailable<ProfileInfoT<FType, BType> >();
- if (!PI)
- ASSERTMESSAGE("No ProfileInfo available");
-
- // Prepare global variables.
- PrintedDebugTree = false;
- BBisVisited.clear();
-
- // Fetch entry block and recurse into it.
- const BType *entry = &F.getEntryBlock();
- recurseBasicBlock(entry);
-
- if (PI->getExecutionCount(&F) != PI->getExecutionCount(entry))
- ASSERTMESSAGE("Function count and entry block count do not match");
-
- return false;
- }
-
- template<class FType, class BType>
- char ProfileVerifierPassT<FType, BType>::ID = 0;
-}
-
-INITIALIZE_PASS_BEGIN(ProfileVerifierPass, "profile-verifier",
- "Verify profiling information", false, true)
-INITIALIZE_AG_DEPENDENCY(ProfileInfo)
-INITIALIZE_PASS_END(ProfileVerifierPass, "profile-verifier",
- "Verify profiling information", false, true)
-
-namespace llvm {
- FunctionPass *createProfileVerifierPass() {
- return new ProfileVerifierPass(ProfileVerifierDisableAssertions);
- }
-}
-
diff --git a/contrib/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm/lib/Analysis/RegionInfo.cpp
index 8577025..5635688 100644
--- a/contrib/llvm/lib/Analysis/RegionInfo.cpp
+++ b/contrib/llvm/lib/Analysis/RegionInfo.cpp
@@ -9,6 +9,7 @@
// Detects single entry single exit regions in the control flow graph.
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "region"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
@@ -17,12 +18,9 @@
#include "llvm/Assembly/Writer.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
-
-#define DEBUG_TYPE "region"
#include "llvm/Support/Debug.h"
-
-#include <set>
#include <algorithm>
+#include <set>
using namespace llvm;
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
index f876748..0a02f4e 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -585,6 +585,9 @@ namespace {
// Lexicographically compare n-ary expressions.
unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
+ if (LNumOps != RNumOps)
+ return (int)LNumOps - (int)RNumOps;
+
for (unsigned i = 0; i != LNumOps; ++i) {
if (i >= RNumOps)
return 1;
@@ -758,7 +761,7 @@ static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
unsigned CalculationBits = W + T;
// Calculate 2^T, at width T+W.
- APInt DivFactor = APInt(CalculationBits, 1).shl(T);
+ APInt DivFactor = APInt::getOneBitSet(CalculationBits, T);
// Calculate the multiplicative inverse of K! / 2^T;
// this multiplication factor will perform the exact division by
@@ -1380,7 +1383,7 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
///
static bool
CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
- SmallVector<const SCEV *, 8> &NewOps,
+ SmallVectorImpl<const SCEV *> &NewOps,
APInt &AccumulatedConstant,
const SCEV *const *Ops, size_t NumOperands,
const APInt &Scale,
@@ -1628,7 +1631,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
// re-generate the operands list. Group the operands by constant scale,
// to avoid multiplying by the same constant scale multiple times.
std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
- for (SmallVector<const SCEV *, 8>::const_iterator I = NewOps.begin(),
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = NewOps.begin(),
E = NewOps.end(); I != E; ++I)
MulOpLists[M.find(*I)->second].push_back(*I);
// Re-generate the operands list.
@@ -2587,55 +2590,39 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
}
-const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) {
+const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
// If we have DataLayout, we can bypass creating a target-independent
// constant expression and then folding it back into a ConstantInt.
// This is just a compile-time optimization.
if (TD)
- return getConstant(TD->getIntPtrType(getContext()),
- TD->getTypeAllocSize(AllocTy));
+ return getConstant(IntTy, TD->getTypeAllocSize(AllocTy));
Constant *C = ConstantExpr::getSizeOf(AllocTy);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
C = Folded;
Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
+ assert(Ty == IntTy && "Effective SCEV type doesn't match");
return getTruncateOrZeroExtend(getSCEV(C), Ty);
}
-const SCEV *ScalarEvolution::getAlignOfExpr(Type *AllocTy) {
- Constant *C = ConstantExpr::getAlignOf(AllocTy);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
- C = Folded;
- Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
- return getTruncateOrZeroExtend(getSCEV(C), Ty);
-}
-
-const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy,
+const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
+ StructType *STy,
unsigned FieldNo) {
// If we have DataLayout, we can bypass creating a target-independent
// constant expression and then folding it back into a ConstantInt.
// This is just a compile-time optimization.
- if (TD)
- return getConstant(TD->getIntPtrType(getContext()),
+ if (TD) {
+ return getConstant(IntTy,
TD->getStructLayout(STy)->getElementOffset(FieldNo));
+ }
Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
C = Folded;
- Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
- return getTruncateOrZeroExtend(getSCEV(C), Ty);
-}
-const SCEV *ScalarEvolution::getOffsetOfExpr(Type *CTy,
- Constant *FieldNo) {
- Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
- C = Folded;
- Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy));
+ Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
return getTruncateOrZeroExtend(getSCEV(C), Ty);
}
@@ -2700,12 +2687,15 @@ uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
assert(isSCEVable(Ty) && "Type is not SCEVable!");
- if (Ty->isIntegerTy())
+ if (Ty->isIntegerTy()) {
return Ty;
+ }
// The only other support type is pointer.
assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
- if (TD) return TD->getIntPtrType(getContext());
+
+ if (TD)
+ return TD->getIntPtrType(Ty);
// Without DataLayout, conservatively assume pointers are 64-bit.
return Type::getInt64Ty(getContext());
@@ -2715,13 +2705,51 @@ const SCEV *ScalarEvolution::getCouldNotCompute() {
return &CouldNotCompute;
}
+namespace {
+ // Helper class working with SCEVTraversal to figure out if a SCEV contains
+ // a SCEVUnknown with null value-pointer. FindInvalidSCEVUnknown::FindOne
+ // is set iff if find such SCEVUnknown.
+ //
+ struct FindInvalidSCEVUnknown {
+ bool FindOne;
+ FindInvalidSCEVUnknown() { FindOne = false; }
+ bool follow(const SCEV *S) {
+ switch (S->getSCEVType()) {
+ case scConstant:
+ return false;
+ case scUnknown:
+ if (!cast<SCEVUnknown>(S)->getValue())
+ FindOne = true;
+ return false;
+ default:
+ return true;
+ }
+ }
+ bool isDone() const { return FindOne; }
+ };
+}
+
+bool ScalarEvolution::checkValidity(const SCEV *S) const {
+ FindInvalidSCEVUnknown F;
+ SCEVTraversal<FindInvalidSCEVUnknown> ST(F);
+ ST.visitAll(S);
+
+ return !F.FindOne;
+}
+
/// getSCEV - Return an existing SCEV if it exists, otherwise analyze the
/// expression and create a new one.
const SCEV *ScalarEvolution::getSCEV(Value *V) {
assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
- ValueExprMapType::const_iterator I = ValueExprMap.find_as(V);
- if (I != ValueExprMap.end()) return I->second;
+ ValueExprMapType::iterator I = ValueExprMap.find_as(V);
+ if (I != ValueExprMap.end()) {
+ const SCEV *S = I->second;
+ if (checkValidity(S))
+ return S;
+ else
+ ValueExprMap.erase(I);
+ }
const SCEV *S = createSCEV(V);
// The process of creating a SCEV for V may have caused other SCEVs
@@ -3060,15 +3088,26 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
Flags = setFlags(Flags, SCEV::FlagNUW);
if (OBO->hasNoSignedWrap())
Flags = setFlags(Flags, SCEV::FlagNSW);
- } else if (const GEPOperator *GEP =
- dyn_cast<GEPOperator>(BEValueV)) {
+ } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
// If the increment is an inbounds GEP, then we know the address
// space cannot be wrapped around. We cannot make any guarantee
// about signed or unsigned overflow because pointers are
// unsigned but we may have a negative index from the base
- // pointer.
- if (GEP->isInBounds())
+ // pointer. We can guarantee that no unsigned wrap occurs if the
+ // indices form a positive value.
+ if (GEP->isInBounds()) {
Flags = setFlags(Flags, SCEV::FlagNW);
+
+ const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
+ if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
+ Flags = setFlags(Flags, SCEV::FlagNUW);
+ }
+ } else if (const SubOperator *OBO =
+ dyn_cast<SubOperator>(BEValueV)) {
+ if (OBO->hasNoUnsignedWrap())
+ Flags = setFlags(Flags, SCEV::FlagNUW);
+ if (OBO->hasNoSignedWrap())
+ Flags = setFlags(Flags, SCEV::FlagNSW);
}
const SCEV *StartVal = getSCEV(StartValueV);
@@ -3136,18 +3175,18 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
/// operations. This allows them to be analyzed by regular SCEV code.
///
const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
+ Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
+ Value *Base = GEP->getOperand(0);
+ // Don't attempt to analyze GEPs over unsized objects.
+ if (!Base->getType()->getPointerElementType()->isSized())
+ return getUnknown(GEP);
// Don't blindly transfer the inbounds flag from the GEP instruction to the
// Add expression, because the Instruction may be guarded by control flow
// and the no-overflow bits may not be valid for the expression in any
// context.
- bool isInBounds = GEP->isInBounds();
+ SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
- Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
- Value *Base = GEP->getOperand(0);
- // Don't attempt to analyze GEPs over unsized objects.
- if (!cast<PointerType>(Base->getType())->getElementType()->isSized())
- return getUnknown(GEP);
const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
gep_type_iterator GTI = gep_type_begin(GEP);
for (GetElementPtrInst::op_iterator I = llvm::next(GEP->op_begin()),
@@ -3158,21 +3197,19 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
if (StructType *STy = dyn_cast<StructType>(*GTI++)) {
// For a struct, add the member offset.
unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
- const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo);
+ const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo);
// Add the field offset to the running total offset.
TotalOffset = getAddExpr(TotalOffset, FieldOffset);
} else {
// For an array, add the element offset, explicitly scaled.
- const SCEV *ElementSize = getSizeOfExpr(*GTI);
+ const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, *GTI);
const SCEV *IndexS = getSCEV(Index);
// Getelementptr indices are signed.
IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy);
// Multiply the index by the element size to compute the element offset.
- const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize,
- isInBounds ? SCEV::FlagNSW :
- SCEV::FlagAnyWrap);
+ const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize, Wrap);
// Add the element offset to the running total offset.
TotalOffset = getAddExpr(TotalOffset, LocalOffset);
@@ -3183,8 +3220,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
const SCEV *BaseS = getSCEV(Base);
// Add the total offset from all the GEP indices to the base.
- return getAddExpr(BaseS, TotalOffset,
- isInBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap);
+ return getAddExpr(BaseS, TotalOffset, Wrap);
}
/// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
@@ -3551,7 +3587,7 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
if (!U->getValue()->getType()->isIntegerTy() && !TD)
return setSignedRange(U, ConservativeResult);
unsigned NS = ComputeNumSignBits(U->getValue(), TD);
- if (NS == 1)
+ if (NS <= 1)
return setSignedRange(U, ConservativeResult);
return setSignedRange(U, ConservativeResult.intersectWith(
ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
@@ -3751,7 +3787,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
break;
Constant *X = ConstantInt::get(getContext(),
- APInt(BitWidth, 1).shl(SA->getZExtValue()));
+ APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X));
}
break;
@@ -3769,7 +3805,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
break;
Constant *X = ConstantInt::get(getContext(),
- APInt(BitWidth, 1).shl(SA->getZExtValue()));
+ APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X));
}
break;
@@ -3947,7 +3983,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
/// depends on a NSW assumption, and we would only fall back to a conservative
/// trip count in that case.
unsigned ScalarEvolution::
-getSmallConstantTripCount(Loop *L, BasicBlock */*ExitingBlock*/) {
+getSmallConstantTripCount(Loop *L, BasicBlock * /*ExitingBlock*/) {
const SCEVConstant *ExitCount =
dyn_cast<SCEVConstant>(getBackedgeTakenCount(L));
if (!ExitCount)
@@ -3976,7 +4012,7 @@ getSmallConstantTripCount(Loop *L, BasicBlock */*ExitingBlock*/) {
/// As explained in the comments for getSmallConstantTripCount, this assumes
/// that control exits the loop via ExitingBlock.
unsigned ScalarEvolution::
-getSmallConstantTripMultiple(Loop *L, BasicBlock */*ExitingBlock*/) {
+getSmallConstantTripMultiple(Loop *L, BasicBlock * /*ExitingBlock*/) {
const SCEV *ExitCount = getBackedgeTakenCount(L);
if (ExitCount == getCouldNotCompute())
return 1;
@@ -4575,25 +4611,17 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
if (EL.hasAnyInfo()) return EL;
break;
}
- case ICmpInst::ICMP_SLT: {
- ExitLimit EL = HowManyLessThans(LHS, RHS, L, true, IsSubExpr);
- if (EL.hasAnyInfo()) return EL;
- break;
- }
- case ICmpInst::ICMP_SGT: {
- ExitLimit EL = HowManyLessThans(getNotSCEV(LHS),
- getNotSCEV(RHS), L, true, IsSubExpr);
- if (EL.hasAnyInfo()) return EL;
- break;
- }
- case ICmpInst::ICMP_ULT: {
- ExitLimit EL = HowManyLessThans(LHS, RHS, L, false, IsSubExpr);
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_ULT: { // while (X < Y)
+ bool IsSigned = Cond == ICmpInst::ICMP_SLT;
+ ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, IsSubExpr);
if (EL.hasAnyInfo()) return EL;
break;
}
- case ICmpInst::ICMP_UGT: {
- ExitLimit EL = HowManyLessThans(getNotSCEV(LHS),
- getNotSCEV(RHS), L, false, IsSubExpr);
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_UGT: { // while (X > Y)
+ bool IsSigned = Cond == ICmpInst::ICMP_SGT;
+ ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, IsSubExpr);
if (EL.hasAnyInfo()) return EL;
break;
}
@@ -5031,15 +5059,21 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
/// original value V is returned.
const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
// Check to see if we've folded this expression at this loop before.
- std::map<const Loop *, const SCEV *> &Values = ValuesAtScopes[V];
- std::pair<std::map<const Loop *, const SCEV *>::iterator, bool> Pair =
- Values.insert(std::make_pair(L, static_cast<const SCEV *>(0)));
- if (!Pair.second)
- return Pair.first->second ? Pair.first->second : V;
-
+ SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values = ValuesAtScopes[V];
+ for (unsigned u = 0; u < Values.size(); u++) {
+ if (Values[u].first == L)
+ return Values[u].second ? Values[u].second : V;
+ }
+ Values.push_back(std::make_pair(L, static_cast<const SCEV *>(0)));
// Otherwise compute it.
const SCEV *C = computeSCEVAtScope(V, L);
- ValuesAtScopes[V][L] = C;
+ SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values2 = ValuesAtScopes[V];
+ for (unsigned u = Values2.size(); u > 0; u--) {
+ if (Values2[u - 1].first == L) {
+ Values2[u - 1].second = C;
+ break;
+ }
+ }
return C;
}
@@ -5078,18 +5112,23 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
case scAddExpr: {
const SCEVAddExpr *SA = cast<SCEVAddExpr>(V);
if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) {
- if (C->getType()->isPointerTy())
- C = ConstantExpr::getBitCast(C, Type::getInt8PtrTy(C->getContext()));
+ if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
+ unsigned AS = PTy->getAddressSpace();
+ Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
+ C = ConstantExpr::getBitCast(C, DestPtrTy);
+ }
for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) {
Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i));
if (!C2) return 0;
// First pointer!
if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) {
+ unsigned AS = C2->getType()->getPointerAddressSpace();
std::swap(C, C2);
+ Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
// The offsets have been converted to bytes. We can add bytes to an
// i8* by GEP with the byte count in the first index.
- C = ConstantExpr::getBitCast(C,Type::getInt8PtrTy(C->getContext()));
+ C = ConstantExpr::getBitCast(C, DestPtrTy);
}
// Don't bother trying to sum two pointers. We probably can't
@@ -5097,8 +5136,8 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
if (C2->getType()->isPointerTy())
return 0;
- if (C->getType()->isPointerTy()) {
- if (cast<PointerType>(C->getType())->getElementType()->isStructTy())
+ if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
+ if (PTy->getElementType()->isStructTy())
C2 = ConstantExpr::getIntegerCast(
C2, Type::getInt32Ty(C->getContext()), true);
C = ConstantExpr::getGetElementPtr(C, C2);
@@ -6295,45 +6334,72 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
return false;
}
-/// getBECount - Subtract the end and start values and divide by the step,
-/// rounding up, to get the number of times the backedge is executed. Return
-/// CouldNotCompute if an intermediate computation overflows.
-const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
- const SCEV *End,
- const SCEV *Step,
- bool NoWrap) {
- assert(!isKnownNegative(Step) &&
- "This code doesn't handle negative strides yet!");
-
- Type *Ty = Start->getType();
-
- // When Start == End, we have an exact BECount == 0. Short-circuit this case
- // here because SCEV may not be able to determine that the unsigned division
- // after rounding is zero.
- if (Start == End)
- return getConstant(Ty, 0);
-
- const SCEV *NegOne = getConstant(Ty, (uint64_t)-1);
- const SCEV *Diff = getMinusSCEV(End, Start);
- const SCEV *RoundUp = getAddExpr(Step, NegOne);
-
- // Add an adjustment to the difference between End and Start so that
- // the division will effectively round up.
- const SCEV *Add = getAddExpr(Diff, RoundUp);
-
- if (!NoWrap) {
- // Check Add for unsigned overflow.
- // TODO: More sophisticated things could be done here.
- Type *WideTy = IntegerType::get(getContext(),
- getTypeSizeInBits(Ty) + 1);
- const SCEV *EDiff = getZeroExtendExpr(Diff, WideTy);
- const SCEV *ERoundUp = getZeroExtendExpr(RoundUp, WideTy);
- const SCEV *OperandExtendedAdd = getAddExpr(EDiff, ERoundUp);
- if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd)
- return getCouldNotCompute();
+// Verify if an linear IV with positive stride can overflow when in a
+// less-than comparison, knowing the invariant term of the comparison, the
+// stride and the knowledge of NSW/NUW flags on the recurrence.
+bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
+ bool IsSigned, bool NoWrap) {
+ if (NoWrap) return false;
+
+ unsigned BitWidth = getTypeSizeInBits(RHS->getType());
+ const SCEV *One = getConstant(Stride->getType(), 1);
+
+ if (IsSigned) {
+ APInt MaxRHS = getSignedRange(RHS).getSignedMax();
+ APInt MaxValue = APInt::getSignedMaxValue(BitWidth);
+ APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One))
+ .getSignedMax();
+
+ // SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow!
+ return (MaxValue - MaxStrideMinusOne).slt(MaxRHS);
}
- return getUDivExpr(Add, Step);
+ APInt MaxRHS = getUnsignedRange(RHS).getUnsignedMax();
+ APInt MaxValue = APInt::getMaxValue(BitWidth);
+ APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One))
+ .getUnsignedMax();
+
+ // UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow!
+ return (MaxValue - MaxStrideMinusOne).ult(MaxRHS);
+}
+
+// Verify if an linear IV with negative stride can overflow when in a
+// greater-than comparison, knowing the invariant term of the comparison,
+// the stride and the knowledge of NSW/NUW flags on the recurrence.
+bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
+ bool IsSigned, bool NoWrap) {
+ if (NoWrap) return false;
+
+ unsigned BitWidth = getTypeSizeInBits(RHS->getType());
+ const SCEV *One = getConstant(Stride->getType(), 1);
+
+ if (IsSigned) {
+ APInt MinRHS = getSignedRange(RHS).getSignedMin();
+ APInt MinValue = APInt::getSignedMinValue(BitWidth);
+ APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One))
+ .getSignedMax();
+
+ // SMinRHS - SMaxStrideMinusOne < SMinValue => overflow!
+ return (MinValue + MaxStrideMinusOne).sgt(MinRHS);
+ }
+
+ APInt MinRHS = getUnsignedRange(RHS).getUnsignedMin();
+ APInt MinValue = APInt::getMinValue(BitWidth);
+ APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One))
+ .getUnsignedMax();
+
+ // UMinRHS - UMaxStrideMinusOne < UMinValue => overflow!
+ return (MinValue + MaxStrideMinusOne).ugt(MinRHS);
+}
+
+// Compute the backedge taken count knowing the interval difference, the
+// stride and presence of the equality in the comparison.
+const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step,
+ bool Equality) {
+ const SCEV *One = getConstant(Step->getType(), 1);
+ Delta = Equality ? getAddExpr(Delta, Step)
+ : getAddExpr(Delta, getMinusSCEV(Step, One));
+ return getUDivExpr(Delta, Step);
}
/// HowManyLessThans - Return the number of times a backedge containing the
@@ -6345,119 +6411,144 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
/// a subexpression that cannot overflow before evaluating true.
ScalarEvolution::ExitLimit
ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
- const Loop *L, bool isSigned,
+ const Loop *L, bool IsSigned,
bool IsSubExpr) {
- // Only handle: "ADDREC < LoopInvariant".
- if (!isLoopInvariant(RHS, L)) return getCouldNotCompute();
+ // We handle only IV < Invariant
+ if (!isLoopInvariant(RHS, L))
+ return getCouldNotCompute();
- const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS);
- if (!AddRec || AddRec->getLoop() != L)
+ const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
+
+ // Avoid weird loops
+ if (!IV || IV->getLoop() != L || !IV->isAffine())
return getCouldNotCompute();
- // Check to see if we have a flag which makes analysis easy.
- bool NoWrap = false;
- if (!IsSubExpr) {
- NoWrap = AddRec->getNoWrapFlags(
- (SCEV::NoWrapFlags)(((isSigned ? SCEV::FlagNSW : SCEV::FlagNUW))
- | SCEV::FlagNW));
- }
- if (AddRec->isAffine()) {
- unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
- const SCEV *Step = AddRec->getStepRecurrence(*this);
+ bool NoWrap = !IsSubExpr &&
+ IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW);
- if (Step->isZero())
- return getCouldNotCompute();
- if (Step->isOne()) {
- // With unit stride, the iteration never steps past the limit value.
- } else if (isKnownPositive(Step)) {
- // Test whether a positive iteration can step past the limit
- // value and past the maximum value for its type in a single step.
- // Note that it's not sufficient to check NoWrap here, because even
- // though the value after a wrap is undefined, it's not undefined
- // behavior, so if wrap does occur, the loop could either terminate or
- // loop infinitely, but in either case, the loop is guaranteed to
- // iterate at least until the iteration where the wrapping occurs.
- const SCEV *One = getConstant(Step->getType(), 1);
- if (isSigned) {
- APInt Max = APInt::getSignedMaxValue(BitWidth);
- if ((Max - getSignedRange(getMinusSCEV(Step, One)).getSignedMax())
- .slt(getSignedRange(RHS).getSignedMax()))
- return getCouldNotCompute();
- } else {
- APInt Max = APInt::getMaxValue(BitWidth);
- if ((Max - getUnsignedRange(getMinusSCEV(Step, One)).getUnsignedMax())
- .ult(getUnsignedRange(RHS).getUnsignedMax()))
- return getCouldNotCompute();
- }
- } else
- // TODO: Handle negative strides here and below.
- return getCouldNotCompute();
+ const SCEV *Stride = IV->getStepRecurrence(*this);
- // We know the LHS is of the form {n,+,s} and the RHS is some loop-invariant
- // m. So, we count the number of iterations in which {n,+,s} < m is true.
- // Note that we cannot simply return max(m-n,0)/s because it's not safe to
- // treat m-n as signed nor unsigned due to overflow possibility.
-
- // First, we get the value of the LHS in the first iteration: n
- const SCEV *Start = AddRec->getOperand(0);
-
- // Determine the minimum constant start value.
- const SCEV *MinStart = getConstant(isSigned ?
- getSignedRange(Start).getSignedMin() :
- getUnsignedRange(Start).getUnsignedMin());
-
- // If we know that the condition is true in order to enter the loop,
- // then we know that it will run exactly (m-n)/s times. Otherwise, we
- // only know that it will execute (max(m,n)-n)/s times. In both cases,
- // the division must round up.
- const SCEV *End = RHS;
- if (!isLoopEntryGuardedByCond(L,
- isSigned ? ICmpInst::ICMP_SLT :
- ICmpInst::ICMP_ULT,
- getMinusSCEV(Start, Step), RHS))
- End = isSigned ? getSMaxExpr(RHS, Start)
- : getUMaxExpr(RHS, Start);
-
- // Determine the maximum constant end value.
- const SCEV *MaxEnd = getConstant(isSigned ?
- getSignedRange(End).getSignedMax() :
- getUnsignedRange(End).getUnsignedMax());
-
- // If MaxEnd is within a step of the maximum integer value in its type,
- // adjust it down to the minimum value which would produce the same effect.
- // This allows the subsequent ceiling division of (N+(step-1))/step to
- // compute the correct value.
- const SCEV *StepMinusOne = getMinusSCEV(Step,
- getConstant(Step->getType(), 1));
- MaxEnd = isSigned ?
- getSMinExpr(MaxEnd,
- getMinusSCEV(getConstant(APInt::getSignedMaxValue(BitWidth)),
- StepMinusOne)) :
- getUMinExpr(MaxEnd,
- getMinusSCEV(getConstant(APInt::getMaxValue(BitWidth)),
- StepMinusOne));
-
- // Finally, we subtract these two values and divide, rounding up, to get
- // the number of times the backedge is executed.
- const SCEV *BECount = getBECount(Start, End, Step, NoWrap);
-
- // The maximum backedge count is similar, except using the minimum start
- // value and the maximum end value.
- // If we already have an exact constant BECount, use it instead.
- const SCEV *MaxBECount = isa<SCEVConstant>(BECount) ? BECount
- : getBECount(MinStart, MaxEnd, Step, NoWrap);
-
- // If the stride is nonconstant, and NoWrap == true, then
- // getBECount(MinStart, MaxEnd) may not compute. This would result in an
- // exact BECount and invalid MaxBECount, which should be avoided to catch
- // more optimization opportunities.
- if (isa<SCEVCouldNotCompute>(MaxBECount))
- MaxBECount = BECount;
-
- return ExitLimit(BECount, MaxBECount);
- }
+ // Avoid negative or zero stride values
+ if (!isKnownPositive(Stride))
+ return getCouldNotCompute();
- return getCouldNotCompute();
+ // Avoid proven overflow cases: this will ensure that the backedge taken count
+ // will not generate any unsigned overflow. Relaxed no-overflow conditions
+ // exploit NoWrapFlags, allowing to optimize in presence of undefined
+ // behaviors like the case of C language.
+ if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))
+ return getCouldNotCompute();
+
+ ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT
+ : ICmpInst::ICMP_ULT;
+ const SCEV *Start = IV->getStart();
+ const SCEV *End = RHS;
+ if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS))
+ End = IsSigned ? getSMaxExpr(RHS, Start)
+ : getUMaxExpr(RHS, Start);
+
+ const SCEV *BECount = computeBECount(getMinusSCEV(End, Start), Stride, false);
+
+ APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin()
+ : getUnsignedRange(Start).getUnsignedMin();
+
+ APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
+ : getUnsignedRange(Stride).getUnsignedMin();
+
+ unsigned BitWidth = getTypeSizeInBits(LHS->getType());
+ APInt Limit = IsSigned ? APInt::getSignedMaxValue(BitWidth) - (MinStride - 1)
+ : APInt::getMaxValue(BitWidth) - (MinStride - 1);
+
+ // Although End can be a MAX expression we estimate MaxEnd considering only
+ // the case End = RHS. This is safe because in the other case (End - Start)
+ // is zero, leading to a zero maximum backedge taken count.
+ APInt MaxEnd =
+ IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit)
+ : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit);
+
+ const SCEV *MaxBECount = getCouldNotCompute();
+ if (isa<SCEVConstant>(BECount))
+ MaxBECount = BECount;
+ else
+ MaxBECount = computeBECount(getConstant(MaxEnd - MinStart),
+ getConstant(MinStride), false);
+
+ if (isa<SCEVCouldNotCompute>(MaxBECount))
+ MaxBECount = BECount;
+
+ return ExitLimit(BECount, MaxBECount);
+}
+
+ScalarEvolution::ExitLimit
+ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
+ const Loop *L, bool IsSigned,
+ bool IsSubExpr) {
+ // We handle only IV > Invariant
+ if (!isLoopInvariant(RHS, L))
+ return getCouldNotCompute();
+
+ const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
+
+ // Avoid weird loops
+ if (!IV || IV->getLoop() != L || !IV->isAffine())
+ return getCouldNotCompute();
+
+ bool NoWrap = !IsSubExpr &&
+ IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW);
+
+ const SCEV *Stride = getNegativeSCEV(IV->getStepRecurrence(*this));
+
+ // Avoid negative or zero stride values
+ if (!isKnownPositive(Stride))
+ return getCouldNotCompute();
+
+ // Avoid proven overflow cases: this will ensure that the backedge taken count
+ // will not generate any unsigned overflow. Relaxed no-overflow conditions
+ // exploit NoWrapFlags, allowing to optimize in presence of undefined
+ // behaviors like the case of C language.
+ if (!Stride->isOne() && doesIVOverflowOnGT(RHS, Stride, IsSigned, NoWrap))
+ return getCouldNotCompute();
+
+ ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SGT
+ : ICmpInst::ICMP_UGT;
+
+ const SCEV *Start = IV->getStart();
+ const SCEV *End = RHS;
+ if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS))
+ End = IsSigned ? getSMinExpr(RHS, Start)
+ : getUMinExpr(RHS, Start);
+
+ const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false);
+
+ APInt MaxStart = IsSigned ? getSignedRange(Start).getSignedMax()
+ : getUnsignedRange(Start).getUnsignedMax();
+
+ APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
+ : getUnsignedRange(Stride).getUnsignedMin();
+
+ unsigned BitWidth = getTypeSizeInBits(LHS->getType());
+ APInt Limit = IsSigned ? APInt::getSignedMinValue(BitWidth) + (MinStride - 1)
+ : APInt::getMinValue(BitWidth) + (MinStride - 1);
+
+ // Although End can be a MIN expression we estimate MinEnd considering only
+ // the case End = RHS. This is safe because in the other case (Start - End)
+ // is zero, leading to a zero maximum backedge taken count.
+ APInt MinEnd =
+ IsSigned ? APIntOps::smax(getSignedRange(RHS).getSignedMin(), Limit)
+ : APIntOps::umax(getUnsignedRange(RHS).getUnsignedMin(), Limit);
+
+
+ const SCEV *MaxBECount = getCouldNotCompute();
+ if (isa<SCEVConstant>(BECount))
+ MaxBECount = BECount;
+ else
+ MaxBECount = computeBECount(getConstant(MaxStart - MinEnd),
+ getConstant(MinStride), false);
+
+ if (isa<SCEVCouldNotCompute>(MaxBECount))
+ MaxBECount = BECount;
+
+ return ExitLimit(BECount, MaxBECount);
}
/// getNumIterationsInRange - Return the number of iterations of this loop that
@@ -6586,7 +6677,534 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
return SE.getCouldNotCompute();
}
+static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
+ APInt A = C1->getValue()->getValue().abs();
+ APInt B = C2->getValue()->getValue().abs();
+ uint32_t ABW = A.getBitWidth();
+ uint32_t BBW = B.getBitWidth();
+ if (ABW > BBW)
+ B = B.zext(ABW);
+ else if (ABW < BBW)
+ A = A.zext(BBW);
+
+ return APIntOps::GreatestCommonDivisor(A, B);
+}
+
+static const APInt srem(const SCEVConstant *C1, const SCEVConstant *C2) {
+ APInt A = C1->getValue()->getValue();
+ APInt B = C2->getValue()->getValue();
+ uint32_t ABW = A.getBitWidth();
+ uint32_t BBW = B.getBitWidth();
+
+ if (ABW > BBW)
+ B = B.sext(ABW);
+ else if (ABW < BBW)
+ A = A.sext(BBW);
+
+ return APIntOps::srem(A, B);
+}
+
+static const APInt sdiv(const SCEVConstant *C1, const SCEVConstant *C2) {
+ APInt A = C1->getValue()->getValue();
+ APInt B = C2->getValue()->getValue();
+ uint32_t ABW = A.getBitWidth();
+ uint32_t BBW = B.getBitWidth();
+
+ if (ABW > BBW)
+ B = B.sext(ABW);
+ else if (ABW < BBW)
+ A = A.sext(BBW);
+
+ return APIntOps::sdiv(A, B);
+}
+
+namespace {
+struct SCEVGCD : public SCEVVisitor<SCEVGCD, const SCEV *> {
+public:
+ // Pattern match Step into Start. When Step is a multiply expression, find
+ // the largest subexpression of Step that appears in Start. When Start is an
+ // add expression, try to match Step in the subexpressions of Start, non
+ // matching subexpressions are returned under Remainder.
+ static const SCEV *findGCD(ScalarEvolution &SE, const SCEV *Start,
+ const SCEV *Step, const SCEV **Remainder) {
+ assert(Remainder && "Remainder should not be NULL");
+ SCEVGCD R(SE, Step, SE.getConstant(Step->getType(), 0));
+ const SCEV *Res = R.visit(Start);
+ *Remainder = R.Remainder;
+ return Res;
+ }
+
+ SCEVGCD(ScalarEvolution &S, const SCEV *G, const SCEV *R)
+ : SE(S), GCD(G), Remainder(R) {
+ Zero = SE.getConstant(GCD->getType(), 0);
+ One = SE.getConstant(GCD->getType(), 1);
+ }
+
+ const SCEV *visitConstant(const SCEVConstant *Constant) {
+ if (GCD == Constant || Constant == Zero)
+ return GCD;
+
+ if (const SCEVConstant *CGCD = dyn_cast<SCEVConstant>(GCD)) {
+ const SCEV *Res = SE.getConstant(gcd(Constant, CGCD));
+ if (Res != One)
+ return Res;
+
+ Remainder = SE.getConstant(srem(Constant, CGCD));
+ Constant = cast<SCEVConstant>(SE.getMinusSCEV(Constant, Remainder));
+ Res = SE.getConstant(gcd(Constant, CGCD));
+ return Res;
+ }
+
+ // When GCD is not a constant, it could be that the GCD is an Add, Mul,
+ // AddRec, etc., in which case we want to find out how many times the
+ // Constant divides the GCD: we then return that as the new GCD.
+ const SCEV *Rem = Zero;
+ const SCEV *Res = findGCD(SE, GCD, Constant, &Rem);
+
+ if (Res == One || Rem != Zero) {
+ Remainder = Constant;
+ return One;
+ }
+
+ assert(isa<SCEVConstant>(Res) && "Res should be a constant");
+ Remainder = SE.getConstant(srem(Constant, cast<SCEVConstant>(Res)));
+ return Res;
+ }
+
+ const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
+ if (GCD != Expr)
+ Remainder = Expr;
+ return GCD;
+ }
+
+ const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
+ if (GCD != Expr)
+ Remainder = Expr;
+ return GCD;
+ }
+
+ const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
+ if (GCD != Expr)
+ Remainder = Expr;
+ return GCD;
+ }
+
+ const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
+ if (GCD == Expr)
+ return GCD;
+
+ for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
+ const SCEV *Rem = Zero;
+ const SCEV *Res = findGCD(SE, Expr->getOperand(e - 1 - i), GCD, &Rem);
+
+ // FIXME: There may be ambiguous situations: for instance,
+ // GCD(-4 + (3 * %m), 2 * %m) where 2 divides -4 and %m divides (3 * %m).
+ // The order in which the AddExpr is traversed computes a different GCD
+ // and Remainder.
+ if (Res != One)
+ GCD = Res;
+ if (Rem != Zero)
+ Remainder = SE.getAddExpr(Remainder, Rem);
+ }
+
+ return GCD;
+ }
+
+ const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
+ if (GCD == Expr)
+ return GCD;
+
+ for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
+ if (Expr->getOperand(i) == GCD)
+ return GCD;
+ }
+
+ // If we have not returned yet, it means that GCD is not part of Expr.
+ const SCEV *PartialGCD = One;
+ for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
+ const SCEV *Rem = Zero;
+ const SCEV *Res = findGCD(SE, Expr->getOperand(i), GCD, &Rem);
+ if (Rem != Zero)
+ // GCD does not divide Expr->getOperand(i).
+ continue;
+
+ if (Res == GCD)
+ return GCD;
+ PartialGCD = SE.getMulExpr(PartialGCD, Res);
+ if (PartialGCD == GCD)
+ return GCD;
+ }
+
+ if (PartialGCD != One)
+ return PartialGCD;
+
+ Remainder = Expr;
+ const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(GCD);
+ if (!Mul)
+ return PartialGCD;
+
+ // When the GCD is a multiply expression, try to decompose it:
+ // this occurs when Step does not divide the Start expression
+ // as in: {(-4 + (3 * %m)),+,(2 * %m)}
+ for (int i = 0, e = Mul->getNumOperands(); i < e; ++i) {
+ const SCEV *Rem = Zero;
+ const SCEV *Res = findGCD(SE, Expr, Mul->getOperand(i), &Rem);
+ if (Rem == Zero) {
+ Remainder = Rem;
+ return Res;
+ }
+ }
+
+ return PartialGCD;
+ }
+
+ const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
+ if (GCD != Expr)
+ Remainder = Expr;
+ return GCD;
+ }
+
+ const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+ if (GCD == Expr)
+ return GCD;
+
+ if (!Expr->isAffine()) {
+ Remainder = Expr;
+ return GCD;
+ }
+
+ const SCEV *Rem = Zero;
+ const SCEV *Res = findGCD(SE, Expr->getOperand(0), GCD, &Rem);
+ if (Rem != Zero)
+ Remainder = SE.getAddExpr(Remainder, Rem);
+
+ Rem = Zero;
+ Res = findGCD(SE, Expr->getOperand(1), Res, &Rem);
+ if (Rem != Zero) {
+ Remainder = Expr;
+ return GCD;
+ }
+
+ return Res;
+ }
+
+ const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
+ if (GCD != Expr)
+ Remainder = Expr;
+ return GCD;
+ }
+
+ const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
+ if (GCD != Expr)
+ Remainder = Expr;
+ return GCD;
+ }
+
+ const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+ if (GCD != Expr)
+ Remainder = Expr;
+ return GCD;
+ }
+
+ const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
+ return One;
+ }
+
+private:
+ ScalarEvolution &SE;
+ const SCEV *GCD, *Remainder, *Zero, *One;
+};
+
+struct SCEVDivision : public SCEVVisitor<SCEVDivision, const SCEV *> {
+public:
+ // Remove from Start all multiples of Step.
+ static const SCEV *divide(ScalarEvolution &SE, const SCEV *Start,
+ const SCEV *Step) {
+ SCEVDivision D(SE, Step);
+ const SCEV *Rem = D.Zero;
+ (void)Rem;
+ // The division is guaranteed to succeed: Step should divide Start with no
+ // remainder.
+ assert(Step == SCEVGCD::findGCD(SE, Start, Step, &Rem) && Rem == D.Zero &&
+ "Step should divide Start with no remainder.");
+ return D.visit(Start);
+ }
+
+ SCEVDivision(ScalarEvolution &S, const SCEV *G) : SE(S), GCD(G) {
+ Zero = SE.getConstant(GCD->getType(), 0);
+ One = SE.getConstant(GCD->getType(), 1);
+ }
+
+ const SCEV *visitConstant(const SCEVConstant *Constant) {
+ if (GCD == Constant)
+ return One;
+
+ if (const SCEVConstant *CGCD = dyn_cast<SCEVConstant>(GCD))
+ return SE.getConstant(sdiv(Constant, CGCD));
+ return Constant;
+ }
+
+ const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
+ if (GCD == Expr)
+ return One;
+ return Expr;
+ }
+
+ const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
+ if (GCD == Expr)
+ return One;
+ return Expr;
+ }
+
+ const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
+ if (GCD == Expr)
+ return One;
+ return Expr;
+ }
+
+ const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
+ if (GCD == Expr)
+ return One;
+
+ SmallVector<const SCEV *, 2> Operands;
+ for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
+ Operands.push_back(divide(SE, Expr->getOperand(i), GCD));
+
+ if (Operands.size() == 1)
+ return Operands[0];
+ return SE.getAddExpr(Operands);
+ }
+
+ const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
+ if (GCD == Expr)
+ return One;
+
+ bool FoundGCDTerm = false;
+ for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
+ if (Expr->getOperand(i) == GCD)
+ FoundGCDTerm = true;
+
+ SmallVector<const SCEV *, 2> Operands;
+ if (FoundGCDTerm) {
+ FoundGCDTerm = false;
+ for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
+ if (FoundGCDTerm)
+ Operands.push_back(Expr->getOperand(i));
+ else if (Expr->getOperand(i) == GCD)
+ FoundGCDTerm = true;
+ else
+ Operands.push_back(Expr->getOperand(i));
+ }
+ } else {
+ FoundGCDTerm = false;
+ const SCEV *PartialGCD = One;
+ for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
+ if (PartialGCD == GCD) {
+ Operands.push_back(Expr->getOperand(i));
+ continue;
+ }
+
+ const SCEV *Rem = Zero;
+ const SCEV *Res = SCEVGCD::findGCD(SE, Expr->getOperand(i), GCD, &Rem);
+ if (Rem == Zero) {
+ PartialGCD = SE.getMulExpr(PartialGCD, Res);
+ Operands.push_back(divide(SE, Expr->getOperand(i), GCD));
+ } else {
+ Operands.push_back(Expr->getOperand(i));
+ }
+ }
+ }
+
+ if (Operands.size() == 1)
+ return Operands[0];
+ return SE.getMulExpr(Operands);
+ }
+
+ const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
+ if (GCD == Expr)
+ return One;
+ return Expr;
+ }
+
+ const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+ if (GCD == Expr)
+ return One;
+
+ assert(Expr->isAffine() && "Expr should be affine");
+
+ const SCEV *Start = divide(SE, Expr->getStart(), GCD);
+ const SCEV *Step = divide(SE, Expr->getStepRecurrence(SE), GCD);
+
+ return SE.getAddRecExpr(Start, Step, Expr->getLoop(),
+ Expr->getNoWrapFlags());
+ }
+
+ const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
+ if (GCD == Expr)
+ return One;
+ return Expr;
+ }
+
+ const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
+ if (GCD == Expr)
+ return One;
+ return Expr;
+ }
+
+ const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+ if (GCD == Expr)
+ return One;
+ return Expr;
+ }
+
+ const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
+ return Expr;
+ }
+
+private:
+ ScalarEvolution &SE;
+ const SCEV *GCD, *Zero, *One;
+};
+}
+
+/// Splits the SCEV into two vectors of SCEVs representing the subscripts and
+/// sizes of an array access. Returns the remainder of the delinearization that
+/// is the offset start of the array. The SCEV->delinearize algorithm computes
+/// the multiples of SCEV coefficients: that is a pattern matching of sub
+/// expressions in the stride and base of a SCEV corresponding to the
+/// computation of a GCD (greatest common divisor) of base and stride. When
+/// SCEV->delinearize fails, it returns the SCEV unchanged.
+///
+/// For example: when analyzing the memory access A[i][j][k] in this loop nest
+///
+/// void foo(long n, long m, long o, double A[n][m][o]) {
+///
+/// for (long i = 0; i < n; i++)
+/// for (long j = 0; j < m; j++)
+/// for (long k = 0; k < o; k++)
+/// A[i][j][k] = 1.0;
+/// }
+///
+/// the delinearization input is the following AddRec SCEV:
+///
+/// AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
+///
+/// From this SCEV, we are able to say that the base offset of the access is %A
+/// because it appears as an offset that does not divide any of the strides in
+/// the loops:
+///
+/// CHECK: Base offset: %A
+///
+/// and then SCEV->delinearize determines the size of some of the dimensions of
+/// the array as these are the multiples by which the strides are happening:
+///
+/// CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
+///
+/// Note that the outermost dimension remains of UnknownSize because there are
+/// no strides that would help identifying the size of the last dimension: when
+/// the array has been statically allocated, one could compute the size of that
+/// dimension by dividing the overall size of the array by the size of the known
+/// dimensions: %m * %o * 8.
+///
+/// Finally delinearize provides the access functions for the array reference
+/// that does correspond to A[i][j][k] of the above C testcase:
+///
+/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
+///
+/// The testcases are checking the output of a function pass:
+/// DelinearizationPass that walks through all loads and stores of a function
+/// asking for the SCEV of the memory access with respect to all enclosing
+/// loops, calling SCEV->delinearize on that and printing the results.
+
+const SCEV *
+SCEVAddRecExpr::delinearize(ScalarEvolution &SE,
+ SmallVectorImpl<const SCEV *> &Subscripts,
+ SmallVectorImpl<const SCEV *> &Sizes) const {
+ // Early exit in case this SCEV is not an affine multivariate function.
+ if (!this->isAffine())
+ return this;
+
+ const SCEV *Start = this->getStart();
+ const SCEV *Step = this->getStepRecurrence(SE);
+
+ // Build the SCEV representation of the cannonical induction variable in the
+ // loop of this SCEV.
+ const SCEV *Zero = SE.getConstant(this->getType(), 0);
+ const SCEV *One = SE.getConstant(this->getType(), 1);
+ const SCEV *IV =
+ SE.getAddRecExpr(Zero, One, this->getLoop(), this->getNoWrapFlags());
+
+ DEBUG(dbgs() << "(delinearize: " << *this << "\n");
+
+ // Currently we fail to delinearize when the stride of this SCEV is 1. We
+ // could decide to not fail in this case: we could just return 1 for the size
+ // of the subscript, and this same SCEV for the access function.
+ if (Step == One) {
+ DEBUG(dbgs() << "failed to delinearize " << *this << "\n)\n");
+ return this;
+ }
+
+ // Find the GCD and Remainder of the Start and Step coefficients of this SCEV.
+ const SCEV *Remainder = NULL;
+ const SCEV *GCD = SCEVGCD::findGCD(SE, Start, Step, &Remainder);
+
+ DEBUG(dbgs() << "GCD: " << *GCD << "\n");
+ DEBUG(dbgs() << "Remainder: " << *Remainder << "\n");
+
+ // Same remark as above: we currently fail the delinearization, although we
+ // can very well handle this special case.
+ if (GCD == One) {
+ DEBUG(dbgs() << "failed to delinearize " << *this << "\n)\n");
+ return this;
+ }
+
+ // As findGCD computed Remainder, GCD divides "Start - Remainder." The
+ // Quotient is then this SCEV without Remainder, scaled down by the GCD. The
+ // Quotient is what will be used in the next subscript delinearization.
+ const SCEV *Quotient =
+ SCEVDivision::divide(SE, SE.getMinusSCEV(Start, Remainder), GCD);
+ DEBUG(dbgs() << "Quotient: " << *Quotient << "\n");
+
+ const SCEV *Rem;
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Quotient))
+ // Recursively call delinearize on the Quotient until there are no more
+ // multiples that can be recognized.
+ Rem = AR->delinearize(SE, Subscripts, Sizes);
+ else
+ Rem = Quotient;
+
+ // Scale up the cannonical induction variable IV by whatever remains from the
+ // Step after division by the GCD: the GCD is the size of all the sub-array.
+ if (Step != GCD) {
+ Step = SCEVDivision::divide(SE, Step, GCD);
+ IV = SE.getMulExpr(IV, Step);
+ }
+ // The access function in the current subscript is computed as the cannonical
+ // induction variable IV (potentially scaled up by the step) and offset by
+ // Rem, the offset of delinearization in the sub-array.
+ const SCEV *Index = SE.getAddExpr(IV, Rem);
+
+ // Record the access function and the size of the current subscript.
+ Subscripts.push_back(Index);
+ Sizes.push_back(GCD);
+
+#ifndef NDEBUG
+ int Size = Sizes.size();
+ DEBUG(dbgs() << "succeeded to delinearize " << *this << "\n");
+ DEBUG(dbgs() << "ArrayDecl[UnknownSize]");
+ for (int i = 0; i < Size - 1; i++)
+ DEBUG(dbgs() << "[" << *Sizes[i] << "]");
+ DEBUG(dbgs() << " with elements of " << *Sizes[Size - 1] << " bytes.\n");
+
+ DEBUG(dbgs() << "ArrayRef");
+ for (int i = 0; i < Size; i++)
+ DEBUG(dbgs() << "[" << *Subscripts[i] << "]");
+ DEBUG(dbgs() << "\n)\n");
+#endif
+
+ return Remainder;
+}
//===----------------------------------------------------------------------===//
// SCEVCallbackVH Class Implementation
@@ -6642,7 +7260,7 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
//===----------------------------------------------------------------------===//
ScalarEvolution::ScalarEvolution()
- : FunctionPass(ID), FirstUnknown(0) {
+ : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64), BlockDispositions(64), FirstUnknown(0) {
initializeScalarEvolutionPass(*PassRegistry::getPassRegistry());
}
@@ -6780,14 +7398,21 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
ScalarEvolution::LoopDisposition
ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) {
- std::map<const Loop *, LoopDisposition> &Values = LoopDispositions[S];
- std::pair<std::map<const Loop *, LoopDisposition>::iterator, bool> Pair =
- Values.insert(std::make_pair(L, LoopVariant));
- if (!Pair.second)
- return Pair.first->second;
-
+ SmallVector<std::pair<const Loop *, LoopDisposition>, 2> &Values = LoopDispositions[S];
+ for (unsigned u = 0; u < Values.size(); u++) {
+ if (Values[u].first == L)
+ return Values[u].second;
+ }
+ Values.push_back(std::make_pair(L, LoopVariant));
LoopDisposition D = computeLoopDisposition(S, L);
- return LoopDispositions[S][L] = D;
+ SmallVector<std::pair<const Loop *, LoopDisposition>, 2> &Values2 = LoopDispositions[S];
+ for (unsigned u = Values2.size(); u > 0; u--) {
+ if (Values2[u - 1].first == L) {
+ Values2[u - 1].second = D;
+ break;
+ }
+ }
+ return D;
}
ScalarEvolution::LoopDisposition
@@ -6879,14 +7504,21 @@ bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) {
ScalarEvolution::BlockDisposition
ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) {
- std::map<const BasicBlock *, BlockDisposition> &Values = BlockDispositions[S];
- std::pair<std::map<const BasicBlock *, BlockDisposition>::iterator, bool>
- Pair = Values.insert(std::make_pair(BB, DoesNotDominateBlock));
- if (!Pair.second)
- return Pair.first->second;
-
+ SmallVector<std::pair<const BasicBlock *, BlockDisposition>, 2> &Values = BlockDispositions[S];
+ for (unsigned u = 0; u < Values.size(); u++) {
+ if (Values[u].first == BB)
+ return Values[u].second;
+ }
+ Values.push_back(std::make_pair(BB, DoesNotDominateBlock));
BlockDisposition D = computeBlockDisposition(S, BB);
- return BlockDispositions[S][BB] = D;
+ SmallVector<std::pair<const BasicBlock *, BlockDisposition>, 2> &Values2 = BlockDispositions[S];
+ for (unsigned u = Values2.size(); u > 0; u--) {
+ if (Values2[u - 1].first == BB) {
+ Values2[u - 1].second = D;
+ break;
+ }
+ }
+ return D;
}
ScalarEvolution::BlockDisposition
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
index fcd7ce2..86a557b 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -176,8 +177,8 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
}
// Save the original insertion point so we can restore it when we're done.
- BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
- BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc();
+ BuilderType::InsertPointGuard Guard(Builder);
// Move the insertion point out of as many loops as we can.
while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
@@ -191,13 +192,9 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
// If we haven't found this binop, insert it.
Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS));
- BO->setDebugLoc(SaveInsertPt->getDebugLoc());
+ BO->setDebugLoc(Loc);
rememberInstruction(BO);
- // Restore the original insert point.
- if (SaveInsertBB)
- restoreInsertPoint(SaveInsertBB, SaveInsertPt);
-
return BO;
}
@@ -294,8 +291,8 @@ static bool FactorOutConstant(const SCEV *&S,
const SCEV *Start = A->getStart();
if (!FactorOutConstant(Start, Remainder, Factor, SE, TD))
return false;
- // FIXME: can use A->getNoWrapFlags(FlagNW)
- S = SE.getAddRecExpr(Start, Step, A->getLoop(), SCEV::FlagAnyWrap);
+ S = SE.getAddRecExpr(Start, Step, A->getLoop(),
+ A->getNoWrapFlags(SCEV::FlagNW));
return true;
}
@@ -348,8 +345,7 @@ static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops,
AddRecs.push_back(SE.getAddRecExpr(Zero,
A->getStepRecurrence(SE),
A->getLoop(),
- // FIXME: A->getNoWrapFlags(FlagNW)
- SCEV::FlagAnyWrap));
+ A->getNoWrapFlags(SCEV::FlagNW)));
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) {
Ops[i] = Zero;
Ops.append(Add->op_begin(), Add->op_end());
@@ -407,6 +403,10 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
// without the other.
SplitAddRecs(Ops, Ty, SE);
+ Type *IntPtrTy = SE.TD
+ ? SE.TD->getIntPtrType(PTy)
+ : Type::getInt64Ty(PTy->getContext());
+
// Descend down the pointer's type and attempt to convert the other
// operands into GEP indices, at each level. The first index in a GEP
// indexes into the array implied by the pointer operand; the rest of
@@ -417,7 +417,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
// array indexing.
SmallVector<const SCEV *, 8> ScaledOps;
if (ElTy->isSized()) {
- const SCEV *ElSize = SE.getSizeOfExpr(ElTy);
+ const SCEV *ElSize = SE.getSizeOfExpr(IntPtrTy, ElTy);
if (!ElSize->isZero()) {
SmallVector<const SCEV *, 8> NewOps;
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
@@ -549,8 +549,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
}
// Save the original insertion point so we can restore it when we're done.
- BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
- BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ BuilderType::InsertPointGuard Guard(Builder);
// Move the insertion point out of as many loops as we can.
while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
@@ -566,16 +565,11 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
Value *GEP = Builder.CreateGEP(V, Idx, "uglygep");
rememberInstruction(GEP);
- // Restore the original insert point.
- if (SaveInsertBB)
- restoreInsertPoint(SaveInsertBB, SaveInsertPt);
-
return GEP;
}
// Save the original insertion point so we can restore it when we're done.
- BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
- BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ BuilderType::InsertPoint SaveInsertPt = Builder.saveIP();
// Move the insertion point out of as many loops as we can.
while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
@@ -611,8 +605,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
rememberInstruction(GEP);
// Restore the original insert point.
- if (SaveInsertBB)
- restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+ Builder.restoreIP(SaveInsertPt);
return expand(SE.getAddExpr(Ops));
}
@@ -846,8 +839,7 @@ static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest,
SE.getAddRecExpr(SE.getConstant(A->getType(), 0),
A->getStepRecurrence(SE),
A->getLoop(),
- // FIXME: A->getNoWrapFlags(FlagNW)
- SCEV::FlagAnyWrap));
+ A->getNoWrapFlags(SCEV::FlagNW)));
}
if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) {
Base = A->getOperand(A->getNumOperands()-1);
@@ -1078,8 +1070,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
}
// Save the original insertion point so we can restore it when we're done.
- BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
- BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ BuilderType::InsertPointGuard Guard(Builder);
// Another AddRec may need to be recursively expanded below. For example, if
// this AddRec is quadratic, the StepV may itself be an AddRec in this
@@ -1137,14 +1128,15 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
IVIncInsertPos : Pred->getTerminator();
Builder.SetInsertPoint(InsertPos);
Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
-
+ if (isa<OverflowingBinaryOperator>(IncV)) {
+ if (Normalized->getNoWrapFlags(SCEV::FlagNUW))
+ cast<BinaryOperator>(IncV)->setHasNoUnsignedWrap();
+ if (Normalized->getNoWrapFlags(SCEV::FlagNSW))
+ cast<BinaryOperator>(IncV)->setHasNoSignedWrap();
+ }
PN->addIncoming(IncV, Pred);
}
- // Restore the original insert point.
- if (SaveInsertBB)
- restoreInsertPoint(SaveInsertBB, SaveInsertPt);
-
// After expanding subexpressions, restore the PostIncLoops set so the caller
// can ensure that IVIncrement dominates the current uses.
PostIncLoops = SavedPostIncLoops;
@@ -1180,8 +1172,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
Normalized = cast<SCEVAddRecExpr>(
SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE),
Normalized->getLoop(),
- // FIXME: Normalized->getNoWrapFlags(FlagNW)
- SCEV::FlagAnyWrap));
+ Normalized->getNoWrapFlags(SCEV::FlagNW)));
}
// Strip off any non-loop-dominating component from the addrec step.
@@ -1191,11 +1182,9 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
PostLoopScale = Step;
Step = SE.getConstant(Normalized->getType(), 1);
Normalized =
- cast<SCEVAddRecExpr>(SE.getAddRecExpr(Start, Step,
- Normalized->getLoop(),
- // FIXME: Normalized
- // ->getNoWrapFlags(FlagNW)
- SCEV::FlagAnyWrap));
+ cast<SCEVAddRecExpr>(SE.getAddRecExpr(
+ Start, Step, Normalized->getLoop(),
+ Normalized->getNoWrapFlags(SCEV::FlagNW)));
}
// Expand the core addrec. If we need post-loop scaling, force it to
@@ -1232,19 +1221,19 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
!ExpandTy->isPointerTy() && Step->isNonConstantNegative();
if (useSubtract)
Step = SE.getNegativeSCEV(Step);
- // Expand the step somewhere that dominates the loop header.
- BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
- BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
- Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
- // Restore the insertion point to the place where the caller has
- // determined dominates all uses.
- restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+ Value *StepV;
+ {
+ // Expand the step somewhere that dominates the loop header.
+ BuilderType::InsertPointGuard Guard(Builder);
+ StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
+ }
Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
}
}
// Re-apply any non-loop-dominating scale.
if (PostLoopScale) {
+ assert(S->isAffine() && "Can't linearly scale non-affine recurrences.");
Result = InsertNoopCastOfTo(Result, IntTy);
Result = Builder.CreateMul(Result,
expandCodeFor(PostLoopScale, IntTy));
@@ -1288,18 +1277,15 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i)
NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType());
Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
- // FIXME: S->getNoWrapFlags(FlagNW)
- SCEV::FlagAnyWrap));
- BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
- BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ S->getNoWrapFlags(SCEV::FlagNW)));
BasicBlock::iterator NewInsertPt =
llvm::next(BasicBlock::iterator(cast<Instruction>(V)));
+ BuilderType::InsertPointGuard Guard(Builder);
while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt) ||
isa<LandingPadInst>(NewInsertPt))
++NewInsertPt;
V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0,
NewInsertPt);
- restoreInsertPoint(SaveInsertBB, SaveInsertPt);
return V;
}
@@ -1307,8 +1293,8 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
if (!S->getStart()->isZero()) {
SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end());
NewOps[0] = SE.getConstant(Ty, 0);
- // FIXME: can use S->getNoWrapFlags()
- const SCEV *Rest = SE.getAddRecExpr(NewOps, L, SCEV::FlagAnyWrap);
+ const SCEV *Rest = SE.getAddRecExpr(NewOps, L,
+ S->getNoWrapFlags(SCEV::FlagNW));
// Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the
// comments on expandAddToGEP for details.
@@ -1343,9 +1329,13 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
Header->begin());
rememberInstruction(CanonicalIV);
+ SmallSet<BasicBlock *, 4> PredSeen;
Constant *One = ConstantInt::get(Ty, 1);
for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
BasicBlock *HP = *HPI;
+ if (!PredSeen.insert(HP))
+ continue;
+
if (L->contains(HP)) {
// Insert a unit add instruction right before the terminator
// corresponding to the back-edge.
@@ -1528,8 +1518,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
if (I != InsertedExpressions.end())
return I->second;
- BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
- BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ BuilderType::InsertPointGuard Guard(Builder);
Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
// Expand the expression into instructions.
@@ -1542,8 +1531,6 @@ Value *SCEVExpander::expand(const SCEV *S) {
// a postinc expansion, it could be reused by a non postinc user, but only if
// its insertion point was already at the head of the loop.
InsertedExpressions[std::make_pair(S, InsertPt)] = V;
-
- restoreInsertPoint(SaveInsertBB, SaveInsertPt);
return V;
}
@@ -1554,10 +1541,6 @@ void SCEVExpander::rememberInstruction(Value *I) {
InsertedValues.insert(I);
}
-void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) {
- Builder.SetInsertPoint(BB, I);
-}
-
/// getOrInsertCanonicalInductionVariable - This method returns the
/// canonical induction variable of the specified type for the specified
/// loop (inserting one if there is none). A canonical induction variable
@@ -1573,11 +1556,8 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap);
// Emit code for it.
- BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
- BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ BuilderType::InsertPointGuard Guard(Builder);
PHINode *V = cast<PHINode>(expandCodeFor(H, 0, L->getHeader()->begin()));
- if (SaveInsertBB)
- restoreInsertPoint(SaveInsertBB, SaveInsertPt);
return V;
}
@@ -1725,28 +1705,43 @@ namespace {
// Currently, we only allow division by a nonzero constant here. If this is
// inadequate, we could easily allow division by SCEVUnknown by using
// ValueTracking to check isKnownNonZero().
+//
+// We cannot generally expand recurrences unless the step dominates the loop
+// header. The expander handles the special case of affine recurrences by
+// scaling the recurrence outside the loop, but this technique isn't generally
+// applicable. Expanding a nested recurrence outside a loop requires computing
+// binomial coefficients. This could be done, but the recurrence has to be in a
+// perfectly reduced form, which can't be guaranteed.
struct SCEVFindUnsafe {
+ ScalarEvolution &SE;
bool IsUnsafe;
- SCEVFindUnsafe(): IsUnsafe(false) {}
+ SCEVFindUnsafe(ScalarEvolution &se): SE(se), IsUnsafe(false) {}
bool follow(const SCEV *S) {
- const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S);
- if (!D)
- return true;
- const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS());
- if (SC && !SC->getValue()->isZero())
- return true;
- IsUnsafe = true;
- return false;
+ if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS());
+ if (!SC || SC->getValue()->isZero()) {
+ IsUnsafe = true;
+ return false;
+ }
+ }
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ const SCEV *Step = AR->getStepRecurrence(SE);
+ if (!AR->isAffine() && !SE.dominates(Step, AR->getLoop()->getHeader())) {
+ IsUnsafe = true;
+ return false;
+ }
+ }
+ return true;
}
bool isDone() const { return IsUnsafe; }
};
}
namespace llvm {
-bool isSafeToExpand(const SCEV *S) {
- SCEVFindUnsafe Search;
+bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) {
+ SCEVFindUnsafe Search(SE);
visitAll(S, Search);
return !Search.IsUnsafe;
}
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
index dd2ed4f..f110616 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -119,11 +119,19 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap);
switch (Kind) {
case NormalizeAutodetect:
- if (IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) {
- const SCEV *TransformedStep =
- TransformSubExpr(AR->getStepRecurrence(SE),
- User, OperandValToReplace);
- Result = SE.getMinusSCEV(Result, TransformedStep);
+ // Normalize this SCEV by subtracting the expression for the final step.
+ // We only allow affine AddRecs to be normalized, otherwise we would not
+ // be able to correctly denormalize.
+ // e.g. {1,+,3,+,2} == {-2,+,1,+,2} + {3,+,2}
+ // Normalized form: {-2,+,1,+,2}
+ // Denormalized form: {1,+,3,+,2}
+ //
+ // However, denormalization would use the a different step expression than
+ // normalization (see getPostIncExpr), generating the wrong final
+ // expression: {-2,+,1,+,2} + {1,+,2} => {-1,+,3,+,2}
+ if (AR->isAffine() &&
+ IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) {
+ Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE));
Loops.insert(L);
}
#if 0
diff --git a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
index 64f8e96..0353295 100644
--- a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -88,10 +88,19 @@ unsigned TargetTransformInfo::getUserCost(const User *U) const {
return PrevTTI->getUserCost(U);
}
+bool TargetTransformInfo::hasBranchDivergence() const {
+ return PrevTTI->hasBranchDivergence();
+}
+
bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
return PrevTTI->isLoweredToCall(F);
}
+void TargetTransformInfo::getUnrollingPreferences(Loop *L,
+ UnrollingPreferences &UP) const {
+ PrevTTI->getUnrollingPreferences(L, UP);
+}
+
bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
return PrevTTI->isLegalAddImmediate(Imm);
}
@@ -108,6 +117,14 @@ bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
Scale);
}
+int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
+ int64_t BaseOffset,
+ bool HasBaseReg,
+ int64_t Scale) const {
+ return PrevTTI->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
+ Scale);
+}
+
bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const {
return PrevTTI->isTruncateFree(Ty1, Ty2);
}
@@ -133,6 +150,10 @@ TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
return PrevTTI->getPopcntSupport(IntTyWidthInBit);
}
+bool TargetTransformInfo::haveFastSqrt(Type *Ty) const {
+ return PrevTTI->haveFastSqrt(Ty);
+}
+
unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
return PrevTTI->getIntImmCost(Imm, Ty);
}
@@ -198,8 +219,14 @@ unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {
return PrevTTI->getNumberOfParts(Tp);
}
-unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp) const {
- return PrevTTI->getAddressComputationCost(Tp);
+unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp,
+ bool IsComplex) const {
+ return PrevTTI->getAddressComputationCost(Tp, IsComplex);
+}
+
+unsigned TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty,
+ bool IsPairwise) const {
+ return PrevTTI->getReductionCost(Opcode, Ty, IsPairwise);
}
namespace {
@@ -252,26 +279,34 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
// Otherwise, the default basic cost is used.
return TCC_Basic;
- case Instruction::IntToPtr:
+ case Instruction::IntToPtr: {
+ if (!DL)
+ return TCC_Basic;
+
// An inttoptr cast is free so long as the input is a legal integer type
// which doesn't contain values outside the range of a pointer.
- if (DL && DL->isLegalInteger(OpTy->getScalarSizeInBits()) &&
- OpTy->getScalarSizeInBits() <= DL->getPointerSizeInBits())
+ unsigned OpSize = OpTy->getScalarSizeInBits();
+ if (DL->isLegalInteger(OpSize) &&
+ OpSize <= DL->getPointerTypeSizeInBits(Ty))
return TCC_Free;
// Otherwise it's not a no-op.
return TCC_Basic;
+ }
+ case Instruction::PtrToInt: {
+ if (!DL)
+ return TCC_Basic;
- case Instruction::PtrToInt:
// A ptrtoint cast is free so long as the result is large enough to store
// the pointer, and a legal integer type.
- if (DL && DL->isLegalInteger(Ty->getScalarSizeInBits()) &&
- Ty->getScalarSizeInBits() >= DL->getPointerSizeInBits())
+ unsigned DestSize = Ty->getScalarSizeInBits();
+ if (DL->isLegalInteger(DestSize) &&
+ DestSize >= DL->getPointerTypeSizeInBits(OpTy))
return TCC_Free;
// Otherwise it's not a no-op.
return TCC_Basic;
-
+ }
case Instruction::Trunc:
// trunc to a native type is free (assuming the target has compare and
// shift-right of the same width).
@@ -411,6 +446,8 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
U->getOperand(0)->getType() : 0);
}
+ bool hasBranchDivergence() const { return false; }
+
bool isLoweredToCall(const Function *F) const {
// FIXME: These should almost certainly not be handled here, and instead
// handled with the help of TLI or the target itself. This was largely
@@ -442,6 +479,8 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
return true;
}
+ void getUnrollingPreferences(Loop *, UnrollingPreferences &) const { }
+
bool isLegalAddImmediate(int64_t Imm) const {
return false;
}
@@ -457,6 +496,15 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
return !BaseGV && BaseOffset == 0 && Scale <= 1;
}
+ int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
+ bool HasBaseReg, int64_t Scale) const {
+ // Guess that all legal addressing mode are free.
+ if(isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale))
+ return 0;
+ return -1;
+ }
+
+
bool isTruncateFree(Type *Ty1, Type *Ty2) const {
return false;
}
@@ -481,6 +529,10 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
return PSK_Software;
}
+ bool haveFastSqrt(Type *Ty) const {
+ return false;
+ }
+
unsigned getIntImmCost(const APInt &Imm, Type *Ty) const {
return 1;
}
@@ -542,9 +594,13 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
return 0;
}
- unsigned getAddressComputationCost(Type *Tp) const {
+ unsigned getAddressComputationCost(Type *Tp, bool) const {
return 0;
}
+
+ unsigned getReductionCost(unsigned, Type *, bool) const {
+ return 1;
+ }
};
} // end anonymous namespace
diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index bbf3c3a..6791d4b 100644
--- a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -16,7 +16,12 @@
// typical C/C++ TBAA, but it can also be used to implement custom alias
// analysis behavior for other languages.
//
-// The current metadata format is very simple. TBAA MDNodes have up to
+// We now support two types of metadata format: scalar TBAA and struct-path
+// aware TBAA. After all testing cases are upgraded to use struct-path aware
+// TBAA and we can auto-upgrade existing bc files, the support for scalar TBAA
+// can be dropped.
+//
+// The scalar TBAA metadata format is very simple. TBAA MDNodes have up to
// three fields, e.g.:
// !0 = metadata !{ metadata !"an example type tree" }
// !1 = metadata !{ metadata !"int", metadata !0 }
@@ -40,6 +45,65 @@
// should return true; see
// http://llvm.org/docs/AliasAnalysis.html#OtherItfs).
//
+// With struct-path aware TBAA, the MDNodes attached to an instruction using
+// "!tbaa" are called path tag nodes.
+//
+// The path tag node has 4 fields with the last field being optional.
+//
+// The first field is the base type node, it can be a struct type node
+// or a scalar type node. The second field is the access type node, it
+// must be a scalar type node. The third field is the offset into the base type.
+// The last field has the same meaning as the last field of our scalar TBAA:
+// it's an integer which if equal to 1 indicates that the access is "constant".
+//
+// The struct type node has a name and a list of pairs, one pair for each member
+// of the struct. The first element of each pair is a type node (a struct type
+// node or a sclar type node), specifying the type of the member, the second
+// element of each pair is the offset of the member.
+//
+// Given an example
+// typedef struct {
+// short s;
+// } A;
+// typedef struct {
+// uint16_t s;
+// A a;
+// } B;
+//
+// For an acess to B.a.s, we attach !5 (a path tag node) to the load/store
+// instruction. The base type is !4 (struct B), the access type is !2 (scalar
+// type short) and the offset is 4.
+//
+// !0 = metadata !{metadata !"Simple C/C++ TBAA"}
+// !1 = metadata !{metadata !"omnipotent char", metadata !0} // Scalar type node
+// !2 = metadata !{metadata !"short", metadata !1} // Scalar type node
+// !3 = metadata !{metadata !"A", metadata !2, i64 0} // Struct type node
+// !4 = metadata !{metadata !"B", metadata !2, i64 0, metadata !3, i64 4}
+// // Struct type node
+// !5 = metadata !{metadata !4, metadata !2, i64 4} // Path tag node
+//
+// The struct type nodes and the scalar type nodes form a type DAG.
+// Root (!0)
+// char (!1) -- edge to Root
+// short (!2) -- edge to char
+// A (!3) -- edge with offset 0 to short
+// B (!4) -- edge with offset 0 to short and edge with offset 4 to A
+//
+// To check if two tags (tagX and tagY) can alias, we start from the base type
+// of tagX, follow the edge with the correct offset in the type DAG and adjust
+// the offset until we reach the base type of tagY or until we reach the Root
+// node.
+// If we reach the base type of tagY, compare the adjusted offset with
+// offset of tagY, return Alias if the offsets are the same, return NoAlias
+// otherwise.
+// If we reach the Root node, perform the above starting from base type of tagY
+// to see if we reach base type of tagX.
+//
+// If they have different roots, they're part of different potentially
+// unrelated type systems, so we return Alias to be conservative.
+// If neither node is an ancestor of the other and they have the same root,
+// then we say NoAlias.
+//
// TODO: The current metadata format doesn't support struct
// fields. For example:
// struct X {
@@ -71,7 +135,6 @@ using namespace llvm;
// achieved by stripping the !tbaa tags from IR, but this option is sometimes
// more convenient.
static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));
-static cl::opt<bool> EnableStructPathTBAA("struct-path-tbaa", cl::init(false));
namespace {
/// TBAANode - This is a simple wrapper around an MDNode which provides a
@@ -168,8 +231,12 @@ namespace {
if (Node->getNumOperands() < 2)
return TBAAStructTypeNode();
- // Special handling for a scalar type node.
+ // Fast path for a scalar type node and a struct type node with a single
+ // field.
if (Node->getNumOperands() <= 3) {
+ uint64_t Cur = Node->getNumOperands() == 2 ? 0 :
+ cast<ConstantInt>(Node->getOperand(2))->getZExtValue();
+ Offset -= Cur;
MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
if (!P)
return TBAAStructTypeNode();
@@ -259,12 +326,21 @@ TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AliasAnalysis::getAnalysisUsage(AU);
}
+/// Check the first operand of the tbaa tag node, if it is a MDNode, we treat
+/// it as struct-path aware TBAA format, otherwise, we treat it as scalar TBAA
+/// format.
+static bool isStructPathTBAA(const MDNode *MD) {
+ // Anonymous TBAA root starts with a MDNode and dragonegg uses it as
+ // a TBAA tag.
+ return isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3;
+}
+
/// Aliases - Test whether the type represented by A may alias the
/// type represented by B.
bool
TypeBasedAliasAnalysis::Aliases(const MDNode *A,
const MDNode *B) const {
- if (EnableStructPathTBAA)
+ if (isStructPathTBAA(A))
return PathAliases(A, B);
// Keep track of the root node for A and B.
@@ -397,8 +473,8 @@ bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc,
// If this is an "immutable" type, we can assume the pointer is pointing
// to constant memory.
- if ((!EnableStructPathTBAA && TBAANode(M).TypeIsImmutable()) ||
- (EnableStructPathTBAA && TBAAStructTagNode(M).TypeIsImmutable()))
+ if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) ||
+ (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable()))
return true;
return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
@@ -414,8 +490,8 @@ TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
// If this is an "immutable" type, we can assume the call doesn't write
// to memory.
if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
- if ((!EnableStructPathTBAA && TBAANode(M).TypeIsImmutable()) ||
- (EnableStructPathTBAA && TBAAStructTagNode(M).TypeIsImmutable()))
+ if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) ||
+ (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable()))
Min = OnlyReadsMemory;
return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
@@ -458,6 +534,25 @@ TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
return AliasAnalysis::getModRefInfo(CS1, CS2);
}
+bool MDNode::isTBAAVtableAccess() const {
+ if (!isStructPathTBAA(this)) {
+ if (getNumOperands() < 1) return false;
+ if (MDString *Tag1 = dyn_cast<MDString>(getOperand(0))) {
+ if (Tag1->getString() == "vtable pointer") return true;
+ }
+ return false;
+ }
+
+ // For struct-path aware TBAA, we use the access type of the tag.
+ if (getNumOperands() < 2) return false;
+ MDNode *Tag = cast_or_null<MDNode>(getOperand(1));
+ if (!Tag) return false;
+ if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) {
+ if (Tag1->getString() == "vtable pointer") return true;
+ }
+ return false;
+}
+
MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
if (!A || !B)
return NULL;
@@ -466,7 +561,8 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
return A;
// For struct-path aware TBAA, we use the access type of the tag.
- if (EnableStructPathTBAA) {
+ bool StructPath = isStructPathTBAA(A);
+ if (StructPath) {
A = cast_or_null<MDNode>(A->getOperand(1));
if (!A) return 0;
B = cast_or_null<MDNode>(B->getOperand(1));
@@ -499,7 +595,7 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
--IA;
--IB;
}
- if (!EnableStructPathTBAA)
+ if (!StructPath)
return Ret;
if (!Ret)
diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp
index 45dcc5e..e39ee62 100644
--- a/contrib/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp
@@ -15,6 +15,7 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GlobalAlias.h"
@@ -39,8 +40,8 @@ const unsigned MaxDepth = 6;
static unsigned getBitWidth(Type *Ty, const DataLayout *TD) {
if (unsigned BitWidth = Ty->getScalarSizeInBits())
return BitWidth;
- assert(isa<PointerType>(Ty) && "Expected a pointer type!");
- return TD ? TD->getPointerSizeInBits() : 0;
+
+ return TD ? TD->getPointerTypeSizeInBits(Ty) : 0;
}
static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
@@ -290,7 +291,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
}
if (Align > 0)
KnownZero = APInt::getLowBitsSet(BitWidth,
- CountTrailingZeros_32(Align));
+ countTrailingZeros(Align));
else
KnownZero.clearAllBits();
KnownOne.clearAllBits();
@@ -321,7 +322,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
}
if (Align)
- KnownZero = APInt::getLowBitsSet(BitWidth, CountTrailingZeros_32(Align));
+ KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align));
return;
}
@@ -613,7 +614,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
Align = TD->getABITypeAlignment(AI->getType()->getElementType());
if (Align > 0)
- KnownZero = APInt::getLowBitsSet(BitWidth, CountTrailingZeros_32(Align));
+ KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align));
break;
}
case Instruction::GetElementPtr: {
@@ -629,12 +630,22 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
Value *Index = I->getOperand(i);
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
// Handle struct member offset arithmetic.
- if (!TD) return;
- const StructLayout *SL = TD->getStructLayout(STy);
+ if (!TD)
+ return;
+
+ // Handle case when index is vector zeroinitializer
+ Constant *CIndex = cast<Constant>(Index);
+ if (CIndex->isZeroValue())
+ continue;
+
+ if (CIndex->getType()->isVectorTy())
+ Index = CIndex->getSplatValue();
+
unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
+ const StructLayout *SL = TD->getStructLayout(STy);
uint64_t Offset = SL->getElementOffset(Idx);
- TrailZ = std::min(TrailZ,
- CountTrailingZeros_64(Offset));
+ TrailZ = std::min<unsigned>(TrailZ,
+ countTrailingZeros(Offset));
} else {
// Handle array index arithmetic.
Type *IndexedTy = GTI.getIndexedType();
@@ -644,7 +655,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0);
ComputeMaskedBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1);
TrailZ = std::min(TrailZ,
- unsigned(CountTrailingZeros_64(TypeSize) +
+ unsigned(countTrailingZeros(TypeSize) +
LocalKnownZero.countTrailingOnes()));
}
}
@@ -749,7 +760,6 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
break;
}
- case Intrinsic::x86_sse42_crc32_64_8:
case Intrinsic::x86_sse42_crc32_64_64:
KnownZero = APInt::getHighBitsSet(64, 32);
break;
@@ -855,6 +865,37 @@ bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) {
return false;
}
+ // Adding a power-of-two or zero to the same power-of-two or zero yields
+ // either the original power-of-two, a larger power-of-two or zero.
+ if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
+ OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V);
+ if (OrZero || VOBO->hasNoUnsignedWrap() || VOBO->hasNoSignedWrap()) {
+ if (match(X, m_And(m_Specific(Y), m_Value())) ||
+ match(X, m_And(m_Value(), m_Specific(Y))))
+ if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth))
+ return true;
+ if (match(Y, m_And(m_Specific(X), m_Value())) ||
+ match(Y, m_And(m_Value(), m_Specific(X))))
+ if (isKnownToBeAPowerOfTwo(X, OrZero, Depth))
+ return true;
+
+ unsigned BitWidth = V->getType()->getScalarSizeInBits();
+ APInt LHSZeroBits(BitWidth, 0), LHSOneBits(BitWidth, 0);
+ ComputeMaskedBits(X, LHSZeroBits, LHSOneBits, 0, Depth);
+
+ APInt RHSZeroBits(BitWidth, 0), RHSOneBits(BitWidth, 0);
+ ComputeMaskedBits(Y, RHSZeroBits, RHSOneBits, 0, Depth);
+ // If i8 V is a power of two or zero:
+ // ZeroBits: 1 1 1 0 1 1 1 1
+ // ~ZeroBits: 0 0 0 1 0 0 0 0
+ if ((~(LHSZeroBits & RHSZeroBits)).isPowerOf2())
+ // If OrZero isn't set, we cannot give back a zero result.
+ // Make sure either the LHS or RHS has a bit set.
+ if (OrZero || RHSOneBits.getBoolValue() || LHSOneBits.getBoolValue())
+ return true;
+ }
+ }
+
// An exact divide or right shift can only shift off zero bits, so the result
// is a power of two only if the first operand is a power of two and not
// copying a sign bit (sdiv int_min, 2).
@@ -1509,7 +1550,7 @@ Value *llvm::isBytewiseValue(Value *V) {
// struct. To is the result struct built so far, new insertvalue instructions
// build on that.
static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType,
- SmallVector<unsigned, 10> &Idxs,
+ SmallVectorImpl<unsigned> &Idxs,
unsigned IdxSkip,
Instruction *InsertBefore) {
llvm::StructType *STy = dyn_cast<llvm::StructType>(IndexedType);
@@ -1673,20 +1714,24 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
/// it can be expressed as a base pointer plus a constant offset. Return the
/// base and offset to the caller.
Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
- const DataLayout *TD) {
+ const DataLayout *DL) {
// Without DataLayout, conservatively assume 64-bit offsets, which is
// the widest we support.
- unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
+ unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(Ptr->getType()) : 64;
APInt ByteOffset(BitWidth, 0);
while (1) {
if (Ptr->getType()->isVectorTy())
break;
if (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
- APInt GEPOffset(BitWidth, 0);
- if (TD && !GEP->accumulateConstantOffset(*TD, GEPOffset))
- break;
- ByteOffset += GEPOffset;
+ if (DL) {
+ APInt GEPOffset(BitWidth, 0);
+ if (!GEP->accumulateConstantOffset(*DL, GEPOffset))
+ break;
+
+ ByteOffset += GEPOffset;
+ }
+
Ptr = GEP->getPointerOperand();
} else if (Operator::getOpcode(Ptr) == Instruction::BitCast) {
Ptr = cast<Operator>(Ptr)->getOperand(0);
@@ -2019,7 +2064,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
/// isKnownNonNull - Return true if we know that the specified value is never
/// null.
-bool llvm::isKnownNonNull(const Value *V) {
+bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
// Alloca never returns null, malloc might.
if (isa<AllocaInst>(V)) return true;
@@ -2030,5 +2075,10 @@ bool llvm::isKnownNonNull(const Value *V) {
// Global values are not null unless extern weak.
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
return !GV->hasExternalWeakLinkage();
+
+ // operator new never returns null.
+ if (isOperatorNewLikeFn(V, TLI, /*LookThroughBitCast=*/true))
+ return true;
+
return false;
}
OpenPOWER on IntegriCloud