summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Analysis
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2015-12-30 13:13:10 +0000
committerdim <dim@FreeBSD.org>2015-12-30 13:13:10 +0000
commit9b5bf5c4f53d65d6a48722d7410ed7cb15f5ba3a (patch)
treeb466a4817f79516eb1df8eae92bccf62ecc84003 /contrib/llvm/lib/Analysis
parentf09a28d1de99fda4f5517fb12670fc36552f4927 (diff)
parente194cd6d03d91631334d9d5e55b506036f423cc8 (diff)
downloadFreeBSD-src-9b5bf5c4f53d65d6a48722d7410ed7cb15f5ba3a.zip
FreeBSD-src-9b5bf5c4f53d65d6a48722d7410ed7cb15f5ba3a.tar.gz
Update llvm to trunk r256633.
Diffstat (limited to 'contrib/llvm/lib/Analysis')
-rw-r--r--contrib/llvm/lib/Analysis/AliasAnalysis.cpp615
-rw-r--r--contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp173
-rw-r--r--contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp54
-rw-r--r--contrib/llvm/lib/Analysis/AliasDebugger.cpp136
-rw-r--r--contrib/llvm/lib/Analysis/AliasSetTracker.cpp44
-rw-r--r--contrib/llvm/lib/Analysis/Analysis.cpp32
-rw-r--r--contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp1094
-rw-r--r--contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp93
-rw-r--r--contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp12
-rw-r--r--contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp397
-rw-r--r--contrib/llvm/lib/Analysis/CFG.cpp8
-rw-r--r--contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp253
-rw-r--r--contrib/llvm/lib/Analysis/CallGraph.cpp (renamed from contrib/llvm/lib/Analysis/IPA/CallGraph.cpp)42
-rw-r--r--contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp (renamed from contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp)0
-rw-r--r--contrib/llvm/lib/Analysis/CallPrinter.cpp (renamed from contrib/llvm/lib/Analysis/IPA/CallPrinter.cpp)0
-rw-r--r--contrib/llvm/lib/Analysis/CaptureTracking.cpp98
-rw-r--r--contrib/llvm/lib/Analysis/CodeMetrics.cpp15
-rw-r--r--contrib/llvm/lib/Analysis/ConstantFolding.cpp136
-rw-r--r--contrib/llvm/lib/Analysis/CostModel.cpp19
-rw-r--r--contrib/llvm/lib/Analysis/Delinearization.cpp14
-rw-r--r--contrib/llvm/lib/Analysis/DemandedBits.cpp392
-rw-r--r--contrib/llvm/lib/Analysis/DependenceAnalysis.cpp182
-rw-r--r--contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp115
-rw-r--r--contrib/llvm/lib/Analysis/EHPersonalities.cpp106
-rw-r--r--contrib/llvm/lib/Analysis/GlobalsModRef.cpp1002
-rw-r--r--contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp609
-rw-r--r--contrib/llvm/lib/Analysis/IPA/IPA.cpp30
-rw-r--r--contrib/llvm/lib/Analysis/IVUsers.cpp8
-rw-r--r--contrib/llvm/lib/Analysis/InlineCost.cpp (renamed from contrib/llvm/lib/Analysis/IPA/InlineCost.cpp)102
-rw-r--r--contrib/llvm/lib/Analysis/InstructionSimplify.cpp81
-rw-r--r--contrib/llvm/lib/Analysis/LazyCallGraph.cpp8
-rw-r--r--contrib/llvm/lib/Analysis/LazyValueInfo.cpp345
-rw-r--r--contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp141
-rw-r--r--contrib/llvm/lib/Analysis/LibCallSemantics.cpp89
-rw-r--r--contrib/llvm/lib/Analysis/Lint.cpp303
-rw-r--r--contrib/llvm/lib/Analysis/Loads.cpp24
-rw-r--r--contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp551
-rw-r--r--contrib/llvm/lib/Analysis/LoopInfo.cpp69
-rw-r--r--contrib/llvm/lib/Analysis/LoopPass.cpp139
-rw-r--r--contrib/llvm/lib/Analysis/MemDepPrinter.cpp6
-rw-r--r--contrib/llvm/lib/Analysis/MemDerefPrinter.cpp18
-rw-r--r--contrib/llvm/lib/Analysis/MemoryBuiltins.cpp55
-rw-r--r--contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp174
-rw-r--r--contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp95
-rw-r--r--contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp170
-rw-r--r--contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp28
-rw-r--r--contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp675
-rw-r--r--contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp85
-rw-r--r--contrib/llvm/lib/Analysis/RegionInfo.cpp9
-rw-r--r--contrib/llvm/lib/Analysis/RegionPrinter.cpp151
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolution.cpp3265
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp178
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp361
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp2
-rw-r--r--contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp202
-rw-r--r--contrib/llvm/lib/Analysis/SparsePropagation.cpp14
-rw-r--r--contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp42
-rw-r--r--contrib/llvm/lib/Analysis/TargetTransformInfo.cpp234
-rw-r--r--contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp654
-rw-r--r--contrib/llvm/lib/Analysis/ValueTracking.cpp1164
-rw-r--r--contrib/llvm/lib/Analysis/VectorUtils.cpp199
61 files changed, 9542 insertions, 5770 deletions
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
index 44d137d..35f2e97 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -25,9 +25,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/CFLAliasAnalysis.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/ObjCARCAliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DataLayout.h"
@@ -40,44 +47,72 @@
#include "llvm/Pass.h"
using namespace llvm;
-// Register the AliasAnalysis interface, providing a nice name to refer to.
-INITIALIZE_ANALYSIS_GROUP(AliasAnalysis, "Alias Analysis", NoAA)
-char AliasAnalysis::ID = 0;
+/// Allow disabling BasicAA from the AA results. This is particularly useful
+/// when testing to isolate a single AA implementation.
+static cl::opt<bool> DisableBasicAA("disable-basicaa", cl::Hidden,
+ cl::init(false));
+
+AAResults::AAResults(AAResults &&Arg) : AAs(std::move(Arg.AAs)) {
+ for (auto &AA : AAs)
+ AA->setAAResults(this);
+}
+
+AAResults &AAResults::operator=(AAResults &&Arg) {
+ AAs = std::move(Arg.AAs);
+ for (auto &AA : AAs)
+ AA->setAAResults(this);
+ return *this;
+}
+
+AAResults::~AAResults() {
+// FIXME; It would be nice to at least clear out the pointers back to this
+// aggregation here, but we end up with non-nesting lifetimes in the legacy
+// pass manager that prevent this from working. In the legacy pass manager
+// we'll end up with dangling references here in some cases.
+#if 0
+ for (auto &AA : AAs)
+ AA->setAAResults(nullptr);
+#endif
+}
//===----------------------------------------------------------------------===//
// Default chaining methods
//===----------------------------------------------------------------------===//
-AliasResult AliasAnalysis::alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
- assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
- return AA->alias(LocA, LocB);
+AliasResult AAResults::alias(const MemoryLocation &LocA,
+ const MemoryLocation &LocB) {
+ for (const auto &AA : AAs) {
+ auto Result = AA->alias(LocA, LocB);
+ if (Result != MayAlias)
+ return Result;
+ }
+ return MayAlias;
}
-bool AliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal) {
- assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
- return AA->pointsToConstantMemory(Loc, OrLocal);
-}
+bool AAResults::pointsToConstantMemory(const MemoryLocation &Loc,
+ bool OrLocal) {
+ for (const auto &AA : AAs)
+ if (AA->pointsToConstantMemory(Loc, OrLocal))
+ return true;
-AliasAnalysis::ModRefResult
-AliasAnalysis::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
- assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
- return AA->getArgModRefInfo(CS, ArgIdx);
+ return false;
}
-void AliasAnalysis::deleteValue(Value *V) {
- assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
- AA->deleteValue(V);
-}
+ModRefInfo AAResults::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
+ ModRefInfo Result = MRI_ModRef;
+
+ for (const auto &AA : AAs) {
+ Result = ModRefInfo(Result & AA->getArgModRefInfo(CS, ArgIdx));
-void AliasAnalysis::addEscapingUse(Use &U) {
- assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
- AA->addEscapingUse(U);
+ // Early-exit the moment we reach the bottom of the lattice.
+ if (Result == MRI_NoModRef)
+ return Result;
+ }
+
+ return Result;
}
-AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(Instruction *I, ImmutableCallSite Call) {
+ModRefInfo AAResults::getModRefInfo(Instruction *I, ImmutableCallSite Call) {
// We may have two calls
if (auto CS = ImmutableCallSite(I)) {
// Check if the two calls modify the same memory
@@ -88,289 +123,215 @@ AliasAnalysis::getModRefInfo(Instruction *I, ImmutableCallSite Call) {
// is that if the call references what this instruction
// defines, it must be clobbered by this location.
const MemoryLocation DefLoc = MemoryLocation::get(I);
- if (getModRefInfo(Call, DefLoc) != AliasAnalysis::NoModRef)
- return AliasAnalysis::ModRef;
- }
- return AliasAnalysis::NoModRef;
-}
-
-AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) {
- assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
-
- ModRefBehavior MRB = getModRefBehavior(CS);
- if (MRB == DoesNotAccessMemory)
- return NoModRef;
-
- ModRefResult Mask = ModRef;
- if (onlyReadsMemory(MRB))
- Mask = Ref;
-
- if (onlyAccessesArgPointees(MRB)) {
- bool doesAlias = false;
- ModRefResult AllArgsMask = NoModRef;
- if (doesAccessArgPointees(MRB)) {
- for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
- AI != AE; ++AI) {
- const Value *Arg = *AI;
- if (!Arg->getType()->isPointerTy())
- continue;
- unsigned ArgIdx = std::distance(CS.arg_begin(), AI);
- MemoryLocation ArgLoc =
- MemoryLocation::getForArgument(CS, ArgIdx, *TLI);
- if (!isNoAlias(ArgLoc, Loc)) {
- ModRefResult ArgMask = getArgModRefInfo(CS, ArgIdx);
- doesAlias = true;
- AllArgsMask = ModRefResult(AllArgsMask | ArgMask);
- }
- }
- }
- if (!doesAlias)
- return NoModRef;
- Mask = ModRefResult(Mask & AllArgsMask);
+ if (getModRefInfo(Call, DefLoc) != MRI_NoModRef)
+ return MRI_ModRef;
}
+ return MRI_NoModRef;
+}
- // If Loc is a constant memory location, the call definitely could not
- // modify the memory location.
- if ((Mask & Mod) && pointsToConstantMemory(Loc))
- Mask = ModRefResult(Mask & ~Mod);
-
- // If this is the end of the chain, don't forward.
- if (!AA) return Mask;
-
- // Otherwise, fall back to the next AA in the chain. But we can merge
- // in any mask we've managed to compute.
- return ModRefResult(AA->getModRefInfo(CS, Loc) & Mask);
-}
-
-AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
- assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
-
- // If CS1 or CS2 are readnone, they don't interact.
- ModRefBehavior CS1B = getModRefBehavior(CS1);
- if (CS1B == DoesNotAccessMemory) return NoModRef;
-
- ModRefBehavior CS2B = getModRefBehavior(CS2);
- if (CS2B == DoesNotAccessMemory) return NoModRef;
-
- // If they both only read from memory, there is no dependence.
- if (onlyReadsMemory(CS1B) && onlyReadsMemory(CS2B))
- return NoModRef;
-
- AliasAnalysis::ModRefResult Mask = ModRef;
-
- // If CS1 only reads memory, the only dependence on CS2 can be
- // from CS1 reading memory written by CS2.
- if (onlyReadsMemory(CS1B))
- Mask = ModRefResult(Mask & Ref);
-
- // If CS2 only access memory through arguments, accumulate the mod/ref
- // information from CS1's references to the memory referenced by
- // CS2's arguments.
- if (onlyAccessesArgPointees(CS2B)) {
- AliasAnalysis::ModRefResult R = NoModRef;
- if (doesAccessArgPointees(CS2B)) {
- for (ImmutableCallSite::arg_iterator
- I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) {
- const Value *Arg = *I;
- if (!Arg->getType()->isPointerTy())
- continue;
- unsigned CS2ArgIdx = std::distance(CS2.arg_begin(), I);
- auto CS2ArgLoc = MemoryLocation::getForArgument(CS2, CS2ArgIdx, *TLI);
-
- // ArgMask indicates what CS2 might do to CS2ArgLoc, and the dependence of
- // CS1 on that location is the inverse.
- ModRefResult ArgMask = getArgModRefInfo(CS2, CS2ArgIdx);
- if (ArgMask == Mod)
- ArgMask = ModRef;
- else if (ArgMask == Ref)
- ArgMask = Mod;
-
- R = ModRefResult((R | (getModRefInfo(CS1, CS2ArgLoc) & ArgMask)) & Mask);
- if (R == Mask)
- break;
- }
- }
- return R;
- }
+ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
+ const MemoryLocation &Loc) {
+ ModRefInfo Result = MRI_ModRef;
- // If CS1 only accesses memory through arguments, check if CS2 references
- // any of the memory referenced by CS1's arguments. If not, return NoModRef.
- if (onlyAccessesArgPointees(CS1B)) {
- AliasAnalysis::ModRefResult R = NoModRef;
- if (doesAccessArgPointees(CS1B)) {
- for (ImmutableCallSite::arg_iterator
- I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) {
- const Value *Arg = *I;
- if (!Arg->getType()->isPointerTy())
- continue;
- unsigned CS1ArgIdx = std::distance(CS1.arg_begin(), I);
- auto CS1ArgLoc = MemoryLocation::getForArgument(CS1, CS1ArgIdx, *TLI);
-
- // ArgMask indicates what CS1 might do to CS1ArgLoc; if CS1 might Mod
- // CS1ArgLoc, then we care about either a Mod or a Ref by CS2. If CS1
- // might Ref, then we care only about a Mod by CS2.
- ModRefResult ArgMask = getArgModRefInfo(CS1, CS1ArgIdx);
- ModRefResult ArgR = getModRefInfo(CS2, CS1ArgLoc);
- if (((ArgMask & Mod) != NoModRef && (ArgR & ModRef) != NoModRef) ||
- ((ArgMask & Ref) != NoModRef && (ArgR & Mod) != NoModRef))
- R = ModRefResult((R | ArgMask) & Mask);
-
- if (R == Mask)
- break;
- }
- }
- return R;
- }
+ for (const auto &AA : AAs) {
+ Result = ModRefInfo(Result & AA->getModRefInfo(CS, Loc));
- // If this is the end of the chain, don't forward.
- if (!AA) return Mask;
+ // Early-exit the moment we reach the bottom of the lattice.
+ if (Result == MRI_NoModRef)
+ return Result;
+ }
- // Otherwise, fall back to the next AA in the chain. But we can merge
- // in any mask we've managed to compute.
- return ModRefResult(AA->getModRefInfo(CS1, CS2) & Mask);
+ return Result;
}
-AliasAnalysis::ModRefBehavior
-AliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
- assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ ModRefInfo Result = MRI_ModRef;
+
+ for (const auto &AA : AAs) {
+ Result = ModRefInfo(Result & AA->getModRefInfo(CS1, CS2));
+
+ // Early-exit the moment we reach the bottom of the lattice.
+ if (Result == MRI_NoModRef)
+ return Result;
+ }
+
+ return Result;
+}
- ModRefBehavior Min = UnknownModRefBehavior;
+FunctionModRefBehavior AAResults::getModRefBehavior(ImmutableCallSite CS) {
+ FunctionModRefBehavior Result = FMRB_UnknownModRefBehavior;
- // Call back into the alias analysis with the other form of getModRefBehavior
- // to see if it can give a better response.
- if (const Function *F = CS.getCalledFunction())
- Min = getModRefBehavior(F);
+ for (const auto &AA : AAs) {
+ Result = FunctionModRefBehavior(Result & AA->getModRefBehavior(CS));
- // If this is the end of the chain, don't forward.
- if (!AA) return Min;
+ // Early-exit the moment we reach the bottom of the lattice.
+ if (Result == FMRB_DoesNotAccessMemory)
+ return Result;
+ }
- // Otherwise, fall back to the next AA in the chain. But we can merge
- // in any result we've managed to compute.
- return ModRefBehavior(AA->getModRefBehavior(CS) & Min);
+ return Result;
}
-AliasAnalysis::ModRefBehavior
-AliasAnalysis::getModRefBehavior(const Function *F) {
- assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
- return AA->getModRefBehavior(F);
+FunctionModRefBehavior AAResults::getModRefBehavior(const Function *F) {
+ FunctionModRefBehavior Result = FMRB_UnknownModRefBehavior;
+
+ for (const auto &AA : AAs) {
+ Result = FunctionModRefBehavior(Result & AA->getModRefBehavior(F));
+
+ // Early-exit the moment we reach the bottom of the lattice.
+ if (Result == FMRB_DoesNotAccessMemory)
+ return Result;
+ }
+
+ return Result;
}
//===----------------------------------------------------------------------===//
-// AliasAnalysis non-virtual helper method implementation
+// Helper method implementation
//===----------------------------------------------------------------------===//
-AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(const LoadInst *L, const MemoryLocation &Loc) {
+ModRefInfo AAResults::getModRefInfo(const LoadInst *L,
+ const MemoryLocation &Loc) {
// Be conservative in the face of volatile/atomic.
if (!L->isUnordered())
- return ModRef;
+ return MRI_ModRef;
// If the load address doesn't alias the given address, it doesn't read
// or write the specified memory.
if (Loc.Ptr && !alias(MemoryLocation::get(L), Loc))
- return NoModRef;
+ return MRI_NoModRef;
// Otherwise, a load just reads.
- return Ref;
+ return MRI_Ref;
}
-AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(const StoreInst *S, const MemoryLocation &Loc) {
+ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
+ const MemoryLocation &Loc) {
// Be conservative in the face of volatile/atomic.
if (!S->isUnordered())
- return ModRef;
+ return MRI_ModRef;
if (Loc.Ptr) {
// If the store address cannot alias the pointer in question, then the
// specified memory cannot be modified by the store.
if (!alias(MemoryLocation::get(S), Loc))
- return NoModRef;
+ return MRI_NoModRef;
// If the pointer is a pointer to constant memory, then it could not have
// been modified by this store.
if (pointsToConstantMemory(Loc))
- return NoModRef;
-
+ return MRI_NoModRef;
}
// Otherwise, a store just writes.
- return Mod;
+ return MRI_Mod;
}
-AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(const VAArgInst *V, const MemoryLocation &Loc) {
+ModRefInfo AAResults::getModRefInfo(const VAArgInst *V,
+ const MemoryLocation &Loc) {
if (Loc.Ptr) {
// If the va_arg address cannot alias the pointer in question, then the
// specified memory cannot be accessed by the va_arg.
if (!alias(MemoryLocation::get(V), Loc))
- return NoModRef;
+ return MRI_NoModRef;
// If the pointer is a pointer to constant memory, then it could not have
// been modified by this va_arg.
if (pointsToConstantMemory(Loc))
- return NoModRef;
+ return MRI_NoModRef;
}
// Otherwise, a va_arg reads and writes.
- return ModRef;
+ return MRI_ModRef;
+}
+
+ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad,
+ const MemoryLocation &Loc) {
+ if (Loc.Ptr) {
+ // If the pointer is a pointer to constant memory,
+ // then it could not have been modified by this catchpad.
+ if (pointsToConstantMemory(Loc))
+ return MRI_NoModRef;
+ }
+
+ // Otherwise, a catchpad reads and writes.
+ return MRI_ModRef;
+}
+
+ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet,
+ const MemoryLocation &Loc) {
+ if (Loc.Ptr) {
+ // If the pointer is a pointer to constant memory,
+ // then it could not have been modified by this catchpad.
+ if (pointsToConstantMemory(Loc))
+ return MRI_NoModRef;
+ }
+
+ // Otherwise, a catchret reads and writes.
+ return MRI_ModRef;
}
-AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(const AtomicCmpXchgInst *CX,
- const MemoryLocation &Loc) {
+ModRefInfo AAResults::getModRefInfo(const AtomicCmpXchgInst *CX,
+ const MemoryLocation &Loc) {
// Acquire/Release cmpxchg has properties that matter for arbitrary addresses.
if (CX->getSuccessOrdering() > Monotonic)
- return ModRef;
+ return MRI_ModRef;
// If the cmpxchg address does not alias the location, it does not access it.
if (Loc.Ptr && !alias(MemoryLocation::get(CX), Loc))
- return NoModRef;
+ return MRI_NoModRef;
- return ModRef;
+ return MRI_ModRef;
}
-AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW,
- const MemoryLocation &Loc) {
+ModRefInfo AAResults::getModRefInfo(const AtomicRMWInst *RMW,
+ const MemoryLocation &Loc) {
// Acquire/Release atomicrmw has properties that matter for arbitrary addresses.
if (RMW->getOrdering() > Monotonic)
- return ModRef;
+ return MRI_ModRef;
// If the atomicrmw address does not alias the location, it does not access it.
if (Loc.Ptr && !alias(MemoryLocation::get(RMW), Loc))
- return NoModRef;
+ return MRI_NoModRef;
- return ModRef;
+ return MRI_ModRef;
}
-// FIXME: this is really just shoring-up a deficiency in alias analysis.
-// BasicAA isn't willing to spend linear time determining whether an alloca
-// was captured before or after this particular call, while we are. However,
-// with a smarter AA in place, this test is just wasting compile time.
-AliasAnalysis::ModRefResult AliasAnalysis::callCapturesBefore(
- const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT) {
+/// \brief Return information about whether a particular call site modifies
+/// or reads the specified memory location \p MemLoc before instruction \p I
+/// in a BasicBlock. A ordered basic block \p OBB can be used to speed up
+/// instruction-ordering queries inside the BasicBlock containing \p I.
+/// FIXME: this is really just shoring-up a deficiency in alias analysis.
+/// BasicAA isn't willing to spend linear time determining whether an alloca
+/// was captured before or after this particular call, while we are. However,
+/// with a smarter AA in place, this test is just wasting compile time.
+ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
+ const MemoryLocation &MemLoc,
+ DominatorTree *DT,
+ OrderedBasicBlock *OBB) {
if (!DT)
- return AliasAnalysis::ModRef;
+ return MRI_ModRef;
- const Value *Object = GetUnderlyingObject(MemLoc.Ptr, *DL);
+ const Value *Object =
+ GetUnderlyingObject(MemLoc.Ptr, I->getModule()->getDataLayout());
if (!isIdentifiedObject(Object) || isa<GlobalValue>(Object) ||
isa<Constant>(Object))
- return AliasAnalysis::ModRef;
+ return MRI_ModRef;
ImmutableCallSite CS(I);
if (!CS.getInstruction() || CS.getInstruction() == Object)
- return AliasAnalysis::ModRef;
+ return MRI_ModRef;
if (llvm::PointerMayBeCapturedBefore(Object, /* ReturnCaptures */ true,
/* StoreCaptures */ true, I, DT,
- /* include Object */ true))
- return AliasAnalysis::ModRef;
+ /* include Object */ true,
+ /* OrderedBasicBlock */ OBB))
+ return MRI_ModRef;
unsigned ArgNo = 0;
- AliasAnalysis::ModRefResult R = AliasAnalysis::NoModRef;
+ ModRefInfo R = MRI_NoModRef;
for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
CI != CE; ++CI, ++ArgNo) {
// Only look at the no-capture or byval pointer arguments. If this
@@ -389,50 +350,20 @@ AliasAnalysis::ModRefResult AliasAnalysis::callCapturesBefore(
if (CS.doesNotAccessMemory(ArgNo))
continue;
if (CS.onlyReadsMemory(ArgNo)) {
- R = AliasAnalysis::Ref;
+ R = MRI_Ref;
continue;
}
- return AliasAnalysis::ModRef;
+ return MRI_ModRef;
}
return R;
}
-// AliasAnalysis destructor: DO NOT move this to the header file for
-// AliasAnalysis or else clients of the AliasAnalysis class may not depend on
-// the AliasAnalysis.o file in the current .a file, causing alias analysis
-// support to not be included in the tool correctly!
-//
-AliasAnalysis::~AliasAnalysis() {}
-
-/// InitializeAliasAnalysis - Subclasses must call this method to initialize the
-/// AliasAnalysis interface before any other methods are called.
-///
-void AliasAnalysis::InitializeAliasAnalysis(Pass *P, const DataLayout *NewDL) {
- DL = NewDL;
- auto *TLIP = P->getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- TLI = TLIP ? &TLIP->getTLI() : nullptr;
- AA = &P->getAnalysis<AliasAnalysis>();
-}
-
-// getAnalysisUsage - All alias analysis implementations should invoke this
-// directly (using AliasAnalysis::getAnalysisUsage(AU)).
-void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<AliasAnalysis>(); // All AA's chain
-}
-
-/// getTypeStoreSize - Return the DataLayout store size for the given type,
-/// if known, or a conservative value otherwise.
-///
-uint64_t AliasAnalysis::getTypeStoreSize(Type *Ty) {
- return DL ? DL->getTypeStoreSize(Ty) : MemoryLocation::UnknownSize;
-}
-
/// canBasicBlockModify - Return true if it is possible for execution of the
/// specified basic block to modify the location Loc.
///
-bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB,
- const MemoryLocation &Loc) {
- return canInstructionRangeModRef(BB.front(), BB.back(), Loc, Mod);
+bool AAResults::canBasicBlockModify(const BasicBlock &BB,
+ const MemoryLocation &Loc) {
+ return canInstructionRangeModRef(BB.front(), BB.back(), Loc, MRI_Mod);
}
/// canInstructionRangeModRef - Return true if it is possible for the
@@ -440,28 +371,178 @@ bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB,
/// mode) the location Loc. The instructions to consider are all
/// of the instructions in the range of [I1,I2] INCLUSIVE.
/// I1 and I2 must be in the same basic block.
-bool AliasAnalysis::canInstructionRangeModRef(const Instruction &I1,
- const Instruction &I2,
- const MemoryLocation &Loc,
- const ModRefResult Mode) {
+bool AAResults::canInstructionRangeModRef(const Instruction &I1,
+ const Instruction &I2,
+ const MemoryLocation &Loc,
+ const ModRefInfo Mode) {
assert(I1.getParent() == I2.getParent() &&
"Instructions not in same basic block!");
- BasicBlock::const_iterator I = &I1;
- BasicBlock::const_iterator E = &I2;
+ BasicBlock::const_iterator I = I1.getIterator();
+ BasicBlock::const_iterator E = I2.getIterator();
++E; // Convert from inclusive to exclusive range.
for (; I != E; ++I) // Check every instruction in range
- if (getModRefInfo(I, Loc) & Mode)
+ if (getModRefInfo(&*I, Loc) & Mode)
return true;
return false;
}
+// Provide a definition for the root virtual destructor.
+AAResults::Concept::~Concept() {}
+
+namespace {
+/// A wrapper pass for external alias analyses. This just squirrels away the
+/// callback used to run any analyses and register their results.
+struct ExternalAAWrapperPass : ImmutablePass {
+ typedef std::function<void(Pass &, Function &, AAResults &)> CallbackT;
+
+ CallbackT CB;
+
+ static char ID;
+
+ ExternalAAWrapperPass() : ImmutablePass(ID) {
+ initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry());
+ }
+ explicit ExternalAAWrapperPass(CallbackT CB)
+ : ImmutablePass(ID), CB(std::move(CB)) {
+ initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+};
+}
+
+char ExternalAAWrapperPass::ID = 0;
+INITIALIZE_PASS(ExternalAAWrapperPass, "external-aa", "External Alias Analysis",
+ false, true)
+
+ImmutablePass *
+llvm::createExternalAAWrapperPass(ExternalAAWrapperPass::CallbackT Callback) {
+ return new ExternalAAWrapperPass(std::move(Callback));
+}
+
+AAResultsWrapperPass::AAResultsWrapperPass() : FunctionPass(ID) {
+ initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+char AAResultsWrapperPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(AAResultsWrapperPass, "aa",
+ "Function Alias Analysis Results", false, true)
+INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(CFLAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ExternalAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ObjCARCAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScopedNoAliasAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TypeBasedAAWrapperPass)
+INITIALIZE_PASS_END(AAResultsWrapperPass, "aa",
+ "Function Alias Analysis Results", false, true)
+
+FunctionPass *llvm::createAAResultsWrapperPass() {
+ return new AAResultsWrapperPass();
+}
+
+/// Run the wrapper pass to rebuild an aggregation over known AA passes.
+///
+/// This is the legacy pass manager's interface to the new-style AA results
+/// aggregation object. Because this is somewhat shoe-horned into the legacy
+/// pass manager, we hard code all the specific alias analyses available into
+/// it. While the particular set enabled is configured via commandline flags,
+/// adding a new alias analysis to LLVM will require adding support for it to
+/// this list.
+bool AAResultsWrapperPass::runOnFunction(Function &F) {
+ // NB! This *must* be reset before adding new AA results to the new
+ // AAResults object because in the legacy pass manager, each instance
+ // of these will refer to the *same* immutable analyses, registering and
+ // unregistering themselves with them. We need to carefully tear down the
+ // previous object first, in this case replacing it with an empty one, before
+ // registering new results.
+ AAR.reset(new AAResults());
+
+ // BasicAA is always available for function analyses. Also, we add it first
+ // so that it can trump TBAA results when it proves MustAlias.
+ // FIXME: TBAA should have an explicit mode to support this and then we
+ // should reconsider the ordering here.
+ if (!DisableBasicAA)
+ AAR->addAAResult(getAnalysis<BasicAAWrapperPass>().getResult());
+
+ // Populate the results with the currently available AAs.
+ if (auto *WrapperPass = getAnalysisIfAvailable<ScopedNoAliasAAWrapperPass>())
+ AAR->addAAResult(WrapperPass->getResult());
+ if (auto *WrapperPass = getAnalysisIfAvailable<TypeBasedAAWrapperPass>())
+ AAR->addAAResult(WrapperPass->getResult());
+ if (auto *WrapperPass =
+ getAnalysisIfAvailable<objcarc::ObjCARCAAWrapperPass>())
+ AAR->addAAResult(WrapperPass->getResult());
+ if (auto *WrapperPass = getAnalysisIfAvailable<GlobalsAAWrapperPass>())
+ AAR->addAAResult(WrapperPass->getResult());
+ if (auto *WrapperPass = getAnalysisIfAvailable<SCEVAAWrapperPass>())
+ AAR->addAAResult(WrapperPass->getResult());
+ if (auto *WrapperPass = getAnalysisIfAvailable<CFLAAWrapperPass>())
+ AAR->addAAResult(WrapperPass->getResult());
+
+ // If available, run an external AA providing callback over the results as
+ // well.
+ if (auto *WrapperPass = getAnalysisIfAvailable<ExternalAAWrapperPass>())
+ if (WrapperPass->CB)
+ WrapperPass->CB(*this, F, *AAR);
+
+ // Analyses don't mutate the IR, so return false.
+ return false;
+}
+
+void AAResultsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<BasicAAWrapperPass>();
+
+ // We also need to mark all the alias analysis passes we will potentially
+ // probe in runOnFunction as used here to ensure the legacy pass manager
+ // preserves them. This hard coding of lists of alias analyses is specific to
+ // the legacy pass manager.
+ AU.addUsedIfAvailable<ScopedNoAliasAAWrapperPass>();
+ AU.addUsedIfAvailable<TypeBasedAAWrapperPass>();
+ AU.addUsedIfAvailable<objcarc::ObjCARCAAWrapperPass>();
+ AU.addUsedIfAvailable<GlobalsAAWrapperPass>();
+ AU.addUsedIfAvailable<SCEVAAWrapperPass>();
+ AU.addUsedIfAvailable<CFLAAWrapperPass>();
+}
+
+AAResults llvm::createLegacyPMAAResults(Pass &P, Function &F,
+ BasicAAResult &BAR) {
+ AAResults AAR;
+
+ // Add in our explicitly constructed BasicAA results.
+ if (!DisableBasicAA)
+ AAR.addAAResult(BAR);
+
+ // Populate the results with the other currently available AAs.
+ if (auto *WrapperPass =
+ P.getAnalysisIfAvailable<ScopedNoAliasAAWrapperPass>())
+ AAR.addAAResult(WrapperPass->getResult());
+ if (auto *WrapperPass = P.getAnalysisIfAvailable<TypeBasedAAWrapperPass>())
+ AAR.addAAResult(WrapperPass->getResult());
+ if (auto *WrapperPass =
+ P.getAnalysisIfAvailable<objcarc::ObjCARCAAWrapperPass>())
+ AAR.addAAResult(WrapperPass->getResult());
+ if (auto *WrapperPass = P.getAnalysisIfAvailable<GlobalsAAWrapperPass>())
+ AAR.addAAResult(WrapperPass->getResult());
+ if (auto *WrapperPass = P.getAnalysisIfAvailable<SCEVAAWrapperPass>())
+ AAR.addAAResult(WrapperPass->getResult());
+ if (auto *WrapperPass = P.getAnalysisIfAvailable<CFLAAWrapperPass>())
+ AAR.addAAResult(WrapperPass->getResult());
+
+ return AAR;
+}
+
/// isNoAliasCall - Return true if this pointer is returned by a noalias
/// function.
bool llvm::isNoAliasCall(const Value *V) {
- if (isa<CallInst>(V) || isa<InvokeInst>(V))
- return ImmutableCallSite(cast<Instruction>(V))
- .paramHasAttr(0, Attribute::NoAlias);
+ if (auto CS = ImmutableCallSite(V))
+ return CS.paramHasAttr(0, Attribute::NoAlias);
return false;
}
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp
deleted file mode 100644
index 9b6a5a4..0000000
--- a/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp
+++ /dev/null
@@ -1,173 +0,0 @@
-//===- AliasAnalysisCounter.cpp - Alias Analysis Query Counter ------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a pass which can be used to count how many alias queries
-// are being made and how the alias analysis implementation being used responds.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-static cl::opt<bool>
-PrintAll("count-aa-print-all-queries", cl::ReallyHidden, cl::init(true));
-static cl::opt<bool>
-PrintAllFailures("count-aa-print-all-failed-queries", cl::ReallyHidden);
-
-namespace {
- class AliasAnalysisCounter : public ModulePass, public AliasAnalysis {
- unsigned No, May, Partial, Must;
- unsigned NoMR, JustRef, JustMod, MR;
- Module *M;
- public:
- static char ID; // Class identification, replacement for typeinfo
- AliasAnalysisCounter() : ModulePass(ID) {
- initializeAliasAnalysisCounterPass(*PassRegistry::getPassRegistry());
- No = May = Partial = Must = 0;
- NoMR = JustRef = JustMod = MR = 0;
- }
-
- void printLine(const char *Desc, unsigned Val, unsigned Sum) {
- errs() << " " << Val << " " << Desc << " responses ("
- << Val*100/Sum << "%)\n";
- }
- ~AliasAnalysisCounter() override {
- unsigned AASum = No+May+Partial+Must;
- unsigned MRSum = NoMR+JustRef+JustMod+MR;
- if (AASum + MRSum) { // Print a report if any counted queries occurred...
- errs() << "\n===== Alias Analysis Counter Report =====\n"
- << " Analysis counted:\n"
- << " " << AASum << " Total Alias Queries Performed\n";
- if (AASum) {
- printLine("no alias", No, AASum);
- printLine("may alias", May, AASum);
- printLine("partial alias", Partial, AASum);
- printLine("must alias", Must, AASum);
- errs() << " Alias Analysis Counter Summary: " << No*100/AASum << "%/"
- << May*100/AASum << "%/"
- << Partial*100/AASum << "%/"
- << Must*100/AASum<<"%\n\n";
- }
-
- errs() << " " << MRSum << " Total Mod/Ref Queries Performed\n";
- if (MRSum) {
- printLine("no mod/ref", NoMR, MRSum);
- printLine("ref", JustRef, MRSum);
- printLine("mod", JustMod, MRSum);
- printLine("mod/ref", MR, MRSum);
- errs() << " Mod/Ref Analysis Counter Summary: " <<NoMR*100/MRSum
- << "%/" << JustRef*100/MRSum << "%/" << JustMod*100/MRSum
- << "%/" << MR*100/MRSum <<"%\n\n";
- }
- }
- }
-
- bool runOnModule(Module &M) override {
- this->M = &M;
- InitializeAliasAnalysis(this, &M.getDataLayout());
- return false;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AliasAnalysis::getAnalysisUsage(AU);
- AU.addRequired<AliasAnalysis>();
- AU.setPreservesAll();
- }
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- void *getAdjustedAnalysisPointer(AnalysisID PI) override {
- if (PI == &AliasAnalysis::ID)
- return (AliasAnalysis*)this;
- return this;
- }
-
- // FIXME: We could count these too...
- bool pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal) override {
- return getAnalysis<AliasAnalysis>().pointsToConstantMemory(Loc, OrLocal);
- }
-
- // Forwarding functions: just delegate to a real AA implementation, counting
- // the number of responses...
- AliasResult alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) override;
-
- ModRefResult getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) override;
- ModRefResult getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) override {
- return AliasAnalysis::getModRefInfo(CS1,CS2);
- }
- };
-}
-
-char AliasAnalysisCounter::ID = 0;
-INITIALIZE_AG_PASS(AliasAnalysisCounter, AliasAnalysis, "count-aa",
- "Count Alias Analysis Query Responses", false, true, false)
-
-ModulePass *llvm::createAliasAnalysisCounterPass() {
- return new AliasAnalysisCounter();
-}
-
-AliasResult AliasAnalysisCounter::alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
- AliasResult R = getAnalysis<AliasAnalysis>().alias(LocA, LocB);
-
- const char *AliasString = nullptr;
- switch (R) {
- case NoAlias: No++; AliasString = "No alias"; break;
- case MayAlias: May++; AliasString = "May alias"; break;
- case PartialAlias: Partial++; AliasString = "Partial alias"; break;
- case MustAlias: Must++; AliasString = "Must alias"; break;
- }
-
- if (PrintAll || (PrintAllFailures && R == MayAlias)) {
- errs() << AliasString << ":\t";
- errs() << "[" << LocA.Size << "B] ";
- LocA.Ptr->printAsOperand(errs(), true, M);
- errs() << ", ";
- errs() << "[" << LocB.Size << "B] ";
- LocB.Ptr->printAsOperand(errs(), true, M);
- errs() << "\n";
- }
-
- return R;
-}
-
-AliasAnalysis::ModRefResult
-AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) {
- ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, Loc);
-
- const char *MRString = nullptr;
- switch (R) {
- case NoModRef: NoMR++; MRString = "NoModRef"; break;
- case Ref: JustRef++; MRString = "JustRef"; break;
- case Mod: JustMod++; MRString = "JustMod"; break;
- case ModRef: MR++; MRString = "ModRef"; break;
- }
-
- if (PrintAll || (PrintAllFailures && R == ModRef)) {
- errs() << MRString << ": Ptr: ";
- errs() << "[" << Loc.Size << "B] ";
- Loc.Ptr->printAsOperand(errs(), true, M);
- errs() << "\t<->" << *CS.getInstruction() << '\n';
- }
- return R;
-}
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
index 5d1b001..12917b6 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -21,8 +21,10 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Pass.h"
@@ -57,7 +59,7 @@ namespace {
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.setPreservesAll();
}
@@ -81,7 +83,7 @@ namespace {
char AAEval::ID = 0;
INITIALIZE_PASS_BEGIN(AAEval, "aa-eval",
"Exhaustive Alias Analysis Precision Evaluator", false, true)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(AAEval, "aa-eval",
"Exhaustive Alias Analysis Precision Evaluator", false, true)
@@ -139,16 +141,17 @@ static inline bool isInterestingPointer(Value *V) {
}
bool AAEval::runOnFunction(Function &F) {
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
SetVector<Value *> Pointers;
- SetVector<CallSite> CallSites;
+ SmallSetVector<CallSite, 16> CallSites;
SetVector<Value *> Loads;
SetVector<Value *> Stores;
- for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
- if (I->getType()->isPointerTy()) // Add all pointer arguments.
- Pointers.insert(I);
+ for (auto &I : F.args())
+ if (I.getType()->isPointerTy()) // Add all pointer arguments.
+ Pointers.insert(&I);
for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
if (I->getType()->isPointerTy()) // Add all pointer instructions.
@@ -164,10 +167,9 @@ bool AAEval::runOnFunction(Function &F) {
if (!isa<Function>(Callee) && isInterestingPointer(Callee))
Pointers.insert(Callee);
// Consider formals.
- for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
- AI != AE; ++AI)
- if (isInterestingPointer(*AI))
- Pointers.insert(*AI);
+ for (Use &DataOp : CS.data_ops())
+ if (isInterestingPointer(DataOp))
+ Pointers.insert(DataOp);
CallSites.insert(CS);
} else {
// Consider all operands.
@@ -188,12 +190,12 @@ bool AAEval::runOnFunction(Function &F) {
I1 != E; ++I1) {
uint64_t I1Size = MemoryLocation::UnknownSize;
Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType();
- if (I1ElTy->isSized()) I1Size = AA.getTypeStoreSize(I1ElTy);
+ if (I1ElTy->isSized()) I1Size = DL.getTypeStoreSize(I1ElTy);
for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) {
uint64_t I2Size = MemoryLocation::UnknownSize;
Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType();
- if (I2ElTy->isSized()) I2Size = AA.getTypeStoreSize(I2ElTy);
+ if (I2ElTy->isSized()) I2Size = DL.getTypeStoreSize(I2ElTy);
switch (AA.alias(*I1, I1Size, *I2, I2Size)) {
case NoAlias:
@@ -281,30 +283,29 @@ bool AAEval::runOnFunction(Function &F) {
}
// Mod/ref alias analysis: compare all pairs of calls and values
- for (SetVector<CallSite>::iterator C = CallSites.begin(),
- Ce = CallSites.end(); C != Ce; ++C) {
+ for (auto C = CallSites.begin(), Ce = CallSites.end(); C != Ce; ++C) {
Instruction *I = C->getInstruction();
for (SetVector<Value *>::iterator V = Pointers.begin(), Ve = Pointers.end();
V != Ve; ++V) {
uint64_t Size = MemoryLocation::UnknownSize;
Type *ElTy = cast<PointerType>((*V)->getType())->getElementType();
- if (ElTy->isSized()) Size = AA.getTypeStoreSize(ElTy);
+ if (ElTy->isSized()) Size = DL.getTypeStoreSize(ElTy);
switch (AA.getModRefInfo(*C, *V, Size)) {
- case AliasAnalysis::NoModRef:
+ case MRI_NoModRef:
PrintModRefResults("NoModRef", PrintNoModRef, I, *V, F.getParent());
++NoModRefCount;
break;
- case AliasAnalysis::Mod:
+ case MRI_Mod:
PrintModRefResults("Just Mod", PrintMod, I, *V, F.getParent());
++ModCount;
break;
- case AliasAnalysis::Ref:
+ case MRI_Ref:
PrintModRefResults("Just Ref", PrintRef, I, *V, F.getParent());
++RefCount;
break;
- case AliasAnalysis::ModRef:
+ case MRI_ModRef:
PrintModRefResults("Both ModRef", PrintModRef, I, *V, F.getParent());
++ModRefCount;
break;
@@ -313,25 +314,24 @@ bool AAEval::runOnFunction(Function &F) {
}
// Mod/ref alias analysis: compare all pairs of calls
- for (SetVector<CallSite>::iterator C = CallSites.begin(),
- Ce = CallSites.end(); C != Ce; ++C) {
- for (SetVector<CallSite>::iterator D = CallSites.begin(); D != Ce; ++D) {
+ for (auto C = CallSites.begin(), Ce = CallSites.end(); C != Ce; ++C) {
+ for (auto D = CallSites.begin(); D != Ce; ++D) {
if (D == C)
continue;
switch (AA.getModRefInfo(*C, *D)) {
- case AliasAnalysis::NoModRef:
+ case MRI_NoModRef:
PrintModRefResults("NoModRef", PrintNoModRef, *C, *D, F.getParent());
++NoModRefCount;
break;
- case AliasAnalysis::Mod:
+ case MRI_Mod:
PrintModRefResults("Just Mod", PrintMod, *C, *D, F.getParent());
++ModCount;
break;
- case AliasAnalysis::Ref:
+ case MRI_Ref:
PrintModRefResults("Just Ref", PrintRef, *C, *D, F.getParent());
++RefCount;
break;
- case AliasAnalysis::ModRef:
+ case MRI_ModRef:
PrintModRefResults("Both ModRef", PrintModRef, *C, *D, F.getParent());
++ModRefCount;
break;
diff --git a/contrib/llvm/lib/Analysis/AliasDebugger.cpp b/contrib/llvm/lib/Analysis/AliasDebugger.cpp
deleted file mode 100644
index e5107b3..0000000
--- a/contrib/llvm/lib/Analysis/AliasDebugger.cpp
+++ /dev/null
@@ -1,136 +0,0 @@
-//===- AliasDebugger.cpp - Simple Alias Analysis Use Checker --------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This simple pass checks alias analysis users to ensure that if they
-// create a new value, they do not query AA without informing it of the value.
-// It acts as a shim over any other AA pass you want.
-//
-// Yes keeping track of every value in the program is expensive, but this is
-// a debugging pass.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include <set>
-using namespace llvm;
-
-namespace {
-
- class AliasDebugger : public ModulePass, public AliasAnalysis {
-
- //What we do is simple. Keep track of every value the AA could
- //know about, and verify that queries are one of those.
- //A query to a value that didn't exist when the AA was created
- //means someone forgot to update the AA when creating new values
-
- std::set<const Value*> Vals;
-
- public:
- static char ID; // Class identification, replacement for typeinfo
- AliasDebugger() : ModulePass(ID) {
- initializeAliasDebuggerPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override {
- InitializeAliasAnalysis(this, &M.getDataLayout()); // set up super class
-
- for(Module::global_iterator I = M.global_begin(),
- E = M.global_end(); I != E; ++I) {
- Vals.insert(&*I);
- for (User::const_op_iterator OI = I->op_begin(),
- OE = I->op_end(); OI != OE; ++OI)
- Vals.insert(*OI);
- }
-
- for(Module::iterator I = M.begin(),
- E = M.end(); I != E; ++I){
- Vals.insert(&*I);
- if(!I->isDeclaration()) {
- for (Function::arg_iterator AI = I->arg_begin(), AE = I->arg_end();
- AI != AE; ++AI)
- Vals.insert(&*AI);
- for (Function::const_iterator FI = I->begin(), FE = I->end();
- FI != FE; ++FI)
- for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end();
- BI != BE; ++BI) {
- Vals.insert(&*BI);
- for (User::const_op_iterator OI = BI->op_begin(),
- OE = BI->op_end(); OI != OE; ++OI)
- Vals.insert(*OI);
- }
- }
-
- }
- return false;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AliasAnalysis::getAnalysisUsage(AU);
- AU.setPreservesAll(); // Does not transform code
- }
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- void *getAdjustedAnalysisPointer(AnalysisID PI) override {
- if (PI == &AliasAnalysis::ID)
- return (AliasAnalysis*)this;
- return this;
- }
-
- //------------------------------------------------
- // Implement the AliasAnalysis API
- //
- AliasResult alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) override {
- assert(Vals.find(LocA.Ptr) != Vals.end() &&
- "Never seen value in AA before");
- assert(Vals.find(LocB.Ptr) != Vals.end() &&
- "Never seen value in AA before");
- return AliasAnalysis::alias(LocA, LocB);
- }
-
- ModRefResult getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) override {
- assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before");
- return AliasAnalysis::getModRefInfo(CS, Loc);
- }
-
- ModRefResult getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) override {
- return AliasAnalysis::getModRefInfo(CS1,CS2);
- }
-
- bool pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal) override {
- assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before");
- return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
- }
-
- void deleteValue(Value *V) override {
- assert(Vals.find(V) != Vals.end() && "Never seen value in AA before");
- AliasAnalysis::deleteValue(V);
- }
-
- };
-}
-
-char AliasDebugger::ID = 0;
-INITIALIZE_AG_PASS(AliasDebugger, AliasAnalysis, "debug-aa",
- "AA use debugger", false, true, false)
-
-Pass *llvm::createAliasDebugger() { return new AliasDebugger(); }
-
diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
index 54d0f43..3094049 100644
--- a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
+++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
@@ -17,6 +17,7 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
@@ -167,8 +168,7 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
if (!UnknownInsts.empty()) {
for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i)
if (AA.getModRefInfo(UnknownInsts[i],
- MemoryLocation(Ptr, Size, AAInfo)) !=
- AliasAnalysis::NoModRef)
+ MemoryLocation(Ptr, Size, AAInfo)) != MRI_NoModRef)
return true;
}
@@ -182,16 +182,14 @@ bool AliasSet::aliasesUnknownInst(const Instruction *Inst,
for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) {
ImmutableCallSite C1(getUnknownInst(i)), C2(Inst);
- if (!C1 || !C2 ||
- AA.getModRefInfo(C1, C2) != AliasAnalysis::NoModRef ||
- AA.getModRefInfo(C2, C1) != AliasAnalysis::NoModRef)
+ if (!C1 || !C2 || AA.getModRefInfo(C1, C2) != MRI_NoModRef ||
+ AA.getModRefInfo(C2, C1) != MRI_NoModRef)
return true;
}
for (iterator I = begin(), E = end(); I != E; ++I)
- if (AA.getModRefInfo(
- Inst, MemoryLocation(I.getPointer(), I.getSize(), I.getAAInfo())) !=
- AliasAnalysis::NoModRef)
+ if (AA.getModRefInfo(Inst, MemoryLocation(I.getPointer(), I.getSize(),
+ I.getAAInfo())) != MRI_NoModRef)
return true;
return false;
@@ -223,7 +221,7 @@ AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr,
if (Cur->Forward || !Cur->aliasesPointer(Ptr, Size, AAInfo, AA)) continue;
if (!FoundSet) { // If this is the first alias set ptr can go into.
- FoundSet = Cur; // Remember it.
+ FoundSet = &*Cur; // Remember it.
} else { // Otherwise, we must merge the sets.
FoundSet->mergeSetIn(*Cur, *this); // Merge in contents.
}
@@ -257,7 +255,7 @@ AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
if (Cur->Forward || !Cur->aliasesUnknownInst(Inst, AA))
continue;
if (!FoundSet) // If this is the first alias set ptr can go into.
- FoundSet = Cur; // Remember it.
+ FoundSet = &*Cur; // Remember it.
else if (!Cur->Forward) // Otherwise, we must merge the sets.
FoundSet->mergeSetIn(*Cur, *this); // Merge in contents.
}
@@ -309,8 +307,9 @@ bool AliasSetTracker::add(LoadInst *LI) {
AliasSet::AccessLattice Access = AliasSet::RefAccess;
bool NewPtr;
+ const DataLayout &DL = LI->getModule()->getDataLayout();
AliasSet &AS = addPointer(LI->getOperand(0),
- AA.getTypeStoreSize(LI->getType()),
+ DL.getTypeStoreSize(LI->getType()),
AAInfo, Access, NewPtr);
if (LI->isVolatile()) AS.setVolatile();
return NewPtr;
@@ -324,9 +323,10 @@ bool AliasSetTracker::add(StoreInst *SI) {
AliasSet::AccessLattice Access = AliasSet::ModAccess;
bool NewPtr;
+ const DataLayout &DL = SI->getModule()->getDataLayout();
Value *Val = SI->getOperand(0);
AliasSet &AS = addPointer(SI->getOperand(1),
- AA.getTypeStoreSize(Val->getType()),
+ DL.getTypeStoreSize(Val->getType()),
AAInfo, Access, NewPtr);
if (SI->isVolatile()) AS.setVolatile();
return NewPtr;
@@ -372,8 +372,8 @@ bool AliasSetTracker::add(Instruction *I) {
}
void AliasSetTracker::add(BasicBlock &BB) {
- for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
- add(I);
+ for (auto &I : BB)
+ add(&I);
}
void AliasSetTracker::add(const AliasSetTracker &AST) {
@@ -443,7 +443,8 @@ AliasSetTracker::remove(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) {
}
bool AliasSetTracker::remove(LoadInst *LI) {
- uint64_t Size = AA.getTypeStoreSize(LI->getType());
+ const DataLayout &DL = LI->getModule()->getDataLayout();
+ uint64_t Size = DL.getTypeStoreSize(LI->getType());
AAMDNodes AAInfo;
LI->getAAMetadata(AAInfo);
@@ -455,7 +456,8 @@ bool AliasSetTracker::remove(LoadInst *LI) {
}
bool AliasSetTracker::remove(StoreInst *SI) {
- uint64_t Size = AA.getTypeStoreSize(SI->getOperand(0)->getType());
+ const DataLayout &DL = SI->getModule()->getDataLayout();
+ uint64_t Size = DL.getTypeStoreSize(SI->getOperand(0)->getType());
AAMDNodes AAInfo;
SI->getAAMetadata(AAInfo);
@@ -505,9 +507,6 @@ bool AliasSetTracker::remove(Instruction *I) {
// dangling pointers to deleted instructions.
//
void AliasSetTracker::deleteValue(Value *PtrVal) {
- // Notify the alias analysis implementation that this value is gone.
- AA.deleteValue(PtrVal);
-
// If this is a call instruction, remove the callsite from the appropriate
// AliasSet (if present).
if (Instruction *Inst = dyn_cast<Instruction>(PtrVal)) {
@@ -650,11 +649,12 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
}
bool runOnFunction(Function &F) override {
- Tracker = new AliasSetTracker(getAnalysis<AliasAnalysis>());
+ auto &AAWP = getAnalysis<AAResultsWrapperPass>();
+ Tracker = new AliasSetTracker(AAWP.getAAResults());
for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
Tracker->add(&*I);
@@ -668,6 +668,6 @@ namespace {
char AliasSetPrinter::ID = 0;
INITIALIZE_PASS_BEGIN(AliasSetPrinter, "print-alias-sets",
"Alias Set Printer", false, true)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(AliasSetPrinter, "print-alias-sets",
"Alias Set Printer", false, true)
diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp
index 842ff0a..9c1ac00 100644
--- a/contrib/llvm/lib/Analysis/Analysis.cpp
+++ b/contrib/llvm/lib/Analysis/Analysis.cpp
@@ -20,23 +20,23 @@ using namespace llvm;
/// initializeAnalysis - Initialize all passes linked into the Analysis library.
void llvm::initializeAnalysis(PassRegistry &Registry) {
- initializeAliasAnalysisAnalysisGroup(Registry);
- initializeAliasAnalysisCounterPass(Registry);
initializeAAEvalPass(Registry);
- initializeAliasDebuggerPass(Registry);
initializeAliasSetPrinterPass(Registry);
- initializeNoAAPass(Registry);
- initializeBasicAliasAnalysisPass(Registry);
- initializeBlockFrequencyInfoPass(Registry);
- initializeBranchProbabilityInfoPass(Registry);
+ initializeBasicAAWrapperPassPass(Registry);
+ initializeBlockFrequencyInfoWrapperPassPass(Registry);
+ initializeBranchProbabilityInfoWrapperPassPass(Registry);
+ initializeCallGraphWrapperPassPass(Registry);
+ initializeCallGraphPrinterPass(Registry);
+ initializeCallGraphViewerPass(Registry);
initializeCostModelAnalysisPass(Registry);
initializeCFGViewerPass(Registry);
initializeCFGPrinterPass(Registry);
initializeCFGOnlyViewerPass(Registry);
initializeCFGOnlyPrinterPass(Registry);
- initializeCFLAliasAnalysisPass(Registry);
+ initializeCFLAAWrapperPassPass(Registry);
initializeDependenceAnalysisPass(Registry);
initializeDelinearizationPass(Registry);
+ initializeDemandedBitsPass(Registry);
initializeDivergenceAnalysisPass(Registry);
initializeDominanceFrontierPass(Registry);
initializeDomViewerPass(Registry);
@@ -47,34 +47,40 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializePostDomPrinterPass(Registry);
initializePostDomOnlyViewerPass(Registry);
initializePostDomOnlyPrinterPass(Registry);
+ initializeAAResultsWrapperPassPass(Registry);
+ initializeGlobalsAAWrapperPassPass(Registry);
initializeIVUsersPass(Registry);
initializeInstCountPass(Registry);
initializeIntervalPartitionPass(Registry);
initializeLazyValueInfoPass(Registry);
- initializeLibCallAliasAnalysisPass(Registry);
initializeLintPass(Registry);
initializeLoopInfoWrapperPassPass(Registry);
initializeMemDepPrinterPass(Registry);
initializeMemDerefPrinterPass(Registry);
initializeMemoryDependenceAnalysisPass(Registry);
initializeModuleDebugInfoPrinterPass(Registry);
+ initializeObjCARCAAWrapperPassPass(Registry);
initializePostDominatorTreePass(Registry);
initializeRegionInfoPassPass(Registry);
initializeRegionViewerPass(Registry);
initializeRegionPrinterPass(Registry);
initializeRegionOnlyViewerPass(Registry);
initializeRegionOnlyPrinterPass(Registry);
- initializeScalarEvolutionPass(Registry);
- initializeScalarEvolutionAliasAnalysisPass(Registry);
+ initializeSCEVAAWrapperPassPass(Registry);
+ initializeScalarEvolutionWrapperPassPass(Registry);
initializeTargetTransformInfoWrapperPassPass(Registry);
- initializeTypeBasedAliasAnalysisPass(Registry);
- initializeScopedNoAliasAAPass(Registry);
+ initializeTypeBasedAAWrapperPassPass(Registry);
+ initializeScopedNoAliasAAWrapperPassPass(Registry);
}
void LLVMInitializeAnalysis(LLVMPassRegistryRef R) {
initializeAnalysis(*unwrap(R));
}
+void LLVMInitializeIPA(LLVMPassRegistryRef R) {
+ initializeAnalysis(*unwrap(R));
+}
+
LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action,
char **OutMessages) {
raw_ostream *DebugOS = Action != LLVMReturnStatusAction ? &errs() : nullptr;
diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 3586354..00f346e 100644
--- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -13,24 +13,21 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Analysis/Passes.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
@@ -42,6 +39,18 @@
#include <algorithm>
using namespace llvm;
+/// Enable analysis of recursive PHI nodes.
+static cl::opt<bool> EnableRecPhiAnalysis("basicaa-recphi", cl::Hidden,
+ cl::init(false));
+
+/// SearchLimitReached / SearchTimes shows how often the limit of
+/// to decompose GEPs is reached. It will affect the precision
+/// of basic alias analysis.
+#define DEBUG_TYPE "basicaa"
+STATISTIC(SearchLimitReached, "Number of times the limit to "
+ "decompose GEPs is reached");
+STATISTIC(SearchTimes, "Number of times a GEP is decomposed");
+
/// Cutoff after which to stop analysing a set of phi nodes potentially involved
/// in a cycle. Because we are analysing 'through' phi nodes we need to be
/// careful with value equivalence. We use reachability to make sure a value
@@ -57,8 +66,8 @@ static const unsigned MaxLookupSearchDepth = 6;
// Useful predicates
//===----------------------------------------------------------------------===//
-/// isNonEscapingLocalObject - Return true if the pointer is to a function-local
-/// object that never escapes from the function.
+/// Returns true if the pointer is to a function-local object that never
+/// escapes from the function.
static bool isNonEscapingLocalObject(const Value *V) {
// If this is a local allocation, check to see if it escapes.
if (isa<AllocaInst>(V) || isNoAliasCall(V))
@@ -82,8 +91,8 @@ static bool isNonEscapingLocalObject(const Value *V) {
return false;
}
-/// isEscapeSource - Return true if the pointer is one which would have
-/// been considered an escape by isNonEscapingLocalObject.
+/// Returns true if the pointer is one which would have been considered an
+/// escape by isNonEscapingLocalObject.
static bool isEscapeSource(const Value *V) {
if (isa<CallInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V))
return true;
@@ -97,8 +106,7 @@ static bool isEscapeSource(const Value *V) {
return false;
}
-/// getObjectSize - Return the size of the object specified by V, or
-/// UnknownSize if unknown.
+/// Returns the size of the object specified by V, or UnknownSize if unknown.
static uint64_t getObjectSize(const Value *V, const DataLayout &DL,
const TargetLibraryInfo &TLI,
bool RoundToAlign = false) {
@@ -108,8 +116,8 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &DL,
return MemoryLocation::UnknownSize;
}
-/// isObjectSmallerThan - Return true if we can prove that the object specified
-/// by V is smaller than Size.
+/// Returns true if we can prove that the object specified by V is smaller than
+/// Size.
static bool isObjectSmallerThan(const Value *V, uint64_t Size,
const DataLayout &DL,
const TargetLibraryInfo &TLI) {
@@ -144,15 +152,14 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size,
// This function needs to use the aligned object size because we allow
// reads a bit past the end given sufficient alignment.
- uint64_t ObjectSize = getObjectSize(V, DL, TLI, /*RoundToAlign*/true);
+ uint64_t ObjectSize = getObjectSize(V, DL, TLI, /*RoundToAlign*/ true);
return ObjectSize != MemoryLocation::UnknownSize && ObjectSize < Size;
}
-/// isObjectSize - Return true if we can prove that the object specified
-/// by V has size Size.
-static bool isObjectSize(const Value *V, uint64_t Size,
- const DataLayout &DL, const TargetLibraryInfo &TLI) {
+/// Returns true if we can prove that the object specified by V has size Size.
+static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
+ const TargetLibraryInfo &TLI) {
uint64_t ObjectSize = getObjectSize(V, DL, TLI);
return ObjectSize != MemoryLocation::UnknownSize && ObjectSize == Size;
}
@@ -161,42 +168,20 @@ static bool isObjectSize(const Value *V, uint64_t Size,
// GetElementPtr Instruction Decomposition and Analysis
//===----------------------------------------------------------------------===//
-namespace {
- enum ExtensionKind {
- EK_NotExtended,
- EK_SignExt,
- EK_ZeroExt
- };
-
- struct VariableGEPIndex {
- const Value *V;
- ExtensionKind Extension;
- int64_t Scale;
-
- bool operator==(const VariableGEPIndex &Other) const {
- return V == Other.V && Extension == Other.Extension &&
- Scale == Other.Scale;
- }
-
- bool operator!=(const VariableGEPIndex &Other) const {
- return !operator==(Other);
- }
- };
-}
-
-
-/// GetLinearExpression - Analyze the specified value as a linear expression:
-/// "A*V + B", where A and B are constant integers. Return the scale and offset
-/// values as APInts and return V as a Value*, and return whether we looked
-/// through any sign or zero extends. The incoming Value is known to have
-/// IntegerType and it may already be sign or zero extended.
+/// Analyzes the specified value as a linear expression: "A*V + B", where A and
+/// B are constant integers.
+///
+/// Returns the scale and offset values as APInts and return V as a Value*, and
+/// return whether we looked through any sign or zero extends. The incoming
+/// Value is known to have IntegerType and it may already be sign or zero
+/// extended.
///
/// Note that this looks through extends, so the high bits may not be
/// represented in the result.
-static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
- ExtensionKind &Extension,
- const DataLayout &DL, unsigned Depth,
- AssumptionCache *AC, DominatorTree *DT) {
+/*static*/ const Value *BasicAAResult::GetLinearExpression(
+ const Value *V, APInt &Scale, APInt &Offset, unsigned &ZExtBits,
+ unsigned &SExtBits, const DataLayout &DL, unsigned Depth,
+ AssumptionCache *AC, DominatorTree *DT, bool &NSW, bool &NUW) {
assert(V->getType()->isIntegerTy() && "Not an integer value");
// Limit our recursion depth.
@@ -206,54 +191,125 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
return V;
}
- if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
+ if (const ConstantInt *Const = dyn_cast<ConstantInt>(V)) {
+ // if it's a constant, just convert it to an offset and remove the variable.
+ // If we've been called recursively the Offset bit width will be greater
+ // than the constant's (the Offset's always as wide as the outermost call),
+ // so we'll zext here and process any extension in the isa<SExtInst> &
+ // isa<ZExtInst> cases below.
+ Offset += Const->getValue().zextOrSelf(Offset.getBitWidth());
+ assert(Scale == 0 && "Constant values don't have a scale");
+ return V;
+ }
+
+ if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
+
+ // If we've been called recursively then Offset and Scale will be wider
+ // that the BOp operands. We'll always zext it here as we'll process sign
+ // extensions below (see the isa<SExtInst> / isa<ZExtInst> cases).
+ APInt RHS = RHSC->getValue().zextOrSelf(Offset.getBitWidth());
+
switch (BOp->getOpcode()) {
- default: break;
+ default:
+ // We don't understand this instruction, so we can't decompose it any
+ // further.
+ Scale = 1;
+ Offset = 0;
+ return V;
case Instruction::Or:
// X|C == X+C if all the bits in C are unset in X. Otherwise we can't
// analyze it.
if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), DL, 0, AC,
- BOp, DT))
- break;
- // FALL THROUGH.
+ BOp, DT)) {
+ Scale = 1;
+ Offset = 0;
+ return V;
+ }
+ // FALL THROUGH.
case Instruction::Add:
- V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
- DL, Depth + 1, AC, DT);
- Offset += RHSC->getValue();
- return V;
+ V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits,
+ SExtBits, DL, Depth + 1, AC, DT, NSW, NUW);
+ Offset += RHS;
+ break;
+ case Instruction::Sub:
+ V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits,
+ SExtBits, DL, Depth + 1, AC, DT, NSW, NUW);
+ Offset -= RHS;
+ break;
case Instruction::Mul:
- V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
- DL, Depth + 1, AC, DT);
- Offset *= RHSC->getValue();
- Scale *= RHSC->getValue();
- return V;
+ V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits,
+ SExtBits, DL, Depth + 1, AC, DT, NSW, NUW);
+ Offset *= RHS;
+ Scale *= RHS;
+ break;
case Instruction::Shl:
- V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
- DL, Depth + 1, AC, DT);
- Offset <<= RHSC->getValue().getLimitedValue();
- Scale <<= RHSC->getValue().getLimitedValue();
+ V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits,
+ SExtBits, DL, Depth + 1, AC, DT, NSW, NUW);
+ Offset <<= RHS.getLimitedValue();
+ Scale <<= RHS.getLimitedValue();
+ // the semantics of nsw and nuw for left shifts don't match those of
+ // multiplications, so we won't propagate them.
+ NSW = NUW = false;
return V;
}
+
+ if (isa<OverflowingBinaryOperator>(BOp)) {
+ NUW &= BOp->hasNoUnsignedWrap();
+ NSW &= BOp->hasNoSignedWrap();
+ }
+ return V;
}
}
// Since GEP indices are sign extended anyway, we don't care about the high
// bits of a sign or zero extended value - just scales and offsets. The
// extensions have to be consistent though.
- if ((isa<SExtInst>(V) && Extension != EK_ZeroExt) ||
- (isa<ZExtInst>(V) && Extension != EK_SignExt)) {
+ if (isa<SExtInst>(V) || isa<ZExtInst>(V)) {
Value *CastOp = cast<CastInst>(V)->getOperand(0);
- unsigned OldWidth = Scale.getBitWidth();
+ unsigned NewWidth = V->getType()->getPrimitiveSizeInBits();
unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits();
- Scale = Scale.trunc(SmallWidth);
- Offset = Offset.trunc(SmallWidth);
- Extension = isa<SExtInst>(V) ? EK_SignExt : EK_ZeroExt;
-
- Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension, DL,
- Depth + 1, AC, DT);
- Scale = Scale.zext(OldWidth);
- Offset = Offset.zext(OldWidth);
+ unsigned OldZExtBits = ZExtBits, OldSExtBits = SExtBits;
+ const Value *Result =
+ GetLinearExpression(CastOp, Scale, Offset, ZExtBits, SExtBits, DL,
+ Depth + 1, AC, DT, NSW, NUW);
+
+ // zext(zext(%x)) == zext(%x), and similiarly for sext; we'll handle this
+ // by just incrementing the number of bits we've extended by.
+ unsigned ExtendedBy = NewWidth - SmallWidth;
+
+ if (isa<SExtInst>(V) && ZExtBits == 0) {
+ // sext(sext(%x, a), b) == sext(%x, a + b)
+
+ if (NSW) {
+ // We haven't sign-wrapped, so it's valid to decompose sext(%x + c)
+ // into sext(%x) + sext(c). We'll sext the Offset ourselves:
+ unsigned OldWidth = Offset.getBitWidth();
+ Offset = Offset.trunc(SmallWidth).sext(NewWidth).zextOrSelf(OldWidth);
+ } else {
+ // We may have signed-wrapped, so don't decompose sext(%x + c) into
+ // sext(%x) + sext(c)
+ Scale = 1;
+ Offset = 0;
+ Result = CastOp;
+ ZExtBits = OldZExtBits;
+ SExtBits = OldSExtBits;
+ }
+ SExtBits += ExtendedBy;
+ } else {
+ // sext(zext(%x, a), b) = zext(zext(%x, a), b) = zext(%x, a + b)
+
+ if (!NUW) {
+ // We may have unsigned-wrapped, so don't decompose zext(%x + c) into
+ // zext(%x) + zext(c)
+ Scale = 1;
+ Offset = 0;
+ Result = CastOp;
+ ZExtBits = OldZExtBits;
+ SExtBits = OldSExtBits;
+ }
+ ZExtBits += ExtendedBy;
+ }
return Result;
}
@@ -263,29 +319,27 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
return V;
}
-/// DecomposeGEPExpression - If V is a symbolic pointer expression, decompose it
-/// into a base pointer with a constant offset and a number of scaled symbolic
-/// offsets.
+/// If V is a symbolic pointer expression, decompose it into a base pointer
+/// with a constant offset and a number of scaled symbolic offsets.
///
-/// The scaled symbolic offsets (represented by pairs of a Value* and a scale in
-/// the VarIndices vector) are Value*'s that are known to be scaled by the
-/// specified amount, but which may have other unrepresented high bits. As such,
-/// the gep cannot necessarily be reconstructed from its decomposed form.
+/// The scaled symbolic offsets (represented by pairs of a Value* and a scale
+/// in the VarIndices vector) are Value*'s that are known to be scaled by the
+/// specified amount, but which may have other unrepresented high bits. As
+/// such, the gep cannot necessarily be reconstructed from its decomposed form.
///
/// When DataLayout is around, this function is capable of analyzing everything
/// that GetUnderlyingObject can look through. To be able to do that
/// GetUnderlyingObject and DecomposeGEPExpression must use the same search
-/// depth (MaxLookupSearchDepth).
-/// When DataLayout not is around, it just looks through pointer casts.
-///
-static const Value *
-DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
- SmallVectorImpl<VariableGEPIndex> &VarIndices,
- bool &MaxLookupReached, const DataLayout &DL,
- AssumptionCache *AC, DominatorTree *DT) {
+/// depth (MaxLookupSearchDepth). When DataLayout not is around, it just looks
+/// through pointer casts.
+/*static*/ const Value *BasicAAResult::DecomposeGEPExpression(
+ const Value *V, int64_t &BaseOffs,
+ SmallVectorImpl<VariableGEPIndex> &VarIndices, bool &MaxLookupReached,
+ const DataLayout &DL, AssumptionCache *AC, DominatorTree *DT) {
// Limit recursion depth to limit compile time in crazy cases.
unsigned MaxLookup = MaxLookupSearchDepth;
MaxLookupReached = false;
+ SearchTimes++;
BaseOffs = 0;
do {
@@ -318,7 +372,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
// updated when GetUnderlyingObject is updated). TLI should be
// provided also.
if (const Value *Simplified =
- SimplifyInstruction(const_cast<Instruction *>(I), DL)) {
+ SimplifyInstruction(const_cast<Instruction *>(I), DL)) {
V = Simplified;
continue;
}
@@ -333,43 +387,47 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
unsigned AS = GEPOp->getPointerAddressSpace();
// Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
gep_type_iterator GTI = gep_type_begin(GEPOp);
- for (User::const_op_iterator I = GEPOp->op_begin()+1,
- E = GEPOp->op_end(); I != E; ++I) {
- Value *Index = *I;
+ for (User::const_op_iterator I = GEPOp->op_begin() + 1, E = GEPOp->op_end();
+ I != E; ++I) {
+ const Value *Index = *I;
// Compute the (potentially symbolic) offset in bytes for this index.
if (StructType *STy = dyn_cast<StructType>(*GTI++)) {
// For a struct, add the member offset.
unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
- if (FieldNo == 0) continue;
+ if (FieldNo == 0)
+ continue;
BaseOffs += DL.getStructLayout(STy)->getElementOffset(FieldNo);
continue;
}
// For an array/pointer, add the element offset, explicitly scaled.
- if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
- if (CIdx->isZero()) continue;
+ if (const ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
+ if (CIdx->isZero())
+ continue;
BaseOffs += DL.getTypeAllocSize(*GTI) * CIdx->getSExtValue();
continue;
}
uint64_t Scale = DL.getTypeAllocSize(*GTI);
- ExtensionKind Extension = EK_NotExtended;
+ unsigned ZExtBits = 0, SExtBits = 0;
// If the integer type is smaller than the pointer size, it is implicitly
// sign extended to pointer size.
unsigned Width = Index->getType()->getIntegerBitWidth();
- if (DL.getPointerSizeInBits(AS) > Width)
- Extension = EK_SignExt;
+ unsigned PointerSize = DL.getPointerSizeInBits(AS);
+ if (PointerSize > Width)
+ SExtBits += PointerSize - Width;
// Use GetLinearExpression to decompose the index into a C1*V+C2 form.
APInt IndexScale(Width, 0), IndexOffset(Width, 0);
- Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension, DL,
- 0, AC, DT);
+ bool NSW = true, NUW = true;
+ Index = GetLinearExpression(Index, IndexScale, IndexOffset, ZExtBits,
+ SExtBits, DL, 0, AC, DT, NSW, NUW);
// The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
// This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
- BaseOffs += IndexOffset.getSExtValue()*Scale;
+ BaseOffs += IndexOffset.getSExtValue() * Scale;
Scale *= IndexScale.getSExtValue();
// If we already had an occurrence of this index variable, merge this
@@ -377,23 +435,23 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
// A[x][x] -> x*16 + x*4 -> x*20
// This also ensures that 'x' only appears in the index list once.
for (unsigned i = 0, e = VarIndices.size(); i != e; ++i) {
- if (VarIndices[i].V == Index &&
- VarIndices[i].Extension == Extension) {
+ if (VarIndices[i].V == Index && VarIndices[i].ZExtBits == ZExtBits &&
+ VarIndices[i].SExtBits == SExtBits) {
Scale += VarIndices[i].Scale;
- VarIndices.erase(VarIndices.begin()+i);
+ VarIndices.erase(VarIndices.begin() + i);
break;
}
}
// Make sure that we have a scale that makes sense for this target's
// pointer size.
- if (unsigned ShiftBits = 64 - DL.getPointerSizeInBits(AS)) {
+ if (unsigned ShiftBits = 64 - PointerSize) {
Scale <<= ShiftBits;
Scale = (int64_t)Scale >> ShiftBits;
}
if (Scale) {
- VariableGEPIndex Entry = {Index, Extension,
+ VariableGEPIndex Entry = {Index, ZExtBits, SExtBits,
static_cast<int64_t>(Scale)};
VarIndices.push_back(Entry);
}
@@ -405,196 +463,25 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
// If the chain of expressions is too deep, just return early.
MaxLookupReached = true;
+ SearchLimitReached++;
return V;
}
-//===----------------------------------------------------------------------===//
-// BasicAliasAnalysis Pass
-//===----------------------------------------------------------------------===//
-
-#ifndef NDEBUG
-static const Function *getParent(const Value *V) {
- if (const Instruction *inst = dyn_cast<Instruction>(V))
- return inst->getParent()->getParent();
-
- if (const Argument *arg = dyn_cast<Argument>(V))
- return arg->getParent();
-
- return nullptr;
-}
-
-static bool notDifferentParent(const Value *O1, const Value *O2) {
-
- const Function *F1 = getParent(O1);
- const Function *F2 = getParent(O2);
-
- return !F1 || !F2 || F1 == F2;
-}
-#endif
-
-namespace {
- /// BasicAliasAnalysis - This is the primary alias analysis implementation.
- struct BasicAliasAnalysis : public ImmutablePass, public AliasAnalysis {
- static char ID; // Class identification, replacement for typeinfo
- BasicAliasAnalysis() : ImmutablePass(ID) {
- initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry());
- }
-
- bool doInitialization(Module &M) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AliasAnalysis>();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- }
-
- AliasResult alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) override {
- assert(AliasCache.empty() && "AliasCache must be cleared after use!");
- assert(notDifferentParent(LocA.Ptr, LocB.Ptr) &&
- "BasicAliasAnalysis doesn't support interprocedural queries.");
- AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.AATags,
- LocB.Ptr, LocB.Size, LocB.AATags);
- // AliasCache rarely has more than 1 or 2 elements, always use
- // shrink_and_clear so it quickly returns to the inline capacity of the
- // SmallDenseMap if it ever grows larger.
- // FIXME: This should really be shrink_to_inline_capacity_and_clear().
- AliasCache.shrink_and_clear();
- VisitedPhiBBs.clear();
- return Alias;
- }
-
- ModRefResult getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) override;
-
- ModRefResult getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) override;
-
- /// pointsToConstantMemory - Chase pointers until we find a (constant
- /// global) or not.
- bool pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal) override;
-
- /// Get the location associated with a pointer argument of a callsite.
- ModRefResult getArgModRefInfo(ImmutableCallSite CS,
- unsigned ArgIdx) override;
-
- /// getModRefBehavior - Return the behavior when calling the given
- /// call site.
- ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override;
-
- /// getModRefBehavior - Return the behavior when calling the given function.
- /// For use when the call site is not known.
- ModRefBehavior getModRefBehavior(const Function *F) override;
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- void *getAdjustedAnalysisPointer(const void *ID) override {
- if (ID == &AliasAnalysis::ID)
- return (AliasAnalysis*)this;
- return this;
- }
-
- private:
- // AliasCache - Track alias queries to guard against recursion.
- typedef std::pair<MemoryLocation, MemoryLocation> LocPair;
- typedef SmallDenseMap<LocPair, AliasResult, 8> AliasCacheTy;
- AliasCacheTy AliasCache;
-
- /// \brief Track phi nodes we have visited. When interpret "Value" pointer
- /// equality as value equality we need to make sure that the "Value" is not
- /// part of a cycle. Otherwise, two uses could come from different
- /// "iterations" of a cycle and see different values for the same "Value"
- /// pointer.
- /// The following example shows the problem:
- /// %p = phi(%alloca1, %addr2)
- /// %l = load %ptr
- /// %addr1 = gep, %alloca2, 0, %l
- /// %addr2 = gep %alloca2, 0, (%l + 1)
- /// alias(%p, %addr1) -> MayAlias !
- /// store %l, ...
- SmallPtrSet<const BasicBlock*, 8> VisitedPhiBBs;
-
- // Visited - Track instructions visited by pointsToConstantMemory.
- SmallPtrSet<const Value*, 16> Visited;
-
- /// \brief Check whether two Values can be considered equivalent.
- ///
- /// In addition to pointer equivalence of \p V1 and \p V2 this checks
- /// whether they can not be part of a cycle in the value graph by looking at
- /// all visited phi nodes an making sure that the phis cannot reach the
- /// value. We have to do this because we are looking through phi nodes (That
- /// is we say noalias(V, phi(VA, VB)) if noalias(V, VA) and noalias(V, VB).
- bool isValueEqualInPotentialCycles(const Value *V1, const Value *V2);
-
- /// \brief Dest and Src are the variable indices from two decomposed
- /// GetElementPtr instructions GEP1 and GEP2 which have common base
- /// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic
- /// difference between the two pointers.
- void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
- const SmallVectorImpl<VariableGEPIndex> &Src);
-
- // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP
- // instruction against another.
- AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size,
- const AAMDNodes &V1AAInfo,
- const Value *V2, uint64_t V2Size,
- const AAMDNodes &V2AAInfo,
- const Value *UnderlyingV1, const Value *UnderlyingV2);
-
- // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI
- // instruction against another.
- AliasResult aliasPHI(const PHINode *PN, uint64_t PNSize,
- const AAMDNodes &PNAAInfo,
- const Value *V2, uint64_t V2Size,
- const AAMDNodes &V2AAInfo);
-
- /// aliasSelect - Disambiguate a Select instruction against another value.
- AliasResult aliasSelect(const SelectInst *SI, uint64_t SISize,
- const AAMDNodes &SIAAInfo,
- const Value *V2, uint64_t V2Size,
- const AAMDNodes &V2AAInfo);
-
- AliasResult aliasCheck(const Value *V1, uint64_t V1Size,
- AAMDNodes V1AATag,
- const Value *V2, uint64_t V2Size,
- AAMDNodes V2AATag);
- };
-} // End of anonymous namespace
-
-// Register this pass...
-char BasicAliasAnalysis::ID = 0;
-INITIALIZE_AG_PASS_BEGIN(BasicAliasAnalysis, AliasAnalysis, "basicaa",
- "Basic Alias Analysis (stateless AA impl)",
- false, true, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_AG_PASS_END(BasicAliasAnalysis, AliasAnalysis, "basicaa",
- "Basic Alias Analysis (stateless AA impl)",
- false, true, false)
-
-
-ImmutablePass *llvm::createBasicAliasAnalysisPass() {
- return new BasicAliasAnalysis();
-}
-
-/// pointsToConstantMemory - Returns whether the given pointer value
-/// points to memory that is local to the function, with global constants being
-/// considered local to all functions.
-bool BasicAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal) {
+/// Returns whether the given pointer value points to memory that is local to
+/// the function, with global constants being considered local to all
+/// functions.
+bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
+ bool OrLocal) {
assert(Visited.empty() && "Visited must be cleared after use!");
unsigned MaxLookup = 8;
SmallVector<const Value *, 16> Worklist;
Worklist.push_back(Loc.Ptr);
do {
- const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), *DL);
+ const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), DL);
if (!Visited.insert(V).second) {
Visited.clear();
- return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
}
// An alloca instruction defines local memory.
@@ -608,7 +495,7 @@ bool BasicAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,
// others. GV may even be a declaration, not a definition.
if (!GV->isConstant()) {
Visited.clear();
- return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
}
continue;
}
@@ -626,7 +513,7 @@ bool BasicAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,
// Don't bother inspecting phi nodes with many operands.
if (PN->getNumIncomingValues() > MaxLookup) {
Visited.clear();
- return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
}
for (Value *IncValue : PN->incoming_values())
Worklist.push_back(IncValue);
@@ -635,7 +522,7 @@ bool BasicAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,
// Otherwise be conservative.
Visited.clear();
- return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
} while (!Worklist.empty() && --MaxLookup);
@@ -660,62 +547,51 @@ static bool isMemsetPattern16(const Function *MS,
return false;
}
-/// getModRefBehavior - Return the behavior when calling the given call site.
-AliasAnalysis::ModRefBehavior
-BasicAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+/// Returns the behavior when calling the given call site.
+FunctionModRefBehavior BasicAAResult::getModRefBehavior(ImmutableCallSite CS) {
if (CS.doesNotAccessMemory())
// Can't do better than this.
- return DoesNotAccessMemory;
+ return FMRB_DoesNotAccessMemory;
- ModRefBehavior Min = UnknownModRefBehavior;
+ FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
// If the callsite knows it only reads memory, don't return worse
// than that.
if (CS.onlyReadsMemory())
- Min = OnlyReadsMemory;
+ Min = FMRB_OnlyReadsMemory;
if (CS.onlyAccessesArgMemory())
- Min = ModRefBehavior(Min & OnlyAccessesArgumentPointees);
+ Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees);
- // The AliasAnalysis base class has some smarts, lets use them.
- return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
+ // The AAResultBase base class has some smarts, lets use them.
+ return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min);
}
-/// getModRefBehavior - Return the behavior when calling the given function.
-/// For use when the call site is not known.
-AliasAnalysis::ModRefBehavior
-BasicAliasAnalysis::getModRefBehavior(const Function *F) {
+/// Returns the behavior when calling the given function. For use when the call
+/// site is not known.
+FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) {
// If the function declares it doesn't access memory, we can't do better.
if (F->doesNotAccessMemory())
- return DoesNotAccessMemory;
-
- // For intrinsics, we can check the table.
- if (Intrinsic::ID iid = F->getIntrinsicID()) {
-#define GET_INTRINSIC_MODREF_BEHAVIOR
-#include "llvm/IR/Intrinsics.gen"
-#undef GET_INTRINSIC_MODREF_BEHAVIOR
- }
+ return FMRB_DoesNotAccessMemory;
- ModRefBehavior Min = UnknownModRefBehavior;
+ FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
// If the function declares it only reads memory, go with that.
if (F->onlyReadsMemory())
- Min = OnlyReadsMemory;
+ Min = FMRB_OnlyReadsMemory;
if (F->onlyAccessesArgMemory())
- Min = ModRefBehavior(Min & OnlyAccessesArgumentPointees);
+ Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees);
- const TargetLibraryInfo &TLI =
- getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
if (isMemsetPattern16(F, TLI))
- Min = OnlyAccessesArgumentPointees;
+ Min = FMRB_OnlyAccessesArgumentPointees;
// Otherwise be conservative.
- return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min);
+ return FunctionModRefBehavior(AAResultBase::getModRefBehavior(F) & Min);
}
-AliasAnalysis::ModRefResult
-BasicAliasAnalysis::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
+ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS,
+ unsigned ArgIdx) {
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction()))
switch (II->getIntrinsicID()) {
default:
@@ -725,7 +601,7 @@ BasicAliasAnalysis::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
case Intrinsic::memmove:
assert((ArgIdx == 0 || ArgIdx == 1) &&
"Invalid argument index for memory intrinsic");
- return ArgIdx ? Ref : Mod;
+ return ArgIdx ? MRI_Ref : MRI_Mod;
}
// We can bound the aliasing properties of memset_pattern16 just as we can
@@ -733,40 +609,82 @@ BasicAliasAnalysis::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
// LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
// whenever possible.
if (CS.getCalledFunction() &&
- isMemsetPattern16(CS.getCalledFunction(), *TLI)) {
+ isMemsetPattern16(CS.getCalledFunction(), TLI)) {
assert((ArgIdx == 0 || ArgIdx == 1) &&
"Invalid argument index for memset_pattern16");
- return ArgIdx ? Ref : Mod;
+ return ArgIdx ? MRI_Ref : MRI_Mod;
}
// FIXME: Handle memset_pattern4 and memset_pattern8 also.
- return AliasAnalysis::getArgModRefInfo(CS, ArgIdx);
+ if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadOnly))
+ return MRI_Ref;
+
+ if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadNone))
+ return MRI_NoModRef;
+
+ return AAResultBase::getArgModRefInfo(CS, ArgIdx);
}
static bool isAssumeIntrinsic(ImmutableCallSite CS) {
const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
- if (II && II->getIntrinsicID() == Intrinsic::assume)
- return true;
+ return II && II->getIntrinsicID() == Intrinsic::assume;
+}
- return false;
+#ifndef NDEBUG
+static const Function *getParent(const Value *V) {
+ if (const Instruction *inst = dyn_cast<Instruction>(V))
+ return inst->getParent()->getParent();
+
+ if (const Argument *arg = dyn_cast<Argument>(V))
+ return arg->getParent();
+
+ return nullptr;
}
-bool BasicAliasAnalysis::doInitialization(Module &M) {
- InitializeAliasAnalysis(this, &M.getDataLayout());
- return true;
+static bool notDifferentParent(const Value *O1, const Value *O2) {
+
+ const Function *F1 = getParent(O1);
+ const Function *F2 = getParent(O2);
+
+ return !F1 || !F2 || F1 == F2;
+}
+#endif
+
+AliasResult BasicAAResult::alias(const MemoryLocation &LocA,
+ const MemoryLocation &LocB) {
+ assert(notDifferentParent(LocA.Ptr, LocB.Ptr) &&
+ "BasicAliasAnalysis doesn't support interprocedural queries.");
+
+ // If we have a directly cached entry for these locations, we have recursed
+ // through this once, so just return the cached results. Notably, when this
+ // happens, we don't clear the cache.
+ auto CacheIt = AliasCache.find(LocPair(LocA, LocB));
+ if (CacheIt != AliasCache.end())
+ return CacheIt->second;
+
+ AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.AATags, LocB.Ptr,
+ LocB.Size, LocB.AATags);
+ // AliasCache rarely has more than 1 or 2 elements, always use
+ // shrink_and_clear so it quickly returns to the inline capacity of the
+ // SmallDenseMap if it ever grows larger.
+ // FIXME: This should really be shrink_to_inline_capacity_and_clear().
+ AliasCache.shrink_and_clear();
+ VisitedPhiBBs.clear();
+ return Alias;
}
-/// getModRefInfo - Check to see if the specified callsite can clobber the
-/// specified memory object. Since we only look at local properties of this
-/// function, we really can't say much about this query. We do, however, use
-/// simple "address taken" analysis on local objects.
-AliasAnalysis::ModRefResult
-BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) {
+/// Checks to see if the specified callsite can clobber the specified memory
+/// object.
+///
+/// Since we only look at local properties of this function, we really can't
+/// say much about this query. We do, however, use simple "address taken"
+/// analysis on local objects.
+ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
+ const MemoryLocation &Loc) {
assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) &&
"AliasAnalysis query involving multiple functions!");
- const Value *Object = GetUnderlyingObject(Loc.Ptr, *DL);
+ const Value *Object = GetUnderlyingObject(Loc.Ptr, DL);
// If this is a tail call and Loc.Ptr points to a stack location, we know that
// the tail call cannot access or modify the local stack.
@@ -776,7 +694,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
if (isa<AllocaInst>(Object))
if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction()))
if (CI->isTailCall())
- return NoModRef;
+ return MRI_NoModRef;
// If the pointer is to a locally allocated object that does not escape,
// then the call can not mod/ref the pointer unless the call takes the pointer
@@ -798,41 +716,42 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
// is impossible to alias the pointer we're checking. If not, we have to
// assume that the call could touch the pointer, even though it doesn't
// escape.
- if (!isNoAlias(MemoryLocation(*CI), MemoryLocation(Object))) {
+ AliasResult AR =
+ getBestAAResults().alias(MemoryLocation(*CI), MemoryLocation(Object));
+ if (AR) {
PassedAsArg = true;
break;
}
}
if (!PassedAsArg)
- return NoModRef;
+ return MRI_NoModRef;
}
// While the assume intrinsic is marked as arbitrarily writing so that
// proper control dependencies will be maintained, it never aliases any
// particular memory location.
if (isAssumeIntrinsic(CS))
- return NoModRef;
+ return MRI_NoModRef;
- // The AliasAnalysis base class has some smarts, lets use them.
- return AliasAnalysis::getModRefInfo(CS, Loc);
+ // The AAResultBase base class has some smarts, lets use them.
+ return AAResultBase::getModRefInfo(CS, Loc);
}
-AliasAnalysis::ModRefResult
-BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) {
+ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
// While the assume intrinsic is marked as arbitrarily writing so that
// proper control dependencies will be maintained, it never aliases any
// particular memory location.
if (isAssumeIntrinsic(CS1) || isAssumeIntrinsic(CS2))
- return NoModRef;
+ return MRI_NoModRef;
- // The AliasAnalysis base class has some smarts, lets use them.
- return AliasAnalysis::getModRefInfo(CS1, CS2);
+ // The AAResultBase base class has some smarts, lets use them.
+ return AAResultBase::getModRefInfo(CS1, CS2);
}
-/// \brief Provide ad-hoc rules to disambiguate accesses through two GEP
-/// operators, both having the exact same pointer operand.
+/// Provide ad-hoc rules to disambiguate accesses through two GEP operators,
+/// both having the exact same pointer operand.
static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
uint64_t V1Size,
const GEPOperator *GEP2,
@@ -860,10 +779,9 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
ConstantInt *C2 =
dyn_cast<ConstantInt>(GEP2->getOperand(GEP2->getNumOperands() - 1));
- // If the last (struct) indices aren't constants, we can't say anything.
- // If they're identical, the other indices might be also be dynamically
- // equal, so the GEPs can alias.
- if (!C1 || !C2 || C1 == C2)
+ // If the last (struct) indices are constants and are equal, the other indices
+ // might be also be dynamically equal, so the GEPs can alias.
+ if (C1 && C2 && C1 == C2)
return MayAlias;
// Find the last-indexed type of the GEP, i.e., the type you'd get if
@@ -886,12 +804,49 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
IntermediateIndices.push_back(GEP1->getOperand(i + 1));
}
- StructType *LastIndexedStruct =
- dyn_cast<StructType>(GetElementPtrInst::getIndexedType(
- GEP1->getSourceElementType(), IntermediateIndices));
+ auto *Ty = GetElementPtrInst::getIndexedType(
+ GEP1->getSourceElementType(), IntermediateIndices);
+ StructType *LastIndexedStruct = dyn_cast<StructType>(Ty);
+
+ if (isa<SequentialType>(Ty)) {
+ // We know that:
+ // - both GEPs begin indexing from the exact same pointer;
+ // - the last indices in both GEPs are constants, indexing into a sequential
+ // type (array or pointer);
+ // - both GEPs only index through arrays prior to that.
+ //
+ // Because array indices greater than the number of elements are valid in
+ // GEPs, unless we know the intermediate indices are identical between
+ // GEP1 and GEP2 we cannot guarantee that the last indexed arrays don't
+ // partially overlap. We also need to check that the loaded size matches
+ // the element size, otherwise we could still have overlap.
+ const uint64_t ElementSize =
+ DL.getTypeStoreSize(cast<SequentialType>(Ty)->getElementType());
+ if (V1Size != ElementSize || V2Size != ElementSize)
+ return MayAlias;
+
+ for (unsigned i = 0, e = GEP1->getNumIndices() - 1; i != e; ++i)
+ if (GEP1->getOperand(i + 1) != GEP2->getOperand(i + 1))
+ return MayAlias;
- if (!LastIndexedStruct)
+ // Now we know that the array/pointer that GEP1 indexes into and that
+ // that GEP2 indexes into must either precisely overlap or be disjoint.
+ // Because they cannot partially overlap and because fields in an array
+ // cannot overlap, if we can prove the final indices are different between
+ // GEP1 and GEP2, we can conclude GEP1 and GEP2 don't alias.
+
+ // If the last indices are constants, we've already checked they don't
+ // equal each other so we can exit early.
+ if (C1 && C2)
+ return NoAlias;
+ if (isKnownNonEqual(GEP1->getOperand(GEP1->getNumOperands() - 1),
+ GEP2->getOperand(GEP2->getNumOperands() - 1),
+ DL))
+ return NoAlias;
+ return MayAlias;
+ } else if (!LastIndexedStruct || !C1 || !C2) {
return MayAlias;
+ }
// We know that:
// - both GEPs begin indexing from the exact same pointer;
@@ -925,39 +880,21 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
return MayAlias;
}
-/// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
-/// against another pointer. We know that V1 is a GEP, but we don't know
-/// anything about V2. UnderlyingV1 is GetUnderlyingObject(GEP1, DL),
-/// UnderlyingV2 is the same for V2.
+/// Provides a bunch of ad-hoc rules to disambiguate a GEP instruction against
+/// another pointer.
///
-AliasResult BasicAliasAnalysis::aliasGEP(
- const GEPOperator *GEP1, uint64_t V1Size, const AAMDNodes &V1AAInfo,
- const Value *V2, uint64_t V2Size, const AAMDNodes &V2AAInfo,
- const Value *UnderlyingV1, const Value *UnderlyingV2) {
+/// We know that V1 is a GEP, but we don't know anything about V2.
+/// UnderlyingV1 is GetUnderlyingObject(GEP1, DL), UnderlyingV2 is the same for
+/// V2.
+AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
+ const AAMDNodes &V1AAInfo, const Value *V2,
+ uint64_t V2Size, const AAMDNodes &V2AAInfo,
+ const Value *UnderlyingV1,
+ const Value *UnderlyingV2) {
int64_t GEP1BaseOffset;
bool GEP1MaxLookupReached;
SmallVector<VariableGEPIndex, 4> GEP1VariableIndices;
- // We have to get two AssumptionCaches here because GEP1 and V2 may be from
- // different functions.
- // FIXME: This really doesn't make any sense. We get a dominator tree below
- // that can only refer to a single function. But this function (aliasGEP) is
- // a method on an immutable pass that can be called when there *isn't*
- // a single function. The old pass management layer makes this "work", but
- // this isn't really a clean solution.
- AssumptionCacheTracker &ACT = getAnalysis<AssumptionCacheTracker>();
- AssumptionCache *AC1 = nullptr, *AC2 = nullptr;
- if (auto *GEP1I = dyn_cast<Instruction>(GEP1))
- AC1 = &ACT.getAssumptionCache(
- const_cast<Function &>(*GEP1I->getParent()->getParent()));
- if (auto *I2 = dyn_cast<Instruction>(V2))
- AC2 = &ACT.getAssumptionCache(
- const_cast<Function &>(*I2->getParent()->getParent()));
-
- DominatorTreeWrapperPass *DTWP =
- getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
-
// If we have two gep instructions with must-alias or not-alias'ing base
// pointers, figure out if the indexes to the GEP tell us anything about the
// derived pointer.
@@ -971,9 +908,8 @@ AliasResult BasicAliasAnalysis::aliasGEP(
// identical.
if ((BaseAlias == MayAlias) && V1Size == V2Size) {
// Do the base pointers alias assuming type and size.
- AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size,
- V1AAInfo, UnderlyingV2,
- V2Size, V2AAInfo);
+ AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size, V1AAInfo,
+ UnderlyingV2, V2Size, V2AAInfo);
if (PreciseBaseAlias == NoAlias) {
// See if the computed offset from the common pointer tells us about the
// relation of the resulting pointer.
@@ -982,15 +918,15 @@ AliasResult BasicAliasAnalysis::aliasGEP(
SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
const Value *GEP2BasePtr =
DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices,
- GEP2MaxLookupReached, *DL, AC2, DT);
+ GEP2MaxLookupReached, DL, &AC, DT);
const Value *GEP1BasePtr =
DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
- GEP1MaxLookupReached, *DL, AC1, DT);
+ GEP1MaxLookupReached, DL, &AC, DT);
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
+ // FIXME: They always have a DataLayout so this should become an
+ // assert.
if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
- assert(!DL &&
- "DecomposeGEPExpression and GetUnderlyingObject disagree!");
return MayAlias;
}
// If the max search depth is reached the result is undefined
@@ -1007,35 +943,35 @@ AliasResult BasicAliasAnalysis::aliasGEP(
// If we get a No or May, then return it immediately, no amount of analysis
// will improve this situation.
- if (BaseAlias != MustAlias) return BaseAlias;
+ if (BaseAlias != MustAlias)
+ return BaseAlias;
// Otherwise, we have a MustAlias. Since the base pointers alias each other
// exactly, see if the computed offset from the common pointer tells us
// about the relation of the resulting pointer.
const Value *GEP1BasePtr =
DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
- GEP1MaxLookupReached, *DL, AC1, DT);
+ GEP1MaxLookupReached, DL, &AC, DT);
int64_t GEP2BaseOffset;
bool GEP2MaxLookupReached;
SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
const Value *GEP2BasePtr =
DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices,
- GEP2MaxLookupReached, *DL, AC2, DT);
+ GEP2MaxLookupReached, DL, &AC, DT);
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
+ // FIXME: They always have a DataLayout so this should become an assert.
if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
- assert(!DL &&
- "DecomposeGEPExpression and GetUnderlyingObject disagree!");
return MayAlias;
}
// If we know the two GEPs are based off of the exact same pointer (and not
// just the same underlying object), see if that tells us anything about
// the resulting pointers.
- if (DL && GEP1->getPointerOperand() == GEP2->getPointerOperand()) {
- AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, *DL);
+ if (GEP1->getPointerOperand() == GEP2->getPointerOperand()) {
+ AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, DL);
// If we couldn't find anything interesting, don't abandon just yet.
if (R != MayAlias)
return R;
@@ -1072,13 +1008,12 @@ AliasResult BasicAliasAnalysis::aliasGEP(
const Value *GEP1BasePtr =
DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
- GEP1MaxLookupReached, *DL, AC1, DT);
+ GEP1MaxLookupReached, DL, &AC, DT);
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
+ // FIXME: They always have a DataLayout so this should become an assert.
if (GEP1BasePtr != UnderlyingV1) {
- assert(!DL &&
- "DecomposeGEPExpression and GetUnderlyingObject disagree!");
return MayAlias;
}
// If the max search depth is reached the result is undefined
@@ -1124,12 +1059,42 @@ AliasResult BasicAliasAnalysis::aliasGEP(
}
}
- // Try to distinguish something like &A[i][1] against &A[42][0].
- // Grab the least significant bit set in any of the scales.
if (!GEP1VariableIndices.empty()) {
uint64_t Modulo = 0;
- for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i)
- Modulo |= (uint64_t) GEP1VariableIndices[i].Scale;
+ bool AllPositive = true;
+ for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i) {
+
+ // Try to distinguish something like &A[i][1] against &A[42][0].
+ // Grab the least significant bit set in any of the scales. We
+ // don't need std::abs here (even if the scale's negative) as we'll
+ // be ^'ing Modulo with itself later.
+ Modulo |= (uint64_t)GEP1VariableIndices[i].Scale;
+
+ if (AllPositive) {
+ // If the Value could change between cycles, then any reasoning about
+ // the Value this cycle may not hold in the next cycle. We'll just
+ // give up if we can't determine conditions that hold for every cycle:
+ const Value *V = GEP1VariableIndices[i].V;
+
+ bool SignKnownZero, SignKnownOne;
+ ComputeSignBit(const_cast<Value *>(V), SignKnownZero, SignKnownOne, DL,
+ 0, &AC, nullptr, DT);
+
+ // Zero-extension widens the variable, and so forces the sign
+ // bit to zero.
+ bool IsZExt = GEP1VariableIndices[i].ZExtBits > 0 || isa<ZExtInst>(V);
+ SignKnownZero |= IsZExt;
+ SignKnownOne &= !IsZExt;
+
+ // If the variable begins with a zero then we know it's
+ // positive, regardless of whether the value is signed or
+ // unsigned.
+ int64_t Scale = GEP1VariableIndices[i].Scale;
+ AllPositive =
+ (SignKnownZero && Scale >= 0) || (SignKnownOne && Scale < 0);
+ }
+ }
+
Modulo = Modulo ^ (Modulo & (Modulo - 1));
// We can compute the difference between the two addresses
@@ -1140,6 +1105,16 @@ AliasResult BasicAliasAnalysis::aliasGEP(
V2Size != MemoryLocation::UnknownSize && ModOffset >= V2Size &&
V1Size <= Modulo - ModOffset)
return NoAlias;
+
+ // If we know all the variables are positive, then GEP1 >= GEP1BasePtr.
+ // If GEP1BasePtr > V2 (GEP1BaseOffset > 0) then we know the pointers
+ // don't alias if V2Size can fit in the gap between V2 and GEP1BasePtr.
+ if (AllPositive && GEP1BaseOffset > 0 && V2Size <= (uint64_t)GEP1BaseOffset)
+ return NoAlias;
+
+ if (constantOffsetHeuristic(GEP1VariableIndices, V1Size, V2Size,
+ GEP1BaseOffset, &AC, DT))
+ return NoAlias;
}
// Statically, we can see that the base objects are the same, but the
@@ -1164,46 +1139,44 @@ static AliasResult MergeAliasResults(AliasResult A, AliasResult B) {
return MayAlias;
}
-/// aliasSelect - Provide a bunch of ad-hoc rules to disambiguate a Select
-/// instruction against another.
-AliasResult BasicAliasAnalysis::aliasSelect(const SelectInst *SI,
- uint64_t SISize,
- const AAMDNodes &SIAAInfo,
- const Value *V2, uint64_t V2Size,
- const AAMDNodes &V2AAInfo) {
+/// Provides a bunch of ad-hoc rules to disambiguate a Select instruction
+/// against another.
+AliasResult BasicAAResult::aliasSelect(const SelectInst *SI, uint64_t SISize,
+ const AAMDNodes &SIAAInfo,
+ const Value *V2, uint64_t V2Size,
+ const AAMDNodes &V2AAInfo) {
// If the values are Selects with the same condition, we can do a more precise
// check: just check for aliases between the values on corresponding arms.
if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2))
if (SI->getCondition() == SI2->getCondition()) {
- AliasResult Alias =
- aliasCheck(SI->getTrueValue(), SISize, SIAAInfo,
- SI2->getTrueValue(), V2Size, V2AAInfo);
+ AliasResult Alias = aliasCheck(SI->getTrueValue(), SISize, SIAAInfo,
+ SI2->getTrueValue(), V2Size, V2AAInfo);
if (Alias == MayAlias)
return MayAlias;
AliasResult ThisAlias =
- aliasCheck(SI->getFalseValue(), SISize, SIAAInfo,
- SI2->getFalseValue(), V2Size, V2AAInfo);
+ aliasCheck(SI->getFalseValue(), SISize, SIAAInfo,
+ SI2->getFalseValue(), V2Size, V2AAInfo);
return MergeAliasResults(ThisAlias, Alias);
}
// If both arms of the Select node NoAlias or MustAlias V2, then returns
// NoAlias / MustAlias. Otherwise, returns MayAlias.
AliasResult Alias =
- aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(), SISize, SIAAInfo);
+ aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(), SISize, SIAAInfo);
if (Alias == MayAlias)
return MayAlias;
AliasResult ThisAlias =
- aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), SISize, SIAAInfo);
+ aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), SISize, SIAAInfo);
return MergeAliasResults(ThisAlias, Alias);
}
-// aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction
-// against another.
-AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
- const AAMDNodes &PNAAInfo,
- const Value *V2, uint64_t V2Size,
- const AAMDNodes &V2AAInfo) {
+/// Provide a bunch of ad-hoc rules to disambiguate a PHI instruction against
+/// another.
+AliasResult BasicAAResult::aliasPHI(const PHINode *PN, uint64_t PNSize,
+ const AAMDNodes &PNAAInfo, const Value *V2,
+ uint64_t V2Size,
+ const AAMDNodes &V2AAInfo) {
// Track phi nodes we have visited. We use this information when we determine
// value equivalence.
VisitedPhiBBs.insert(PN->getParent());
@@ -1232,9 +1205,9 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
AliasResult ThisAlias =
- aliasCheck(PN->getIncomingValue(i), PNSize, PNAAInfo,
- PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)),
- V2Size, V2AAInfo);
+ aliasCheck(PN->getIncomingValue(i), PNSize, PNAAInfo,
+ PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)),
+ V2Size, V2AAInfo);
Alias = MergeAliasResults(ThisAlias, Alias);
if (Alias == MayAlias)
break;
@@ -1247,8 +1220,9 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
return Alias;
}
- SmallPtrSet<Value*, 4> UniqueSrc;
- SmallVector<Value*, 4> V1Srcs;
+ SmallPtrSet<Value *, 4> UniqueSrc;
+ SmallVector<Value *, 4> V1Srcs;
+ bool isRecursive = false;
for (Value *PV1 : PN->incoming_values()) {
if (isa<PHINode>(PV1))
// If any of the source itself is a PHI, return MayAlias conservatively
@@ -1256,12 +1230,33 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
// sides are PHI nodes. In which case, this is O(m x n) time where 'm'
// and 'n' are the number of PHI sources.
return MayAlias;
+
+ if (EnableRecPhiAnalysis)
+ if (GEPOperator *PV1GEP = dyn_cast<GEPOperator>(PV1)) {
+ // Check whether the incoming value is a GEP that advances the pointer
+ // result of this PHI node (e.g. in a loop). If this is the case, we
+ // would recurse and always get a MayAlias. Handle this case specially
+ // below.
+ if (PV1GEP->getPointerOperand() == PN && PV1GEP->getNumIndices() == 1 &&
+ isa<ConstantInt>(PV1GEP->idx_begin())) {
+ isRecursive = true;
+ continue;
+ }
+ }
+
if (UniqueSrc.insert(PV1).second)
V1Srcs.push_back(PV1);
}
- AliasResult Alias = aliasCheck(V2, V2Size, V2AAInfo,
- V1Srcs[0], PNSize, PNAAInfo);
+ // If this PHI node is recursive, set the size of the accessed memory to
+ // unknown to represent all the possible values the GEP could advance the
+ // pointer to.
+ if (isRecursive)
+ PNSize = MemoryLocation::UnknownSize;
+
+ AliasResult Alias =
+ aliasCheck(V2, V2Size, V2AAInfo, V1Srcs[0], PNSize, PNAAInfo);
+
// Early exit if the check of the first PHI source against V2 is MayAlias.
// Other results are not possible.
if (Alias == MayAlias)
@@ -1272,8 +1267,8 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) {
Value *V = V1Srcs[i];
- AliasResult ThisAlias = aliasCheck(V2, V2Size, V2AAInfo,
- V, PNSize, PNAAInfo);
+ AliasResult ThisAlias =
+ aliasCheck(V2, V2Size, V2AAInfo, V, PNSize, PNAAInfo);
Alias = MergeAliasResults(ThisAlias, Alias);
if (Alias == MayAlias)
break;
@@ -1282,13 +1277,11 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
return Alias;
}
-// aliasCheck - Provide a bunch of ad-hoc rules to disambiguate in common cases,
-// such as array references.
-//
-AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
- AAMDNodes V1AAInfo, const Value *V2,
- uint64_t V2Size,
- AAMDNodes V2AAInfo) {
+/// Provides a bunch of ad-hoc rules to disambiguate in common cases, such as
+/// array references.
+AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size,
+ AAMDNodes V1AAInfo, const Value *V2,
+ uint64_t V2Size, AAMDNodes V2AAInfo) {
// If either of the memory references is empty, it doesn't matter what the
// pointer values are.
if (V1Size == 0 || V2Size == 0)
@@ -1313,11 +1306,11 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
return MustAlias;
if (!V1->getType()->isPointerTy() || !V2->getType()->isPointerTy())
- return NoAlias; // Scalars cannot alias each other
+ return NoAlias; // Scalars cannot alias each other
// Figure out what objects these things are pointing to if we can.
- const Value *O1 = GetUnderlyingObject(V1, *DL, MaxLookupSearchDepth);
- const Value *O2 = GetUnderlyingObject(V2, *DL, MaxLookupSearchDepth);
+ const Value *O1 = GetUnderlyingObject(V1, DL, MaxLookupSearchDepth);
+ const Value *O2 = GetUnderlyingObject(V2, DL, MaxLookupSearchDepth);
// Null values in the default address space don't point to any object, so they
// don't alias any other pointer.
@@ -1366,12 +1359,11 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
// If the size of one access is larger than the entire object on the other
// side, then we know such behavior is undefined and can assume no alias.
- if (DL)
- if ((V1Size != MemoryLocation::UnknownSize &&
- isObjectSmallerThan(O2, V1Size, *DL, *TLI)) ||
- (V2Size != MemoryLocation::UnknownSize &&
- isObjectSmallerThan(O1, V2Size, *DL, *TLI)))
- return NoAlias;
+ if ((V1Size != MemoryLocation::UnknownSize &&
+ isObjectSmallerThan(O2, V1Size, DL, TLI)) ||
+ (V2Size != MemoryLocation::UnknownSize &&
+ isObjectSmallerThan(O1, V2Size, DL, TLI)))
+ return NoAlias;
// Check the cache before climbing up use-def chains. This also terminates
// otherwise infinitely recursive queries.
@@ -1380,7 +1372,7 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
if (V1 > V2)
std::swap(Locs.first, Locs.second);
std::pair<AliasCacheTy::iterator, bool> Pair =
- AliasCache.insert(std::make_pair(Locs, MayAlias));
+ AliasCache.insert(std::make_pair(Locs, MayAlias));
if (!Pair.second)
return Pair.first->second;
@@ -1393,8 +1385,10 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
std::swap(V1AAInfo, V2AAInfo);
}
if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) {
- AliasResult Result = aliasGEP(GV1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O1, O2);
- if (Result != MayAlias) return AliasCache[Locs] = Result;
+ AliasResult Result =
+ aliasGEP(GV1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O1, O2);
+ if (Result != MayAlias)
+ return AliasCache[Locs] = Result;
}
if (isa<PHINode>(V2) && !isa<PHINode>(V1)) {
@@ -1403,9 +1397,9 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
std::swap(V1AAInfo, V2AAInfo);
}
if (const PHINode *PN = dyn_cast<PHINode>(V1)) {
- AliasResult Result = aliasPHI(PN, V1Size, V1AAInfo,
- V2, V2Size, V2AAInfo);
- if (Result != MayAlias) return AliasCache[Locs] = Result;
+ AliasResult Result = aliasPHI(PN, V1Size, V1AAInfo, V2, V2Size, V2AAInfo);
+ if (Result != MayAlias)
+ return AliasCache[Locs] = Result;
}
if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) {
@@ -1414,29 +1408,38 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
std::swap(V1AAInfo, V2AAInfo);
}
if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) {
- AliasResult Result = aliasSelect(S1, V1Size, V1AAInfo,
- V2, V2Size, V2AAInfo);
- if (Result != MayAlias) return AliasCache[Locs] = Result;
+ AliasResult Result =
+ aliasSelect(S1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo);
+ if (Result != MayAlias)
+ return AliasCache[Locs] = Result;
}
// If both pointers are pointing into the same object and one of them
// accesses is accessing the entire object, then the accesses must
// overlap in some way.
- if (DL && O1 == O2)
+ if (O1 == O2)
if ((V1Size != MemoryLocation::UnknownSize &&
- isObjectSize(O1, V1Size, *DL, *TLI)) ||
+ isObjectSize(O1, V1Size, DL, TLI)) ||
(V2Size != MemoryLocation::UnknownSize &&
- isObjectSize(O2, V2Size, *DL, *TLI)))
+ isObjectSize(O2, V2Size, DL, TLI)))
return AliasCache[Locs] = PartialAlias;
- AliasResult Result =
- AliasAnalysis::alias(MemoryLocation(V1, V1Size, V1AAInfo),
- MemoryLocation(V2, V2Size, V2AAInfo));
+ // Recurse back into the best AA results we have, potentially with refined
+ // memory locations. We have already ensured that BasicAA has a MayAlias
+ // cache result for these, so any recursion back into BasicAA won't loop.
+ AliasResult Result = getBestAAResults().alias(Locs.first, Locs.second);
return AliasCache[Locs] = Result;
}
-bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V,
- const Value *V2) {
+/// Check whether two Values can be considered equivalent.
+///
+/// In addition to pointer equivalence of \p V1 and \p V2 this checks whether
+/// they can not be part of a cycle in the value graph by looking at all
+/// visited phi nodes an making sure that the phis cannot reach the value. We
+/// have to do this because we are looking through phi nodes (That is we say
+/// noalias(V, phi(VA, VB)) if noalias(V, VA) and noalias(V, VB).
+bool BasicAAResult::isValueEqualInPotentialCycles(const Value *V,
+ const Value *V2) {
if (V != V2)
return false;
@@ -1450,28 +1453,21 @@ bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V,
if (VisitedPhiBBs.size() > MaxNumPhiBBsValueReachabilityCheck)
return false;
- // Use dominance or loop info if available.
- DominatorTreeWrapperPass *DTWP =
- getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
- auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
- LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
-
// Make sure that the visited phis cannot reach the Value. This ensures that
// the Values cannot come from different iterations of a potential cycle the
// phi nodes could be involved in.
for (auto *P : VisitedPhiBBs)
- if (isPotentiallyReachable(P->begin(), Inst, DT, LI))
+ if (isPotentiallyReachable(&P->front(), Inst, DT, LI))
return false;
return true;
}
-/// GetIndexDifference - Dest and Src are the variable indices from two
-/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
-/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic
-/// difference between the two pointers.
-void BasicAliasAnalysis::GetIndexDifference(
+/// Computes the symbolic difference between two de-composed GEPs.
+///
+/// Dest and Src are the variable indices from two decomposed GetElementPtr
+/// instructions GEP1 and GEP2 which have common base pointers.
+void BasicAAResult::GetIndexDifference(
SmallVectorImpl<VariableGEPIndex> &Dest,
const SmallVectorImpl<VariableGEPIndex> &Src) {
if (Src.empty())
@@ -1479,14 +1475,14 @@ void BasicAliasAnalysis::GetIndexDifference(
for (unsigned i = 0, e = Src.size(); i != e; ++i) {
const Value *V = Src[i].V;
- ExtensionKind Extension = Src[i].Extension;
+ unsigned ZExtBits = Src[i].ZExtBits, SExtBits = Src[i].SExtBits;
int64_t Scale = Src[i].Scale;
// Find V in Dest. This is N^2, but pointer indices almost never have more
// than a few variable indexes.
for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
if (!isValueEqualInPotentialCycles(Dest[j].V, V) ||
- Dest[j].Extension != Extension)
+ Dest[j].ZExtBits != ZExtBits || Dest[j].SExtBits != SExtBits)
continue;
// If we found it, subtract off Scale V's from the entry in Dest. If it
@@ -1501,8 +1497,120 @@ void BasicAliasAnalysis::GetIndexDifference(
// If we didn't consume this entry, add it to the end of the Dest list.
if (Scale) {
- VariableGEPIndex Entry = { V, Extension, -Scale };
+ VariableGEPIndex Entry = {V, ZExtBits, SExtBits, -Scale};
Dest.push_back(Entry);
}
}
}
+
+bool BasicAAResult::constantOffsetHeuristic(
+ const SmallVectorImpl<VariableGEPIndex> &VarIndices, uint64_t V1Size,
+ uint64_t V2Size, int64_t BaseOffset, AssumptionCache *AC,
+ DominatorTree *DT) {
+ if (VarIndices.size() != 2 || V1Size == MemoryLocation::UnknownSize ||
+ V2Size == MemoryLocation::UnknownSize)
+ return false;
+
+ const VariableGEPIndex &Var0 = VarIndices[0], &Var1 = VarIndices[1];
+
+ if (Var0.ZExtBits != Var1.ZExtBits || Var0.SExtBits != Var1.SExtBits ||
+ Var0.Scale != -Var1.Scale)
+ return false;
+
+ unsigned Width = Var1.V->getType()->getIntegerBitWidth();
+
+ // We'll strip off the Extensions of Var0 and Var1 and do another round
+ // of GetLinearExpression decomposition. In the example above, if Var0
+ // is zext(%x + 1) we should get V1 == %x and V1Offset == 1.
+
+ APInt V0Scale(Width, 0), V0Offset(Width, 0), V1Scale(Width, 0),
+ V1Offset(Width, 0);
+ bool NSW = true, NUW = true;
+ unsigned V0ZExtBits = 0, V0SExtBits = 0, V1ZExtBits = 0, V1SExtBits = 0;
+ const Value *V0 = GetLinearExpression(Var0.V, V0Scale, V0Offset, V0ZExtBits,
+ V0SExtBits, DL, 0, AC, DT, NSW, NUW);
+ NSW = true, NUW = true;
+ const Value *V1 = GetLinearExpression(Var1.V, V1Scale, V1Offset, V1ZExtBits,
+ V1SExtBits, DL, 0, AC, DT, NSW, NUW);
+
+ if (V0Scale != V1Scale || V0ZExtBits != V1ZExtBits ||
+ V0SExtBits != V1SExtBits || !isValueEqualInPotentialCycles(V0, V1))
+ return false;
+
+ // We have a hit - Var0 and Var1 only differ by a constant offset!
+
+ // If we've been sext'ed then zext'd the maximum difference between Var0 and
+ // Var1 is possible to calculate, but we're just interested in the absolute
+ // minimum difference between the two. The minimum distance may occur due to
+ // wrapping; consider "add i3 %i, 5": if %i == 7 then 7 + 5 mod 8 == 4, and so
+ // the minimum distance between %i and %i + 5 is 3.
+ APInt MinDiff = V0Offset - V1Offset, Wrapped = -MinDiff;
+ MinDiff = APIntOps::umin(MinDiff, Wrapped);
+ uint64_t MinDiffBytes = MinDiff.getZExtValue() * std::abs(Var0.Scale);
+
+ // We can't definitely say whether GEP1 is before or after V2 due to wrapping
+ // arithmetic (i.e. for some values of GEP1 and V2 GEP1 < V2, and for other
+ // values GEP1 > V2). We'll therefore only declare NoAlias if both V1Size and
+ // V2Size can fit in the MinDiffBytes gap.
+ return V1Size + std::abs(BaseOffset) <= MinDiffBytes &&
+ V2Size + std::abs(BaseOffset) <= MinDiffBytes;
+}
+
+//===----------------------------------------------------------------------===//
+// BasicAliasAnalysis Pass
+//===----------------------------------------------------------------------===//
+
+char BasicAA::PassID;
+
+BasicAAResult BasicAA::run(Function &F, AnalysisManager<Function> *AM) {
+ return BasicAAResult(F.getParent()->getDataLayout(),
+ AM->getResult<TargetLibraryAnalysis>(F),
+ AM->getResult<AssumptionAnalysis>(F),
+ AM->getCachedResult<DominatorTreeAnalysis>(F),
+ AM->getCachedResult<LoopAnalysis>(F));
+}
+
+BasicAAWrapperPass::BasicAAWrapperPass() : FunctionPass(ID) {
+ initializeBasicAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+char BasicAAWrapperPass::ID = 0;
+void BasicAAWrapperPass::anchor() {}
+
+INITIALIZE_PASS_BEGIN(BasicAAWrapperPass, "basicaa",
+ "Basic Alias Analysis (stateless AA impl)", true, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(BasicAAWrapperPass, "basicaa",
+ "Basic Alias Analysis (stateless AA impl)", true, true)
+
+FunctionPass *llvm::createBasicAAWrapperPass() {
+ return new BasicAAWrapperPass();
+}
+
+bool BasicAAWrapperPass::runOnFunction(Function &F) {
+ auto &ACT = getAnalysis<AssumptionCacheTracker>();
+ auto &TLIWP = getAnalysis<TargetLibraryInfoWrapperPass>();
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
+
+ Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), TLIWP.getTLI(),
+ ACT.getAssumptionCache(F),
+ DTWP ? &DTWP->getDomTree() : nullptr,
+ LIWP ? &LIWP->getLoopInfo() : nullptr));
+
+ return false;
+}
+
+void BasicAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+}
+
+BasicAAResult llvm::createLegacyPMBasicAAResult(Pass &P, Function &F) {
+ return BasicAAResult(
+ F.getParent()->getDataLayout(),
+ P.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+ P.getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F));
+}
diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
index 3d819eb..90b7a33 100644
--- a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
@@ -55,7 +55,7 @@ struct GraphTraits<BlockFrequencyInfo *> {
typedef Function::const_iterator nodes_iterator;
static inline const NodeType *getEntryNode(const BlockFrequencyInfo *G) {
- return G->getFunction()->begin();
+ return &G->getFunction()->front();
}
static ChildIteratorType child_begin(const NodeType *N) {
return succ_begin(N);
@@ -105,51 +105,36 @@ struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits {
} // end namespace llvm
#endif
-INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq",
- "Block Frequency Analysis", true, true)
-INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq",
- "Block Frequency Analysis", true, true)
-
-char BlockFrequencyInfo::ID = 0;
-
+BlockFrequencyInfo::BlockFrequencyInfo() {}
-BlockFrequencyInfo::BlockFrequencyInfo() : FunctionPass(ID) {
- initializeBlockFrequencyInfoPass(*PassRegistry::getPassRegistry());
-}
-
-BlockFrequencyInfo::~BlockFrequencyInfo() {}
-
-void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<BranchProbabilityInfo>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.setPreservesAll();
+BlockFrequencyInfo::BlockFrequencyInfo(const Function &F,
+ const BranchProbabilityInfo &BPI,
+ const LoopInfo &LI) {
+ calculate(F, BPI, LI);
}
-bool BlockFrequencyInfo::runOnFunction(Function &F) {
- BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
- LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+void BlockFrequencyInfo::calculate(const Function &F,
+ const BranchProbabilityInfo &BPI,
+ const LoopInfo &LI) {
if (!BFI)
BFI.reset(new ImplType);
- BFI->doFunction(&F, &BPI, &LI);
+ BFI->calculate(F, BPI, LI);
#ifndef NDEBUG
if (ViewBlockFreqPropagationDAG != GVDT_None)
view();
#endif
- return false;
-}
-
-void BlockFrequencyInfo::releaseMemory() { BFI.reset(); }
-
-void BlockFrequencyInfo::print(raw_ostream &O, const Module *) const {
- if (BFI) BFI->print(O);
}
BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const {
return BFI ? BFI->getBlockFreq(BB) : 0;
}
+void BlockFrequencyInfo::setBlockFreq(const BasicBlock *BB,
+ uint64_t Freq) {
+ assert(BFI && "Expected analysis to be available");
+ BFI->setBlockFreq(BB, Freq);
+}
+
/// Pop up a ghostview window with the current block frequency propagation
/// rendered using dot.
void BlockFrequencyInfo::view() const {
@@ -180,3 +165,49 @@ BlockFrequencyInfo::printBlockFreq(raw_ostream &OS,
uint64_t BlockFrequencyInfo::getEntryFreq() const {
return BFI ? BFI->getEntryFreq() : 0;
}
+
+void BlockFrequencyInfo::releaseMemory() { BFI.reset(); }
+
+void BlockFrequencyInfo::print(raw_ostream &OS) const {
+ if (BFI)
+ BFI->print(OS);
+}
+
+
+INITIALIZE_PASS_BEGIN(BlockFrequencyInfoWrapperPass, "block-freq",
+ "Block Frequency Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(BlockFrequencyInfoWrapperPass, "block-freq",
+ "Block Frequency Analysis", true, true)
+
+char BlockFrequencyInfoWrapperPass::ID = 0;
+
+
+BlockFrequencyInfoWrapperPass::BlockFrequencyInfoWrapperPass()
+ : FunctionPass(ID) {
+ initializeBlockFrequencyInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+BlockFrequencyInfoWrapperPass::~BlockFrequencyInfoWrapperPass() {}
+
+void BlockFrequencyInfoWrapperPass::print(raw_ostream &OS,
+ const Module *) const {
+ BFI.print(OS);
+}
+
+void BlockFrequencyInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<BranchProbabilityInfoWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.setPreservesAll();
+}
+
+void BlockFrequencyInfoWrapperPass::releaseMemory() { BFI.releaseMemory(); }
+
+bool BlockFrequencyInfoWrapperPass::runOnFunction(Function &F) {
+ BranchProbabilityInfo &BPI =
+ getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
+ LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ BFI.calculate(F, BPI, LI);
+ return false;
+}
diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
index 6ceda06..48e23af 100644
--- a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -530,6 +530,13 @@ BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {
return Freqs[Node.Index].Scaled;
}
+void BlockFrequencyInfoImplBase::setBlockFreq(const BlockNode &Node,
+ uint64_t Freq) {
+ assert(Node.isValid() && "Expected valid node");
+ assert(Node.Index < Freqs.size() && "Expected legal index");
+ Freqs[Node.Index].Integer = Freq;
+}
+
std::string
BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const {
return std::string();
@@ -743,7 +750,10 @@ void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) {
auto &BackedgeMass = Loop.BackedgeMass[Loop.getHeaderIndex(HeaderNode)];
DEBUG(dbgs() << " - Add back edge mass for node "
<< getBlockName(HeaderNode) << ": " << BackedgeMass << "\n");
- Dist.addLocal(HeaderNode, BackedgeMass.getMass());
+ if (BackedgeMass.getMass() > 0)
+ Dist.addLocal(HeaderNode, BackedgeMass.getMass());
+ else
+ DEBUG(dbgs() << " Nothing added. Back edge mass is zero\n");
}
DitheringDistributer D(Dist, LoopMass);
diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
index 430b412..cf0cc8d 100644
--- a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -27,13 +27,13 @@ using namespace llvm;
#define DEBUG_TYPE "branch-prob"
-INITIALIZE_PASS_BEGIN(BranchProbabilityInfo, "branch-prob",
+INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob",
"Branch Probability Analysis", false, true)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_END(BranchProbabilityInfo, "branch-prob",
+INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob",
"Branch Probability Analysis", false, true)
-char BranchProbabilityInfo::ID = 0;
+char BranchProbabilityInfoWrapperPass::ID = 0;
// Weights are for internal use only. They are used by heuristics to help to
// estimate edges' probability. Example:
@@ -108,13 +108,6 @@ static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1;
/// instruction. This is essentially never taken.
static const uint32_t IH_NONTAKEN_WEIGHT = 1;
-// Standard weight value. Used when none of the heuristics set weight for
-// the edge.
-static const uint32_t NORMAL_WEIGHT = 16;
-
-// Minimum weight of an edge. Please note, that weight is NEVER 0.
-static const uint32_t MIN_WEIGHT = 1;
-
/// \brief Calculate edge weights for successors lead to unreachable.
///
/// Predict that a successor which leads necessarily to an
@@ -147,22 +140,34 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(BasicBlock *BB) {
if (TI->getNumSuccessors() == 1 || UnreachableEdges.empty())
return false;
- uint32_t UnreachableWeight =
- std::max(UR_TAKEN_WEIGHT / (unsigned)UnreachableEdges.size(), MIN_WEIGHT);
- for (SmallVectorImpl<unsigned>::iterator I = UnreachableEdges.begin(),
- E = UnreachableEdges.end();
- I != E; ++I)
- setEdgeWeight(BB, *I, UnreachableWeight);
+ // If the terminator is an InvokeInst, check only the normal destination block
+ // as the unwind edge of InvokeInst is also very unlikely taken.
+ if (auto *II = dyn_cast<InvokeInst>(TI))
+ if (PostDominatedByUnreachable.count(II->getNormalDest())) {
+ PostDominatedByUnreachable.insert(BB);
+ // Return false here so that edge weights for InvokeInst could be decided
+ // in calcInvokeHeuristics().
+ return false;
+ }
- if (ReachableEdges.empty())
+ if (ReachableEdges.empty()) {
+ BranchProbability Prob(1, UnreachableEdges.size());
+ for (unsigned SuccIdx : UnreachableEdges)
+ setEdgeProbability(BB, SuccIdx, Prob);
return true;
- uint32_t ReachableWeight =
- std::max(UR_NONTAKEN_WEIGHT / (unsigned)ReachableEdges.size(),
- NORMAL_WEIGHT);
- for (SmallVectorImpl<unsigned>::iterator I = ReachableEdges.begin(),
- E = ReachableEdges.end();
- I != E; ++I)
- setEdgeWeight(BB, *I, ReachableWeight);
+ }
+
+ BranchProbability UnreachableProb(UR_TAKEN_WEIGHT,
+ (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) *
+ UnreachableEdges.size());
+ BranchProbability ReachableProb(UR_NONTAKEN_WEIGHT,
+ (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) *
+ ReachableEdges.size());
+
+ for (unsigned SuccIdx : UnreachableEdges)
+ setEdgeProbability(BB, SuccIdx, UnreachableProb);
+ for (unsigned SuccIdx : ReachableEdges)
+ setEdgeProbability(BB, SuccIdx, ReachableProb);
return true;
}
@@ -213,10 +218,18 @@ bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) {
WeightSum = 0;
for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
- uint32_t W = Weights[i] / ScalingFactor;
- WeightSum += W;
- setEdgeWeight(BB, i, W);
+ Weights[i] /= ScalingFactor;
+ WeightSum += Weights[i];
}
+
+ if (WeightSum == 0) {
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ setEdgeProbability(BB, i, {1, e});
+ } else {
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ setEdgeProbability(BB, i, {Weights[i], static_cast<uint32_t>(WeightSum)});
+ }
+
assert(WeightSum <= UINT32_MAX &&
"Expected weights to scale down to 32 bits");
@@ -265,21 +278,24 @@ bool BranchProbabilityInfo::calcColdCallHeuristics(BasicBlock *BB) {
if (TI->getNumSuccessors() == 1 || ColdEdges.empty())
return false;
- uint32_t ColdWeight =
- std::max(CC_TAKEN_WEIGHT / (unsigned) ColdEdges.size(), MIN_WEIGHT);
- for (SmallVectorImpl<unsigned>::iterator I = ColdEdges.begin(),
- E = ColdEdges.end();
- I != E; ++I)
- setEdgeWeight(BB, *I, ColdWeight);
-
- if (NormalEdges.empty())
+ if (NormalEdges.empty()) {
+ BranchProbability Prob(1, ColdEdges.size());
+ for (unsigned SuccIdx : ColdEdges)
+ setEdgeProbability(BB, SuccIdx, Prob);
return true;
- uint32_t NormalWeight = std::max(
- CC_NONTAKEN_WEIGHT / (unsigned) NormalEdges.size(), NORMAL_WEIGHT);
- for (SmallVectorImpl<unsigned>::iterator I = NormalEdges.begin(),
- E = NormalEdges.end();
- I != E; ++I)
- setEdgeWeight(BB, *I, NormalWeight);
+ }
+
+ BranchProbability ColdProb(CC_TAKEN_WEIGHT,
+ (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) *
+ ColdEdges.size());
+ BranchProbability NormalProb(CC_NONTAKEN_WEIGHT,
+ (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) *
+ NormalEdges.size());
+
+ for (unsigned SuccIdx : ColdEdges)
+ setEdgeProbability(BB, SuccIdx, ColdProb);
+ for (unsigned SuccIdx : NormalEdges)
+ setEdgeProbability(BB, SuccIdx, NormalProb);
return true;
}
@@ -312,15 +328,18 @@ bool BranchProbabilityInfo::calcPointerHeuristics(BasicBlock *BB) {
if (!isProb)
std::swap(TakenIdx, NonTakenIdx);
- setEdgeWeight(BB, TakenIdx, PH_TAKEN_WEIGHT);
- setEdgeWeight(BB, NonTakenIdx, PH_NONTAKEN_WEIGHT);
+ BranchProbability TakenProb(PH_TAKEN_WEIGHT,
+ PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT);
+ setEdgeProbability(BB, TakenIdx, TakenProb);
+ setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl());
return true;
}
// Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges
// as taken, exiting edges as not-taken.
-bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) {
- Loop *L = LI->getLoopFor(BB);
+bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB,
+ const LoopInfo &LI) {
+ Loop *L = LI.getLoopFor(BB);
if (!L)
return false;
@@ -340,37 +359,35 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) {
if (BackEdges.empty() && ExitingEdges.empty())
return false;
- if (uint32_t numBackEdges = BackEdges.size()) {
- uint32_t backWeight = LBH_TAKEN_WEIGHT / numBackEdges;
- if (backWeight < NORMAL_WEIGHT)
- backWeight = NORMAL_WEIGHT;
+ // Collect the sum of probabilities of back-edges/in-edges/exiting-edges, and
+ // normalize them so that they sum up to one.
+ SmallVector<BranchProbability, 4> Probs(3, BranchProbability::getZero());
+ unsigned Denom = (BackEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) +
+ (InEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) +
+ (ExitingEdges.empty() ? 0 : LBH_NONTAKEN_WEIGHT);
+ if (!BackEdges.empty())
+ Probs[0] = BranchProbability(LBH_TAKEN_WEIGHT, Denom);
+ if (!InEdges.empty())
+ Probs[1] = BranchProbability(LBH_TAKEN_WEIGHT, Denom);
+ if (!ExitingEdges.empty())
+ Probs[2] = BranchProbability(LBH_NONTAKEN_WEIGHT, Denom);
- for (SmallVectorImpl<unsigned>::iterator EI = BackEdges.begin(),
- EE = BackEdges.end(); EI != EE; ++EI) {
- setEdgeWeight(BB, *EI, backWeight);
- }
+ if (uint32_t numBackEdges = BackEdges.size()) {
+ auto Prob = Probs[0] / numBackEdges;
+ for (unsigned SuccIdx : BackEdges)
+ setEdgeProbability(BB, SuccIdx, Prob);
}
if (uint32_t numInEdges = InEdges.size()) {
- uint32_t inWeight = LBH_TAKEN_WEIGHT / numInEdges;
- if (inWeight < NORMAL_WEIGHT)
- inWeight = NORMAL_WEIGHT;
-
- for (SmallVectorImpl<unsigned>::iterator EI = InEdges.begin(),
- EE = InEdges.end(); EI != EE; ++EI) {
- setEdgeWeight(BB, *EI, inWeight);
- }
+ auto Prob = Probs[1] / numInEdges;
+ for (unsigned SuccIdx : InEdges)
+ setEdgeProbability(BB, SuccIdx, Prob);
}
if (uint32_t numExitingEdges = ExitingEdges.size()) {
- uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numExitingEdges;
- if (exitWeight < MIN_WEIGHT)
- exitWeight = MIN_WEIGHT;
-
- for (SmallVectorImpl<unsigned>::iterator EI = ExitingEdges.begin(),
- EE = ExitingEdges.end(); EI != EE; ++EI) {
- setEdgeWeight(BB, *EI, exitWeight);
- }
+ auto Prob = Probs[2] / numExitingEdges;
+ for (unsigned SuccIdx : ExitingEdges)
+ setEdgeProbability(BB, SuccIdx, Prob);
}
return true;
@@ -452,9 +469,10 @@ bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) {
if (!isProb)
std::swap(TakenIdx, NonTakenIdx);
- setEdgeWeight(BB, TakenIdx, ZH_TAKEN_WEIGHT);
- setEdgeWeight(BB, NonTakenIdx, ZH_NONTAKEN_WEIGHT);
-
+ BranchProbability TakenProb(ZH_TAKEN_WEIGHT,
+ ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT);
+ setEdgeProbability(BB, TakenIdx, TakenProb);
+ setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl());
return true;
}
@@ -488,9 +506,10 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(BasicBlock *BB) {
if (!isProb)
std::swap(TakenIdx, NonTakenIdx);
- setEdgeWeight(BB, TakenIdx, FPH_TAKEN_WEIGHT);
- setEdgeWeight(BB, NonTakenIdx, FPH_NONTAKEN_WEIGHT);
-
+ BranchProbability TakenProb(FPH_TAKEN_WEIGHT,
+ FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT);
+ setEdgeProbability(BB, TakenIdx, TakenProb);
+ setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl());
return true;
}
@@ -499,82 +518,30 @@ bool BranchProbabilityInfo::calcInvokeHeuristics(BasicBlock *BB) {
if (!II)
return false;
- setEdgeWeight(BB, 0/*Index for Normal*/, IH_TAKEN_WEIGHT);
- setEdgeWeight(BB, 1/*Index for Unwind*/, IH_NONTAKEN_WEIGHT);
+ BranchProbability TakenProb(IH_TAKEN_WEIGHT,
+ IH_TAKEN_WEIGHT + IH_NONTAKEN_WEIGHT);
+ setEdgeProbability(BB, 0 /*Index for Normal*/, TakenProb);
+ setEdgeProbability(BB, 1 /*Index for Unwind*/, TakenProb.getCompl());
return true;
}
-void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<LoopInfoWrapperPass>();
- AU.setPreservesAll();
-}
-
-bool BranchProbabilityInfo::runOnFunction(Function &F) {
- DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName()
- << " ----\n\n");
- LastF = &F; // Store the last function we ran on for printing.
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- assert(PostDominatedByUnreachable.empty());
- assert(PostDominatedByColdCall.empty());
-
- // Walk the basic blocks in post-order so that we can build up state about
- // the successors of a block iteratively.
- for (auto BB : post_order(&F.getEntryBlock())) {
- DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n");
- if (calcUnreachableHeuristics(BB))
- continue;
- if (calcMetadataWeights(BB))
- continue;
- if (calcColdCallHeuristics(BB))
- continue;
- if (calcLoopBranchHeuristics(BB))
- continue;
- if (calcPointerHeuristics(BB))
- continue;
- if (calcZeroHeuristics(BB))
- continue;
- if (calcFloatingPointHeuristics(BB))
- continue;
- calcInvokeHeuristics(BB);
- }
-
- PostDominatedByUnreachable.clear();
- PostDominatedByColdCall.clear();
- return false;
-}
-
void BranchProbabilityInfo::releaseMemory() {
- Weights.clear();
+ Probs.clear();
}
-void BranchProbabilityInfo::print(raw_ostream &OS, const Module *) const {
+void BranchProbabilityInfo::print(raw_ostream &OS) const {
OS << "---- Branch Probabilities ----\n";
// We print the probabilities from the last function the analysis ran over,
// or the function it is currently running over.
assert(LastF && "Cannot print prior to running over a function");
- for (Function::const_iterator BI = LastF->begin(), BE = LastF->end();
- BI != BE; ++BI) {
- for (succ_const_iterator SI = succ_begin(BI), SE = succ_end(BI);
- SI != SE; ++SI) {
- printEdgeProbability(OS << " ", BI, *SI);
+ for (const auto &BI : *LastF) {
+ for (succ_const_iterator SI = succ_begin(&BI), SE = succ_end(&BI); SI != SE;
+ ++SI) {
+ printEdgeProbability(OS << " ", &BI, *SI);
}
}
}
-uint32_t BranchProbabilityInfo::getSumForBlock(const BasicBlock *BB) const {
- uint32_t Sum = 0;
-
- for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
- uint32_t Weight = getEdgeWeight(BB, I.getSuccessorIndex());
- uint32_t PrevSum = Sum;
-
- Sum += Weight;
- assert(Sum >= PrevSum); (void) PrevSum;
- }
-
- return Sum;
-}
-
bool BranchProbabilityInfo::
isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const {
// Hot probability is at least 4/5 = 80%
@@ -583,97 +550,74 @@ isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const {
}
BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const {
- uint32_t Sum = 0;
- uint32_t MaxWeight = 0;
+ auto MaxProb = BranchProbability::getZero();
BasicBlock *MaxSucc = nullptr;
for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
BasicBlock *Succ = *I;
- uint32_t Weight = getEdgeWeight(BB, Succ);
- uint32_t PrevSum = Sum;
-
- Sum += Weight;
- assert(Sum > PrevSum); (void) PrevSum;
-
- if (Weight > MaxWeight) {
- MaxWeight = Weight;
+ auto Prob = getEdgeProbability(BB, Succ);
+ if (Prob > MaxProb) {
+ MaxProb = Prob;
MaxSucc = Succ;
}
}
// Hot probability is at least 4/5 = 80%
- if (BranchProbability(MaxWeight, Sum) > BranchProbability(4, 5))
+ if (MaxProb > BranchProbability(4, 5))
return MaxSucc;
return nullptr;
}
-/// Get the raw edge weight for the edge. If can't find it, return
-/// DEFAULT_WEIGHT value. Here an edge is specified using PredBlock and an index
-/// to the successors.
-uint32_t BranchProbabilityInfo::
-getEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors) const {
- DenseMap<Edge, uint32_t>::const_iterator I =
- Weights.find(std::make_pair(Src, IndexInSuccessors));
+/// Get the raw edge probability for the edge. If can't find it, return a
+/// default probability 1/N where N is the number of successors. Here an edge is
+/// specified using PredBlock and an
+/// index to the successors.
+BranchProbability
+BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src,
+ unsigned IndexInSuccessors) const {
+ auto I = Probs.find(std::make_pair(Src, IndexInSuccessors));
- if (I != Weights.end())
+ if (I != Probs.end())
return I->second;
- return DEFAULT_WEIGHT;
+ return {1,
+ static_cast<uint32_t>(std::distance(succ_begin(Src), succ_end(Src)))};
}
-uint32_t BranchProbabilityInfo::getEdgeWeight(const BasicBlock *Src,
- succ_const_iterator Dst) const {
- return getEdgeWeight(Src, Dst.getSuccessorIndex());
+BranchProbability
+BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src,
+ succ_const_iterator Dst) const {
+ return getEdgeProbability(Src, Dst.getSuccessorIndex());
}
-/// Get the raw edge weight calculated for the block pair. This returns the sum
-/// of all raw edge weights from Src to Dst.
-uint32_t BranchProbabilityInfo::
-getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const {
- uint32_t Weight = 0;
- bool FoundWeight = false;
- DenseMap<Edge, uint32_t>::const_iterator MapI;
+/// Get the raw edge probability calculated for the block pair. This returns the
+/// sum of all raw edge probabilities from Src to Dst.
+BranchProbability
+BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src,
+ const BasicBlock *Dst) const {
+ auto Prob = BranchProbability::getZero();
+ bool FoundProb = false;
for (succ_const_iterator I = succ_begin(Src), E = succ_end(Src); I != E; ++I)
if (*I == Dst) {
- MapI = Weights.find(std::make_pair(Src, I.getSuccessorIndex()));
- if (MapI != Weights.end()) {
- FoundWeight = true;
- Weight += MapI->second;
+ auto MapI = Probs.find(std::make_pair(Src, I.getSuccessorIndex()));
+ if (MapI != Probs.end()) {
+ FoundProb = true;
+ Prob += MapI->second;
}
}
- return (!FoundWeight) ? DEFAULT_WEIGHT : Weight;
+ uint32_t succ_num = std::distance(succ_begin(Src), succ_end(Src));
+ return FoundProb ? Prob : BranchProbability(1, succ_num);
}
-/// Set the edge weight for a given edge specified by PredBlock and an index
-/// to the successors.
-void BranchProbabilityInfo::
-setEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors,
- uint32_t Weight) {
- Weights[std::make_pair(Src, IndexInSuccessors)] = Weight;
- DEBUG(dbgs() << "set edge " << Src->getName() << " -> "
- << IndexInSuccessors << " successor weight to "
- << Weight << "\n");
-}
-
-/// Get an edge's probability, relative to other out-edges from Src.
-BranchProbability BranchProbabilityInfo::
-getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const {
- uint32_t N = getEdgeWeight(Src, IndexInSuccessors);
- uint32_t D = getSumForBlock(Src);
-
- return BranchProbability(N, D);
-}
-
-/// Get the probability of going from Src to Dst. It returns the sum of all
-/// probabilities for edges from Src to Dst.
-BranchProbability BranchProbabilityInfo::
-getEdgeProbability(const BasicBlock *Src, const BasicBlock *Dst) const {
-
- uint32_t N = getEdgeWeight(Src, Dst);
- uint32_t D = getSumForBlock(Src);
-
- return BranchProbability(N, D);
+/// Set the edge probability for a given edge specified by PredBlock and an
+/// index to the successors.
+void BranchProbabilityInfo::setEdgeProbability(const BasicBlock *Src,
+ unsigned IndexInSuccessors,
+ BranchProbability Prob) {
+ Probs[std::make_pair(Src, IndexInSuccessors)] = Prob;
+ DEBUG(dbgs() << "set edge " << Src->getName() << " -> " << IndexInSuccessors
+ << " successor probability to " << Prob << "\n");
}
raw_ostream &
@@ -688,3 +632,54 @@ BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS,
return OS;
}
+
+void BranchProbabilityInfo::calculate(Function &F, const LoopInfo& LI) {
+ DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName()
+ << " ----\n\n");
+ LastF = &F; // Store the last function we ran on for printing.
+ assert(PostDominatedByUnreachable.empty());
+ assert(PostDominatedByColdCall.empty());
+
+ // Walk the basic blocks in post-order so that we can build up state about
+ // the successors of a block iteratively.
+ for (auto BB : post_order(&F.getEntryBlock())) {
+ DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n");
+ if (calcUnreachableHeuristics(BB))
+ continue;
+ if (calcMetadataWeights(BB))
+ continue;
+ if (calcColdCallHeuristics(BB))
+ continue;
+ if (calcLoopBranchHeuristics(BB, LI))
+ continue;
+ if (calcPointerHeuristics(BB))
+ continue;
+ if (calcZeroHeuristics(BB))
+ continue;
+ if (calcFloatingPointHeuristics(BB))
+ continue;
+ calcInvokeHeuristics(BB);
+ }
+
+ PostDominatedByUnreachable.clear();
+ PostDominatedByColdCall.clear();
+}
+
+void BranchProbabilityInfoWrapperPass::getAnalysisUsage(
+ AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.setPreservesAll();
+}
+
+bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) {
+ const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ BPI.calculate(F, LI);
+ return false;
+}
+
+void BranchProbabilityInfoWrapperPass::releaseMemory() { BPI.releaseMemory(); }
+
+void BranchProbabilityInfoWrapperPass::print(raw_ostream &OS,
+ const Module *) const {
+ BPI.print(OS);
+}
diff --git a/contrib/llvm/lib/Analysis/CFG.cpp b/contrib/llvm/lib/Analysis/CFG.cpp
index e15109b..0dfd57d 100644
--- a/contrib/llvm/lib/Analysis/CFG.cpp
+++ b/contrib/llvm/lib/Analysis/CFG.cpp
@@ -69,8 +69,9 @@ void llvm::FindFunctionBackedges(const Function &F,
/// and return its position in the terminator instruction's list of
/// successors. It is an error to call this with a block that is not a
/// successor.
-unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) {
- TerminatorInst *Term = BB->getTerminator();
+unsigned llvm::GetSuccessorNumber(const BasicBlock *BB,
+ const BasicBlock *Succ) {
+ const TerminatorInst *Term = BB->getTerminator();
#ifndef NDEBUG
unsigned e = Term->getNumSuccessors();
#endif
@@ -203,7 +204,8 @@ bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B,
return true;
// Linear scan, start at 'A', see whether we hit 'B' or the end first.
- for (BasicBlock::const_iterator I = A, E = BB->end(); I != E; ++I) {
+ for (BasicBlock::const_iterator I = A->getIterator(), E = BB->end(); I != E;
+ ++I) {
if (&*I == B)
return true;
}
diff --git a/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp
index fe1c088..4843ed6 100644
--- a/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp
@@ -27,18 +27,17 @@
// time.
//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/CFLAliasAnalysis.h"
#include "StratifiedSets.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Compiler.h"
@@ -47,7 +46,6 @@
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
-#include <forward_list>
#include <memory>
#include <tuple>
@@ -55,6 +53,19 @@ using namespace llvm;
#define DEBUG_TYPE "cfl-aa"
+CFLAAResult::CFLAAResult(const TargetLibraryInfo &TLI) : AAResultBase(TLI) {}
+CFLAAResult::CFLAAResult(CFLAAResult &&Arg) : AAResultBase(std::move(Arg)) {}
+
+// \brief Information we have about a function and would like to keep around
+struct CFLAAResult::FunctionInfo {
+ StratifiedSets<Value *> Sets;
+ // Lots of functions have < 4 returns. Adjust as necessary.
+ SmallVector<Value *, 4> ReturnedValues;
+
+ FunctionInfo(StratifiedSets<Value *> &&S, SmallVector<Value *, 4> &&RV)
+ : Sets(std::move(S)), ReturnedValues(std::move(RV)) {}
+};
+
// Try to go from a Value* to a Function*. Never returns nullptr.
static Optional<Function *> parentFunctionOfValue(Value *);
@@ -141,129 +152,13 @@ struct Edge {
: From(From), To(To), Weight(W), AdditionalAttrs(A) {}
};
-// \brief Information we have about a function and would like to keep around
-struct FunctionInfo {
- StratifiedSets<Value *> Sets;
- // Lots of functions have < 4 returns. Adjust as necessary.
- SmallVector<Value *, 4> ReturnedValues;
-
- FunctionInfo(StratifiedSets<Value *> &&S, SmallVector<Value *, 4> &&RV)
- : Sets(std::move(S)), ReturnedValues(std::move(RV)) {}
-};
-
-struct CFLAliasAnalysis;
-
-struct FunctionHandle : public CallbackVH {
- FunctionHandle(Function *Fn, CFLAliasAnalysis *CFLAA)
- : CallbackVH(Fn), CFLAA(CFLAA) {
- assert(Fn != nullptr);
- assert(CFLAA != nullptr);
- }
-
- ~FunctionHandle() override {}
-
- void deleted() override { removeSelfFromCache(); }
- void allUsesReplacedWith(Value *) override { removeSelfFromCache(); }
-
-private:
- CFLAliasAnalysis *CFLAA;
-
- void removeSelfFromCache();
-};
-
-struct CFLAliasAnalysis : public ImmutablePass, public AliasAnalysis {
-private:
- /// \brief Cached mapping of Functions to their StratifiedSets.
- /// If a function's sets are currently being built, it is marked
- /// in the cache as an Optional without a value. This way, if we
- /// have any kind of recursion, it is discernable from a function
- /// that simply has empty sets.
- DenseMap<Function *, Optional<FunctionInfo>> Cache;
- std::forward_list<FunctionHandle> Handles;
-
-public:
- static char ID;
-
- CFLAliasAnalysis() : ImmutablePass(ID) {
- initializeCFLAliasAnalysisPass(*PassRegistry::getPassRegistry());
- }
-
- ~CFLAliasAnalysis() override {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AliasAnalysis::getAnalysisUsage(AU);
- }
-
- void *getAdjustedAnalysisPointer(const void *ID) override {
- if (ID == &AliasAnalysis::ID)
- return (AliasAnalysis *)this;
- return this;
- }
-
- /// \brief Inserts the given Function into the cache.
- void scan(Function *Fn);
-
- void evict(Function *Fn) { Cache.erase(Fn); }
-
- /// \brief Ensures that the given function is available in the cache.
- /// Returns the appropriate entry from the cache.
- const Optional<FunctionInfo> &ensureCached(Function *Fn) {
- auto Iter = Cache.find(Fn);
- if (Iter == Cache.end()) {
- scan(Fn);
- Iter = Cache.find(Fn);
- assert(Iter != Cache.end());
- assert(Iter->second.hasValue());
- }
- return Iter->second;
- }
-
- AliasResult query(const MemoryLocation &LocA, const MemoryLocation &LocB);
-
- AliasResult alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) override {
- if (LocA.Ptr == LocB.Ptr) {
- if (LocA.Size == LocB.Size) {
- return MustAlias;
- } else {
- return PartialAlias;
- }
- }
-
- // Comparisons between global variables and other constants should be
- // handled by BasicAA.
- // TODO: ConstantExpr handling -- CFLAA may report NoAlias when comparing
- // a GlobalValue and ConstantExpr, but every query needs to have at least
- // one Value tied to a Function, and neither GlobalValues nor ConstantExprs
- // are.
- if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr)) {
- return AliasAnalysis::alias(LocA, LocB);
- }
-
- AliasResult QueryResult = query(LocA, LocB);
- if (QueryResult == MayAlias)
- return AliasAnalysis::alias(LocA, LocB);
-
- return QueryResult;
- }
-
- bool doInitialization(Module &M) override;
-};
-
-void FunctionHandle::removeSelfFromCache() {
- assert(CFLAA != nullptr);
- auto *Val = getValPtr();
- CFLAA->evict(cast<Function>(Val));
- setValPtr(nullptr);
-}
-
// \brief Gets the edges our graph should have, based on an Instruction*
class GetEdgesVisitor : public InstVisitor<GetEdgesVisitor, void> {
- CFLAliasAnalysis &AA;
+ CFLAAResult &AA;
SmallVectorImpl<Edge> &Output;
public:
- GetEdgesVisitor(CFLAliasAnalysis &AA, SmallVectorImpl<Edge> &Output)
+ GetEdgesVisitor(CFLAAResult &AA, SmallVectorImpl<Edge> &Output)
: AA(AA), Output(Output) {}
void visitInstruction(Instruction &) {
@@ -480,6 +375,8 @@ public:
}
template <typename InstT> void visitCallLikeInst(InstT &Inst) {
+ // TODO: Add support for noalias args/all the other fun function attributes
+ // that we can tack on.
SmallVector<Function *, 4> Targets;
if (getPossibleTargets(&Inst, Targets)) {
if (tryInterproceduralAnalysis(Targets, &Inst, Inst.arg_operands()))
@@ -488,8 +385,16 @@ public:
Output.clear();
}
+ // Because the function is opaque, we need to note that anything
+ // could have happened to the arguments, and that the result could alias
+ // just about anything, too.
+ // The goal of the loop is in part to unify many Values into one set, so we
+ // don't care if the function is void there.
for (Value *V : Inst.arg_operands())
Output.push_back(Edge(&Inst, V, EdgeType::Assign, AttrAll));
+ if (Inst.getNumArgOperands() == 0 &&
+ Inst.getType() != Type::getVoidTy(Inst.getContext()))
+ Output.push_back(Edge(&Inst, &Inst, EdgeType::Assign, AttrAll));
}
void visitCallInst(CallInst &Inst) { visitCallLikeInst(Inst); }
@@ -624,7 +529,7 @@ public:
// ----- Various Edge iterators for the graph ----- //
// \brief Iterator for edges. Because this graph is bidirected, we don't
- // allow modificaiton of the edges using this iterator. Additionally, the
+ // allow modification of the edges using this iterator. Additionally, the
// iterator becomes invalid if you add edges to or from the node you're
// getting the edges of.
struct EdgeIterator : public std::iterator<std::forward_iterator_tag,
@@ -727,16 +632,6 @@ typedef WeightedBidirectionalGraph<std::pair<EdgeType, StratifiedAttrs>> GraphT;
typedef DenseMap<Value *, GraphT::Node> NodeMapT;
}
-// -- Setting up/registering CFLAA pass -- //
-char CFLAliasAnalysis::ID = 0;
-
-INITIALIZE_AG_PASS(CFLAliasAnalysis, AliasAnalysis, "cfl-aa",
- "CFL-Based AA implementation", false, true, false)
-
-ImmutablePass *llvm::createCFLAliasAnalysisPass() {
- return new CFLAliasAnalysis();
-}
-
//===----------------------------------------------------------------------===//
// Function declarations that require types defined in the namespace above
//===----------------------------------------------------------------------===//
@@ -751,12 +646,10 @@ static Optional<StratifiedAttr> valueToAttrIndex(Value *Val);
static EdgeType flipWeight(EdgeType);
// Gets edges of the given Instruction*, writing them to the SmallVector*.
-static void argsToEdges(CFLAliasAnalysis &, Instruction *,
- SmallVectorImpl<Edge> &);
+static void argsToEdges(CFLAAResult &, Instruction *, SmallVectorImpl<Edge> &);
// Gets edges of the given ConstantExpr*, writing them to the SmallVector*.
-static void argsToEdges(CFLAliasAnalysis &, ConstantExpr *,
- SmallVectorImpl<Edge> &);
+static void argsToEdges(CFLAAResult &, ConstantExpr *, SmallVectorImpl<Edge> &);
// Gets the "Level" that one should travel in StratifiedSets
// given an EdgeType.
@@ -764,13 +657,13 @@ static Level directionOfEdgeType(EdgeType);
// Builds the graph needed for constructing the StratifiedSets for the
// given function
-static void buildGraphFrom(CFLAliasAnalysis &, Function *,
+static void buildGraphFrom(CFLAAResult &, Function *,
SmallVectorImpl<Value *> &, NodeMapT &, GraphT &);
// Gets the edges of a ConstantExpr as if it was an Instruction. This
// function also acts on any nested ConstantExprs, adding the edges
// of those to the given SmallVector as well.
-static void constexprToEdges(CFLAliasAnalysis &, ConstantExpr &,
+static void constexprToEdges(CFLAAResult &, ConstantExpr &,
SmallVectorImpl<Edge> &);
// Given an Instruction, this will add it to the graph, along with any
@@ -779,16 +672,13 @@ static void constexprToEdges(CFLAliasAnalysis &, ConstantExpr &,
// %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2
// addInstructionToGraph would add both the `load` and `getelementptr`
// instructions to the graph appropriately.
-static void addInstructionToGraph(CFLAliasAnalysis &, Instruction &,
+static void addInstructionToGraph(CFLAAResult &, Instruction &,
SmallVectorImpl<Value *> &, NodeMapT &,
GraphT &);
// Notes whether it would be pointless to add the given Value to our sets.
static bool canSkipAddingToSets(Value *Val);
-// Builds the graph + StratifiedSets for a function.
-static FunctionInfo buildSetsFrom(CFLAliasAnalysis &, Function *);
-
static Optional<Function *> parentFunctionOfValue(Value *Val) {
if (auto *Inst = dyn_cast<Instruction>(Val)) {
auto *Bb = Inst->getParent();
@@ -825,7 +715,7 @@ static bool hasUsefulEdges(Instruction *Inst) {
}
static bool hasUsefulEdges(ConstantExpr *CE) {
- // ConstantExpr doens't have terminators, invokes, or fences, so only needs
+ // ConstantExpr doesn't have terminators, invokes, or fences, so only needs
// to check for compares.
return CE->getOpcode() != Instruction::ICmp &&
CE->getOpcode() != Instruction::FCmp;
@@ -862,7 +752,7 @@ static EdgeType flipWeight(EdgeType Initial) {
llvm_unreachable("Incomplete coverage of EdgeType enum");
}
-static void argsToEdges(CFLAliasAnalysis &Analysis, Instruction *Inst,
+static void argsToEdges(CFLAAResult &Analysis, Instruction *Inst,
SmallVectorImpl<Edge> &Output) {
assert(hasUsefulEdges(Inst) &&
"Expected instructions to have 'useful' edges");
@@ -870,7 +760,7 @@ static void argsToEdges(CFLAliasAnalysis &Analysis, Instruction *Inst,
v.visit(Inst);
}
-static void argsToEdges(CFLAliasAnalysis &Analysis, ConstantExpr *CE,
+static void argsToEdges(CFLAAResult &Analysis, ConstantExpr *CE,
SmallVectorImpl<Edge> &Output) {
assert(hasUsefulEdges(CE) && "Expected constant expr to have 'useful' edges");
GetEdgesVisitor v(Analysis, Output);
@@ -889,7 +779,7 @@ static Level directionOfEdgeType(EdgeType Weight) {
llvm_unreachable("Incomplete switch coverage");
}
-static void constexprToEdges(CFLAliasAnalysis &Analysis,
+static void constexprToEdges(CFLAAResult &Analysis,
ConstantExpr &CExprToCollapse,
SmallVectorImpl<Edge> &Results) {
SmallVector<ConstantExpr *, 4> Worklist;
@@ -919,7 +809,7 @@ static void constexprToEdges(CFLAliasAnalysis &Analysis,
}
}
-static void addInstructionToGraph(CFLAliasAnalysis &Analysis, Instruction &Inst,
+static void addInstructionToGraph(CFLAAResult &Analysis, Instruction &Inst,
SmallVectorImpl<Value *> &ReturnedValues,
NodeMapT &Map, GraphT &Graph) {
const auto findOrInsertNode = [&Map, &Graph](Value *Val) {
@@ -982,7 +872,7 @@ static void addInstructionToGraph(CFLAliasAnalysis &Analysis, Instruction &Inst,
// buy us much that we don't already have. I'd like to add interprocedural
// analysis prior to this however, in case that somehow requires the graph
// produced by this for efficient execution
-static void buildGraphFrom(CFLAliasAnalysis &Analysis, Function *Fn,
+static void buildGraphFrom(CFLAAResult &Analysis, Function *Fn,
SmallVectorImpl<Value *> &ReturnedValues,
NodeMapT &Map, GraphT &Graph) {
for (auto &Bb : Fn->getBasicBlockList())
@@ -1012,12 +902,13 @@ static bool canSkipAddingToSets(Value *Val) {
return false;
}
-static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) {
+// Builds the graph + StratifiedSets for a function.
+CFLAAResult::FunctionInfo CFLAAResult::buildSetsFrom(Function *Fn) {
NodeMapT Map;
GraphT Graph;
SmallVector<Value *, 4> ReturnedValues;
- buildGraphFrom(Analysis, Fn, ReturnedValues, Map, Graph);
+ buildGraphFrom(*this, Fn, ReturnedValues, Map, Graph);
DenseMap<GraphT::Node, Value *> NodeValueMap;
NodeValueMap.resize(Map.size());
@@ -1098,19 +989,35 @@ static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) {
return FunctionInfo(Builder.build(), std::move(ReturnedValues));
}
-void CFLAliasAnalysis::scan(Function *Fn) {
+void CFLAAResult::scan(Function *Fn) {
auto InsertPair = Cache.insert(std::make_pair(Fn, Optional<FunctionInfo>()));
(void)InsertPair;
assert(InsertPair.second &&
"Trying to scan a function that has already been cached");
- FunctionInfo Info(buildSetsFrom(*this, Fn));
+ FunctionInfo Info(buildSetsFrom(Fn));
Cache[Fn] = std::move(Info);
Handles.push_front(FunctionHandle(Fn, this));
}
-AliasResult CFLAliasAnalysis::query(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
+void CFLAAResult::evict(Function *Fn) { Cache.erase(Fn); }
+
+/// \brief Ensures that the given function is available in the cache.
+/// Returns the appropriate entry from the cache.
+const Optional<CFLAAResult::FunctionInfo> &
+CFLAAResult::ensureCached(Function *Fn) {
+ auto Iter = Cache.find(Fn);
+ if (Iter == Cache.end()) {
+ scan(Fn);
+ Iter = Cache.find(Fn);
+ assert(Iter != Cache.end());
+ assert(Iter->second.hasValue());
+ }
+ return Iter->second;
+}
+
+AliasResult CFLAAResult::query(const MemoryLocation &LocA,
+ const MemoryLocation &LocB) {
auto *ValA = const_cast<Value *>(LocA.Ptr);
auto *ValB = const_cast<Value *>(LocB.Ptr);
@@ -1176,7 +1083,37 @@ AliasResult CFLAliasAnalysis::query(const MemoryLocation &LocA,
return NoAlias;
}
-bool CFLAliasAnalysis::doInitialization(Module &M) {
- InitializeAliasAnalysis(this, &M.getDataLayout());
- return true;
+CFLAAResult CFLAA::run(Function &F, AnalysisManager<Function> *AM) {
+ return CFLAAResult(AM->getResult<TargetLibraryAnalysis>(F));
+}
+
+char CFLAA::PassID;
+
+char CFLAAWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(CFLAAWrapperPass, "cfl-aa", "CFL-Based Alias Analysis",
+ false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(CFLAAWrapperPass, "cfl-aa", "CFL-Based Alias Analysis",
+ false, true)
+
+ImmutablePass *llvm::createCFLAAWrapperPass() { return new CFLAAWrapperPass(); }
+
+CFLAAWrapperPass::CFLAAWrapperPass() : ImmutablePass(ID) {
+ initializeCFLAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+bool CFLAAWrapperPass::doInitialization(Module &M) {
+ Result.reset(
+ new CFLAAResult(getAnalysis<TargetLibraryInfoWrapperPass>().getTLI()));
+ return false;
+}
+
+bool CFLAAWrapperPass::doFinalization(Module &M) {
+ Result.reset();
+ return false;
+}
+
+void CFLAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp b/contrib/llvm/lib/Analysis/CallGraph.cpp
index e2799d9..7cec962 100644
--- a/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp
+++ b/contrib/llvm/lib/Analysis/CallGraph.cpp
@@ -22,7 +22,7 @@ using namespace llvm;
CallGraph::CallGraph(Module &M)
: M(M), Root(nullptr), ExternalCallingNode(getOrInsertFunction(nullptr)),
- CallsExternalNode(new CallGraphNode(nullptr)) {
+ CallsExternalNode(llvm::make_unique<CallGraphNode>(nullptr)) {
// Add every function to the call graph.
for (Function &F : M)
addToCallGraph(&F);
@@ -32,10 +32,19 @@ CallGraph::CallGraph(Module &M)
Root = ExternalCallingNode;
}
+CallGraph::CallGraph(CallGraph &&Arg)
+ : M(Arg.M), FunctionMap(std::move(Arg.FunctionMap)), Root(Arg.Root),
+ ExternalCallingNode(Arg.ExternalCallingNode),
+ CallsExternalNode(std::move(Arg.CallsExternalNode)) {
+ Arg.FunctionMap.clear();
+ Arg.Root = nullptr;
+ Arg.ExternalCallingNode = nullptr;
+}
+
CallGraph::~CallGraph() {
// CallsExternalNode is not in the function map, delete it explicitly.
- CallsExternalNode->allReferencesDropped();
- delete CallsExternalNode;
+ if (CallsExternalNode)
+ CallsExternalNode->allReferencesDropped();
// Reset all node's use counts to zero before deleting them to prevent an
// assertion from firing.
@@ -43,8 +52,6 @@ CallGraph::~CallGraph() {
for (auto &I : FunctionMap)
I.second->allReferencesDropped();
#endif
- for (auto &I : FunctionMap)
- delete I.second;
}
void CallGraph::addToCallGraph(Function *F) {
@@ -70,7 +77,7 @@ void CallGraph::addToCallGraph(Function *F) {
// If this function is not defined in this translation unit, it could call
// anything.
if (F->isDeclaration() && !F->isIntrinsic())
- Node->addCalledFunction(CallSite(), CallsExternalNode);
+ Node->addCalledFunction(CallSite(), CallsExternalNode.get());
// Look for calls by this function.
for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB)
@@ -83,7 +90,7 @@ void CallGraph::addToCallGraph(Function *F) {
// Indirect calls of intrinsics are not allowed so no need to check.
// We can be more precise here by using TargetArg returned by
// Intrinsic::isLeaf.
- Node->addCalledFunction(CS, CallsExternalNode);
+ Node->addCalledFunction(CS, CallsExternalNode.get());
else if (!Callee->isIntrinsic())
Node->addCalledFunction(CS, getOrInsertFunction(Callee));
}
@@ -105,7 +112,7 @@ void CallGraph::print(raw_ostream &OS) const {
Nodes.reserve(FunctionMap.size());
for (auto I = begin(), E = end(); I != E; ++I)
- Nodes.push_back(I->second);
+ Nodes.push_back(I->second.get());
std::sort(Nodes.begin(), Nodes.end(),
[](CallGraphNode *LHS, CallGraphNode *RHS) {
@@ -120,9 +127,8 @@ void CallGraph::print(raw_ostream &OS) const {
CN->print(OS);
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void CallGraph::dump() const { print(dbgs()); }
-#endif
// removeFunctionFromModule - Unlink the function from this module, returning
// it. Because this removes the function from the module, the call graph node
@@ -134,7 +140,6 @@ Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) {
assert(CGN->empty() && "Cannot remove function from call "
"graph if it references other functions!");
Function *F = CGN->getFunction(); // Get the function for the call graph node
- delete CGN; // Delete the call graph node for this func
FunctionMap.erase(F); // Remove the call graph node from the map
M.getFunctionList().remove(F);
@@ -152,7 +157,7 @@ void CallGraph::spliceFunction(const Function *From, const Function *To) {
"Pointing CallGraphNode at a function that already exists");
FunctionMapTy::iterator I = FunctionMap.find(From);
I->second->F = const_cast<Function*>(To);
- FunctionMap[To] = I->second;
+ FunctionMap[To] = std::move(I->second);
FunctionMap.erase(I);
}
@@ -160,12 +165,13 @@ void CallGraph::spliceFunction(const Function *From, const Function *To) {
// it will insert a new CallGraphNode for the specified function if one does
// not already exist.
CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) {
- CallGraphNode *&CGN = FunctionMap[F];
+ auto &CGN = FunctionMap[F];
if (CGN)
- return CGN;
+ return CGN.get();
assert((!F || F->getParent() == &M) && "Function not in current module!");
- return CGN = new CallGraphNode(const_cast<Function*>(F));
+ CGN = llvm::make_unique<CallGraphNode>(const_cast<Function *>(F));
+ return CGN.get();
}
//===----------------------------------------------------------------------===//
@@ -190,9 +196,8 @@ void CallGraphNode::print(raw_ostream &OS) const {
OS << '\n';
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void CallGraphNode::dump() const { print(dbgs()); }
-#endif
/// removeCallEdgeFor - This method removes the edge in the node for the
/// specified call site. Note that this method takes linear time, so it
@@ -297,6 +302,5 @@ void CallGraphWrapperPass::print(raw_ostream &OS, const Module *) const {
G->print(OS);
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void CallGraphWrapperPass::dump() const { print(dbgs(), nullptr); }
-#endif
diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
index 07b389a..07b389a 100644
--- a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
diff --git a/contrib/llvm/lib/Analysis/IPA/CallPrinter.cpp b/contrib/llvm/lib/Analysis/CallPrinter.cpp
index 68dcd3c..68dcd3c 100644
--- a/contrib/llvm/lib/Analysis/IPA/CallPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/CallPrinter.cpp
diff --git a/contrib/llvm/lib/Analysis/CaptureTracking.cpp b/contrib/llvm/lib/Analysis/CaptureTracking.cpp
index 52ef807..1add2fa 100644
--- a/contrib/llvm/lib/Analysis/CaptureTracking.cpp
+++ b/contrib/llvm/lib/Analysis/CaptureTracking.cpp
@@ -21,6 +21,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/OrderedBasicBlock.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
@@ -52,63 +53,6 @@ namespace {
bool Captured;
};
- struct NumberedInstCache {
- SmallDenseMap<const Instruction *, unsigned, 32> NumberedInsts;
- BasicBlock::const_iterator LastInstFound;
- unsigned LastInstPos;
- const BasicBlock *BB;
-
- NumberedInstCache(const BasicBlock *BasicB) : LastInstPos(0), BB(BasicB) {
- LastInstFound = BB->end();
- }
-
- /// \brief Find the first instruction 'A' or 'B' in 'BB'. Number out
- /// instruction while walking 'BB'.
- const Instruction *find(const Instruction *A, const Instruction *B) {
- const Instruction *Inst = nullptr;
- assert(!(LastInstFound == BB->end() && LastInstPos != 0) &&
- "Instruction supposed to be in NumberedInsts");
-
- // Start the search with the instruction found in the last lookup round.
- auto II = BB->begin();
- auto IE = BB->end();
- if (LastInstFound != IE)
- II = std::next(LastInstFound);
-
- // Number all instructions up to the point where we find 'A' or 'B'.
- for (++LastInstPos; II != IE; ++II, ++LastInstPos) {
- Inst = cast<Instruction>(II);
- NumberedInsts[Inst] = LastInstPos;
- if (Inst == A || Inst == B)
- break;
- }
-
- assert(II != IE && "Instruction not found?");
- LastInstFound = II;
- return Inst;
- }
-
- /// \brief Find out whether 'A' dominates 'B', meaning whether 'A'
- /// comes before 'B' in 'BB'. This is a simplification that considers
- /// cached instruction positions and ignores other basic blocks, being
- /// only relevant to compare relative instructions positions inside 'BB'.
- bool dominates(const Instruction *A, const Instruction *B) {
- assert(A->getParent() == B->getParent() &&
- "Instructions must be in the same basic block!");
-
- unsigned NA = NumberedInsts.lookup(A);
- unsigned NB = NumberedInsts.lookup(B);
- if (NA && NB)
- return NA < NB;
- if (NA)
- return true;
- if (NB)
- return false;
-
- return A == find(A, B);
- }
- };
-
/// Only find pointer captures which happen before the given instruction. Uses
/// the dominator tree to determine whether one instruction is before another.
/// Only support the case where the Value is defined in the same basic block
@@ -116,8 +60,8 @@ namespace {
struct CapturesBefore : public CaptureTracker {
CapturesBefore(bool ReturnCaptures, const Instruction *I, DominatorTree *DT,
- bool IncludeI)
- : LocalInstCache(I->getParent()), BeforeHere(I), DT(DT),
+ bool IncludeI, OrderedBasicBlock *IC)
+ : OrderedBB(IC), BeforeHere(I), DT(DT),
ReturnCaptures(ReturnCaptures), IncludeI(IncludeI), Captured(false) {}
void tooManyUses() override { Captured = true; }
@@ -131,18 +75,18 @@ namespace {
// Compute the case where both instructions are inside the same basic
// block. Since instructions in the same BB as BeforeHere are numbered in
- // 'LocalInstCache', avoid using 'dominates' and 'isPotentiallyReachable'
+ // 'OrderedBB', avoid using 'dominates' and 'isPotentiallyReachable'
// which are very expensive for large basic blocks.
if (BB == BeforeHere->getParent()) {
// 'I' dominates 'BeforeHere' => not safe to prune.
//
- // The value defined by an invoke dominates an instruction only if it
- // dominates every instruction in UseBB. A PHI is dominated only if
- // the instruction dominates every possible use in the UseBB. Since
+ // The value defined by an invoke dominates an instruction only
+ // if it dominates every instruction in UseBB. A PHI is dominated only
+ // if the instruction dominates every possible use in the UseBB. Since
// UseBB == BB, avoid pruning.
if (isa<InvokeInst>(BeforeHere) || isa<PHINode>(I) || I == BeforeHere)
return false;
- if (!LocalInstCache.dominates(BeforeHere, I))
+ if (!OrderedBB->dominates(BeforeHere, I))
return false;
// 'BeforeHere' comes before 'I', it's safe to prune if we also
@@ -157,10 +101,7 @@ namespace {
SmallVector<BasicBlock*, 32> Worklist;
Worklist.append(succ_begin(BB), succ_end(BB));
- if (!isPotentiallyReachableFromMany(Worklist, BB, DT))
- return true;
-
- return false;
+ return !isPotentiallyReachableFromMany(Worklist, BB, DT);
}
// If the value is defined in the same basic block as use and BeforeHere,
@@ -196,7 +137,7 @@ namespace {
return true;
}
- NumberedInstCache LocalInstCache;
+ OrderedBasicBlock *OrderedBB;
const Instruction *BeforeHere;
DominatorTree *DT;
@@ -238,21 +179,29 @@ bool llvm::PointerMayBeCaptured(const Value *V,
/// returning the value (or part of it) from the function counts as capturing
/// it or not. The boolean StoreCaptures specified whether storing the value
/// (or part of it) into memory anywhere automatically counts as capturing it
-/// or not.
+/// or not. A ordered basic block \p OBB can be used in order to speed up
+/// queries about relative order among instructions in the same basic block.
bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
bool StoreCaptures, const Instruction *I,
- DominatorTree *DT, bool IncludeI) {
+ DominatorTree *DT, bool IncludeI,
+ OrderedBasicBlock *OBB) {
assert(!isa<GlobalValue>(V) &&
"It doesn't make sense to ask whether a global is captured.");
+ bool UseNewOBB = OBB == nullptr;
if (!DT)
return PointerMayBeCaptured(V, ReturnCaptures, StoreCaptures);
+ if (UseNewOBB)
+ OBB = new OrderedBasicBlock(I->getParent());
// TODO: See comment in PointerMayBeCaptured regarding what could be done
// with StoreCaptures.
- CapturesBefore CB(ReturnCaptures, I, DT, IncludeI);
+ CapturesBefore CB(ReturnCaptures, I, DT, IncludeI, OBB);
PointerMayBeCaptured(V, &CB);
+
+ if (UseNewOBB)
+ delete OBB;
return CB.Captured;
}
@@ -300,8 +249,9 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
// that loading a value from a pointer does not cause the pointer to be
// captured, even though the loaded value might be the pointer itself
// (think of self-referential objects).
- CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
- for (CallSite::arg_iterator A = B; A != E; ++A)
+ CallSite::data_operand_iterator B =
+ CS.data_operands_begin(), E = CS.data_operands_end();
+ for (CallSite::data_operand_iterator A = B; A != E; ++A)
if (A->get() == V && !CS.doesNotCapture(A - B))
// The parameter is not marked 'nocapture' - captured.
if (Tracker->captured(U))
diff --git a/contrib/llvm/lib/Analysis/CodeMetrics.cpp b/contrib/llvm/lib/Analysis/CodeMetrics.cpp
index 46a2c43..4090b4c 100644
--- a/contrib/llvm/lib/Analysis/CodeMetrics.cpp
+++ b/contrib/llvm/lib/Analysis/CodeMetrics.cpp
@@ -45,14 +45,8 @@ static void completeEphemeralValues(SmallVector<const Value *, 16> &WorkSet,
continue;
// If all uses of this value are ephemeral, then so is this value.
- bool FoundNEUse = false;
- for (const User *I : V->users())
- if (!EphValues.count(I)) {
- FoundNEUse = true;
- break;
- }
-
- if (FoundNEUse)
+ if (!std::all_of(V->user_begin(), V->user_end(),
+ [&](const User *U) { return EphValues.count(U); }))
continue;
EphValues.insert(V);
@@ -116,7 +110,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
II != E; ++II) {
// Skip ephemeral values.
- if (EphValues.count(II))
+ if (EphValues.count(&*II))
continue;
// Special handling for calls.
@@ -155,6 +149,9 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy())
++NumVectorInsts;
+ if (II->getType()->isTokenTy() && II->isUsedOutsideOfBlock(BB))
+ notDuplicatable = true;
+
if (const CallInst *CI = dyn_cast<CallInst>(II))
if (CI->cannotDuplicate())
notDuplicatable = true;
diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
index 02a5aef..ccb5663 100644
--- a/contrib/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
@@ -248,8 +248,7 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
// Look through ptr->int and ptr->ptr casts.
if (CE->getOpcode() == Instruction::PtrToInt ||
- CE->getOpcode() == Instruction::BitCast ||
- CE->getOpcode() == Instruction::AddrSpaceCast)
+ CE->getOpcode() == Instruction::BitCast)
return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, DL);
// i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)
@@ -532,6 +531,10 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
if (GV->isConstant() && GV->hasDefinitiveInitializer())
return GV->getInitializer();
+ if (auto *GA = dyn_cast<GlobalAlias>(C))
+ if (GA->getAliasee() && !GA->mayBeOverridden())
+ return ConstantFoldLoadFromConstPtr(GA->getAliasee(), DL);
+
// If the loaded value isn't a constant expr, we can't handle it.
ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
if (!CE)
@@ -1236,6 +1239,9 @@ bool llvm::canConstantFoldCallTo(const Function *F) {
case Intrinsic::sqrt:
case Intrinsic::sin:
case Intrinsic::cos:
+ case Intrinsic::trunc:
+ case Intrinsic::rint:
+ case Intrinsic::nearbyint:
case Intrinsic::pow:
case Intrinsic::powi:
case Intrinsic::bswap:
@@ -1276,24 +1282,30 @@ bool llvm::canConstantFoldCallTo(const Function *F) {
// return true for a name like "cos\0blah" which strcmp would return equal to
// "cos", but has length 8.
switch (Name[0]) {
- default: return false;
+ default:
+ return false;
case 'a':
- return Name == "acos" || Name == "asin" || Name == "atan" || Name =="atan2";
+ return Name == "acos" || Name == "asin" || Name == "atan" ||
+ Name == "atan2" || Name == "acosf" || Name == "asinf" ||
+ Name == "atanf" || Name == "atan2f";
case 'c':
- return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh";
+ return Name == "ceil" || Name == "cos" || Name == "cosh" ||
+ Name == "ceilf" || Name == "cosf" || Name == "coshf";
case 'e':
- return Name == "exp" || Name == "exp2";
+ return Name == "exp" || Name == "exp2" || Name == "expf" || Name == "exp2f";
case 'f':
- return Name == "fabs" || Name == "fmod" || Name == "floor";
+ return Name == "fabs" || Name == "floor" || Name == "fmod" ||
+ Name == "fabsf" || Name == "floorf" || Name == "fmodf";
case 'l':
- return Name == "log" || Name == "log10";
+ return Name == "log" || Name == "log10" || Name == "logf" ||
+ Name == "log10f";
case 'p':
- return Name == "pow";
+ return Name == "pow" || Name == "powf";
case 's':
return Name == "sin" || Name == "sinh" || Name == "sqrt" ||
- Name == "sinf" || Name == "sqrtf";
+ Name == "sinf" || Name == "sinhf" || Name == "sqrtf";
case 't':
- return Name == "tan" || Name == "tanh";
+ return Name == "tan" || Name == "tanh" || Name == "tanf" || Name == "tanhf";
}
}
@@ -1422,6 +1434,36 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
return ConstantFP::get(Ty->getContext(), V);
}
+ if (IntrinsicID == Intrinsic::floor) {
+ APFloat V = Op->getValueAPF();
+ V.roundToIntegral(APFloat::rmTowardNegative);
+ return ConstantFP::get(Ty->getContext(), V);
+ }
+
+ if (IntrinsicID == Intrinsic::ceil) {
+ APFloat V = Op->getValueAPF();
+ V.roundToIntegral(APFloat::rmTowardPositive);
+ return ConstantFP::get(Ty->getContext(), V);
+ }
+
+ if (IntrinsicID == Intrinsic::trunc) {
+ APFloat V = Op->getValueAPF();
+ V.roundToIntegral(APFloat::rmTowardZero);
+ return ConstantFP::get(Ty->getContext(), V);
+ }
+
+ if (IntrinsicID == Intrinsic::rint) {
+ APFloat V = Op->getValueAPF();
+ V.roundToIntegral(APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(Ty->getContext(), V);
+ }
+
+ if (IntrinsicID == Intrinsic::nearbyint) {
+ APFloat V = Op->getValueAPF();
+ V.roundToIntegral(APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(Ty->getContext(), V);
+ }
+
/// We only fold functions with finite arguments. Folding NaN and inf is
/// likely to be aborted with an exception anyway, and some host libms
/// have known errors raising exceptions.
@@ -1448,10 +1490,6 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
return ConstantFoldFP(exp, V, Ty);
case Intrinsic::exp2:
return ConstantFoldFP(exp2, V, Ty);
- case Intrinsic::floor:
- return ConstantFoldFP(floor, V, Ty);
- case Intrinsic::ceil:
- return ConstantFoldFP(ceil, V, Ty);
case Intrinsic::sin:
return ConstantFoldFP(sin, V, Ty);
case Intrinsic::cos:
@@ -1463,43 +1501,51 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
switch (Name[0]) {
case 'a':
- if (Name == "acos" && TLI->has(LibFunc::acos))
+ if ((Name == "acos" && TLI->has(LibFunc::acos)) ||
+ (Name == "acosf" && TLI->has(LibFunc::acosf)))
return ConstantFoldFP(acos, V, Ty);
- else if (Name == "asin" && TLI->has(LibFunc::asin))
+ else if ((Name == "asin" && TLI->has(LibFunc::asin)) ||
+ (Name == "asinf" && TLI->has(LibFunc::asinf)))
return ConstantFoldFP(asin, V, Ty);
- else if (Name == "atan" && TLI->has(LibFunc::atan))
+ else if ((Name == "atan" && TLI->has(LibFunc::atan)) ||
+ (Name == "atanf" && TLI->has(LibFunc::atanf)))
return ConstantFoldFP(atan, V, Ty);
break;
case 'c':
- if (Name == "ceil" && TLI->has(LibFunc::ceil))
+ if ((Name == "ceil" && TLI->has(LibFunc::ceil)) ||
+ (Name == "ceilf" && TLI->has(LibFunc::ceilf)))
return ConstantFoldFP(ceil, V, Ty);
- else if (Name == "cos" && TLI->has(LibFunc::cos))
+ else if ((Name == "cos" && TLI->has(LibFunc::cos)) ||
+ (Name == "cosf" && TLI->has(LibFunc::cosf)))
return ConstantFoldFP(cos, V, Ty);
- else if (Name == "cosh" && TLI->has(LibFunc::cosh))
+ else if ((Name == "cosh" && TLI->has(LibFunc::cosh)) ||
+ (Name == "coshf" && TLI->has(LibFunc::coshf)))
return ConstantFoldFP(cosh, V, Ty);
- else if (Name == "cosf" && TLI->has(LibFunc::cosf))
- return ConstantFoldFP(cos, V, Ty);
break;
case 'e':
- if (Name == "exp" && TLI->has(LibFunc::exp))
+ if ((Name == "exp" && TLI->has(LibFunc::exp)) ||
+ (Name == "expf" && TLI->has(LibFunc::expf)))
return ConstantFoldFP(exp, V, Ty);
-
- if (Name == "exp2" && TLI->has(LibFunc::exp2)) {
+ if ((Name == "exp2" && TLI->has(LibFunc::exp2)) ||
+ (Name == "exp2f" && TLI->has(LibFunc::exp2f)))
// Constant fold exp2(x) as pow(2,x) in case the host doesn't have a
// C99 library.
return ConstantFoldBinaryFP(pow, 2.0, V, Ty);
- }
break;
case 'f':
- if (Name == "fabs" && TLI->has(LibFunc::fabs))
+ if ((Name == "fabs" && TLI->has(LibFunc::fabs)) ||
+ (Name == "fabsf" && TLI->has(LibFunc::fabsf)))
return ConstantFoldFP(fabs, V, Ty);
- else if (Name == "floor" && TLI->has(LibFunc::floor))
+ else if ((Name == "floor" && TLI->has(LibFunc::floor)) ||
+ (Name == "floorf" && TLI->has(LibFunc::floorf)))
return ConstantFoldFP(floor, V, Ty);
break;
case 'l':
- if (Name == "log" && V > 0 && TLI->has(LibFunc::log))
+ if ((Name == "log" && V > 0 && TLI->has(LibFunc::log)) ||
+ (Name == "logf" && V > 0 && TLI->has(LibFunc::logf)))
return ConstantFoldFP(log, V, Ty);
- else if (Name == "log10" && V > 0 && TLI->has(LibFunc::log10))
+ else if ((Name == "log10" && V > 0 && TLI->has(LibFunc::log10)) ||
+ (Name == "log10f" && V > 0 && TLI->has(LibFunc::log10f)))
return ConstantFoldFP(log10, V, Ty);
else if (IntrinsicID == Intrinsic::sqrt &&
(Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())) {
@@ -1516,21 +1562,22 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
}
break;
case 's':
- if (Name == "sin" && TLI->has(LibFunc::sin))
+ if ((Name == "sin" && TLI->has(LibFunc::sin)) ||
+ (Name == "sinf" && TLI->has(LibFunc::sinf)))
return ConstantFoldFP(sin, V, Ty);
- else if (Name == "sinh" && TLI->has(LibFunc::sinh))
+ else if ((Name == "sinh" && TLI->has(LibFunc::sinh)) ||
+ (Name == "sinhf" && TLI->has(LibFunc::sinhf)))
return ConstantFoldFP(sinh, V, Ty);
- else if (Name == "sqrt" && V >= 0 && TLI->has(LibFunc::sqrt))
- return ConstantFoldFP(sqrt, V, Ty);
- else if (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc::sqrtf))
+ else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc::sqrt)) ||
+ (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc::sqrtf)))
return ConstantFoldFP(sqrt, V, Ty);
- else if (Name == "sinf" && TLI->has(LibFunc::sinf))
- return ConstantFoldFP(sin, V, Ty);
break;
case 't':
- if (Name == "tan" && TLI->has(LibFunc::tan))
+ if ((Name == "tan" && TLI->has(LibFunc::tan)) ||
+ (Name == "tanf" && TLI->has(LibFunc::tanf)))
return ConstantFoldFP(tan, V, Ty);
- else if (Name == "tanh" && TLI->has(LibFunc::tanh))
+ else if ((Name == "tanh" && TLI->has(LibFunc::tanh)) ||
+ (Name == "tanhf" && TLI->has(LibFunc::tanhf)))
return ConstantFoldFP(tanh, V, Ty);
break;
default:
@@ -1633,11 +1680,14 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
if (!TLI)
return nullptr;
- if (Name == "pow" && TLI->has(LibFunc::pow))
+ if ((Name == "pow" && TLI->has(LibFunc::pow)) ||
+ (Name == "powf" && TLI->has(LibFunc::powf)))
return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
- if (Name == "fmod" && TLI->has(LibFunc::fmod))
+ if ((Name == "fmod" && TLI->has(LibFunc::fmod)) ||
+ (Name == "fmodf" && TLI->has(LibFunc::fmodf)))
return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
- if (Name == "atan2" && TLI->has(LibFunc::atan2))
+ if ((Name == "atan2" && TLI->has(LibFunc::atan2)) ||
+ (Name == "atan2f" && TLI->has(LibFunc::atan2f)))
return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
} else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy())
diff --git a/contrib/llvm/lib/Analysis/CostModel.cpp b/contrib/llvm/lib/Analysis/CostModel.cpp
index b529c1a..0383cbf 100644
--- a/contrib/llvm/lib/Analysis/CostModel.cpp
+++ b/contrib/llvm/lib/Analysis/CostModel.cpp
@@ -152,10 +152,7 @@ static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
Mask[i] = val;
SmallVector<int, 16> ActualMask = SI->getShuffleMask();
- if (Mask != ActualMask)
- return false;
-
- return true;
+ return Mask == ActualMask;
}
static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp,
@@ -383,10 +380,8 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
return -1;
switch (I->getOpcode()) {
- case Instruction::GetElementPtr:{
- Type *ValTy = I->getOperand(0)->getType()->getPointerElementType();
- return TTI->getAddressComputationCost(ValTy);
- }
+ case Instruction::GetElementPtr:
+ return TTI->getUserCost(I);
case Instruction::Ret:
case Instruction::PHI:
@@ -505,12 +500,12 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
}
case Instruction::Call:
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
- SmallVector<Type*, 4> Tys;
+ SmallVector<Value *, 4> Args;
for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J)
- Tys.push_back(II->getArgOperand(J)->getType());
+ Args.push_back(II->getArgOperand(J));
return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
- Tys);
+ Args);
}
return -1;
default:
@@ -525,7 +520,7 @@ void CostModelAnalysis::print(raw_ostream &OS, const Module*) const {
for (Function::iterator B = F->begin(), BE = F->end(); B != BE; ++B) {
for (BasicBlock::iterator it = B->begin(), e = B->end(); it != e; ++it) {
- Instruction *Inst = it;
+ Instruction *Inst = &*it;
unsigned Cost = getInstructionCost(Inst);
if (Cost != (unsigned)-1)
OS << "Cost Model: Found an estimated cost of " << Cost;
diff --git a/contrib/llvm/lib/Analysis/Delinearization.cpp b/contrib/llvm/lib/Analysis/Delinearization.cpp
index 9d15786..baee8b3 100644
--- a/contrib/llvm/lib/Analysis/Delinearization.cpp
+++ b/contrib/llvm/lib/Analysis/Delinearization.cpp
@@ -60,12 +60,12 @@ public:
void Delinearization::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
}
bool Delinearization::runOnFunction(Function &F) {
this->F = &F;
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
return false;
}
@@ -102,20 +102,14 @@ void Delinearization::print(raw_ostream &O, const Module *) const {
if (!BasePointer)
break;
AccessFn = SE->getMinusSCEV(AccessFn, BasePointer);
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(AccessFn);
-
- // Do not try to delinearize memory accesses that are not AddRecs.
- if (!AR)
- break;
-
O << "\n";
O << "Inst:" << *Inst << "\n";
O << "In Loop with Header: " << L->getHeader()->getName() << "\n";
- O << "AddRec: " << *AR << "\n";
+ O << "AccessFunction: " << *AccessFn << "\n";
SmallVector<const SCEV *, 3> Subscripts, Sizes;
- SE->delinearize(AR, Subscripts, Sizes, SE->getElementSize(Inst));
+ SE->delinearize(AccessFn, Subscripts, Sizes, SE->getElementSize(Inst));
if (Subscripts.size() == 0 || Sizes.size() == 0 ||
Subscripts.size() != Sizes.size()) {
O << "failed to delinearize\n";
diff --git a/contrib/llvm/lib/Analysis/DemandedBits.cpp b/contrib/llvm/lib/Analysis/DemandedBits.cpp
new file mode 100644
index 0000000..912c5ce
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/DemandedBits.cpp
@@ -0,0 +1,392 @@
+//===---- DemandedBits.cpp - Determine demanded bits ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements a demanded bits analysis. A demanded bit is one that
+// contributes to a result; bits that are not demanded can be either zero or
+// one without affecting control or data flow. For example in this sequence:
+//
+// %1 = add i32 %x, %y
+// %2 = trunc i32 %1 to i16
+//
+// Only the lowest 16 bits of %1 are demanded; the rest are removed by the
+// trunc.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DemandedBits.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "demanded-bits"
+
+char DemandedBits::ID = 0;
+INITIALIZE_PASS_BEGIN(DemandedBits, "demanded-bits", "Demanded bits analysis",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(DemandedBits, "demanded-bits", "Demanded bits analysis",
+ false, false)
+
+DemandedBits::DemandedBits() : FunctionPass(ID), F(nullptr), Analyzed(false) {
+ initializeDemandedBitsPass(*PassRegistry::getPassRegistry());
+}
+
+void DemandedBits::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.setPreservesAll();
+}
+
+static bool isAlwaysLive(Instruction *I) {
+ return isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) ||
+ I->isEHPad() || I->mayHaveSideEffects();
+}
+
+void DemandedBits::determineLiveOperandBits(
+ const Instruction *UserI, const Instruction *I, unsigned OperandNo,
+ const APInt &AOut, APInt &AB, APInt &KnownZero, APInt &KnownOne,
+ APInt &KnownZero2, APInt &KnownOne2) {
+ unsigned BitWidth = AB.getBitWidth();
+
+ // We're called once per operand, but for some instructions, we need to
+ // compute known bits of both operands in order to determine the live bits of
+ // either (when both operands are instructions themselves). We don't,
+ // however, want to do this twice, so we cache the result in APInts that live
+ // in the caller. For the two-relevant-operands case, both operand values are
+ // provided here.
+ auto ComputeKnownBits =
+ [&](unsigned BitWidth, const Value *V1, const Value *V2) {
+ const DataLayout &DL = I->getModule()->getDataLayout();
+ KnownZero = APInt(BitWidth, 0);
+ KnownOne = APInt(BitWidth, 0);
+ computeKnownBits(const_cast<Value *>(V1), KnownZero, KnownOne, DL, 0,
+ AC, UserI, DT);
+
+ if (V2) {
+ KnownZero2 = APInt(BitWidth, 0);
+ KnownOne2 = APInt(BitWidth, 0);
+ computeKnownBits(const_cast<Value *>(V2), KnownZero2, KnownOne2, DL,
+ 0, AC, UserI, DT);
+ }
+ };
+
+ switch (UserI->getOpcode()) {
+ default: break;
+ case Instruction::Call:
+ case Instruction::Invoke:
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(UserI))
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::bswap:
+ // The alive bits of the input are the swapped alive bits of
+ // the output.
+ AB = AOut.byteSwap();
+ break;
+ case Intrinsic::ctlz:
+ if (OperandNo == 0) {
+ // We need some output bits, so we need all bits of the
+ // input to the left of, and including, the leftmost bit
+ // known to be one.
+ ComputeKnownBits(BitWidth, I, nullptr);
+ AB = APInt::getHighBitsSet(BitWidth,
+ std::min(BitWidth, KnownOne.countLeadingZeros()+1));
+ }
+ break;
+ case Intrinsic::cttz:
+ if (OperandNo == 0) {
+ // We need some output bits, so we need all bits of the
+ // input to the right of, and including, the rightmost bit
+ // known to be one.
+ ComputeKnownBits(BitWidth, I, nullptr);
+ AB = APInt::getLowBitsSet(BitWidth,
+ std::min(BitWidth, KnownOne.countTrailingZeros()+1));
+ }
+ break;
+ }
+ break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ // Find the highest live output bit. We don't need any more input
+ // bits than that (adds, and thus subtracts, ripple only to the
+ // left).
+ AB = APInt::getLowBitsSet(BitWidth, AOut.getActiveBits());
+ break;
+ case Instruction::Shl:
+ if (OperandNo == 0)
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+ uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+ AB = AOut.lshr(ShiftAmt);
+
+ // If the shift is nuw/nsw, then the high bits are not dead
+ // (because we've promised that they *must* be zero).
+ const ShlOperator *S = cast<ShlOperator>(UserI);
+ if (S->hasNoSignedWrap())
+ AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
+ else if (S->hasNoUnsignedWrap())
+ AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
+ }
+ break;
+ case Instruction::LShr:
+ if (OperandNo == 0)
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+ uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+ AB = AOut.shl(ShiftAmt);
+
+ // If the shift is exact, then the low bits are not dead
+ // (they must be zero).
+ if (cast<LShrOperator>(UserI)->isExact())
+ AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+ }
+ break;
+ case Instruction::AShr:
+ if (OperandNo == 0)
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+ uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+ AB = AOut.shl(ShiftAmt);
+ // Because the high input bit is replicated into the
+ // high-order bits of the result, if we need any of those
+ // bits, then we must keep the highest input bit.
+ if ((AOut & APInt::getHighBitsSet(BitWidth, ShiftAmt))
+ .getBoolValue())
+ AB.setBit(BitWidth-1);
+
+ // If the shift is exact, then the low bits are not dead
+ // (they must be zero).
+ if (cast<AShrOperator>(UserI)->isExact())
+ AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+ }
+ break;
+ case Instruction::And:
+ AB = AOut;
+
+ // For bits that are known zero, the corresponding bits in the
+ // other operand are dead (unless they're both zero, in which
+ // case they can't both be dead, so just mark the LHS bits as
+ // dead).
+ if (OperandNo == 0) {
+ ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
+ AB &= ~KnownZero2;
+ } else {
+ if (!isa<Instruction>(UserI->getOperand(0)))
+ ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
+ AB &= ~(KnownZero & ~KnownZero2);
+ }
+ break;
+ case Instruction::Or:
+ AB = AOut;
+
+ // For bits that are known one, the corresponding bits in the
+ // other operand are dead (unless they're both one, in which
+ // case they can't both be dead, so just mark the LHS bits as
+ // dead).
+ if (OperandNo == 0) {
+ ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
+ AB &= ~KnownOne2;
+ } else {
+ if (!isa<Instruction>(UserI->getOperand(0)))
+ ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
+ AB &= ~(KnownOne & ~KnownOne2);
+ }
+ break;
+ case Instruction::Xor:
+ case Instruction::PHI:
+ AB = AOut;
+ break;
+ case Instruction::Trunc:
+ AB = AOut.zext(BitWidth);
+ break;
+ case Instruction::ZExt:
+ AB = AOut.trunc(BitWidth);
+ break;
+ case Instruction::SExt:
+ AB = AOut.trunc(BitWidth);
+ // Because the high input bit is replicated into the
+ // high-order bits of the result, if we need any of those
+ // bits, then we must keep the highest input bit.
+ if ((AOut & APInt::getHighBitsSet(AOut.getBitWidth(),
+ AOut.getBitWidth() - BitWidth))
+ .getBoolValue())
+ AB.setBit(BitWidth-1);
+ break;
+ case Instruction::Select:
+ if (OperandNo != 0)
+ AB = AOut;
+ break;
+ case Instruction::ICmp:
+ // Count the number of leading zeroes in each operand.
+ ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
+ auto NumLeadingZeroes = std::min(KnownZero.countLeadingOnes(),
+ KnownZero2.countLeadingOnes());
+ AB = ~APInt::getHighBitsSet(BitWidth, NumLeadingZeroes);
+ break;
+ }
+}
+
+bool DemandedBits::runOnFunction(Function& Fn) {
+ F = &Fn;
+ Analyzed = false;
+ return false;
+}
+
+void DemandedBits::performAnalysis() {
+ if (Analyzed)
+ // Analysis already completed for this function.
+ return;
+ Analyzed = true;
+ AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(*F);
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+ Visited.clear();
+ AliveBits.clear();
+
+ SmallVector<Instruction*, 128> Worklist;
+
+ // Collect the set of "root" instructions that are known live.
+ for (Instruction &I : instructions(*F)) {
+ if (!isAlwaysLive(&I))
+ continue;
+
+ DEBUG(dbgs() << "DemandedBits: Root: " << I << "\n");
+ // For integer-valued instructions, set up an initial empty set of alive
+ // bits and add the instruction to the work list. For other instructions
+ // add their operands to the work list (for integer values operands, mark
+ // all bits as live).
+ if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
+ if (!AliveBits.count(&I)) {
+ AliveBits[&I] = APInt(IT->getBitWidth(), 0);
+ Worklist.push_back(&I);
+ }
+
+ continue;
+ }
+
+ // Non-integer-typed instructions...
+ for (Use &OI : I.operands()) {
+ if (Instruction *J = dyn_cast<Instruction>(OI)) {
+ if (IntegerType *IT = dyn_cast<IntegerType>(J->getType()))
+ AliveBits[J] = APInt::getAllOnesValue(IT->getBitWidth());
+ Worklist.push_back(J);
+ }
+ }
+ // To save memory, we don't add I to the Visited set here. Instead, we
+ // check isAlwaysLive on every instruction when searching for dead
+ // instructions later (we need to check isAlwaysLive for the
+ // integer-typed instructions anyway).
+ }
+
+ // Propagate liveness backwards to operands.
+ while (!Worklist.empty()) {
+ Instruction *UserI = Worklist.pop_back_val();
+
+ DEBUG(dbgs() << "DemandedBits: Visiting: " << *UserI);
+ APInt AOut;
+ if (UserI->getType()->isIntegerTy()) {
+ AOut = AliveBits[UserI];
+ DEBUG(dbgs() << " Alive Out: " << AOut);
+ }
+ DEBUG(dbgs() << "\n");
+
+ if (!UserI->getType()->isIntegerTy())
+ Visited.insert(UserI);
+
+ APInt KnownZero, KnownOne, KnownZero2, KnownOne2;
+ // Compute the set of alive bits for each operand. These are anded into the
+ // existing set, if any, and if that changes the set of alive bits, the
+ // operand is added to the work-list.
+ for (Use &OI : UserI->operands()) {
+ if (Instruction *I = dyn_cast<Instruction>(OI)) {
+ if (IntegerType *IT = dyn_cast<IntegerType>(I->getType())) {
+ unsigned BitWidth = IT->getBitWidth();
+ APInt AB = APInt::getAllOnesValue(BitWidth);
+ if (UserI->getType()->isIntegerTy() && !AOut &&
+ !isAlwaysLive(UserI)) {
+ AB = APInt(BitWidth, 0);
+ } else {
+ // If all bits of the output are dead, then all bits of the input
+ // Bits of each operand that are used to compute alive bits of the
+ // output are alive, all others are dead.
+ determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB,
+ KnownZero, KnownOne,
+ KnownZero2, KnownOne2);
+ }
+
+ // If we've added to the set of alive bits (or the operand has not
+ // been previously visited), then re-queue the operand to be visited
+ // again.
+ APInt ABPrev(BitWidth, 0);
+ auto ABI = AliveBits.find(I);
+ if (ABI != AliveBits.end())
+ ABPrev = ABI->second;
+
+ APInt ABNew = AB | ABPrev;
+ if (ABNew != ABPrev || ABI == AliveBits.end()) {
+ AliveBits[I] = std::move(ABNew);
+ Worklist.push_back(I);
+ }
+ } else if (!Visited.count(I)) {
+ Worklist.push_back(I);
+ }
+ }
+ }
+ }
+}
+
+APInt DemandedBits::getDemandedBits(Instruction *I) {
+ performAnalysis();
+
+ const DataLayout &DL = I->getParent()->getModule()->getDataLayout();
+ if (AliveBits.count(I))
+ return AliveBits[I];
+ return APInt::getAllOnesValue(DL.getTypeSizeInBits(I->getType()));
+}
+
+bool DemandedBits::isInstructionDead(Instruction *I) {
+ performAnalysis();
+
+ return !Visited.count(I) && AliveBits.find(I) == AliveBits.end() &&
+ !isAlwaysLive(I);
+}
+
+void DemandedBits::print(raw_ostream &OS, const Module *M) const {
+ // This is gross. But the alternative is making all the state mutable
+ // just because of this one debugging method.
+ const_cast<DemandedBits*>(this)->performAnalysis();
+ for (auto &KV : AliveBits) {
+ OS << "DemandedBits: 0x" << utohexstr(KV.second.getLimitedValue()) << " for "
+ << *KV.first << "\n";
+ }
+}
+
+FunctionPass *llvm::createDemandedBitsPass() {
+ return new DemandedBits();
+}
diff --git a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
index 4826ac4..4040ad3 100644
--- a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -117,8 +117,8 @@ Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore,
INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da",
"Dependence Analysis", true, true)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(DependenceAnalysis, "da",
"Dependence Analysis", true, true)
@@ -132,8 +132,8 @@ FunctionPass *llvm::createDependenceAnalysisPass() {
bool DependenceAnalysis::runOnFunction(Function &F) {
this->F = &F;
- AA = &getAnalysis<AliasAnalysis>();
- SE = &getAnalysis<ScalarEvolution>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
return false;
}
@@ -145,8 +145,8 @@ void DependenceAnalysis::releaseMemory() {
void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
- AU.addRequiredTransitive<AliasAnalysis>();
- AU.addRequiredTransitive<ScalarEvolution>();
+ AU.addRequiredTransitive<AAResultsWrapperPass>();
+ AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
AU.addRequiredTransitive<LoopInfoWrapperPass>();
}
@@ -233,7 +233,8 @@ FullDependence::FullDependence(Instruction *Source, Instruction *Destination,
: Dependence(Source, Destination), Levels(CommonLevels),
LoopIndependent(PossiblyLoopIndependent) {
Consistent = true;
- DV = CommonLevels ? new DVEntry[CommonLevels] : nullptr;
+ if (CommonLevels)
+ DV = make_unique<DVEntry[]>(CommonLevels);
}
// The rest are simple getters that hide the implementation.
@@ -371,7 +372,7 @@ void DependenceAnalysis::Constraint::setLine(const SCEV *AA,
void DependenceAnalysis::Constraint::setDistance(const SCEV *D,
const Loop *CurLoop) {
Kind = Distance;
- A = SE->getConstant(D->getType(), 1);
+ A = SE->getOne(D->getType());
B = SE->getNegativeSCEV(A);
C = SE->getNegativeSCEV(D);
AssociatedLoop = CurLoop;
@@ -500,10 +501,10 @@ bool DependenceAnalysis::intersectConstraints(Constraint *X,
if (!C1B2_C2B1 || !C1A2_C2A1 ||
!A1B2_A2B1 || !A2B1_A1B2)
return false;
- APInt Xtop = C1B2_C2B1->getValue()->getValue();
- APInt Xbot = A1B2_A2B1->getValue()->getValue();
- APInt Ytop = C1A2_C2A1->getValue()->getValue();
- APInt Ybot = A2B1_A1B2->getValue()->getValue();
+ APInt Xtop = C1B2_C2B1->getAPInt();
+ APInt Xbot = A1B2_A2B1->getAPInt();
+ APInt Ytop = C1A2_C2A1->getAPInt();
+ APInt Ybot = A2B1_A1B2->getAPInt();
DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n");
DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n");
DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n");
@@ -527,7 +528,7 @@ bool DependenceAnalysis::intersectConstraints(Constraint *X,
}
if (const SCEVConstant *CUB =
collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) {
- APInt UpperBound = CUB->getValue()->getValue();
+ APInt UpperBound = CUB->getAPInt();
DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n");
if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) {
X->setEmpty();
@@ -630,8 +631,8 @@ static AliasResult underlyingObjectsAlias(AliasAnalysis *AA,
const Value *B) {
const Value *AObj = GetUnderlyingObject(A, DL);
const Value *BObj = GetUnderlyingObject(B, DL);
- return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()),
- BObj, AA->getTypeStoreSize(BObj->getType()));
+ return AA->alias(AObj, DL.getTypeStoreSize(AObj->getType()),
+ BObj, DL.getTypeStoreSize(BObj->getType()));
}
@@ -1114,8 +1115,8 @@ bool DependenceAnalysis::strongSIVtest(const SCEV *Coeff,
// Can we compute distance?
if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) {
- APInt ConstDelta = cast<SCEVConstant>(Delta)->getValue()->getValue();
- APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getValue()->getValue();
+ APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt();
+ APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt();
APInt Distance = ConstDelta; // these need to be initialized
APInt Remainder = ConstDelta;
APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder);
@@ -1256,11 +1257,9 @@ bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff,
assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive");
// compute SplitIter for use by DependenceAnalysis::getSplitIteration()
- SplitIter =
- SE->getUDivExpr(SE->getSMaxExpr(SE->getConstant(Delta->getType(), 0),
- Delta),
- SE->getMulExpr(SE->getConstant(Delta->getType(), 2),
- ConstCoeff));
+ SplitIter = SE->getUDivExpr(
+ SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta),
+ SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff));
DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n");
const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
@@ -1302,14 +1301,14 @@ bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff,
return true;
}
Result.DV[Level].Splitable = false;
- Result.DV[Level].Distance = SE->getConstant(Delta->getType(), 0);
+ Result.DV[Level].Distance = SE->getZero(Delta->getType());
return false;
}
}
// check that Coeff divides Delta
- APInt APDelta = ConstDelta->getValue()->getValue();
- APInt APCoeff = ConstCoeff->getValue()->getValue();
+ APInt APDelta = ConstDelta->getAPInt();
+ APInt APCoeff = ConstCoeff->getAPInt();
APInt Distance = APDelta; // these need to be initialzed
APInt Remainder = APDelta;
APInt::sdivrem(APDelta, APCoeff, Distance, Remainder);
@@ -1463,10 +1462,10 @@ bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff,
// find gcd
APInt G, X, Y;
- APInt AM = ConstSrcCoeff->getValue()->getValue();
- APInt BM = ConstDstCoeff->getValue()->getValue();
+ APInt AM = ConstSrcCoeff->getAPInt();
+ APInt BM = ConstDstCoeff->getAPInt();
unsigned Bits = AM.getBitWidth();
- if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) {
+ if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) {
// gcd doesn't divide Delta, no dependence
++ExactSIVindependence;
++ExactSIVsuccesses;
@@ -1481,7 +1480,7 @@ bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff,
// UM is perhaps unavailable, let's check
if (const SCEVConstant *CUB =
collectConstantUpperBound(CurLoop, Delta->getType())) {
- UM = CUB->getValue()->getValue();
+ UM = CUB->getAPInt();
DEBUG(dbgs() << "\t UM = " << UM << "\n");
UMvalid = true;
}
@@ -1609,8 +1608,8 @@ bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff,
static
bool isRemainderZero(const SCEVConstant *Dividend,
const SCEVConstant *Divisor) {
- APInt ConstDividend = Dividend->getValue()->getValue();
- APInt ConstDivisor = Divisor->getValue()->getValue();
+ APInt ConstDividend = Dividend->getAPInt();
+ APInt ConstDivisor = Divisor->getAPInt();
return ConstDividend.srem(ConstDivisor) == 0;
}
@@ -1665,8 +1664,8 @@ bool DependenceAnalysis::weakZeroSrcSIVtest(const SCEV *DstCoeff,
Level--;
Result.Consistent = false;
const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
- NewConstraint.setLine(SE->getConstant(Delta->getType(), 0),
- DstCoeff, Delta, CurLoop);
+ NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta,
+ CurLoop);
DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) {
if (Level < CommonLevels) {
@@ -1775,8 +1774,8 @@ bool DependenceAnalysis::weakZeroDstSIVtest(const SCEV *SrcCoeff,
Level--;
Result.Consistent = false;
const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
- NewConstraint.setLine(SrcCoeff, SE->getConstant(Delta->getType(), 0),
- Delta, CurLoop);
+ NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta,
+ CurLoop);
DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) {
if (Level < CommonLevels) {
@@ -1867,10 +1866,10 @@ bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff,
// find gcd
APInt G, X, Y;
- APInt AM = ConstSrcCoeff->getValue()->getValue();
- APInt BM = ConstDstCoeff->getValue()->getValue();
+ APInt AM = ConstSrcCoeff->getAPInt();
+ APInt BM = ConstDstCoeff->getAPInt();
unsigned Bits = AM.getBitWidth();
- if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) {
+ if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) {
// gcd doesn't divide Delta, no dependence
++ExactRDIVindependence;
return true;
@@ -1884,7 +1883,7 @@ bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff,
// SrcUM is perhaps unavailable, let's check
if (const SCEVConstant *UpperBound =
collectConstantUpperBound(SrcLoop, Delta->getType())) {
- SrcUM = UpperBound->getValue()->getValue();
+ SrcUM = UpperBound->getAPInt();
DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n");
SrcUMvalid = true;
}
@@ -1894,7 +1893,7 @@ bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff,
// UM is perhaps unavailable, let's check
if (const SCEVConstant *UpperBound =
collectConstantUpperBound(DstLoop, Delta->getType())) {
- DstUM = UpperBound->getValue()->getValue();
+ DstUM = UpperBound->getAPInt();
DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n");
DstUMvalid = true;
}
@@ -2307,7 +2306,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
Constant = getConstantPart(Product);
if (!Constant)
return false;
- APInt ConstCoeff = Constant->getValue()->getValue();
+ APInt ConstCoeff = Constant->getAPInt();
RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
Coefficients = AddRec->getStart();
}
@@ -2328,7 +2327,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
Constant = getConstantPart(Product);
if (!Constant)
return false;
- APInt ConstCoeff = Constant->getValue()->getValue();
+ APInt ConstCoeff = Constant->getAPInt();
RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
Coefficients = AddRec->getStart();
}
@@ -2352,7 +2351,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
const SCEVConstant *ConstOp = getConstantPart(Product);
if (!ConstOp)
return false;
- APInt ConstOpValue = ConstOp->getValue()->getValue();
+ APInt ConstOpValue = ConstOp->getAPInt();
ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD,
ConstOpValue.abs());
}
@@ -2362,7 +2361,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
}
if (!Constant)
return false;
- APInt ConstDelta = cast<SCEVConstant>(Constant)->getValue()->getValue();
+ APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt();
DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n");
if (ConstDelta == 0)
return false;
@@ -2410,7 +2409,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
Constant = getConstantPart(Product);
else
Constant = cast<SCEVConstant>(Coeff);
- APInt ConstCoeff = Constant->getValue()->getValue();
+ APInt ConstCoeff = Constant->getAPInt();
RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
}
Inner = AddRec->getStart();
@@ -2428,7 +2427,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
Constant = getConstantPart(Product);
else
Constant = cast<SCEVConstant>(Coeff);
- APInt ConstCoeff = Constant->getValue()->getValue();
+ APInt ConstCoeff = Constant->getAPInt();
RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
}
Inner = AddRec->getStart();
@@ -2445,7 +2444,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
// or constant, in which case we give up on this direction.
continue;
}
- APInt ConstCoeff = Constant->getValue()->getValue();
+ APInt ConstCoeff = Constant->getAPInt();
RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n");
if (RunningGCD != 0) {
@@ -2728,10 +2727,10 @@ void DependenceAnalysis::findBoundsALL(CoefficientInfo *A,
// If the difference is 0, we won't need to know the number of iterations.
if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart))
Bound[K].Lower[Dependence::DVEntry::ALL] =
- SE->getConstant(A[K].Coeff->getType(), 0);
+ SE->getZero(A[K].Coeff->getType());
if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart))
Bound[K].Upper[Dependence::DVEntry::ALL] =
- SE->getConstant(A[K].Coeff->getType(), 0);
+ SE->getZero(A[K].Coeff->getType());
}
}
@@ -2800,9 +2799,8 @@ void DependenceAnalysis::findBoundsLT(CoefficientInfo *A,
Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity.
Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity.
if (Bound[K].Iterations) {
- const SCEV *Iter_1 =
- SE->getMinusSCEV(Bound[K].Iterations,
- SE->getConstant(Bound[K].Iterations->getType(), 1));
+ const SCEV *Iter_1 = SE->getMinusSCEV(
+ Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType()));
const SCEV *NegPart =
getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));
Bound[K].Lower[Dependence::DVEntry::LT] =
@@ -2847,9 +2845,8 @@ void DependenceAnalysis::findBoundsGT(CoefficientInfo *A,
Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity.
Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity.
if (Bound[K].Iterations) {
- const SCEV *Iter_1 =
- SE->getMinusSCEV(Bound[K].Iterations,
- SE->getConstant(Bound[K].Iterations->getType(), 1));
+ const SCEV *Iter_1 = SE->getMinusSCEV(
+ Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType()));
const SCEV *NegPart =
getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));
Bound[K].Lower[Dependence::DVEntry::GT] =
@@ -2874,13 +2871,13 @@ void DependenceAnalysis::findBoundsGT(CoefficientInfo *A,
// X^+ = max(X, 0)
const SCEV *DependenceAnalysis::getPositivePart(const SCEV *X) const {
- return SE->getSMaxExpr(X, SE->getConstant(X->getType(), 0));
+ return SE->getSMaxExpr(X, SE->getZero(X->getType()));
}
// X^- = min(X, 0)
const SCEV *DependenceAnalysis::getNegativePart(const SCEV *X) const {
- return SE->getSMinExpr(X, SE->getConstant(X->getType(), 0));
+ return SE->getSMinExpr(X, SE->getZero(X->getType()));
}
@@ -2891,7 +2888,7 @@ DependenceAnalysis::CoefficientInfo *
DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript,
bool SrcFlag,
const SCEV *&Constant) const {
- const SCEV *Zero = SE->getConstant(Subscript->getType(), 0);
+ const SCEV *Zero = SE->getZero(Subscript->getType());
CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1];
for (unsigned K = 1; K <= MaxLevels; ++K) {
CI[K].Coeff = Zero;
@@ -2975,7 +2972,7 @@ const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr,
const Loop *TargetLoop) const {
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
if (!AddRec)
- return SE->getConstant(Expr->getType(), 0);
+ return SE->getZero(Expr->getType());
if (AddRec->getLoop() == TargetLoop)
return AddRec->getStepRecurrence(*SE);
return findCoefficient(AddRec->getStart(), TargetLoop);
@@ -3110,8 +3107,8 @@ bool DependenceAnalysis::propagateLine(const SCEV *&Src,
const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B);
const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
if (!Bconst || !Cconst) return false;
- APInt Beta = Bconst->getValue()->getValue();
- APInt Charlie = Cconst->getValue()->getValue();
+ APInt Beta = Bconst->getAPInt();
+ APInt Charlie = Cconst->getAPInt();
APInt CdivB = Charlie.sdiv(Beta);
assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B");
const SCEV *AP_K = findCoefficient(Dst, CurLoop);
@@ -3125,8 +3122,8 @@ bool DependenceAnalysis::propagateLine(const SCEV *&Src,
const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
if (!Aconst || !Cconst) return false;
- APInt Alpha = Aconst->getValue()->getValue();
- APInt Charlie = Cconst->getValue()->getValue();
+ APInt Alpha = Aconst->getAPInt();
+ APInt Charlie = Cconst->getAPInt();
APInt CdivA = Charlie.sdiv(Alpha);
assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
const SCEV *A_K = findCoefficient(Src, CurLoop);
@@ -3139,8 +3136,8 @@ bool DependenceAnalysis::propagateLine(const SCEV *&Src,
const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
if (!Aconst || !Cconst) return false;
- APInt Alpha = Aconst->getValue()->getValue();
- APInt Charlie = Cconst->getValue()->getValue();
+ APInt Alpha = Aconst->getAPInt();
+ APInt Charlie = Cconst->getAPInt();
APInt CdivA = Charlie.sdiv(Alpha);
assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
const SCEV *A_K = findCoefficient(Src, CurLoop);
@@ -3244,20 +3241,36 @@ void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level,
/// source and destination array references are recurrences on a nested loop,
/// this function flattens the nested recurrences into separate recurrences
/// for each loop level.
-bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV,
- const SCEV *DstSCEV,
- SmallVectorImpl<Subscript> &Pair,
- const SCEV *ElementSize) {
+bool DependenceAnalysis::tryDelinearize(Instruction *Src,
+ Instruction *Dst,
+ SmallVectorImpl<Subscript> &Pair)
+{
+ Value *SrcPtr = getPointerOperand(Src);
+ Value *DstPtr = getPointerOperand(Dst);
+
+ Loop *SrcLoop = LI->getLoopFor(Src->getParent());
+ Loop *DstLoop = LI->getLoopFor(Dst->getParent());
+
+ // Below code mimics the code in Delinearization.cpp
+ const SCEV *SrcAccessFn =
+ SE->getSCEVAtScope(SrcPtr, SrcLoop);
+ const SCEV *DstAccessFn =
+ SE->getSCEVAtScope(DstPtr, DstLoop);
+
const SCEVUnknown *SrcBase =
- dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcSCEV));
+ dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
const SCEVUnknown *DstBase =
- dyn_cast<SCEVUnknown>(SE->getPointerBase(DstSCEV));
+ dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
if (!SrcBase || !DstBase || SrcBase != DstBase)
return false;
- SrcSCEV = SE->getMinusSCEV(SrcSCEV, SrcBase);
- DstSCEV = SE->getMinusSCEV(DstSCEV, DstBase);
+ const SCEV *ElementSize = SE->getElementSize(Src);
+ if (ElementSize != SE->getElementSize(Dst))
+ return false;
+
+ const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase);
+ const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase);
const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV);
const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV);
@@ -3330,7 +3343,6 @@ static void dumpSmallBitVector(SmallBitVector &BV) {
}
#endif
-
// depends -
// Returns NULL if there is no dependence.
// Otherwise, return a Dependence with as many details as possible.
@@ -3425,10 +3437,11 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
Pair[0].Dst = DstSCEV;
}
- if (Delinearize && Pairs == 1 && CommonLevels > 1 &&
- tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) {
- DEBUG(dbgs() << " delinerized GEP\n");
- Pairs = Pair.size();
+ if (Delinearize && CommonLevels > 1) {
+ if (tryDelinearize(Src, Dst, Pair)) {
+ DEBUG(dbgs() << " delinerized GEP\n");
+ Pairs = Pair.size();
+ }
}
for (unsigned P = 0; P < Pairs; ++P) {
@@ -3746,9 +3759,7 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
return nullptr;
}
- auto Final = make_unique<FullDependence>(Result);
- Result.DV = nullptr;
- return std::move(Final);
+ return make_unique<FullDependence>(std::move(Result));
}
@@ -3852,10 +3863,11 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence &Dep,
Pair[0].Dst = DstSCEV;
}
- if (Delinearize && Pairs == 1 && CommonLevels > 1 &&
- tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) {
- DEBUG(dbgs() << " delinerized GEP\n");
- Pairs = Pair.size();
+ if (Delinearize && CommonLevels > 1) {
+ if (tryDelinearize(Src, Dst, Pair)) {
+ DEBUG(dbgs() << " delinerized GEP\n");
+ Pairs = Pair.size();
+ }
}
for (unsigned P = 0; P < Pairs; ++P) {
diff --git a/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
index e5ee295..5ae6d74 100644
--- a/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
@@ -1,4 +1,4 @@
-//===- DivergenceAnalysis.cpp ------ Divergence Analysis ------------------===//
+//===- DivergenceAnalysis.cpp --------- Divergence Analysis Implementation -==//
//
// The LLVM Compiler Infrastructure
//
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines divergence analysis which determines whether a branch in a
-// GPU program is divergent. It can help branch optimizations such as jump
+// This file implements divergence analysis which determines whether a branch
+// in a GPU program is divergent.It can help branch optimizations such as jump
// threading and loop unswitching to make better decisions.
//
// GPU programs typically use the SIMD execution model, where multiple threads
@@ -61,75 +61,31 @@
// 2. memory as black box. It conservatively considers values loaded from
// generic or local address as divergent. This can be improved by leveraging
// pointer analysis.
+//
//===----------------------------------------------------------------------===//
-#include <vector>
-#include "llvm/IR/Dominators.h"
-#include "llvm/ADT/DenseSet.h"
+#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/Function.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Value.h"
-#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include <vector>
using namespace llvm;
-#define DEBUG_TYPE "divergence"
-
-namespace {
-class DivergenceAnalysis : public FunctionPass {
-public:
- static char ID;
-
- DivergenceAnalysis() : FunctionPass(ID) {
- initializeDivergenceAnalysisPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<PostDominatorTree>();
- AU.setPreservesAll();
- }
-
- bool runOnFunction(Function &F) override;
-
- // Print all divergent branches in the function.
- void print(raw_ostream &OS, const Module *) const override;
-
- // Returns true if V is divergent.
- bool isDivergent(const Value *V) const { return DivergentValues.count(V); }
- // Returns true if V is uniform/non-divergent.
- bool isUniform(const Value *V) const { return !isDivergent(V); }
-
-private:
- // Stores all divergent values.
- DenseSet<const Value *> DivergentValues;
-};
-} // End of anonymous namespace
-
-// Register this pass.
-char DivergenceAnalysis::ID = 0;
-INITIALIZE_PASS_BEGIN(DivergenceAnalysis, "divergence", "Divergence Analysis",
- false, true)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
-INITIALIZE_PASS_END(DivergenceAnalysis, "divergence", "Divergence Analysis",
- false, true)
-
namespace {
class DivergencePropagator {
public:
- DivergencePropagator(Function &F, TargetTransformInfo &TTI,
- DominatorTree &DT, PostDominatorTree &PDT,
- DenseSet<const Value *> &DV)
+ DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
+ PostDominatorTree &PDT, DenseSet<const Value *> &DV)
: F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {}
void populateWithSourcesOfDivergence();
void propagate();
@@ -140,7 +96,7 @@ private:
// A helper function that explores sync dependents of TI.
void exploreSyncDependency(TerminatorInst *TI);
// Computes the influence region from Start to End. This region includes all
- // basic blocks on any path from Start to End.
+ // basic blocks on any simple path from Start to End.
void computeInfluenceRegion(BasicBlock *Start, BasicBlock *End,
DenseSet<BasicBlock *> &InfluenceRegion);
// Finds all users of I that are outside the influence region, and add these
@@ -153,13 +109,13 @@ private:
DominatorTree &DT;
PostDominatorTree &PDT;
std::vector<Value *> Worklist; // Stack for DFS.
- DenseSet<const Value *> &DV; // Stores all divergent values.
+ DenseSet<const Value *> &DV; // Stores all divergent values.
};
void DivergencePropagator::populateWithSourcesOfDivergence() {
Worklist.clear();
DV.clear();
- for (auto &I : inst_range(F)) {
+ for (auto &I : instructions(F)) {
if (TTI.isSourceOfDivergence(&I)) {
Worklist.push_back(&I);
DV.insert(&I);
@@ -191,8 +147,8 @@ void DivergencePropagator::exploreSyncDependency(TerminatorInst *TI) {
for (auto I = IPostDom->begin(); isa<PHINode>(I); ++I) {
// A PHINode is uniform if it returns the same value no matter which path is
// taken.
- if (!cast<PHINode>(I)->hasConstantValue() && DV.insert(I).second)
- Worklist.push_back(I);
+ if (!cast<PHINode>(I)->hasConstantValue() && DV.insert(&*I).second)
+ Worklist.push_back(&*I);
}
// Propagation rule 2: if a value defined in a loop is used outside, the user
@@ -242,21 +198,33 @@ void DivergencePropagator::findUsersOutsideInfluenceRegion(
}
}
+// A helper function for computeInfluenceRegion that adds successors of "ThisBB"
+// to the influence region.
+static void
+addSuccessorsToInfluenceRegion(BasicBlock *ThisBB, BasicBlock *End,
+ DenseSet<BasicBlock *> &InfluenceRegion,
+ std::vector<BasicBlock *> &InfluenceStack) {
+ for (BasicBlock *Succ : successors(ThisBB)) {
+ if (Succ != End && InfluenceRegion.insert(Succ).second)
+ InfluenceStack.push_back(Succ);
+ }
+}
+
void DivergencePropagator::computeInfluenceRegion(
BasicBlock *Start, BasicBlock *End,
DenseSet<BasicBlock *> &InfluenceRegion) {
assert(PDT.properlyDominates(End, Start) &&
"End does not properly dominate Start");
+
+ // The influence region starts from the end of "Start" to the beginning of
+ // "End". Therefore, "Start" should not be in the region unless "Start" is in
+ // a loop that doesn't contain "End".
std::vector<BasicBlock *> InfluenceStack;
- InfluenceStack.push_back(Start);
- InfluenceRegion.insert(Start);
+ addSuccessorsToInfluenceRegion(Start, End, InfluenceRegion, InfluenceStack);
while (!InfluenceStack.empty()) {
BasicBlock *BB = InfluenceStack.back();
InfluenceStack.pop_back();
- for (BasicBlock *Succ : successors(BB)) {
- if (End != Succ && InfluenceRegion.insert(Succ).second)
- InfluenceStack.push_back(Succ);
- }
+ addSuccessorsToInfluenceRegion(BB, End, InfluenceRegion, InfluenceStack);
}
}
@@ -286,10 +254,25 @@ void DivergencePropagator::propagate() {
} /// end namespace anonymous
+// Register this pass.
+char DivergenceAnalysis::ID = 0;
+INITIALIZE_PASS_BEGIN(DivergenceAnalysis, "divergence", "Divergence Analysis",
+ false, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
+INITIALIZE_PASS_END(DivergenceAnalysis, "divergence", "Divergence Analysis",
+ false, true)
+
FunctionPass *llvm::createDivergenceAnalysisPass() {
return new DivergenceAnalysis();
}
+void DivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<PostDominatorTree>();
+ AU.setPreservesAll();
+}
+
bool DivergenceAnalysis::runOnFunction(Function &F) {
auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
if (TTIWP == nullptr)
@@ -329,8 +312,8 @@ void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
if (DivergentValues.count(&Arg))
OS << "DIVERGENT: " << Arg << "\n";
}
- // Iterate instructions using inst_range to ensure a deterministic order.
- for (auto &I : inst_range(F)) {
+ // Iterate instructions using instructions() to ensure a deterministic order.
+ for (auto &I : instructions(F)) {
if (DivergentValues.count(&I))
OS << "DIVERGENT:" << I << "\n";
}
diff --git a/contrib/llvm/lib/Analysis/EHPersonalities.cpp b/contrib/llvm/lib/Analysis/EHPersonalities.cpp
new file mode 100644
index 0000000..01be8b3
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/EHPersonalities.cpp
@@ -0,0 +1,106 @@
+//===- EHPersonalities.cpp - Compute EH-related information ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// See if the given exception handling personality function is one that we
+/// understand. If so, return a description of it; otherwise return Unknown.
+EHPersonality llvm::classifyEHPersonality(const Value *Pers) {
+ const Function *F =
+ Pers ? dyn_cast<Function>(Pers->stripPointerCasts()) : nullptr;
+ if (!F)
+ return EHPersonality::Unknown;
+ return StringSwitch<EHPersonality>(F->getName())
+ .Case("__gnat_eh_personality", EHPersonality::GNU_Ada)
+ .Case("__gxx_personality_v0", EHPersonality::GNU_CXX)
+ .Case("__gcc_personality_v0", EHPersonality::GNU_C)
+ .Case("__objc_personality_v0", EHPersonality::GNU_ObjC)
+ .Case("_except_handler3", EHPersonality::MSVC_X86SEH)
+ .Case("_except_handler4", EHPersonality::MSVC_X86SEH)
+ .Case("__C_specific_handler", EHPersonality::MSVC_Win64SEH)
+ .Case("__CxxFrameHandler3", EHPersonality::MSVC_CXX)
+ .Case("ProcessCLRException", EHPersonality::CoreCLR)
+ .Default(EHPersonality::Unknown);
+}
+
+bool llvm::canSimplifyInvokeNoUnwind(const Function *F) {
+ EHPersonality Personality = classifyEHPersonality(F->getPersonalityFn());
+ // We can't simplify any invokes to nounwind functions if the personality
+ // function wants to catch asynch exceptions. The nounwind attribute only
+ // implies that the function does not throw synchronous exceptions.
+ return !isAsynchronousEHPersonality(Personality);
+}
+
+DenseMap<BasicBlock *, ColorVector> llvm::colorEHFunclets(Function &F) {
+ SmallVector<std::pair<BasicBlock *, BasicBlock *>, 16> Worklist;
+ BasicBlock *EntryBlock = &F.getEntryBlock();
+ DenseMap<BasicBlock *, ColorVector> BlockColors;
+
+ // Build up the color map, which maps each block to its set of 'colors'.
+ // For any block B the "colors" of B are the set of funclets F (possibly
+ // including a root "funclet" representing the main function) such that
+ // F will need to directly contain B or a copy of B (where the term "directly
+ // contain" is used to distinguish from being "transitively contained" in
+ // a nested funclet).
+ //
+ // Note: Despite not being a funclet in the truest sense, a catchswitch is
+ // considered to belong to its own funclet for the purposes of coloring.
+
+ DEBUG_WITH_TYPE("winehprepare-coloring", dbgs() << "\nColoring funclets for "
+ << F.getName() << "\n");
+
+ Worklist.push_back({EntryBlock, EntryBlock});
+
+ while (!Worklist.empty()) {
+ BasicBlock *Visiting;
+ BasicBlock *Color;
+ std::tie(Visiting, Color) = Worklist.pop_back_val();
+ DEBUG_WITH_TYPE("winehprepare-coloring",
+ dbgs() << "Visiting " << Visiting->getName() << ", "
+ << Color->getName() << "\n");
+ Instruction *VisitingHead = Visiting->getFirstNonPHI();
+ if (VisitingHead->isEHPad()) {
+ // Mark this funclet head as a member of itself.
+ Color = Visiting;
+ }
+ // Note that this is a member of the given color.
+ ColorVector &Colors = BlockColors[Visiting];
+ if (std::find(Colors.begin(), Colors.end(), Color) == Colors.end())
+ Colors.push_back(Color);
+ else
+ continue;
+
+ DEBUG_WITH_TYPE("winehprepare-coloring",
+ dbgs() << " Assigned color \'" << Color->getName()
+ << "\' to block \'" << Visiting->getName()
+ << "\'.\n");
+
+ BasicBlock *SuccColor = Color;
+ TerminatorInst *Terminator = Visiting->getTerminator();
+ if (auto *CatchRet = dyn_cast<CatchReturnInst>(Terminator)) {
+ Value *ParentPad = CatchRet->getParentPad();
+ if (isa<ConstantTokenNone>(ParentPad))
+ SuccColor = EntryBlock;
+ else
+ SuccColor = cast<Instruction>(ParentPad)->getParent();
+ }
+
+ for (BasicBlock *Succ : successors(Visiting))
+ Worklist.push_back({Succ, SuccColor});
+ }
+ return BlockColors;
+}
diff --git a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
new file mode 100644
index 0000000..ab2263a
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
@@ -0,0 +1,1002 @@
+//===- GlobalsModRef.cpp - Simple Mod/Ref Analysis for Globals ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This simple pass provides alias and mod/ref information for global values
+// that do not have their address taken, and keeps track of whether functions
+// read or write memory (are "pure"). For this simple (but very common) case,
+// we can provide pretty accurate and useful information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "globalsmodref-aa"
+
+STATISTIC(NumNonAddrTakenGlobalVars,
+ "Number of global vars without address taken");
+STATISTIC(NumNonAddrTakenFunctions,"Number of functions without address taken");
+STATISTIC(NumNoMemFunctions, "Number of functions that do not access memory");
+STATISTIC(NumReadMemFunctions, "Number of functions that only read memory");
+STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects");
+
+// An option to enable unsafe alias results from the GlobalsModRef analysis.
+// When enabled, GlobalsModRef will provide no-alias results which in extremely
+// rare cases may not be conservatively correct. In particular, in the face of
+// transforms which cause assymetry between how effective GetUnderlyingObject
+// is for two pointers, it may produce incorrect results.
+//
+// These unsafe results have been returned by GMR for many years without
+// causing significant issues in the wild and so we provide a mechanism to
+// re-enable them for users of LLVM that have a particular performance
+// sensitivity and no known issues. The option also makes it easy to evaluate
+// the performance impact of these results.
+static cl::opt<bool> EnableUnsafeGlobalsModRefAliasResults(
+ "enable-unsafe-globalsmodref-alias-results", cl::init(false), cl::Hidden);
+
+/// The mod/ref information collected for a particular function.
+///
+/// We collect information about mod/ref behavior of a function here, both in
+/// general and as pertains to specific globals. We only have this detailed
+/// information when we know *something* useful about the behavior. If we
+/// saturate to fully general mod/ref, we remove the info for the function.
+class GlobalsAAResult::FunctionInfo {
+ typedef SmallDenseMap<const GlobalValue *, ModRefInfo, 16> GlobalInfoMapType;
+
+ /// Build a wrapper struct that has 8-byte alignment. All heap allocations
+ /// should provide this much alignment at least, but this makes it clear we
+ /// specifically rely on this amount of alignment.
+ struct LLVM_ALIGNAS(8) AlignedMap {
+ AlignedMap() {}
+ AlignedMap(const AlignedMap &Arg) : Map(Arg.Map) {}
+ GlobalInfoMapType Map;
+ };
+
+ /// Pointer traits for our aligned map.
+ struct AlignedMapPointerTraits {
+ static inline void *getAsVoidPointer(AlignedMap *P) { return P; }
+ static inline AlignedMap *getFromVoidPointer(void *P) {
+ return (AlignedMap *)P;
+ }
+ enum { NumLowBitsAvailable = 3 };
+ static_assert(AlignOf<AlignedMap>::Alignment >= (1 << NumLowBitsAvailable),
+ "AlignedMap insufficiently aligned to have enough low bits.");
+ };
+
+ /// The bit that flags that this function may read any global. This is
+ /// chosen to mix together with ModRefInfo bits.
+ enum { MayReadAnyGlobal = 4 };
+
+ /// Checks to document the invariants of the bit packing here.
+ static_assert((MayReadAnyGlobal & MRI_ModRef) == 0,
+ "ModRef and the MayReadAnyGlobal flag bits overlap.");
+ static_assert(((MayReadAnyGlobal | MRI_ModRef) >>
+ AlignedMapPointerTraits::NumLowBitsAvailable) == 0,
+ "Insufficient low bits to store our flag and ModRef info.");
+
+public:
+ FunctionInfo() : Info() {}
+ ~FunctionInfo() {
+ delete Info.getPointer();
+ }
+ // Spell out the copy ond move constructors and assignment operators to get
+ // deep copy semantics and correct move semantics in the face of the
+ // pointer-int pair.
+ FunctionInfo(const FunctionInfo &Arg)
+ : Info(nullptr, Arg.Info.getInt()) {
+ if (const auto *ArgPtr = Arg.Info.getPointer())
+ Info.setPointer(new AlignedMap(*ArgPtr));
+ }
+ FunctionInfo(FunctionInfo &&Arg)
+ : Info(Arg.Info.getPointer(), Arg.Info.getInt()) {
+ Arg.Info.setPointerAndInt(nullptr, 0);
+ }
+ FunctionInfo &operator=(const FunctionInfo &RHS) {
+ delete Info.getPointer();
+ Info.setPointerAndInt(nullptr, RHS.Info.getInt());
+ if (const auto *RHSPtr = RHS.Info.getPointer())
+ Info.setPointer(new AlignedMap(*RHSPtr));
+ return *this;
+ }
+ FunctionInfo &operator=(FunctionInfo &&RHS) {
+ delete Info.getPointer();
+ Info.setPointerAndInt(RHS.Info.getPointer(), RHS.Info.getInt());
+ RHS.Info.setPointerAndInt(nullptr, 0);
+ return *this;
+ }
+
+ /// Returns the \c ModRefInfo info for this function.
+ ModRefInfo getModRefInfo() const {
+ return ModRefInfo(Info.getInt() & MRI_ModRef);
+ }
+
+ /// Adds new \c ModRefInfo for this function to its state.
+ void addModRefInfo(ModRefInfo NewMRI) {
+ Info.setInt(Info.getInt() | NewMRI);
+ }
+
+ /// Returns whether this function may read any global variable, and we don't
+ /// know which global.
+ bool mayReadAnyGlobal() const { return Info.getInt() & MayReadAnyGlobal; }
+
+ /// Sets this function as potentially reading from any global.
+ void setMayReadAnyGlobal() { Info.setInt(Info.getInt() | MayReadAnyGlobal); }
+
+ /// Returns the \c ModRefInfo info for this function w.r.t. a particular
+ /// global, which may be more precise than the general information above.
+ ModRefInfo getModRefInfoForGlobal(const GlobalValue &GV) const {
+ ModRefInfo GlobalMRI = mayReadAnyGlobal() ? MRI_Ref : MRI_NoModRef;
+ if (AlignedMap *P = Info.getPointer()) {
+ auto I = P->Map.find(&GV);
+ if (I != P->Map.end())
+ GlobalMRI = ModRefInfo(GlobalMRI | I->second);
+ }
+ return GlobalMRI;
+ }
+
+ /// Add mod/ref info from another function into ours, saturating towards
+ /// MRI_ModRef.
+ void addFunctionInfo(const FunctionInfo &FI) {
+ addModRefInfo(FI.getModRefInfo());
+
+ if (FI.mayReadAnyGlobal())
+ setMayReadAnyGlobal();
+
+ if (AlignedMap *P = FI.Info.getPointer())
+ for (const auto &G : P->Map)
+ addModRefInfoForGlobal(*G.first, G.second);
+ }
+
+ void addModRefInfoForGlobal(const GlobalValue &GV, ModRefInfo NewMRI) {
+ AlignedMap *P = Info.getPointer();
+ if (!P) {
+ P = new AlignedMap();
+ Info.setPointer(P);
+ }
+ auto &GlobalMRI = P->Map[&GV];
+ GlobalMRI = ModRefInfo(GlobalMRI | NewMRI);
+ }
+
+ /// Clear a global's ModRef info. Should be used when a global is being
+ /// deleted.
+ void eraseModRefInfoForGlobal(const GlobalValue &GV) {
+ if (AlignedMap *P = Info.getPointer())
+ P->Map.erase(&GV);
+ }
+
+private:
+ /// All of the information is encoded into a single pointer, with a three bit
+ /// integer in the low three bits. The high bit provides a flag for when this
+ /// function may read any global. The low two bits are the ModRefInfo. And
+ /// the pointer, when non-null, points to a map from GlobalValue to
+ /// ModRefInfo specific to that GlobalValue.
+ PointerIntPair<AlignedMap *, 3, unsigned, AlignedMapPointerTraits> Info;
+};
+
+void GlobalsAAResult::DeletionCallbackHandle::deleted() {
+ Value *V = getValPtr();
+ if (auto *F = dyn_cast<Function>(V))
+ GAR->FunctionInfos.erase(F);
+
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ if (GAR->NonAddressTakenGlobals.erase(GV)) {
+ // This global might be an indirect global. If so, remove it and
+ // remove any AllocRelatedValues for it.
+ if (GAR->IndirectGlobals.erase(GV)) {
+ // Remove any entries in AllocsForIndirectGlobals for this global.
+ for (auto I = GAR->AllocsForIndirectGlobals.begin(),
+ E = GAR->AllocsForIndirectGlobals.end();
+ I != E; ++I)
+ if (I->second == GV)
+ GAR->AllocsForIndirectGlobals.erase(I);
+ }
+
+ // Scan the function info we have collected and remove this global
+ // from all of them.
+ for (auto &FIPair : GAR->FunctionInfos)
+ FIPair.second.eraseModRefInfoForGlobal(*GV);
+ }
+ }
+
+ // If this is an allocation related to an indirect global, remove it.
+ GAR->AllocsForIndirectGlobals.erase(V);
+
+ // And clear out the handle.
+ setValPtr(nullptr);
+ GAR->Handles.erase(I);
+ // This object is now destroyed!
+}
+
+FunctionModRefBehavior GlobalsAAResult::getModRefBehavior(const Function *F) {
+ FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
+
+ if (FunctionInfo *FI = getFunctionInfo(F)) {
+ if (FI->getModRefInfo() == MRI_NoModRef)
+ Min = FMRB_DoesNotAccessMemory;
+ else if ((FI->getModRefInfo() & MRI_Mod) == 0)
+ Min = FMRB_OnlyReadsMemory;
+ }
+
+ return FunctionModRefBehavior(AAResultBase::getModRefBehavior(F) & Min);
+}
+
+FunctionModRefBehavior
+GlobalsAAResult::getModRefBehavior(ImmutableCallSite CS) {
+ FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
+
+ if (const Function *F = CS.getCalledFunction())
+ if (FunctionInfo *FI = getFunctionInfo(F)) {
+ if (FI->getModRefInfo() == MRI_NoModRef)
+ Min = FMRB_DoesNotAccessMemory;
+ else if ((FI->getModRefInfo() & MRI_Mod) == 0)
+ Min = FMRB_OnlyReadsMemory;
+ }
+
+ return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min);
+}
+
+/// Returns the function info for the function, or null if we don't have
+/// anything useful to say about it.
+GlobalsAAResult::FunctionInfo *
+GlobalsAAResult::getFunctionInfo(const Function *F) {
+ auto I = FunctionInfos.find(F);
+ if (I != FunctionInfos.end())
+ return &I->second;
+ return nullptr;
+}
+
+/// AnalyzeGlobals - Scan through the users of all of the internal
+/// GlobalValue's in the program. If none of them have their "address taken"
+/// (really, their address passed to something nontrivial), record this fact,
+/// and record the functions that they are used directly in.
+void GlobalsAAResult::AnalyzeGlobals(Module &M) {
+ SmallPtrSet<Function *, 64> TrackedFunctions;
+ for (Function &F : M)
+ if (F.hasLocalLinkage())
+ if (!AnalyzeUsesOfPointer(&F)) {
+ // Remember that we are tracking this global.
+ NonAddressTakenGlobals.insert(&F);
+ TrackedFunctions.insert(&F);
+ Handles.emplace_front(*this, &F);
+ Handles.front().I = Handles.begin();
+ ++NumNonAddrTakenFunctions;
+ }
+
+ SmallPtrSet<Function *, 64> Readers, Writers;
+ for (GlobalVariable &GV : M.globals())
+ if (GV.hasLocalLinkage()) {
+ if (!AnalyzeUsesOfPointer(&GV, &Readers,
+ GV.isConstant() ? nullptr : &Writers)) {
+ // Remember that we are tracking this global, and the mod/ref fns
+ NonAddressTakenGlobals.insert(&GV);
+ Handles.emplace_front(*this, &GV);
+ Handles.front().I = Handles.begin();
+
+ for (Function *Reader : Readers) {
+ if (TrackedFunctions.insert(Reader).second) {
+ Handles.emplace_front(*this, Reader);
+ Handles.front().I = Handles.begin();
+ }
+ FunctionInfos[Reader].addModRefInfoForGlobal(GV, MRI_Ref);
+ }
+
+ if (!GV.isConstant()) // No need to keep track of writers to constants
+ for (Function *Writer : Writers) {
+ if (TrackedFunctions.insert(Writer).second) {
+ Handles.emplace_front(*this, Writer);
+ Handles.front().I = Handles.begin();
+ }
+ FunctionInfos[Writer].addModRefInfoForGlobal(GV, MRI_Mod);
+ }
+ ++NumNonAddrTakenGlobalVars;
+
+ // If this global holds a pointer type, see if it is an indirect global.
+ if (GV.getType()->getElementType()->isPointerTy() &&
+ AnalyzeIndirectGlobalMemory(&GV))
+ ++NumIndirectGlobalVars;
+ }
+ Readers.clear();
+ Writers.clear();
+ }
+}
+
+/// AnalyzeUsesOfPointer - Look at all of the users of the specified pointer.
+/// If this is used by anything complex (i.e., the address escapes), return
+/// true. Also, while we are at it, keep track of those functions that read and
+/// write to the value.
+///
+/// If OkayStoreDest is non-null, stores into this global are allowed.
+bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V,
+ SmallPtrSetImpl<Function *> *Readers,
+ SmallPtrSetImpl<Function *> *Writers,
+ GlobalValue *OkayStoreDest) {
+ if (!V->getType()->isPointerTy())
+ return true;
+
+ for (Use &U : V->uses()) {
+ User *I = U.getUser();
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (Readers)
+ Readers->insert(LI->getParent()->getParent());
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (V == SI->getOperand(1)) {
+ if (Writers)
+ Writers->insert(SI->getParent()->getParent());
+ } else if (SI->getOperand(1) != OkayStoreDest) {
+ return true; // Storing the pointer
+ }
+ } else if (Operator::getOpcode(I) == Instruction::GetElementPtr) {
+ if (AnalyzeUsesOfPointer(I, Readers, Writers))
+ return true;
+ } else if (Operator::getOpcode(I) == Instruction::BitCast) {
+ if (AnalyzeUsesOfPointer(I, Readers, Writers, OkayStoreDest))
+ return true;
+ } else if (auto CS = CallSite(I)) {
+ // Make sure that this is just the function being called, not that it is
+ // passing into the function.
+ if (CS.isDataOperand(&U)) {
+ // Detect calls to free.
+ if (CS.isArgOperand(&U) && isFreeCall(I, &TLI)) {
+ if (Writers)
+ Writers->insert(CS->getParent()->getParent());
+ } else if (CS.doesNotCapture(CS.getDataOperandNo(&U))) {
+ Function *ParentF = CS->getParent()->getParent();
+ // A nocapture argument may be read from or written to, but does not
+ // escape unless the call can somehow recurse.
+ //
+ // nocapture "indicates that the callee does not make any copies of
+ // the pointer that outlive itself". Therefore if we directly or
+ // indirectly recurse, we must treat the pointer as escaping.
+ if (FunctionToSCCMap[ParentF] ==
+ FunctionToSCCMap[CS.getCalledFunction()])
+ return true;
+ if (Readers)
+ Readers->insert(ParentF);
+ if (Writers)
+ Writers->insert(ParentF);
+ } else {
+ return true; // Argument of an unknown call.
+ }
+ // If the Callee is not ReadNone, it may read the global,
+ // and if it is not ReadOnly, it may also write to it.
+ Function *CalleeF = CS.getCalledFunction();
+ if (!CalleeF->doesNotAccessMemory()) {
+ if (Readers)
+ Readers->insert(CalleeF);
+ if (Writers && !CalleeF->onlyReadsMemory())
+ Writers->insert(CalleeF);
+ }
+ }
+ } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
+ if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
+ return true; // Allow comparison against null.
+ } else {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// AnalyzeIndirectGlobalMemory - We found an non-address-taken global variable
+/// which holds a pointer type. See if the global always points to non-aliased
+/// heap memory: that is, all initializers of the globals are allocations, and
+/// those allocations have no use other than initialization of the global.
+/// Further, all loads out of GV must directly use the memory, not store the
+/// pointer somewhere. If this is true, we consider the memory pointed to by
+/// GV to be owned by GV and can disambiguate other pointers from it.
+bool GlobalsAAResult::AnalyzeIndirectGlobalMemory(GlobalVariable *GV) {
+ // Keep track of values related to the allocation of the memory, f.e. the
+ // value produced by the malloc call and any casts.
+ std::vector<Value *> AllocRelatedValues;
+
+ // If the initializer is a valid pointer, bail.
+ if (Constant *C = GV->getInitializer())
+ if (!C->isNullValue())
+ return false;
+
+ // Walk the user list of the global. If we find anything other than a direct
+ // load or store, bail out.
+ for (User *U : GV->users()) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ // The pointer loaded from the global can only be used in simple ways:
+ // we allow addressing of it and loading storing to it. We do *not* allow
+ // storing the loaded pointer somewhere else or passing to a function.
+ if (AnalyzeUsesOfPointer(LI))
+ return false; // Loaded pointer escapes.
+ // TODO: Could try some IP mod/ref of the loaded pointer.
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ // Storing the global itself.
+ if (SI->getOperand(0) == GV)
+ return false;
+
+ // If storing the null pointer, ignore it.
+ if (isa<ConstantPointerNull>(SI->getOperand(0)))
+ continue;
+
+ // Check the value being stored.
+ Value *Ptr = GetUnderlyingObject(SI->getOperand(0),
+ GV->getParent()->getDataLayout());
+
+ if (!isAllocLikeFn(Ptr, &TLI))
+ return false; // Too hard to analyze.
+
+ // Analyze all uses of the allocation. If any of them are used in a
+ // non-simple way (e.g. stored to another global) bail out.
+ if (AnalyzeUsesOfPointer(Ptr, /*Readers*/ nullptr, /*Writers*/ nullptr,
+ GV))
+ return false; // Loaded pointer escapes.
+
+ // Remember that this allocation is related to the indirect global.
+ AllocRelatedValues.push_back(Ptr);
+ } else {
+ // Something complex, bail out.
+ return false;
+ }
+ }
+
+ // Okay, this is an indirect global. Remember all of the allocations for
+ // this global in AllocsForIndirectGlobals.
+ while (!AllocRelatedValues.empty()) {
+ AllocsForIndirectGlobals[AllocRelatedValues.back()] = GV;
+ Handles.emplace_front(*this, AllocRelatedValues.back());
+ Handles.front().I = Handles.begin();
+ AllocRelatedValues.pop_back();
+ }
+ IndirectGlobals.insert(GV);
+ Handles.emplace_front(*this, GV);
+ Handles.front().I = Handles.begin();
+ return true;
+}
+
+void GlobalsAAResult::CollectSCCMembership(CallGraph &CG) {
+ // We do a bottom-up SCC traversal of the call graph. In other words, we
+ // visit all callees before callers (leaf-first).
+ unsigned SCCID = 0;
+ for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
+ const std::vector<CallGraphNode *> &SCC = *I;
+ assert(!SCC.empty() && "SCC with no functions?");
+
+ for (auto *CGN : SCC)
+ if (Function *F = CGN->getFunction())
+ FunctionToSCCMap[F] = SCCID;
+ ++SCCID;
+ }
+}
+
+/// AnalyzeCallGraph - At this point, we know the functions where globals are
+/// immediately stored to and read from. Propagate this information up the call
+/// graph to all callers and compute the mod/ref info for all memory for each
+/// function.
+void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
+ // We do a bottom-up SCC traversal of the call graph. In other words, we
+ // visit all callees before callers (leaf-first).
+ for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
+ const std::vector<CallGraphNode *> &SCC = *I;
+ assert(!SCC.empty() && "SCC with no functions?");
+
+ if (!SCC[0]->getFunction() || SCC[0]->getFunction()->mayBeOverridden()) {
+ // Calls externally or is weak - can't say anything useful. Remove any existing
+ // function records (may have been created when scanning globals).
+ for (auto *Node : SCC)
+ FunctionInfos.erase(Node->getFunction());
+ continue;
+ }
+
+ FunctionInfo &FI = FunctionInfos[SCC[0]->getFunction()];
+ bool KnowNothing = false;
+
+ // Collect the mod/ref properties due to called functions. We only compute
+ // one mod-ref set.
+ for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) {
+ Function *F = SCC[i]->getFunction();
+ if (!F) {
+ KnowNothing = true;
+ break;
+ }
+
+ if (F->isDeclaration()) {
+ // Try to get mod/ref behaviour from function attributes.
+ if (F->doesNotAccessMemory() || F->onlyAccessesInaccessibleMemory()) {
+ // Can't do better than that!
+ } else if (F->onlyReadsMemory()) {
+ FI.addModRefInfo(MRI_Ref);
+ if (!F->isIntrinsic())
+ // This function might call back into the module and read a global -
+ // consider every global as possibly being read by this function.
+ FI.setMayReadAnyGlobal();
+ } else if (F->onlyAccessesArgMemory() ||
+ F->onlyAccessesInaccessibleMemOrArgMem()) {
+ // This function may only access (read/write) memory pointed to by its
+ // arguments. If this pointer is to a global, this escaping use of the
+ // pointer is captured in AnalyzeUsesOfPointer().
+ FI.addModRefInfo(MRI_ModRef);
+ } else {
+ FI.addModRefInfo(MRI_ModRef);
+ // Can't say anything useful unless it's an intrinsic - they don't
+ // read or write global variables of the kind considered here.
+ KnowNothing = !F->isIntrinsic();
+ }
+ continue;
+ }
+
+ for (CallGraphNode::iterator CI = SCC[i]->begin(), E = SCC[i]->end();
+ CI != E && !KnowNothing; ++CI)
+ if (Function *Callee = CI->second->getFunction()) {
+ if (FunctionInfo *CalleeFI = getFunctionInfo(Callee)) {
+ // Propagate function effect up.
+ FI.addFunctionInfo(*CalleeFI);
+ } else {
+ // Can't say anything about it. However, if it is inside our SCC,
+ // then nothing needs to be done.
+ CallGraphNode *CalleeNode = CG[Callee];
+ if (std::find(SCC.begin(), SCC.end(), CalleeNode) == SCC.end())
+ KnowNothing = true;
+ }
+ } else {
+ KnowNothing = true;
+ }
+ }
+
+ // If we can't say anything useful about this SCC, remove all SCC functions
+ // from the FunctionInfos map.
+ if (KnowNothing) {
+ for (auto *Node : SCC)
+ FunctionInfos.erase(Node->getFunction());
+ continue;
+ }
+
+ // Scan the function bodies for explicit loads or stores.
+ for (auto *Node : SCC) {
+ if (FI.getModRefInfo() == MRI_ModRef)
+ break; // The mod/ref lattice saturates here.
+ for (Instruction &I : instructions(Node->getFunction())) {
+ if (FI.getModRefInfo() == MRI_ModRef)
+ break; // The mod/ref lattice saturates here.
+
+ // We handle calls specially because the graph-relevant aspects are
+ // handled above.
+ if (auto CS = CallSite(&I)) {
+ if (isAllocationFn(&I, &TLI) || isFreeCall(&I, &TLI)) {
+ // FIXME: It is completely unclear why this is necessary and not
+ // handled by the above graph code.
+ FI.addModRefInfo(MRI_ModRef);
+ } else if (Function *Callee = CS.getCalledFunction()) {
+ // The callgraph doesn't include intrinsic calls.
+ if (Callee->isIntrinsic()) {
+ FunctionModRefBehavior Behaviour =
+ AAResultBase::getModRefBehavior(Callee);
+ FI.addModRefInfo(ModRefInfo(Behaviour & MRI_ModRef));
+ }
+ }
+ continue;
+ }
+
+ // All non-call instructions we use the primary predicates for whether
+ // thay read or write memory.
+ if (I.mayReadFromMemory())
+ FI.addModRefInfo(MRI_Ref);
+ if (I.mayWriteToMemory())
+ FI.addModRefInfo(MRI_Mod);
+ }
+ }
+
+ if ((FI.getModRefInfo() & MRI_Mod) == 0)
+ ++NumReadMemFunctions;
+ if (FI.getModRefInfo() == MRI_NoModRef)
+ ++NumNoMemFunctions;
+
+ // Finally, now that we know the full effect on this SCC, clone the
+ // information to each function in the SCC.
+ // FI is a reference into FunctionInfos, so copy it now so that it doesn't
+ // get invalidated if DenseMap decides to re-hash.
+ FunctionInfo CachedFI = FI;
+ for (unsigned i = 1, e = SCC.size(); i != e; ++i)
+ FunctionInfos[SCC[i]->getFunction()] = CachedFI;
+ }
+}
+
+// GV is a non-escaping global. V is a pointer address that has been loaded from.
+// If we can prove that V must escape, we can conclude that a load from V cannot
+// alias GV.
+static bool isNonEscapingGlobalNoAliasWithLoad(const GlobalValue *GV,
+ const Value *V,
+ int &Depth,
+ const DataLayout &DL) {
+ SmallPtrSet<const Value *, 8> Visited;
+ SmallVector<const Value *, 8> Inputs;
+ Visited.insert(V);
+ Inputs.push_back(V);
+ do {
+ const Value *Input = Inputs.pop_back_val();
+
+ if (isa<GlobalValue>(Input) || isa<Argument>(Input) || isa<CallInst>(Input) ||
+ isa<InvokeInst>(Input))
+ // Arguments to functions or returns from functions are inherently
+ // escaping, so we can immediately classify those as not aliasing any
+ // non-addr-taken globals.
+ //
+ // (Transitive) loads from a global are also safe - if this aliased
+ // another global, its address would escape, so no alias.
+ continue;
+
+ // Recurse through a limited number of selects, loads and PHIs. This is an
+ // arbitrary depth of 4, lower numbers could be used to fix compile time
+ // issues if needed, but this is generally expected to be only be important
+ // for small depths.
+ if (++Depth > 4)
+ return false;
+
+ if (auto *LI = dyn_cast<LoadInst>(Input)) {
+ Inputs.push_back(GetUnderlyingObject(LI->getPointerOperand(), DL));
+ continue;
+ }
+ if (auto *SI = dyn_cast<SelectInst>(Input)) {
+ const Value *LHS = GetUnderlyingObject(SI->getTrueValue(), DL);
+ const Value *RHS = GetUnderlyingObject(SI->getFalseValue(), DL);
+ if (Visited.insert(LHS).second)
+ Inputs.push_back(LHS);
+ if (Visited.insert(RHS).second)
+ Inputs.push_back(RHS);
+ continue;
+ }
+ if (auto *PN = dyn_cast<PHINode>(Input)) {
+ for (const Value *Op : PN->incoming_values()) {
+ Op = GetUnderlyingObject(Op, DL);
+ if (Visited.insert(Op).second)
+ Inputs.push_back(Op);
+ }
+ continue;
+ }
+
+ return false;
+ } while (!Inputs.empty());
+
+ // All inputs were known to be no-alias.
+ return true;
+}
+
+// There are particular cases where we can conclude no-alias between
+// a non-addr-taken global and some other underlying object. Specifically,
+// a non-addr-taken global is known to not be escaped from any function. It is
+// also incorrect for a transformation to introduce an escape of a global in
+// a way that is observable when it was not there previously. One function
+// being transformed to introduce an escape which could possibly be observed
+// (via loading from a global or the return value for example) within another
+// function is never safe. If the observation is made through non-atomic
+// operations on different threads, it is a data-race and UB. If the
+// observation is well defined, by being observed the transformation would have
+// changed program behavior by introducing the observed escape, making it an
+// invalid transform.
+//
+// This property does require that transformations which *temporarily* escape
+// a global that was not previously escaped, prior to restoring it, cannot rely
+// on the results of GMR::alias. This seems a reasonable restriction, although
+// currently there is no way to enforce it. There is also no realistic
+// optimization pass that would make this mistake. The closest example is
+// a transformation pass which does reg2mem of SSA values but stores them into
+// global variables temporarily before restoring the global variable's value.
+// This could be useful to expose "benign" races for example. However, it seems
+// reasonable to require that a pass which introduces escapes of global
+// variables in this way to either not trust AA results while the escape is
+// active, or to be forced to operate as a module pass that cannot co-exist
+// with an alias analysis such as GMR.
+bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV,
+ const Value *V) {
+ // In order to know that the underlying object cannot alias the
+ // non-addr-taken global, we must know that it would have to be an escape.
+ // Thus if the underlying object is a function argument, a load from
+ // a global, or the return of a function, it cannot alias. We can also
+ // recurse through PHI nodes and select nodes provided all of their inputs
+ // resolve to one of these known-escaping roots.
+ SmallPtrSet<const Value *, 8> Visited;
+ SmallVector<const Value *, 8> Inputs;
+ Visited.insert(V);
+ Inputs.push_back(V);
+ int Depth = 0;
+ do {
+ const Value *Input = Inputs.pop_back_val();
+
+ if (auto *InputGV = dyn_cast<GlobalValue>(Input)) {
+ // If one input is the very global we're querying against, then we can't
+ // conclude no-alias.
+ if (InputGV == GV)
+ return false;
+
+ // Distinct GlobalVariables never alias, unless overriden or zero-sized.
+ // FIXME: The condition can be refined, but be conservative for now.
+ auto *GVar = dyn_cast<GlobalVariable>(GV);
+ auto *InputGVar = dyn_cast<GlobalVariable>(InputGV);
+ if (GVar && InputGVar &&
+ !GVar->isDeclaration() && !InputGVar->isDeclaration() &&
+ !GVar->mayBeOverridden() && !InputGVar->mayBeOverridden()) {
+ Type *GVType = GVar->getInitializer()->getType();
+ Type *InputGVType = InputGVar->getInitializer()->getType();
+ if (GVType->isSized() && InputGVType->isSized() &&
+ (DL.getTypeAllocSize(GVType) > 0) &&
+ (DL.getTypeAllocSize(InputGVType) > 0))
+ continue;
+ }
+
+ // Conservatively return false, even though we could be smarter
+ // (e.g. look through GlobalAliases).
+ return false;
+ }
+
+ if (isa<Argument>(Input) || isa<CallInst>(Input) ||
+ isa<InvokeInst>(Input)) {
+ // Arguments to functions or returns from functions are inherently
+ // escaping, so we can immediately classify those as not aliasing any
+ // non-addr-taken globals.
+ continue;
+ }
+
+ // Recurse through a limited number of selects, loads and PHIs. This is an
+ // arbitrary depth of 4, lower numbers could be used to fix compile time
+ // issues if needed, but this is generally expected to be only be important
+ // for small depths.
+ if (++Depth > 4)
+ return false;
+
+ if (auto *LI = dyn_cast<LoadInst>(Input)) {
+ // A pointer loaded from a global would have been captured, and we know
+ // that the global is non-escaping, so no alias.
+ const Value *Ptr = GetUnderlyingObject(LI->getPointerOperand(), DL);
+ if (isNonEscapingGlobalNoAliasWithLoad(GV, Ptr, Depth, DL))
+ // The load does not alias with GV.
+ continue;
+ // Otherwise, a load could come from anywhere, so bail.
+ return false;
+ }
+ if (auto *SI = dyn_cast<SelectInst>(Input)) {
+ const Value *LHS = GetUnderlyingObject(SI->getTrueValue(), DL);
+ const Value *RHS = GetUnderlyingObject(SI->getFalseValue(), DL);
+ if (Visited.insert(LHS).second)
+ Inputs.push_back(LHS);
+ if (Visited.insert(RHS).second)
+ Inputs.push_back(RHS);
+ continue;
+ }
+ if (auto *PN = dyn_cast<PHINode>(Input)) {
+ for (const Value *Op : PN->incoming_values()) {
+ Op = GetUnderlyingObject(Op, DL);
+ if (Visited.insert(Op).second)
+ Inputs.push_back(Op);
+ }
+ continue;
+ }
+
+ // FIXME: It would be good to handle other obvious no-alias cases here, but
+ // it isn't clear how to do so reasonbly without building a small version
+ // of BasicAA into this code. We could recurse into AAResultBase::alias
+ // here but that seems likely to go poorly as we're inside the
+ // implementation of such a query. Until then, just conservatievly retun
+ // false.
+ return false;
+ } while (!Inputs.empty());
+
+ // If all the inputs to V were definitively no-alias, then V is no-alias.
+ return true;
+}
+
+/// alias - If one of the pointers is to a global that we are tracking, and the
+/// other is some random pointer, we know there cannot be an alias, because the
+/// address of the global isn't taken.
+AliasResult GlobalsAAResult::alias(const MemoryLocation &LocA,
+ const MemoryLocation &LocB) {
+ // Get the base object these pointers point to.
+ const Value *UV1 = GetUnderlyingObject(LocA.Ptr, DL);
+ const Value *UV2 = GetUnderlyingObject(LocB.Ptr, DL);
+
+ // If either of the underlying values is a global, they may be non-addr-taken
+ // globals, which we can answer queries about.
+ const GlobalValue *GV1 = dyn_cast<GlobalValue>(UV1);
+ const GlobalValue *GV2 = dyn_cast<GlobalValue>(UV2);
+ if (GV1 || GV2) {
+ // If the global's address is taken, pretend we don't know it's a pointer to
+ // the global.
+ if (GV1 && !NonAddressTakenGlobals.count(GV1))
+ GV1 = nullptr;
+ if (GV2 && !NonAddressTakenGlobals.count(GV2))
+ GV2 = nullptr;
+
+ // If the two pointers are derived from two different non-addr-taken
+ // globals we know these can't alias.
+ if (GV1 && GV2 && GV1 != GV2)
+ return NoAlias;
+
+ // If one is and the other isn't, it isn't strictly safe but we can fake
+ // this result if necessary for performance. This does not appear to be
+ // a common problem in practice.
+ if (EnableUnsafeGlobalsModRefAliasResults)
+ if ((GV1 || GV2) && GV1 != GV2)
+ return NoAlias;
+
+ // Check for a special case where a non-escaping global can be used to
+ // conclude no-alias.
+ if ((GV1 || GV2) && GV1 != GV2) {
+ const GlobalValue *GV = GV1 ? GV1 : GV2;
+ const Value *UV = GV1 ? UV2 : UV1;
+ if (isNonEscapingGlobalNoAlias(GV, UV))
+ return NoAlias;
+ }
+
+ // Otherwise if they are both derived from the same addr-taken global, we
+ // can't know the two accesses don't overlap.
+ }
+
+ // These pointers may be based on the memory owned by an indirect global. If
+ // so, we may be able to handle this. First check to see if the base pointer
+ // is a direct load from an indirect global.
+ GV1 = GV2 = nullptr;
+ if (const LoadInst *LI = dyn_cast<LoadInst>(UV1))
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
+ if (IndirectGlobals.count(GV))
+ GV1 = GV;
+ if (const LoadInst *LI = dyn_cast<LoadInst>(UV2))
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
+ if (IndirectGlobals.count(GV))
+ GV2 = GV;
+
+ // These pointers may also be from an allocation for the indirect global. If
+ // so, also handle them.
+ if (!GV1)
+ GV1 = AllocsForIndirectGlobals.lookup(UV1);
+ if (!GV2)
+ GV2 = AllocsForIndirectGlobals.lookup(UV2);
+
+ // Now that we know whether the two pointers are related to indirect globals,
+ // use this to disambiguate the pointers. If the pointers are based on
+ // different indirect globals they cannot alias.
+ if (GV1 && GV2 && GV1 != GV2)
+ return NoAlias;
+
+ // If one is based on an indirect global and the other isn't, it isn't
+ // strictly safe but we can fake this result if necessary for performance.
+ // This does not appear to be a common problem in practice.
+ if (EnableUnsafeGlobalsModRefAliasResults)
+ if ((GV1 || GV2) && GV1 != GV2)
+ return NoAlias;
+
+ return AAResultBase::alias(LocA, LocB);
+}
+
+ModRefInfo GlobalsAAResult::getModRefInfoForArgument(ImmutableCallSite CS,
+ const GlobalValue *GV) {
+ if (CS.doesNotAccessMemory())
+ return MRI_NoModRef;
+ ModRefInfo ConservativeResult = CS.onlyReadsMemory() ? MRI_Ref : MRI_ModRef;
+
+ // Iterate through all the arguments to the called function. If any argument
+ // is based on GV, return the conservative result.
+ for (auto &A : CS.args()) {
+ SmallVector<Value*, 4> Objects;
+ GetUnderlyingObjects(A, Objects, DL);
+
+ // All objects must be identified.
+ if (!std::all_of(Objects.begin(), Objects.end(), isIdentifiedObject))
+ return ConservativeResult;
+
+ if (std::find(Objects.begin(), Objects.end(), GV) != Objects.end())
+ return ConservativeResult;
+ }
+
+ // We identified all objects in the argument list, and none of them were GV.
+ return MRI_NoModRef;
+}
+
+ModRefInfo GlobalsAAResult::getModRefInfo(ImmutableCallSite CS,
+ const MemoryLocation &Loc) {
+ unsigned Known = MRI_ModRef;
+
+ // If we are asking for mod/ref info of a direct call with a pointer to a
+ // global we are tracking, return information if we have it.
+ if (const GlobalValue *GV =
+ dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr, DL)))
+ if (GV->hasLocalLinkage())
+ if (const Function *F = CS.getCalledFunction())
+ if (NonAddressTakenGlobals.count(GV))
+ if (const FunctionInfo *FI = getFunctionInfo(F))
+ Known = FI->getModRefInfoForGlobal(*GV) |
+ getModRefInfoForArgument(CS, GV);
+
+ if (Known == MRI_NoModRef)
+ return MRI_NoModRef; // No need to query other mod/ref analyses
+ return ModRefInfo(Known & AAResultBase::getModRefInfo(CS, Loc));
+}
+
+GlobalsAAResult::GlobalsAAResult(const DataLayout &DL,
+ const TargetLibraryInfo &TLI)
+ : AAResultBase(TLI), DL(DL) {}
+
+GlobalsAAResult::GlobalsAAResult(GlobalsAAResult &&Arg)
+ : AAResultBase(std::move(Arg)), DL(Arg.DL),
+ NonAddressTakenGlobals(std::move(Arg.NonAddressTakenGlobals)),
+ IndirectGlobals(std::move(Arg.IndirectGlobals)),
+ AllocsForIndirectGlobals(std::move(Arg.AllocsForIndirectGlobals)),
+ FunctionInfos(std::move(Arg.FunctionInfos)),
+ Handles(std::move(Arg.Handles)) {
+ // Update the parent for each DeletionCallbackHandle.
+ for (auto &H : Handles) {
+ assert(H.GAR == &Arg);
+ H.GAR = this;
+ }
+}
+
+/*static*/ GlobalsAAResult
+GlobalsAAResult::analyzeModule(Module &M, const TargetLibraryInfo &TLI,
+ CallGraph &CG) {
+ GlobalsAAResult Result(M.getDataLayout(), TLI);
+
+ // Discover which functions aren't recursive, to feed into AnalyzeGlobals.
+ Result.CollectSCCMembership(CG);
+
+ // Find non-addr taken globals.
+ Result.AnalyzeGlobals(M);
+
+ // Propagate on CG.
+ Result.AnalyzeCallGraph(CG, M);
+
+ return Result;
+}
+
+GlobalsAAResult GlobalsAA::run(Module &M, AnalysisManager<Module> *AM) {
+ return GlobalsAAResult::analyzeModule(M,
+ AM->getResult<TargetLibraryAnalysis>(M),
+ AM->getResult<CallGraphAnalysis>(M));
+}
+
+char GlobalsAA::PassID;
+
+char GlobalsAAWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(GlobalsAAWrapperPass, "globals-aa",
+ "Globals Alias Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(GlobalsAAWrapperPass, "globals-aa",
+ "Globals Alias Analysis", false, true)
+
+ModulePass *llvm::createGlobalsAAWrapperPass() {
+ return new GlobalsAAWrapperPass();
+}
+
+GlobalsAAWrapperPass::GlobalsAAWrapperPass() : ModulePass(ID) {
+ initializeGlobalsAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+bool GlobalsAAWrapperPass::runOnModule(Module &M) {
+ Result.reset(new GlobalsAAResult(GlobalsAAResult::analyzeModule(
+ M, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+ getAnalysis<CallGraphWrapperPass>().getCallGraph())));
+ return false;
+}
+
+bool GlobalsAAWrapperPass::doFinalization(Module &M) {
+ Result.reset();
+ return false;
+}
+
+void GlobalsAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<CallGraphWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+}
diff --git a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp
deleted file mode 100644
index 28fb49c..0000000
--- a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp
+++ /dev/null
@@ -1,609 +0,0 @@
-//===- GlobalsModRef.cpp - Simple Mod/Ref Analysis for Globals ------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This simple pass provides alias and mod/ref information for global values
-// that do not have their address taken, and keeps track of whether functions
-// read or write memory (are "pure"). For this simple (but very common) case,
-// we can provide pretty accurate and useful information.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/Passes.h"
-#include "llvm/ADT/SCCIterator.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include <set>
-using namespace llvm;
-
-#define DEBUG_TYPE "globalsmodref-aa"
-
-STATISTIC(NumNonAddrTakenGlobalVars,
- "Number of global vars without address taken");
-STATISTIC(NumNonAddrTakenFunctions,"Number of functions without address taken");
-STATISTIC(NumNoMemFunctions, "Number of functions that do not access memory");
-STATISTIC(NumReadMemFunctions, "Number of functions that only read memory");
-STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects");
-
-namespace {
-/// FunctionRecord - One instance of this structure is stored for every
-/// function in the program. Later, the entries for these functions are
-/// removed if the function is found to call an external function (in which
-/// case we know nothing about it.
-struct FunctionRecord {
- /// GlobalInfo - Maintain mod/ref info for all of the globals without
- /// addresses taken that are read or written (transitively) by this
- /// function.
- std::map<const GlobalValue *, unsigned> GlobalInfo;
-
- /// MayReadAnyGlobal - May read global variables, but it is not known which.
- bool MayReadAnyGlobal;
-
- unsigned getInfoForGlobal(const GlobalValue *GV) const {
- unsigned Effect = MayReadAnyGlobal ? AliasAnalysis::Ref : 0;
- std::map<const GlobalValue *, unsigned>::const_iterator I =
- GlobalInfo.find(GV);
- if (I != GlobalInfo.end())
- Effect |= I->second;
- return Effect;
- }
-
- /// FunctionEffect - Capture whether or not this function reads or writes to
- /// ANY memory. If not, we can do a lot of aggressive analysis on it.
- unsigned FunctionEffect;
-
- FunctionRecord() : MayReadAnyGlobal(false), FunctionEffect(0) {}
-};
-
-/// GlobalsModRef - The actual analysis pass.
-class GlobalsModRef : public ModulePass, public AliasAnalysis {
- /// NonAddressTakenGlobals - The globals that do not have their addresses
- /// taken.
- std::set<const GlobalValue *> NonAddressTakenGlobals;
-
- /// IndirectGlobals - The memory pointed to by this global is known to be
- /// 'owned' by the global.
- std::set<const GlobalValue *> IndirectGlobals;
-
- /// AllocsForIndirectGlobals - If an instruction allocates memory for an
- /// indirect global, this map indicates which one.
- std::map<const Value *, const GlobalValue *> AllocsForIndirectGlobals;
-
- /// FunctionInfo - For each function, keep track of what globals are
- /// modified or read.
- std::map<const Function *, FunctionRecord> FunctionInfo;
-
-public:
- static char ID;
- GlobalsModRef() : ModulePass(ID) {
- initializeGlobalsModRefPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override {
- InitializeAliasAnalysis(this, &M.getDataLayout());
-
- // Find non-addr taken globals.
- AnalyzeGlobals(M);
-
- // Propagate on CG.
- AnalyzeCallGraph(getAnalysis<CallGraphWrapperPass>().getCallGraph(), M);
- return false;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AliasAnalysis::getAnalysisUsage(AU);
- AU.addRequired<CallGraphWrapperPass>();
- AU.setPreservesAll(); // Does not transform code
- }
-
- //------------------------------------------------
- // Implement the AliasAnalysis API
- //
- AliasResult alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) override;
- ModRefResult getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) override;
- ModRefResult getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) override {
- return AliasAnalysis::getModRefInfo(CS1, CS2);
- }
-
- /// getModRefBehavior - Return the behavior of the specified function if
- /// called from the specified call site. The call site may be null in which
- /// case the most generic behavior of this function should be returned.
- ModRefBehavior getModRefBehavior(const Function *F) override {
- ModRefBehavior Min = UnknownModRefBehavior;
-
- if (FunctionRecord *FR = getFunctionInfo(F)) {
- if (FR->FunctionEffect == 0)
- Min = DoesNotAccessMemory;
- else if ((FR->FunctionEffect & Mod) == 0)
- Min = OnlyReadsMemory;
- }
-
- return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min);
- }
-
- /// getModRefBehavior - Return the behavior of the specified function if
- /// called from the specified call site. The call site may be null in which
- /// case the most generic behavior of this function should be returned.
- ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override {
- ModRefBehavior Min = UnknownModRefBehavior;
-
- if (const Function *F = CS.getCalledFunction())
- if (FunctionRecord *FR = getFunctionInfo(F)) {
- if (FR->FunctionEffect == 0)
- Min = DoesNotAccessMemory;
- else if ((FR->FunctionEffect & Mod) == 0)
- Min = OnlyReadsMemory;
- }
-
- return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
- }
-
- void deleteValue(Value *V) override;
- void addEscapingUse(Use &U) override;
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- void *getAdjustedAnalysisPointer(AnalysisID PI) override {
- if (PI == &AliasAnalysis::ID)
- return (AliasAnalysis *)this;
- return this;
- }
-
-private:
- /// getFunctionInfo - Return the function info for the function, or null if
- /// we don't have anything useful to say about it.
- FunctionRecord *getFunctionInfo(const Function *F) {
- std::map<const Function *, FunctionRecord>::iterator I =
- FunctionInfo.find(F);
- if (I != FunctionInfo.end())
- return &I->second;
- return nullptr;
- }
-
- void AnalyzeGlobals(Module &M);
- void AnalyzeCallGraph(CallGraph &CG, Module &M);
- bool AnalyzeUsesOfPointer(Value *V, std::vector<Function *> &Readers,
- std::vector<Function *> &Writers,
- GlobalValue *OkayStoreDest = nullptr);
- bool AnalyzeIndirectGlobalMemory(GlobalValue *GV);
-};
-}
-
-char GlobalsModRef::ID = 0;
-INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis, "globalsmodref-aa",
- "Simple mod/ref analysis for globals", false, true,
- false)
-INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis, "globalsmodref-aa",
- "Simple mod/ref analysis for globals", false, true,
- false)
-
-Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); }
-
-/// AnalyzeGlobals - Scan through the users of all of the internal
-/// GlobalValue's in the program. If none of them have their "address taken"
-/// (really, their address passed to something nontrivial), record this fact,
-/// and record the functions that they are used directly in.
-void GlobalsModRef::AnalyzeGlobals(Module &M) {
- std::vector<Function *> Readers, Writers;
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
- if (I->hasLocalLinkage()) {
- if (!AnalyzeUsesOfPointer(I, Readers, Writers)) {
- // Remember that we are tracking this global.
- NonAddressTakenGlobals.insert(I);
- ++NumNonAddrTakenFunctions;
- }
- Readers.clear();
- Writers.clear();
- }
-
- for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E;
- ++I)
- if (I->hasLocalLinkage()) {
- if (!AnalyzeUsesOfPointer(I, Readers, Writers)) {
- // Remember that we are tracking this global, and the mod/ref fns
- NonAddressTakenGlobals.insert(I);
-
- for (unsigned i = 0, e = Readers.size(); i != e; ++i)
- FunctionInfo[Readers[i]].GlobalInfo[I] |= Ref;
-
- if (!I->isConstant()) // No need to keep track of writers to constants
- for (unsigned i = 0, e = Writers.size(); i != e; ++i)
- FunctionInfo[Writers[i]].GlobalInfo[I] |= Mod;
- ++NumNonAddrTakenGlobalVars;
-
- // If this global holds a pointer type, see if it is an indirect global.
- if (I->getType()->getElementType()->isPointerTy() &&
- AnalyzeIndirectGlobalMemory(I))
- ++NumIndirectGlobalVars;
- }
- Readers.clear();
- Writers.clear();
- }
-}
-
-/// AnalyzeUsesOfPointer - Look at all of the users of the specified pointer.
-/// If this is used by anything complex (i.e., the address escapes), return
-/// true. Also, while we are at it, keep track of those functions that read and
-/// write to the value.
-///
-/// If OkayStoreDest is non-null, stores into this global are allowed.
-bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
- std::vector<Function *> &Readers,
- std::vector<Function *> &Writers,
- GlobalValue *OkayStoreDest) {
- if (!V->getType()->isPointerTy())
- return true;
-
- for (Use &U : V->uses()) {
- User *I = U.getUser();
- if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- Readers.push_back(LI->getParent()->getParent());
- } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (V == SI->getOperand(1)) {
- Writers.push_back(SI->getParent()->getParent());
- } else if (SI->getOperand(1) != OkayStoreDest) {
- return true; // Storing the pointer
- }
- } else if (Operator::getOpcode(I) == Instruction::GetElementPtr) {
- if (AnalyzeUsesOfPointer(I, Readers, Writers))
- return true;
- } else if (Operator::getOpcode(I) == Instruction::BitCast) {
- if (AnalyzeUsesOfPointer(I, Readers, Writers, OkayStoreDest))
- return true;
- } else if (auto CS = CallSite(I)) {
- // Make sure that this is just the function being called, not that it is
- // passing into the function.
- if (!CS.isCallee(&U)) {
- // Detect calls to free.
- if (isFreeCall(I, TLI))
- Writers.push_back(CS->getParent()->getParent());
- else
- return true; // Argument of an unknown call.
- }
- } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
- if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
- return true; // Allow comparison against null.
- } else {
- return true;
- }
- }
-
- return false;
-}
-
-/// AnalyzeIndirectGlobalMemory - We found an non-address-taken global variable
-/// which holds a pointer type. See if the global always points to non-aliased
-/// heap memory: that is, all initializers of the globals are allocations, and
-/// those allocations have no use other than initialization of the global.
-/// Further, all loads out of GV must directly use the memory, not store the
-/// pointer somewhere. If this is true, we consider the memory pointed to by
-/// GV to be owned by GV and can disambiguate other pointers from it.
-bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
- // Keep track of values related to the allocation of the memory, f.e. the
- // value produced by the malloc call and any casts.
- std::vector<Value *> AllocRelatedValues;
-
- // Walk the user list of the global. If we find anything other than a direct
- // load or store, bail out.
- for (User *U : GV->users()) {
- if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
- // The pointer loaded from the global can only be used in simple ways:
- // we allow addressing of it and loading storing to it. We do *not* allow
- // storing the loaded pointer somewhere else or passing to a function.
- std::vector<Function *> ReadersWriters;
- if (AnalyzeUsesOfPointer(LI, ReadersWriters, ReadersWriters))
- return false; // Loaded pointer escapes.
- // TODO: Could try some IP mod/ref of the loaded pointer.
- } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
- // Storing the global itself.
- if (SI->getOperand(0) == GV)
- return false;
-
- // If storing the null pointer, ignore it.
- if (isa<ConstantPointerNull>(SI->getOperand(0)))
- continue;
-
- // Check the value being stored.
- Value *Ptr = GetUnderlyingObject(SI->getOperand(0),
- GV->getParent()->getDataLayout());
-
- if (!isAllocLikeFn(Ptr, TLI))
- return false; // Too hard to analyze.
-
- // Analyze all uses of the allocation. If any of them are used in a
- // non-simple way (e.g. stored to another global) bail out.
- std::vector<Function *> ReadersWriters;
- if (AnalyzeUsesOfPointer(Ptr, ReadersWriters, ReadersWriters, GV))
- return false; // Loaded pointer escapes.
-
- // Remember that this allocation is related to the indirect global.
- AllocRelatedValues.push_back(Ptr);
- } else {
- // Something complex, bail out.
- return false;
- }
- }
-
- // Okay, this is an indirect global. Remember all of the allocations for
- // this global in AllocsForIndirectGlobals.
- while (!AllocRelatedValues.empty()) {
- AllocsForIndirectGlobals[AllocRelatedValues.back()] = GV;
- AllocRelatedValues.pop_back();
- }
- IndirectGlobals.insert(GV);
- return true;
-}
-
-/// AnalyzeCallGraph - At this point, we know the functions where globals are
-/// immediately stored to and read from. Propagate this information up the call
-/// graph to all callers and compute the mod/ref info for all memory for each
-/// function.
-void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
- // We do a bottom-up SCC traversal of the call graph. In other words, we
- // visit all callees before callers (leaf-first).
- for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
- const std::vector<CallGraphNode *> &SCC = *I;
- assert(!SCC.empty() && "SCC with no functions?");
-
- if (!SCC[0]->getFunction()) {
- // Calls externally - can't say anything useful. Remove any existing
- // function records (may have been created when scanning globals).
- for (unsigned i = 0, e = SCC.size(); i != e; ++i)
- FunctionInfo.erase(SCC[i]->getFunction());
- continue;
- }
-
- FunctionRecord &FR = FunctionInfo[SCC[0]->getFunction()];
-
- bool KnowNothing = false;
- unsigned FunctionEffect = 0;
-
- // Collect the mod/ref properties due to called functions. We only compute
- // one mod-ref set.
- for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) {
- Function *F = SCC[i]->getFunction();
- if (!F) {
- KnowNothing = true;
- break;
- }
-
- if (F->isDeclaration()) {
- // Try to get mod/ref behaviour from function attributes.
- if (F->doesNotAccessMemory()) {
- // Can't do better than that!
- } else if (F->onlyReadsMemory()) {
- FunctionEffect |= Ref;
- if (!F->isIntrinsic())
- // This function might call back into the module and read a global -
- // consider every global as possibly being read by this function.
- FR.MayReadAnyGlobal = true;
- } else {
- FunctionEffect |= ModRef;
- // Can't say anything useful unless it's an intrinsic - they don't
- // read or write global variables of the kind considered here.
- KnowNothing = !F->isIntrinsic();
- }
- continue;
- }
-
- for (CallGraphNode::iterator CI = SCC[i]->begin(), E = SCC[i]->end();
- CI != E && !KnowNothing; ++CI)
- if (Function *Callee = CI->second->getFunction()) {
- if (FunctionRecord *CalleeFR = getFunctionInfo(Callee)) {
- // Propagate function effect up.
- FunctionEffect |= CalleeFR->FunctionEffect;
-
- // Incorporate callee's effects on globals into our info.
- for (const auto &G : CalleeFR->GlobalInfo)
- FR.GlobalInfo[G.first] |= G.second;
- FR.MayReadAnyGlobal |= CalleeFR->MayReadAnyGlobal;
- } else {
- // Can't say anything about it. However, if it is inside our SCC,
- // then nothing needs to be done.
- CallGraphNode *CalleeNode = CG[Callee];
- if (std::find(SCC.begin(), SCC.end(), CalleeNode) == SCC.end())
- KnowNothing = true;
- }
- } else {
- KnowNothing = true;
- }
- }
-
- // If we can't say anything useful about this SCC, remove all SCC functions
- // from the FunctionInfo map.
- if (KnowNothing) {
- for (unsigned i = 0, e = SCC.size(); i != e; ++i)
- FunctionInfo.erase(SCC[i]->getFunction());
- continue;
- }
-
- // Scan the function bodies for explicit loads or stores.
- for (auto *Node : SCC) {
- if (FunctionEffect == ModRef)
- break; // The mod/ref lattice saturates here.
- for (Instruction &I : inst_range(Node->getFunction())) {
- if (FunctionEffect == ModRef)
- break; // The mod/ref lattice saturates here.
-
- // We handle calls specially because the graph-relevant aspects are
- // handled above.
- if (auto CS = CallSite(&I)) {
- if (isAllocationFn(&I, TLI) || isFreeCall(&I, TLI)) {
- // FIXME: It is completely unclear why this is necessary and not
- // handled by the above graph code.
- FunctionEffect |= ModRef;
- } else if (Function *Callee = CS.getCalledFunction()) {
- // The callgraph doesn't include intrinsic calls.
- if (Callee->isIntrinsic()) {
- ModRefBehavior Behaviour =
- AliasAnalysis::getModRefBehavior(Callee);
- FunctionEffect |= (Behaviour & ModRef);
- }
- }
- continue;
- }
-
- // All non-call instructions we use the primary predicates for whether
- // thay read or write memory.
- if (I.mayReadFromMemory())
- FunctionEffect |= Ref;
- if (I.mayWriteToMemory())
- FunctionEffect |= Mod;
- }
- }
-
- if ((FunctionEffect & Mod) == 0)
- ++NumReadMemFunctions;
- if (FunctionEffect == 0)
- ++NumNoMemFunctions;
- FR.FunctionEffect = FunctionEffect;
-
- // Finally, now that we know the full effect on this SCC, clone the
- // information to each function in the SCC.
- for (unsigned i = 1, e = SCC.size(); i != e; ++i)
- FunctionInfo[SCC[i]->getFunction()] = FR;
- }
-}
-
-/// alias - If one of the pointers is to a global that we are tracking, and the
-/// other is some random pointer, we know there cannot be an alias, because the
-/// address of the global isn't taken.
-AliasResult GlobalsModRef::alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
- // Get the base object these pointers point to.
- const Value *UV1 = GetUnderlyingObject(LocA.Ptr, *DL);
- const Value *UV2 = GetUnderlyingObject(LocB.Ptr, *DL);
-
- // If either of the underlying values is a global, they may be non-addr-taken
- // globals, which we can answer queries about.
- const GlobalValue *GV1 = dyn_cast<GlobalValue>(UV1);
- const GlobalValue *GV2 = dyn_cast<GlobalValue>(UV2);
- if (GV1 || GV2) {
- // If the global's address is taken, pretend we don't know it's a pointer to
- // the global.
- if (GV1 && !NonAddressTakenGlobals.count(GV1))
- GV1 = nullptr;
- if (GV2 && !NonAddressTakenGlobals.count(GV2))
- GV2 = nullptr;
-
- // If the two pointers are derived from two different non-addr-taken
- // globals, or if one is and the other isn't, we know these can't alias.
- if ((GV1 || GV2) && GV1 != GV2)
- return NoAlias;
-
- // Otherwise if they are both derived from the same addr-taken global, we
- // can't know the two accesses don't overlap.
- }
-
- // These pointers may be based on the memory owned by an indirect global. If
- // so, we may be able to handle this. First check to see if the base pointer
- // is a direct load from an indirect global.
- GV1 = GV2 = nullptr;
- if (const LoadInst *LI = dyn_cast<LoadInst>(UV1))
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
- if (IndirectGlobals.count(GV))
- GV1 = GV;
- if (const LoadInst *LI = dyn_cast<LoadInst>(UV2))
- if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
- if (IndirectGlobals.count(GV))
- GV2 = GV;
-
- // These pointers may also be from an allocation for the indirect global. If
- // so, also handle them.
- if (AllocsForIndirectGlobals.count(UV1))
- GV1 = AllocsForIndirectGlobals[UV1];
- if (AllocsForIndirectGlobals.count(UV2))
- GV2 = AllocsForIndirectGlobals[UV2];
-
- // Now that we know whether the two pointers are related to indirect globals,
- // use this to disambiguate the pointers. If either pointer is based on an
- // indirect global and if they are not both based on the same indirect global,
- // they cannot alias.
- if ((GV1 || GV2) && GV1 != GV2)
- return NoAlias;
-
- return AliasAnalysis::alias(LocA, LocB);
-}
-
-AliasAnalysis::ModRefResult
-GlobalsModRef::getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) {
- unsigned Known = ModRef;
-
- // If we are asking for mod/ref info of a direct call with a pointer to a
- // global we are tracking, return information if we have it.
- const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout();
- if (const GlobalValue *GV =
- dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr, DL)))
- if (GV->hasLocalLinkage())
- if (const Function *F = CS.getCalledFunction())
- if (NonAddressTakenGlobals.count(GV))
- if (const FunctionRecord *FR = getFunctionInfo(F))
- Known = FR->getInfoForGlobal(GV);
-
- if (Known == NoModRef)
- return NoModRef; // No need to query other mod/ref analyses
- return ModRefResult(Known & AliasAnalysis::getModRefInfo(CS, Loc));
-}
-
-//===----------------------------------------------------------------------===//
-// Methods to update the analysis as a result of the client transformation.
-//
-void GlobalsModRef::deleteValue(Value *V) {
- if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
- if (NonAddressTakenGlobals.erase(GV)) {
- // This global might be an indirect global. If so, remove it and remove
- // any AllocRelatedValues for it.
- if (IndirectGlobals.erase(GV)) {
- // Remove any entries in AllocsForIndirectGlobals for this global.
- for (std::map<const Value *, const GlobalValue *>::iterator
- I = AllocsForIndirectGlobals.begin(),
- E = AllocsForIndirectGlobals.end();
- I != E;) {
- if (I->second == GV) {
- AllocsForIndirectGlobals.erase(I++);
- } else {
- ++I;
- }
- }
- }
- }
- }
-
- // Otherwise, if this is an allocation related to an indirect global, remove
- // it.
- AllocsForIndirectGlobals.erase(V);
-
- AliasAnalysis::deleteValue(V);
-}
-
-void GlobalsModRef::addEscapingUse(Use &U) {
- // For the purposes of this analysis, it is conservatively correct to treat
- // a newly escaping value equivalently to a deleted one. We could perhaps
- // be more precise by processing the new use and attempting to update our
- // saved analysis results to accommodate it.
- deleteValue(U);
-
- AliasAnalysis::addEscapingUse(U);
-}
diff --git a/contrib/llvm/lib/Analysis/IPA/IPA.cpp b/contrib/llvm/lib/Analysis/IPA/IPA.cpp
deleted file mode 100644
index 806bfb8..0000000
--- a/contrib/llvm/lib/Analysis/IPA/IPA.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-//===-- IPA.cpp -----------------------------------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the common initialization routines for the IPA library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/InitializePasses.h"
-#include "llvm-c/Initialization.h"
-#include "llvm/PassRegistry.h"
-
-using namespace llvm;
-
-/// initializeIPA - Initialize all passes linked into the IPA library.
-void llvm::initializeIPA(PassRegistry &Registry) {
- initializeCallGraphWrapperPassPass(Registry);
- initializeCallGraphPrinterPass(Registry);
- initializeCallGraphViewerPass(Registry);
- initializeGlobalsModRefPass(Registry);
-}
-
-void LLVMInitializeIPA(LLVMPassRegistryRef R) {
- initializeIPA(*unwrap(R));
-}
diff --git a/contrib/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm/lib/Analysis/IVUsers.cpp
index 926787d..e0c5d8f 100644
--- a/contrib/llvm/lib/Analysis/IVUsers.cpp
+++ b/contrib/llvm/lib/Analysis/IVUsers.cpp
@@ -39,7 +39,7 @@ INITIALIZE_PASS_BEGIN(IVUsers, "iv-users",
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(IVUsers, "iv-users",
"Induction Variable Users", false, true)
@@ -255,7 +255,7 @@ void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.setPreservesAll();
}
@@ -266,7 +266,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
*L->getHeader()->getParent());
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
// Collect ephemeral values so that AddUsersIfInteresting skips them.
EphValues.clear();
@@ -276,7 +276,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
// them by stride. Start by finding all of the PHI nodes in the header for
// this loop. If they are induction variables, inspect their uses.
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
- (void)AddUsersIfInteresting(I);
+ (void)AddUsersIfInteresting(&*I);
return false;
}
diff --git a/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp b/contrib/llvm/lib/Analysis/InlineCost.cpp
index c0d2e37..a86a703 100644
--- a/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp
+++ b/contrib/llvm/lib/Analysis/InlineCost.cpp
@@ -115,11 +115,11 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// inlining has the given attribute set either at the call site or the
/// function declaration. Primarily used to inspect call site specific
/// attributes since these can be more precise than the ones on the callee
- /// itself.
+ /// itself.
bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
/// Return true if the given value is known non null within the callee if
- /// inlined through this particular callsite.
+ /// inlined through this particular callsite.
bool isKnownNonNullInCallee(Value *V);
// Custom analysis routines.
@@ -156,6 +156,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool visitSwitchInst(SwitchInst &SI);
bool visitIndirectBrInst(IndirectBrInst &IBI);
bool visitResumeInst(ResumeInst &RI);
+ bool visitCleanupReturnInst(CleanupReturnInst &RI);
+ bool visitCatchReturnInst(CatchReturnInst &RI);
bool visitUnreachableInst(UnreachableInst &I);
public:
@@ -832,8 +834,8 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS);
if (CA.analyzeCall(CS)) {
// We were able to inline the indirect call! Subtract the cost from the
- // bonus we want to apply, but don't go below zero.
- Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost());
+ // threshold to get the bonus we want to apply, but don't go below zero.
+ Cost -= std::max(0, CA.getThreshold() - CA.getCost());
}
return Base::visitCallSite(CS);
@@ -903,6 +905,18 @@ bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
return false;
}
+bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
+ // FIXME: It's not clear that a single instruction is an accurate model for
+ // the inline cost of a cleanupret instruction.
+ return false;
+}
+
+bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
+ // FIXME: It's not clear that a single instruction is an accurate model for
+ // the inline cost of a catchret instruction.
+ return false;
+}
+
bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
// FIXME: It might be reasonably to discount the cost of instructions leading
// to unreachable as they have the lowest possible impact on both runtime and
@@ -946,20 +960,21 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
continue;
// Skip ephemeral values.
- if (EphValues.count(I))
+ if (EphValues.count(&*I))
continue;
++NumInstructions;
if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
++NumVectorInstructions;
- // If the instruction is floating point, and the target says this operation is
- // expensive or the function has the "use-soft-float" attribute, this may
- // eventually become a library call. Treat the cost as such.
+ // If the instruction is floating point, and the target says this operation
+ // is expensive or the function has the "use-soft-float" attribute, this may
+ // eventually become a library call. Treat the cost as such.
if (I->getType()->isFloatingPointTy()) {
bool hasSoftFloatAttr = false;
- // If the function has the "use-soft-float" attribute, mark it as expensive.
+ // If the function has the "use-soft-float" attribute, mark it as
+ // expensive.
if (F.hasFnAttribute("use-soft-float")) {
Attribute Attr = F.getFnAttribute("use-soft-float");
StringRef Val = Attr.getValueAsString();
@@ -977,7 +992,7 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
// all of the per-instruction logic. The visit tree returns true if we
// consumed the instruction in any way, and false if the instruction's base
// cost should count against inlining.
- if (Base::visit(I))
+ if (Base::visit(&*I))
++NumInstructionsSimplified;
else
Cost += InlineConstants::InstrCost;
@@ -1157,15 +1172,15 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
FAI != FAE; ++FAI, ++CAI) {
assert(CAI != CS.arg_end());
if (Constant *C = dyn_cast<Constant>(CAI))
- SimplifiedValues[FAI] = C;
+ SimplifiedValues[&*FAI] = C;
Value *PtrArg = *CAI;
if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
- ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue());
+ ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue());
// We can SROA any pointer arguments derived from alloca instructions.
if (isa<AllocaInst>(PtrArg)) {
- SROAArgValues[FAI] = PtrArg;
+ SROAArgValues[&*FAI] = PtrArg;
SROAArgCosts[PtrArg] = 0;
}
}
@@ -1281,7 +1296,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
else if (NumVectorInstructions <= NumInstructions / 2)
Threshold -= (FiftyPercentVectorBonus - TenPercentVectorBonus);
- return Cost < Threshold;
+ return Cost <= std::max(0, Threshold);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1304,36 +1319,6 @@ void CallAnalyzer::dump() {
}
#endif
-INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
- true, true)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
- true, true)
-
-char InlineCostAnalysis::ID = 0;
-
-InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID) {}
-
-InlineCostAnalysis::~InlineCostAnalysis() {}
-
-void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- CallGraphSCCPass::getAnalysisUsage(AU);
-}
-
-bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) {
- TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
- ACT = &getAnalysis<AssumptionCacheTracker>();
- return false;
-}
-
-InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) {
- return getInlineCost(CS, CS.getCalledFunction(), Threshold);
-}
-
/// \brief Test that two functions either have or have not the given attribute
/// at the same time.
template<typename AttrKind>
@@ -1346,14 +1331,19 @@ static bool attributeMatches(Function *F1, Function *F2, AttrKind Attr) {
static bool functionsHaveCompatibleAttributes(Function *Caller,
Function *Callee,
TargetTransformInfo &TTI) {
- return TTI.hasCompatibleFunctionAttributes(Caller, Callee) &&
- attributeMatches(Caller, Callee, Attribute::SanitizeAddress) &&
- attributeMatches(Caller, Callee, Attribute::SanitizeMemory) &&
- attributeMatches(Caller, Callee, Attribute::SanitizeThread);
+ return TTI.areInlineCompatible(Caller, Callee) &&
+ AttributeFuncs::areInlineCompatible(*Caller, *Callee);
+}
+
+InlineCost llvm::getInlineCost(CallSite CS, int Threshold,
+ TargetTransformInfo &CalleeTTI,
+ AssumptionCacheTracker *ACT) {
+ return getInlineCost(CS, CS.getCalledFunction(), Threshold, CalleeTTI, ACT);
}
-InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
- int Threshold) {
+InlineCost llvm::getInlineCost(CallSite CS, Function *Callee, int Threshold,
+ TargetTransformInfo &CalleeTTI,
+ AssumptionCacheTracker *ACT) {
// Cannot inline indirect calls.
if (!Callee)
return llvm::InlineCost::getNever();
@@ -1368,8 +1358,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
// Never inline functions with conflicting attributes (unless callee has
// always-inline attribute).
- if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee,
- TTIWP->getTTI(*Callee)))
+ if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee, CalleeTTI))
return llvm::InlineCost::getNever();
// Don't inline this call if the caller has the optnone attribute.
@@ -1386,7 +1375,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
<< "...\n");
- CallAnalyzer CA(TTIWP->getTTI(*Callee), ACT, *Callee, Threshold, CS);
+ CallAnalyzer CA(CalleeTTI, ACT, *Callee, Threshold, CS);
bool ShouldInline = CA.analyzeCall(CS);
DEBUG(CA.dump());
@@ -1400,7 +1389,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
}
-bool InlineCostAnalysis::isInlineViable(Function &F) {
+bool llvm::isInlineViable(Function &F) {
bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
// Disallow inlining of functions which contain indirect branches or
@@ -1408,9 +1397,8 @@ bool InlineCostAnalysis::isInlineViable(Function &F) {
if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken())
return false;
- for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
- ++II) {
- CallSite CS(II);
+ for (auto &II : *BI) {
+ CallSite CS(&II);
if (!CS)
continue;
diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
index a7f8f5c..b89ff26 100644
--- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -122,7 +122,7 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
return DT->dominates(I, P);
}
- // Otherwise, if the instruction is in the entry block, and is not an invoke,
+ // Otherwise, if the instruction is in the entry block and is not an invoke,
// then it obviously dominates all phi nodes.
if (I->getParent() == &I->getParent()->getParent()->getEntryBlock() &&
!isa<InvokeInst>(I))
@@ -2090,8 +2090,7 @@ static Constant *computePointerICmp(const DataLayout &DL,
// Is the set of underlying objects all noalias calls?
auto IsNAC = [](SmallVectorImpl<Value *> &Objects) {
- return std::all_of(Objects.begin(), Objects.end(),
- [](Value *V){ return isNoAliasCall(V); });
+ return std::all_of(Objects.begin(), Objects.end(), isNoAliasCall);
};
// Is the set of underlying objects all things which must be disjoint from
@@ -2176,6 +2175,19 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// X >=u 1 -> X
if (match(RHS, m_One()))
return LHS;
+ if (isImpliedCondition(RHS, LHS, Q.DL))
+ return getTrue(ITy);
+ break;
+ case ICmpInst::ICMP_SGE:
+ /// For signed comparison, the values for an i1 are 0 and -1
+ /// respectively. This maps into a truth table of:
+ /// LHS | RHS | LHS >=s RHS | LHS implies RHS
+ /// 0 | 0 | 1 (0 >= 0) | 1
+ /// 0 | 1 | 1 (0 >= -1) | 1
+ /// 1 | 0 | 0 (-1 >= 0) | 0
+ /// 1 | 1 | 1 (-1 >= -1) | 1
+ if (isImpliedCondition(LHS, RHS, Q.DL))
+ return getTrue(ITy);
break;
case ICmpInst::ICMP_SLT:
// X <s 0 -> X
@@ -2187,6 +2199,10 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (match(RHS, m_One()))
return LHS;
break;
+ case ICmpInst::ICMP_ULE:
+ if (isImpliedCondition(LHS, RHS, Q.DL))
+ return getTrue(ITy);
+ break;
}
}
@@ -2360,9 +2376,19 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
} else if (match(LHS, m_And(m_Value(), m_ConstantInt(CI2)))) {
// 'and x, CI2' produces [0, CI2].
Upper = CI2->getValue() + 1;
+ } else if (match(LHS, m_NUWAdd(m_Value(), m_ConstantInt(CI2)))) {
+ // 'add nuw x, CI2' produces [CI2, UINT_MAX].
+ Lower = CI2->getValue();
}
- if (Lower != Upper) {
- ConstantRange LHS_CR = ConstantRange(Lower, Upper);
+
+ ConstantRange LHS_CR = Lower != Upper ? ConstantRange(Lower, Upper)
+ : ConstantRange(Width, true);
+
+ if (auto *I = dyn_cast<Instruction>(LHS))
+ if (auto *Ranges = I->getMetadata(LLVMContext::MD_range))
+ LHS_CR = LHS_CR.intersectWith(getConstantRangeFromMetadata(*Ranges));
+
+ if (!LHS_CR.isFullSet()) {
if (RHS_CR.contains(LHS_CR))
return ConstantInt::getTrue(RHS->getContext());
if (RHS_CR.inverse().contains(LHS_CR))
@@ -2370,6 +2396,30 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
}
+ // If both operands have range metadata, use the metadata
+ // to simplify the comparison.
+ if (isa<Instruction>(RHS) && isa<Instruction>(LHS)) {
+ auto RHS_Instr = dyn_cast<Instruction>(RHS);
+ auto LHS_Instr = dyn_cast<Instruction>(LHS);
+
+ if (RHS_Instr->getMetadata(LLVMContext::MD_range) &&
+ LHS_Instr->getMetadata(LLVMContext::MD_range)) {
+ auto RHS_CR = getConstantRangeFromMetadata(
+ *RHS_Instr->getMetadata(LLVMContext::MD_range));
+ auto LHS_CR = getConstantRangeFromMetadata(
+ *LHS_Instr->getMetadata(LLVMContext::MD_range));
+
+ auto Satisfied_CR = ConstantRange::makeSatisfyingICmpRegion(Pred, RHS_CR);
+ if (Satisfied_CR.contains(LHS_CR))
+ return ConstantInt::getTrue(RHS->getContext());
+
+ auto InversedSatisfied_CR = ConstantRange::makeSatisfyingICmpRegion(
+ CmpInst::getInversePredicate(Pred), RHS_CR);
+ if (InversedSatisfied_CR.contains(LHS_CR))
+ return ConstantInt::getFalse(RHS->getContext());
+ }
+ }
+
// Compare of cast, for example (zext X) != 0 -> X != 0
if (isa<CastInst>(LHS) && (isa<Constant>(RHS) || isa<CastInst>(RHS))) {
Instruction *LI = cast<CastInst>(LHS);
@@ -2529,6 +2579,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
}
+ // icmp eq|ne X, Y -> false|true if X != Y
+ if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
+ isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT)) {
+ LLVMContext &Ctx = LHS->getType()->getContext();
+ return Pred == ICmpInst::ICMP_NE ?
+ ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx);
+ }
+
// Special logic for binary operators.
BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS);
BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS);
@@ -3039,7 +3097,7 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
- Instruction *CxtI) {
+ const Instruction *CxtI) {
return ::SimplifyICmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
@@ -4024,6 +4082,17 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
break;
}
+ // In general, it is possible for computeKnownBits to determine all bits in a
+ // value even when the operands are not all constants.
+ if (!Result && I->getType()->isIntegerTy()) {
+ unsigned BitWidth = I->getType()->getScalarSizeInBits();
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ computeKnownBits(I, KnownZero, KnownOne, DL, /*Depth*/0, AC, I, DT);
+ if ((KnownZero | KnownOne).isAllOnesValue())
+ Result = ConstantInt::get(I->getContext(), KnownOne);
+ }
+
/// If called on unreachable code, the above logic may report that the
/// instruction simplified to itself. Make life easier for users by
/// detecting that case here, returning a safe value instead.
diff --git a/contrib/llvm/lib/Analysis/LazyCallGraph.cpp b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp
index c8d0410..0f0f31e 100644
--- a/contrib/llvm/lib/Analysis/LazyCallGraph.cpp
+++ b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp
@@ -198,7 +198,8 @@ void LazyCallGraph::SCC::insertOutgoingEdge(Node &CallerN, Node &CalleeN) {
assert(CalleeC.isDescendantOf(*this) &&
"Callee must be a descendant of the Caller.");
- // The only change required is to add this SCC to the parent set of the callee.
+ // The only change required is to add this SCC to the parent set of the
+ // callee.
CalleeC.ParentSCCs.insert(this);
}
@@ -454,8 +455,7 @@ void LazyCallGraph::SCC::internalDFS(
}
SmallVector<LazyCallGraph::SCC *, 1>
-LazyCallGraph::SCC::removeIntraSCCEdge(Node &CallerN,
- Node &CalleeN) {
+LazyCallGraph::SCC::removeIntraSCCEdge(Node &CallerN, Node &CalleeN) {
// First remove it from the node.
CallerN.removeEdgeInternal(CalleeN.getFunction());
@@ -522,7 +522,7 @@ LazyCallGraph::SCC::removeIntraSCCEdge(Node &CallerN,
// the leaf SCC list.
if (!IsLeafSCC && !ResultSCCs.empty())
G->LeafSCCs.erase(std::remove(G->LeafSCCs.begin(), G->LeafSCCs.end(), this),
- G->LeafSCCs.end());
+ G->LeafSCCs.end());
// Return the new list of SCCs.
return ResultSCCs;
diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
index a6ae7f2..0d1d34e 100644
--- a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Debug.h"
@@ -64,10 +65,10 @@ class LVILatticeVal {
enum LatticeValueTy {
/// This Value has no known value yet.
undefined,
-
+
/// This Value has a specific constant value.
constant,
-
+
/// This Value is known to not have the specified value.
notconstant,
@@ -77,13 +78,13 @@ class LVILatticeVal {
/// This value is not known to be constant, and we know that it has a value.
overdefined
};
-
+
/// Val: This stores the current lattice value along with the Constant* for
/// the constant if this is a 'constant' or 'notconstant' value.
LatticeValueTy Tag;
Constant *Val;
ConstantRange Range;
-
+
public:
LVILatticeVal() : Tag(undefined), Val(nullptr), Range(1, true) {}
@@ -104,29 +105,34 @@ public:
Res.markConstantRange(CR);
return Res;
}
+ static LVILatticeVal getOverdefined() {
+ LVILatticeVal Res;
+ Res.markOverdefined();
+ return Res;
+ }
bool isUndefined() const { return Tag == undefined; }
bool isConstant() const { return Tag == constant; }
bool isNotConstant() const { return Tag == notconstant; }
bool isConstantRange() const { return Tag == constantrange; }
bool isOverdefined() const { return Tag == overdefined; }
-
+
Constant *getConstant() const {
assert(isConstant() && "Cannot get the constant of a non-constant!");
return Val;
}
-
+
Constant *getNotConstant() const {
assert(isNotConstant() && "Cannot get the constant of a non-notconstant!");
return Val;
}
-
+
ConstantRange getConstantRange() const {
assert(isConstantRange() &&
"Cannot get the constant-range of a non-constant-range!");
return Range;
}
-
+
/// Return true if this is a change in status.
bool markOverdefined() {
if (isOverdefined())
@@ -150,7 +156,7 @@ public:
Val = V;
return true;
}
-
+
/// Return true if this is a change in status.
bool markNotConstant(Constant *V) {
assert(V && "Marking constant with NULL");
@@ -168,27 +174,27 @@ public:
Val = V;
return true;
}
-
+
/// Return true if this is a change in status.
bool markConstantRange(const ConstantRange NewR) {
if (isConstantRange()) {
if (NewR.isEmptySet())
return markOverdefined();
-
+
bool changed = Range != NewR;
Range = NewR;
return changed;
}
-
+
assert(isUndefined());
if (NewR.isEmptySet())
return markOverdefined();
-
+
Tag = constantrange;
Range = NewR;
return true;
}
-
+
/// Merge the specified lattice value into this one, updating this
/// one and returning true if anything changed.
bool mergeIn(const LVILatticeVal &RHS, const DataLayout &DL) {
@@ -267,7 +273,7 @@ public:
return markConstantRange(NewR);
}
};
-
+
} // end anonymous namespace.
namespace llvm {
@@ -295,9 +301,9 @@ raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) {
namespace {
/// A callback value handle updates the cache when values are erased.
class LazyValueInfoCache;
- struct LVIValueHandle : public CallbackVH {
+ struct LVIValueHandle final : public CallbackVH {
LazyValueInfoCache *Parent;
-
+
LVIValueHandle(Value *V, LazyValueInfoCache *P)
: CallbackVH(V), Parent(P) { }
@@ -308,24 +314,27 @@ namespace {
};
}
-namespace {
+namespace {
/// This is the cache kept by LazyValueInfo which
/// maintains information about queries across the clients' queries.
class LazyValueInfoCache {
/// This is all of the cached block information for exactly one Value*.
/// The entries are sorted by the BasicBlock* of the
/// entries, allowing us to do a lookup with a binary search.
- typedef std::map<AssertingVH<BasicBlock>, LVILatticeVal> ValueCacheEntryTy;
+ /// Over-defined lattice values are recorded in OverDefinedCache to reduce
+ /// memory overhead.
+ typedef SmallDenseMap<AssertingVH<BasicBlock>, LVILatticeVal, 4>
+ ValueCacheEntryTy;
/// This is all of the cached information for all values,
/// mapped from Value* to key information.
std::map<LVIValueHandle, ValueCacheEntryTy> ValueCache;
-
+
/// This tracks, on a per-block basis, the set of values that are
- /// over-defined at the end of that block. This is required
- /// for cache updating.
- typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy;
- DenseSet<OverDefinedPairTy> OverDefinedCache;
+ /// over-defined at the end of that block.
+ typedef DenseMap<AssertingVH<BasicBlock>, SmallPtrSet<Value *, 4>>
+ OverDefinedCacheTy;
+ OverDefinedCacheTy OverDefinedCache;
/// Keep track of all blocks that we have ever seen, so we
/// don't spend time removing unused blocks from our caches.
@@ -357,9 +366,13 @@ namespace {
void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) {
SeenBlocks.insert(BB);
- lookup(Val)[BB] = Result;
+
+ // Insert over-defined values into their own cache to reduce memory
+ // overhead.
if (Result.isOverdefined())
- OverDefinedCache.insert(std::make_pair(BB, Val));
+ OverDefinedCache[BB].insert(Val);
+ else
+ lookup(Val)[BB] = Result;
}
LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB);
@@ -382,11 +395,39 @@ namespace {
Instruction *BBI);
void solve();
-
+
ValueCacheEntryTy &lookup(Value *V) {
return ValueCache[LVIValueHandle(V, this)];
}
+ bool isOverdefined(Value *V, BasicBlock *BB) const {
+ auto ODI = OverDefinedCache.find(BB);
+
+ if (ODI == OverDefinedCache.end())
+ return false;
+
+ return ODI->second.count(V);
+ }
+
+ bool hasCachedValueInfo(Value *V, BasicBlock *BB) {
+ if (isOverdefined(V, BB))
+ return true;
+
+ LVIValueHandle ValHandle(V, this);
+ auto I = ValueCache.find(ValHandle);
+ if (I == ValueCache.end())
+ return false;
+
+ return I->second.count(BB);
+ }
+
+ LVILatticeVal getCachedValueInfo(Value *V, BasicBlock *BB) {
+ if (isOverdefined(V, BB))
+ return LVILatticeVal::getOverdefined();
+
+ return lookup(V)[BB];
+ }
+
public:
/// This is the query interface to determine the lattice
/// value for the specified Value* at the end of the specified block.
@@ -402,15 +443,15 @@ namespace {
/// value for the specified Value* that is true on the specified edge.
LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB,
Instruction *CxtI = nullptr);
-
+
/// This is the update interface to inform the cache that an edge from
/// PredBB to OldSucc has been threaded to be from PredBB to NewSucc.
void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc);
-
+
/// This is part of the update interface to inform the cache
/// that a block has been deleted.
void eraseBlock(BasicBlock *BB);
-
+
/// clear - Empty the cache.
void clear() {
SeenBlocks.clear();
@@ -425,15 +466,17 @@ namespace {
} // end anonymous namespace
void LVIValueHandle::deleted() {
- typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy;
-
- SmallVector<OverDefinedPairTy, 4> ToErase;
- for (const OverDefinedPairTy &P : Parent->OverDefinedCache)
- if (P.second == getValPtr())
- ToErase.push_back(P);
- for (const OverDefinedPairTy &P : ToErase)
- Parent->OverDefinedCache.erase(P);
-
+ SmallVector<AssertingVH<BasicBlock>, 4> ToErase;
+ for (auto &I : Parent->OverDefinedCache) {
+ SmallPtrSetImpl<Value *> &ValueSet = I.second;
+ if (ValueSet.count(getValPtr()))
+ ValueSet.erase(getValPtr());
+ if (ValueSet.empty())
+ ToErase.push_back(I.first);
+ }
+ for (auto &BB : ToErase)
+ Parent->OverDefinedCache.erase(BB);
+
// This erasure deallocates *this, so it MUST happen after we're done
// using any and all members of *this.
Parent->ValueCache.erase(*this);
@@ -446,15 +489,11 @@ void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
return;
SeenBlocks.erase(I);
- SmallVector<OverDefinedPairTy, 4> ToErase;
- for (const OverDefinedPairTy& P : OverDefinedCache)
- if (P.first == BB)
- ToErase.push_back(P);
- for (const OverDefinedPairTy &P : ToErase)
- OverDefinedCache.erase(P);
+ auto ODI = OverDefinedCache.find(BB);
+ if (ODI != OverDefinedCache.end())
+ OverDefinedCache.erase(ODI);
- for (std::map<LVIValueHandle, ValueCacheEntryTy>::iterator
- I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I)
+ for (auto I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I)
I->second.erase(BB);
}
@@ -466,7 +505,8 @@ void LazyValueInfoCache::solve() {
if (solveBlockValue(e.second, e.first)) {
// The work item was completely processed.
assert(BlockValueStack.top() == e && "Nothing should have been pushed!");
- assert(lookup(e.second).count(e.first) && "Result should be in cache!");
+ assert(hasCachedValueInfo(e.second, e.first) &&
+ "Result should be in cache!");
BlockValueStack.pop();
BlockValueSet.erase(e);
@@ -482,11 +522,7 @@ bool LazyValueInfoCache::hasBlockValue(Value *Val, BasicBlock *BB) {
if (isa<Constant>(Val))
return true;
- LVIValueHandle ValHandle(Val, this);
- std::map<LVIValueHandle, ValueCacheEntryTy>::iterator I =
- ValueCache.find(ValHandle);
- if (I == ValueCache.end()) return false;
- return I->second.count(BB);
+ return hasCachedValueInfo(Val, BB);
}
LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) {
@@ -495,17 +531,36 @@ LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) {
return LVILatticeVal::get(VC);
SeenBlocks.insert(BB);
- return lookup(Val)[BB];
+ return getCachedValueInfo(Val, BB);
+}
+
+static LVILatticeVal getFromRangeMetadata(Instruction *BBI) {
+ switch (BBI->getOpcode()) {
+ default: break;
+ case Instruction::Load:
+ case Instruction::Call:
+ case Instruction::Invoke:
+ if (MDNode *Ranges = BBI->getMetadata(LLVMContext::MD_range))
+ if (isa<IntegerType>(BBI->getType())) {
+ ConstantRange Result = getConstantRangeFromMetadata(*Ranges);
+ return LVILatticeVal::getRange(Result);
+ }
+ break;
+ };
+ // Nothing known - Note that we do not want overdefined here. We may know
+ // something else about the value and not having range metadata shouldn't
+ // cause us to throw away those facts.
+ return LVILatticeVal();
}
bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
if (isa<Constant>(Val))
return true;
- if (lookup(Val).count(BB)) {
+ if (hasCachedValueInfo(Val, BB)) {
// If we have a cached value, use that.
DEBUG(dbgs() << " reuse BB '" << BB->getName()
- << "' val=" << lookup(Val)[BB] << '\n');
+ << "' val=" << getCachedValueInfo(Val, BB) << '\n');
// Since we're reusing a cached value, we don't need to update the
// OverDefinedCache. The cache will have been properly updated whenever the
@@ -516,7 +571,7 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
// Hold off inserting this value into the Cache in case we have to return
// false and come back later.
LVILatticeVal Res;
-
+
Instruction *BBI = dyn_cast<Instruction>(Val);
if (!BBI || BBI->getParent() != BB) {
if (!solveBlockValueNonLocal(Res, Val, BB))
@@ -532,12 +587,18 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
return true;
}
- if (AllocaInst *AI = dyn_cast<AllocaInst>(BBI)) {
- Res = LVILatticeVal::getNot(ConstantPointerNull::get(AI->getType()));
+ // If this value is a nonnull pointer, record it's range and bailout.
+ PointerType *PT = dyn_cast<PointerType>(BBI->getType());
+ if (PT && isKnownNonNull(BBI)) {
+ Res = LVILatticeVal::getNot(ConstantPointerNull::get(PT));
insertResult(Val, BB, Res);
return true;
}
+ // If this is an instruction which supports range metadata, return the
+ // implied range. TODO: This should be an intersection, not a union.
+ Res.mergeIn(getFromRangeMetadata(BBI), DL);
+
// We can only analyze the definitions of certain classes of instructions
// (integral binops and casts at the moment), so bail if this isn't one.
LVILatticeVal Result;
@@ -661,7 +722,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
PointerType *PTy = cast<PointerType>(Val->getType());
Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
}
-
+
BBLV = Result;
return true;
}
@@ -674,7 +735,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
BBLV = Result;
return true;
}
-
+
bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV,
PHINode *PN, BasicBlock *BB) {
LVILatticeVal Result; // Start Undefined.
@@ -700,7 +761,7 @@ bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV,
if (Result.isOverdefined()) {
DEBUG(dbgs() << " compute BB '" << BB->getName()
<< "' - overdefined because of pred.\n");
-
+
BBLV = Result;
return true;
}
@@ -765,7 +826,7 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV,
BBLV.markOverdefined();
return true;
}
-
+
ConstantRange LHSRange = LHSVal.getConstantRange();
ConstantRange RHSRange(1);
IntegerType *ResultTy = cast<IntegerType>(BBI->getType());
@@ -819,7 +880,7 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV,
case Instruction::Or:
Result.markConstantRange(LHSRange.binaryOr(RHSRange));
break;
-
+
// Unhandled instructions are overdefined.
default:
DEBUG(dbgs() << " compute BB '" << BB->getName()
@@ -827,7 +888,7 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV,
Result.markOverdefined();
break;
}
-
+
BBLV = Result;
return true;
}
@@ -877,7 +938,7 @@ bool getValueFromFromCondition(Value *Val, ICmpInst *ICI,
/// Val is not constrained on the edge.
static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
BasicBlock *BBTo, LVILatticeVal &Result) {
- // TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we
+ // TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we
// know that v != 0.
if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) {
// If this is a conditional branch and only one successor goes to BBTo, then
@@ -887,7 +948,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
bool isTrueDest = BI->getSuccessor(0) == BBTo;
assert(BI->getSuccessor(!isTrueDest) == BBTo &&
"BBTo isn't a successor of BBFrom");
-
+
// If V is the condition of the branch itself, then we know exactly what
// it is.
if (BI->getCondition() == Val) {
@@ -895,7 +956,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
Type::getInt1Ty(Val->getContext()), isTrueDest));
return true;
}
-
+
// If the condition of the branch is an equality comparison, we may be
// able to infer the value.
if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
@@ -997,7 +1058,7 @@ LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB,
Instruction *CxtI) {
DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '"
<< BB->getName() << "'\n");
-
+
assert(BlockValueStack.empty() && BlockValueSet.empty());
pushBlockValue(std::make_pair(BB, V));
@@ -1014,6 +1075,8 @@ LVILatticeVal LazyValueInfoCache::getValueAt(Value *V, Instruction *CxtI) {
<< CxtI->getName() << "'\n");
LVILatticeVal Result;
+ if (auto *I = dyn_cast<Instruction>(V))
+ Result = getFromRangeMetadata(I);
mergeAssumeBlockValueConstantRange(V, Result, CxtI);
DEBUG(dbgs() << " Result = " << Result << "\n");
@@ -1025,7 +1088,7 @@ getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB,
Instruction *CxtI) {
DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '"
<< FromBB->getName() << "' to '" << ToBB->getName() << "'\n");
-
+
LVILatticeVal Result;
if (!getEdgeValue(V, FromBB, ToBB, Result, CxtI)) {
solve();
@@ -1040,24 +1103,24 @@ getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB,
void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
BasicBlock *NewSucc) {
- // When an edge in the graph has been threaded, values that we could not
- // determine a value for before (i.e. were marked overdefined) may be possible
- // to solve now. We do NOT try to proactively update these values. Instead,
- // we clear their entries from the cache, and allow lazy updating to recompute
- // them when needed.
-
+ // When an edge in the graph has been threaded, values that we could not
+ // determine a value for before (i.e. were marked overdefined) may be
+ // possible to solve now. We do NOT try to proactively update these values.
+ // Instead, we clear their entries from the cache, and allow lazy updating to
+ // recompute them when needed.
+
// The updating process is fairly simple: we need to drop cached info
// for all values that were marked overdefined in OldSucc, and for those same
// values in any successor of OldSucc (except NewSucc) in which they were
// also marked overdefined.
std::vector<BasicBlock*> worklist;
worklist.push_back(OldSucc);
-
- DenseSet<Value*> ClearSet;
- for (OverDefinedPairTy &P : OverDefinedCache)
- if (P.first == OldSucc)
- ClearSet.insert(P.second);
-
+
+ auto I = OverDefinedCache.find(OldSucc);
+ if (I == OverDefinedCache.end())
+ return; // Nothing to process here.
+ SmallVector<Value *, 4> ValsToClear(I->second.begin(), I->second.end());
+
// Use a worklist to perform a depth-first search of OldSucc's successors.
// NOTE: We do not need a visited list since any blocks we have already
// visited will have had their overdefined markers cleared already, and we
@@ -1065,32 +1128,31 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
while (!worklist.empty()) {
BasicBlock *ToUpdate = worklist.back();
worklist.pop_back();
-
+
// Skip blocks only accessible through NewSucc.
if (ToUpdate == NewSucc) continue;
-
+
bool changed = false;
- for (Value *V : ClearSet) {
+ for (Value *V : ValsToClear) {
// If a value was marked overdefined in OldSucc, and is here too...
- DenseSet<OverDefinedPairTy>::iterator OI =
- OverDefinedCache.find(std::make_pair(ToUpdate, V));
- if (OI == OverDefinedCache.end()) continue;
-
- // Remove it from the caches.
- ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(V, this)];
- ValueCacheEntryTy::iterator CI = Entry.find(ToUpdate);
-
- assert(CI != Entry.end() && "Couldn't find entry to update?");
- Entry.erase(CI);
- OverDefinedCache.erase(OI);
-
- // If we removed anything, then we potentially need to update
+ auto OI = OverDefinedCache.find(ToUpdate);
+ if (OI == OverDefinedCache.end())
+ continue;
+ SmallPtrSetImpl<Value *> &ValueSet = OI->second;
+ if (!ValueSet.count(V))
+ continue;
+
+ ValueSet.erase(V);
+ if (ValueSet.empty())
+ OverDefinedCache.erase(OI);
+
+ // If we removed anything, then we potentially need to update
// blocks successors too.
changed = true;
}
if (!changed) continue;
-
+
worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate));
}
}
@@ -1158,7 +1220,7 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB,
}
/// Determine whether the specified value is known to be a
-/// constant on the specified edge. Return null if not.
+/// constant on the specified edge. Return null if not.
Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
BasicBlock *ToBB,
Instruction *CxtI) {
@@ -1190,26 +1252,26 @@ static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C,
return ResCI->isZero() ? LazyValueInfo::False : LazyValueInfo::True;
return LazyValueInfo::Unknown;
}
-
+
if (Result.isConstantRange()) {
ConstantInt *CI = dyn_cast<ConstantInt>(C);
if (!CI) return LazyValueInfo::Unknown;
-
+
ConstantRange CR = Result.getConstantRange();
if (Pred == ICmpInst::ICMP_EQ) {
if (!CR.contains(CI->getValue()))
return LazyValueInfo::False;
-
+
if (CR.isSingleElement() && CR.contains(CI->getValue()))
return LazyValueInfo::True;
} else if (Pred == ICmpInst::ICMP_NE) {
if (!CR.contains(CI->getValue()))
return LazyValueInfo::True;
-
+
if (CR.isSingleElement() && CR.contains(CI->getValue()))
return LazyValueInfo::False;
}
-
+
// Handle more complex predicates.
ConstantRange TrueValues =
ICmpInst::makeConstantRange((ICmpInst::Predicate)Pred, CI->getValue());
@@ -1219,7 +1281,7 @@ static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C,
return LazyValueInfo::False;
return LazyValueInfo::Unknown;
}
-
+
if (Result.isNotConstant()) {
// If this is an equality comparison, we can try to fold it knowing that
// "V != C1".
@@ -1240,7 +1302,7 @@ static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C,
}
return LazyValueInfo::Unknown;
}
-
+
return LazyValueInfo::Unknown;
}
@@ -1266,20 +1328,69 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
if (Ret != Unknown)
return Ret;
- // TODO: Move this logic inside getValueAt so that it can be cached rather
- // than re-queried on each call. This would also allow us to merge the
- // underlying lattice values to get more information
+ // Note: The following bit of code is somewhat distinct from the rest of LVI;
+ // LVI as a whole tries to compute a lattice value which is conservatively
+ // correct at a given location. In this case, we have a predicate which we
+ // weren't able to prove about the merged result, and we're pushing that
+ // predicate back along each incoming edge to see if we can prove it
+ // separately for each input. As a motivating example, consider:
+ // bb1:
+ // %v1 = ... ; constantrange<1, 5>
+ // br label %merge
+ // bb2:
+ // %v2 = ... ; constantrange<10, 20>
+ // br label %merge
+ // merge:
+ // %phi = phi [%v1, %v2] ; constantrange<1,20>
+ // %pred = icmp eq i32 %phi, 8
+ // We can't tell from the lattice value for '%phi' that '%pred' is false
+ // along each path, but by checking the predicate over each input separately,
+ // we can.
+ // We limit the search to one step backwards from the current BB and value.
+ // We could consider extending this to search further backwards through the
+ // CFG and/or value graph, but there are non-obvious compile time vs quality
+ // tradeoffs.
if (CxtI) {
- // For a comparison where the V is outside this block, it's possible
- // that we've branched on it before. Look to see if the value is known
- // on all incoming edges.
BasicBlock *BB = CxtI->getParent();
+
+ // Function entry or an unreachable block. Bail to avoid confusing
+ // analysis below.
pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
- if (PI != PE &&
- (!isa<Instruction>(V) ||
- cast<Instruction>(V)->getParent() != BB)) {
+ if (PI == PE)
+ return Unknown;
+
+ // If V is a PHI node in the same block as the context, we need to ask
+ // questions about the predicate as applied to the incoming value along
+ // each edge. This is useful for eliminating cases where the predicate is
+ // known along all incoming edges.
+ if (auto *PHI = dyn_cast<PHINode>(V))
+ if (PHI->getParent() == BB) {
+ Tristate Baseline = Unknown;
+ for (unsigned i = 0, e = PHI->getNumIncomingValues(); i < e; i++) {
+ Value *Incoming = PHI->getIncomingValue(i);
+ BasicBlock *PredBB = PHI->getIncomingBlock(i);
+ // Note that PredBB may be BB itself.
+ Tristate Result = getPredicateOnEdge(Pred, Incoming, C, PredBB, BB,
+ CxtI);
+
+ // Keep going as long as we've seen a consistent known result for
+ // all inputs.
+ Baseline = (i == 0) ? Result /* First iteration */
+ : (Baseline == Result ? Baseline : Unknown); /* All others */
+ if (Baseline == Unknown)
+ break;
+ }
+ if (Baseline != Unknown)
+ return Baseline;
+ }
+
+ // For a comparison where the V is outside this block, it's possible
+ // that we've branched on it before. Look to see if the value is known
+ // on all incoming edges.
+ if (!isa<Instruction>(V) ||
+ cast<Instruction>(V)->getParent() != BB) {
// For predecessor edge, determine if the comparison is true or false
- // on that edge. If they're all true or all false, we can conclude
+ // on that edge. If they're all true or all false, we can conclude
// the value of the comparison in this block.
Tristate Baseline = getPredicateOnEdge(Pred, V, C, *PI, BB, CxtI);
if (Baseline != Unknown) {
diff --git a/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp
deleted file mode 100644
index 991a0e3..0000000
--- a/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp
+++ /dev/null
@@ -1,141 +0,0 @@
-//===- LibCallAliasAnalysis.cpp - Implement AliasAnalysis for libcalls ----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the LibCallAliasAnalysis class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/LibCallAliasAnalysis.h"
-#include "llvm/Analysis/LibCallSemantics.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Pass.h"
-using namespace llvm;
-
-// Register this pass...
-char LibCallAliasAnalysis::ID = 0;
-INITIALIZE_AG_PASS(LibCallAliasAnalysis, AliasAnalysis, "libcall-aa",
- "LibCall Alias Analysis", false, true, false)
-
-FunctionPass *llvm::createLibCallAliasAnalysisPass(LibCallInfo *LCI) {
- return new LibCallAliasAnalysis(LCI);
-}
-
-LibCallAliasAnalysis::~LibCallAliasAnalysis() {
- delete LCI;
-}
-
-void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
- AliasAnalysis::getAnalysisUsage(AU);
- AU.setPreservesAll(); // Does not transform code
-}
-
-bool LibCallAliasAnalysis::runOnFunction(Function &F) {
- // set up super class
- InitializeAliasAnalysis(this, &F.getParent()->getDataLayout());
- return false;
-}
-
-/// AnalyzeLibCallDetails - Given a call to a function with the specified
-/// LibCallFunctionInfo, see if we can improve the mod/ref footprint of the call
-/// vs the specified pointer/size.
-AliasAnalysis::ModRefResult
-LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
- ImmutableCallSite CS,
- const MemoryLocation &Loc) {
- // If we have a function, check to see what kind of mod/ref effects it
- // has. Start by including any info globally known about the function.
- AliasAnalysis::ModRefResult MRInfo = FI->UniversalBehavior;
- if (MRInfo == NoModRef) return MRInfo;
-
- // If that didn't tell us that the function is 'readnone', check to see
- // if we have detailed info and if 'P' is any of the locations we know
- // about.
- const LibCallFunctionInfo::LocationMRInfo *Details = FI->LocationDetails;
- if (Details == nullptr)
- return MRInfo;
-
- // If the details array is of the 'DoesNot' kind, we only know something if
- // the pointer is a match for one of the locations in 'Details'. If we find a
- // match, we can prove some interactions cannot happen.
- //
- if (FI->DetailsType == LibCallFunctionInfo::DoesNot) {
- // Find out if the pointer refers to a known location.
- for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) {
- const LibCallLocationInfo &LocInfo =
- LCI->getLocationInfo(Details[i].LocationID);
- LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc);
- if (Res != LibCallLocationInfo::Yes) continue;
-
- // If we find a match against a location that we 'do not' interact with,
- // learn this info into MRInfo.
- return ModRefResult(MRInfo & ~Details[i].MRInfo);
- }
- return MRInfo;
- }
-
- // If the details are of the 'DoesOnly' sort, we know something if the pointer
- // is a match for one of the locations in 'Details'. Also, if we can prove
- // that the pointers is *not* one of the locations in 'Details', we know that
- // the call is NoModRef.
- assert(FI->DetailsType == LibCallFunctionInfo::DoesOnly);
-
- // Find out if the pointer refers to a known location.
- bool NoneMatch = true;
- for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) {
- const LibCallLocationInfo &LocInfo =
- LCI->getLocationInfo(Details[i].LocationID);
- LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc);
- if (Res == LibCallLocationInfo::No) continue;
-
- // If we don't know if this pointer points to the location, then we have to
- // assume it might alias in some case.
- if (Res == LibCallLocationInfo::Unknown) {
- NoneMatch = false;
- continue;
- }
-
- // If we know that this pointer definitely is pointing into the location,
- // merge in this information.
- return ModRefResult(MRInfo & Details[i].MRInfo);
- }
-
- // If we found that the pointer is guaranteed to not match any of the
- // locations in our 'DoesOnly' rule, then we know that the pointer must point
- // to some other location. Since the libcall doesn't mod/ref any other
- // locations, return NoModRef.
- if (NoneMatch)
- return NoModRef;
-
- // Otherwise, return any other info gained so far.
- return MRInfo;
-}
-
-// getModRefInfo - Check to see if the specified callsite can clobber the
-// specified memory object.
-//
-AliasAnalysis::ModRefResult
-LibCallAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) {
- ModRefResult MRInfo = ModRef;
-
- // If this is a direct call to a function that LCI knows about, get the
- // information about the runtime function.
- if (LCI) {
- if (const Function *F = CS.getCalledFunction()) {
- if (const LibCallFunctionInfo *FI = LCI->getFunctionInfo(F)) {
- MRInfo = ModRefResult(MRInfo & AnalyzeLibCallDetails(FI, CS, Loc));
- if (MRInfo == NoModRef) return NoModRef;
- }
- }
- }
-
- // The AliasAnalysis base class has some smarts, lets use them.
- return (ModRefResult)(MRInfo | AliasAnalysis::getModRefInfo(CS, Loc));
-}
diff --git a/contrib/llvm/lib/Analysis/LibCallSemantics.cpp b/contrib/llvm/lib/Analysis/LibCallSemantics.cpp
deleted file mode 100644
index 003c81e..0000000
--- a/contrib/llvm/lib/Analysis/LibCallSemantics.cpp
+++ /dev/null
@@ -1,89 +0,0 @@
-//===- LibCallSemantics.cpp - Describe library semantics ------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements interfaces that can be used to describe language
-// specific runtime library interfaces (e.g. libc, libm, etc) to LLVM
-// optimizers.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/LibCallSemantics.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/IR/Function.h"
-using namespace llvm;
-
-/// This impl pointer in ~LibCallInfo is actually a StringMap. This
-/// helper does the cast.
-static StringMap<const LibCallFunctionInfo*> *getMap(void *Ptr) {
- return static_cast<StringMap<const LibCallFunctionInfo*> *>(Ptr);
-}
-
-LibCallInfo::~LibCallInfo() {
- delete getMap(Impl);
-}
-
-const LibCallLocationInfo &LibCallInfo::getLocationInfo(unsigned LocID) const {
- // Get location info on the first call.
- if (NumLocations == 0)
- NumLocations = getLocationInfo(Locations);
-
- assert(LocID < NumLocations && "Invalid location ID!");
- return Locations[LocID];
-}
-
-
-/// Return the LibCallFunctionInfo object corresponding to
-/// the specified function if we have it. If not, return null.
-const LibCallFunctionInfo *
-LibCallInfo::getFunctionInfo(const Function *F) const {
- StringMap<const LibCallFunctionInfo*> *Map = getMap(Impl);
-
- /// If this is the first time we are querying for this info, lazily construct
- /// the StringMap to index it.
- if (!Map) {
- Impl = Map = new StringMap<const LibCallFunctionInfo*>();
-
- const LibCallFunctionInfo *Array = getFunctionInfoArray();
- if (!Array) return nullptr;
-
- // We now have the array of entries. Populate the StringMap.
- for (unsigned i = 0; Array[i].Name; ++i)
- (*Map)[Array[i].Name] = Array+i;
- }
-
- // Look up this function in the string map.
- return Map->lookup(F->getName());
-}
-
-/// See if the given exception handling personality function is one that we
-/// understand. If so, return a description of it; otherwise return Unknown.
-EHPersonality llvm::classifyEHPersonality(const Value *Pers) {
- const Function *F = dyn_cast<Function>(Pers->stripPointerCasts());
- if (!F)
- return EHPersonality::Unknown;
- return StringSwitch<EHPersonality>(F->getName())
- .Case("__gnat_eh_personality", EHPersonality::GNU_Ada)
- .Case("__gxx_personality_v0", EHPersonality::GNU_CXX)
- .Case("__gcc_personality_v0", EHPersonality::GNU_C)
- .Case("__objc_personality_v0", EHPersonality::GNU_ObjC)
- .Case("_except_handler3", EHPersonality::MSVC_X86SEH)
- .Case("_except_handler4", EHPersonality::MSVC_X86SEH)
- .Case("__C_specific_handler", EHPersonality::MSVC_Win64SEH)
- .Case("__CxxFrameHandler3", EHPersonality::MSVC_CXX)
- .Default(EHPersonality::Unknown);
-}
-
-bool llvm::canSimplifyInvokeNoUnwind(const Function *F) {
- EHPersonality Personality = classifyEHPersonality(F->getPersonalityFn());
- // We can't simplify any invokes to nounwind functions if the personality
- // function wants to catch asynch exceptions. The nounwind attribute only
- // implies that the function does not throw synchronous exceptions.
- return !isAsynchronousEHPersonality(Personality);
-}
diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp
index 0b9308a..2dfb09c 100644
--- a/contrib/llvm/lib/Analysis/Lint.cpp
+++ b/contrib/llvm/lib/Analysis/Lint.cpp
@@ -49,6 +49,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LegacyPassManager.h"
@@ -98,12 +99,13 @@ namespace {
void visitInsertElementInst(InsertElementInst &I);
void visitUnreachableInst(UnreachableInst &I);
- Value *findValue(Value *V, const DataLayout &DL, bool OffsetOk) const;
- Value *findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk,
+ Value *findValue(Value *V, bool OffsetOk) const;
+ Value *findValueImpl(Value *V, bool OffsetOk,
SmallPtrSetImpl<Value *> &Visited) const;
public:
Module *Mod;
+ const DataLayout *DL;
AliasAnalysis *AA;
AssumptionCache *AC;
DominatorTree *DT;
@@ -121,7 +123,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
@@ -165,7 +167,7 @@ INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
false, true)
@@ -178,7 +180,8 @@ INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
//
bool Lint::runOnFunction(Function &F) {
Mod = F.getParent();
- AA = &getAnalysis<AliasAnalysis>();
+ DL = &F.getParent()->getDataLayout();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
@@ -200,12 +203,11 @@ void Lint::visitFunction(Function &F) {
void Lint::visitCallSite(CallSite CS) {
Instruction &I = *CS.getInstruction();
Value *Callee = CS.getCalledValue();
- const DataLayout &DL = CS->getModule()->getDataLayout();
visitMemoryReference(I, Callee, MemoryLocation::UnknownSize, 0, nullptr,
MemRef::Callee);
- if (Function *F = dyn_cast<Function>(findValue(Callee, DL,
+ if (Function *F = dyn_cast<Function>(findValue(Callee,
/*OffsetOk=*/false))) {
Assert(CS.getCallingConv() == F->getCallingConv(),
"Undefined behavior: Caller and callee calling convention differ",
@@ -232,7 +234,7 @@ void Lint::visitCallSite(CallSite CS) {
for (; AI != AE; ++AI) {
Value *Actual = *AI;
if (PI != PE) {
- Argument *Formal = PI++;
+ Argument *Formal = &*PI++;
Assert(Formal->getType() == Actual->getType(),
"Undefined behavior: Call argument type mismatches "
"callee parameter type",
@@ -253,8 +255,8 @@ void Lint::visitCallSite(CallSite CS) {
if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
Type *Ty =
cast<PointerType>(Formal->getType())->getElementType();
- visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty),
- DL.getABITypeAlignment(Ty), Ty,
+ visitMemoryReference(I, Actual, DL->getTypeStoreSize(Ty),
+ DL->getABITypeAlignment(Ty), Ty,
MemRef::Read | MemRef::Write);
}
}
@@ -264,7 +266,7 @@ void Lint::visitCallSite(CallSite CS) {
if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall())
for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
AI != AE; ++AI) {
- Value *Obj = findValue(*AI, DL, /*OffsetOk=*/true);
+ Value *Obj = findValue(*AI, /*OffsetOk=*/true);
Assert(!isa<AllocaInst>(Obj),
"Undefined behavior: Call with \"tail\" keyword references "
"alloca",
@@ -291,7 +293,7 @@ void Lint::visitCallSite(CallSite CS) {
// overlap is not distinguished from the case where nothing is known.
uint64_t Size = 0;
if (const ConstantInt *Len =
- dyn_cast<ConstantInt>(findValue(MCI->getLength(), DL,
+ dyn_cast<ConstantInt>(findValue(MCI->getLength(),
/*OffsetOk=*/false)))
if (Len->getValue().isIntN(32))
Size = Len->getValue().getZExtValue();
@@ -343,13 +345,6 @@ void Lint::visitCallSite(CallSite CS) {
visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
nullptr, MemRef::Read | MemRef::Write);
break;
-
- case Intrinsic::eh_begincatch:
- visitEHBeginCatch(II);
- break;
- case Intrinsic::eh_endcatch:
- visitEHEndCatch(II);
- break;
}
}
@@ -367,8 +362,7 @@ void Lint::visitReturnInst(ReturnInst &I) {
"Unusual: Return statement in function with noreturn attribute", &I);
if (Value *V = I.getReturnValue()) {
- Value *Obj =
- findValue(V, F->getParent()->getDataLayout(), /*OffsetOk=*/true);
+ Value *Obj = findValue(V, /*OffsetOk=*/true);
Assert(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I);
}
}
@@ -383,8 +377,7 @@ void Lint::visitMemoryReference(Instruction &I,
if (Size == 0)
return;
- Value *UnderlyingObject =
- findValue(Ptr, I.getModule()->getDataLayout(), /*OffsetOk=*/true);
+ Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true);
Assert(!isa<ConstantPointerNull>(UnderlyingObject),
"Undefined behavior: Null pointer dereference", &I);
Assert(!isa<UndefValue>(UnderlyingObject),
@@ -423,9 +416,8 @@ void Lint::visitMemoryReference(Instruction &I,
// Check for buffer overflows and misalignment.
// Only handles memory references that read/write something simple like an
// alloca instruction or a global variable.
- auto &DL = I.getModule()->getDataLayout();
int64_t Offset = 0;
- if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, DL)) {
+ if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, *DL)) {
// OK, so the access is to a constant offset from Ptr. Check that Ptr is
// something we can handle and if so extract the size of this base object
// along with its alignment.
@@ -435,20 +427,20 @@ void Lint::visitMemoryReference(Instruction &I,
if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
Type *ATy = AI->getAllocatedType();
if (!AI->isArrayAllocation() && ATy->isSized())
- BaseSize = DL.getTypeAllocSize(ATy);
+ BaseSize = DL->getTypeAllocSize(ATy);
BaseAlign = AI->getAlignment();
if (BaseAlign == 0 && ATy->isSized())
- BaseAlign = DL.getABITypeAlignment(ATy);
+ BaseAlign = DL->getABITypeAlignment(ATy);
} else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
// If the global may be defined differently in another compilation unit
// then don't warn about funky memory accesses.
if (GV->hasDefinitiveInitializer()) {
Type *GTy = GV->getType()->getElementType();
if (GTy->isSized())
- BaseSize = DL.getTypeAllocSize(GTy);
+ BaseSize = DL->getTypeAllocSize(GTy);
BaseAlign = GV->getAlignment();
if (BaseAlign == 0 && GTy->isSized())
- BaseAlign = DL.getABITypeAlignment(GTy);
+ BaseAlign = DL->getABITypeAlignment(GTy);
}
}
@@ -462,7 +454,7 @@ void Lint::visitMemoryReference(Instruction &I,
// Accesses that say that the memory is more aligned than it is are not
// defined.
if (Align == 0 && Ty && Ty->isSized())
- Align = DL.getABITypeAlignment(Ty);
+ Align = DL->getABITypeAlignment(Ty);
Assert(!BaseAlign || Align <= MinAlign(BaseAlign, Offset),
"Undefined behavior: Memory reference address is misaligned", &I);
}
@@ -470,13 +462,13 @@ void Lint::visitMemoryReference(Instruction &I,
void Lint::visitLoadInst(LoadInst &I) {
visitMemoryReference(I, I.getPointerOperand(),
- AA->getTypeStoreSize(I.getType()), I.getAlignment(),
+ DL->getTypeStoreSize(I.getType()), I.getAlignment(),
I.getType(), MemRef::Read);
}
void Lint::visitStoreInst(StoreInst &I) {
visitMemoryReference(I, I.getPointerOperand(),
- AA->getTypeStoreSize(I.getOperand(0)->getType()),
+ DL->getTypeStoreSize(I.getOperand(0)->getType()),
I.getAlignment(),
I.getOperand(0)->getType(), MemRef::Write);
}
@@ -492,208 +484,26 @@ void Lint::visitSub(BinaryOperator &I) {
}
void Lint::visitLShr(BinaryOperator &I) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(
- findValue(I.getOperand(1), I.getModule()->getDataLayout(),
- /*OffsetOk=*/false)))
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(1),
+ /*OffsetOk=*/false)))
Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
"Undefined result: Shift count out of range", &I);
}
void Lint::visitAShr(BinaryOperator &I) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(
- I.getOperand(1), I.getModule()->getDataLayout(), /*OffsetOk=*/false)))
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
"Undefined result: Shift count out of range", &I);
}
void Lint::visitShl(BinaryOperator &I) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(
- I.getOperand(1), I.getModule()->getDataLayout(), /*OffsetOk=*/false)))
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
"Undefined result: Shift count out of range", &I);
}
-static bool
-allPredsCameFromLandingPad(BasicBlock *BB,
- SmallSet<BasicBlock *, 4> &VisitedBlocks) {
- VisitedBlocks.insert(BB);
- if (BB->isLandingPad())
- return true;
- // If we find a block with no predecessors, the search failed.
- if (pred_empty(BB))
- return false;
- for (BasicBlock *Pred : predecessors(BB)) {
- if (VisitedBlocks.count(Pred))
- continue;
- if (!allPredsCameFromLandingPad(Pred, VisitedBlocks))
- return false;
- }
- return true;
-}
-
-static bool
-allSuccessorsReachEndCatch(BasicBlock *BB, BasicBlock::iterator InstBegin,
- IntrinsicInst **SecondBeginCatch,
- SmallSet<BasicBlock *, 4> &VisitedBlocks) {
- VisitedBlocks.insert(BB);
- for (BasicBlock::iterator I = InstBegin, E = BB->end(); I != E; ++I) {
- IntrinsicInst *IC = dyn_cast<IntrinsicInst>(I);
- if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch)
- return true;
- // If we find another begincatch while looking for an endcatch,
- // that's also an error.
- if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch) {
- *SecondBeginCatch = IC;
- return false;
- }
- }
-
- // If we reach a block with no successors while searching, the
- // search has failed.
- if (succ_empty(BB))
- return false;
- // Otherwise, search all of the successors.
- for (BasicBlock *Succ : successors(BB)) {
- if (VisitedBlocks.count(Succ))
- continue;
- if (!allSuccessorsReachEndCatch(Succ, Succ->begin(), SecondBeginCatch,
- VisitedBlocks))
- return false;
- }
- return true;
-}
-
-void Lint::visitEHBeginCatch(IntrinsicInst *II) {
- // The checks in this function make a potentially dubious assumption about
- // the CFG, namely that any block involved in a catch is only used for the
- // catch. This will very likely be true of IR generated by a front end,
- // but it may cease to be true, for example, if the IR is run through a
- // pass which combines similar blocks.
- //
- // In general, if we encounter a block the isn't dominated by the catch
- // block while we are searching the catch block's successors for a call
- // to end catch intrinsic, then it is possible that it will be legal for
- // a path through this block to never reach a call to llvm.eh.endcatch.
- // An analogous statement could be made about our search for a landing
- // pad among the catch block's predecessors.
- //
- // What is actually required is that no path is possible at runtime that
- // reaches a call to llvm.eh.begincatch without having previously visited
- // a landingpad instruction and that no path is possible at runtime that
- // calls llvm.eh.begincatch and does not subsequently call llvm.eh.endcatch
- // (mentally adjusting for the fact that in reality these calls will be
- // removed before code generation).
- //
- // Because this is a lint check, we take a pessimistic approach and warn if
- // the control flow is potentially incorrect.
-
- SmallSet<BasicBlock *, 4> VisitedBlocks;
- BasicBlock *CatchBB = II->getParent();
-
- // The begin catch must occur in a landing pad block or all paths
- // to it must have come from a landing pad.
- Assert(allPredsCameFromLandingPad(CatchBB, VisitedBlocks),
- "llvm.eh.begincatch may be reachable without passing a landingpad",
- II);
-
- // Reset the visited block list.
- VisitedBlocks.clear();
-
- IntrinsicInst *SecondBeginCatch = nullptr;
-
- // This has to be called before it is asserted. Otherwise, the first assert
- // below can never be hit.
- bool EndCatchFound = allSuccessorsReachEndCatch(
- CatchBB, std::next(static_cast<BasicBlock::iterator>(II)),
- &SecondBeginCatch, VisitedBlocks);
- Assert(
- SecondBeginCatch == nullptr,
- "llvm.eh.begincatch may be called a second time before llvm.eh.endcatch",
- II, SecondBeginCatch);
- Assert(EndCatchFound,
- "Some paths from llvm.eh.begincatch may not reach llvm.eh.endcatch",
- II);
-}
-
-static bool allPredCameFromBeginCatch(
- BasicBlock *BB, BasicBlock::reverse_iterator InstRbegin,
- IntrinsicInst **SecondEndCatch, SmallSet<BasicBlock *, 4> &VisitedBlocks) {
- VisitedBlocks.insert(BB);
- // Look for a begincatch in this block.
- for (BasicBlock::reverse_iterator RI = InstRbegin, RE = BB->rend(); RI != RE;
- ++RI) {
- IntrinsicInst *IC = dyn_cast<IntrinsicInst>(&*RI);
- if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch)
- return true;
- // If we find another end catch before we find a begin catch, that's
- // an error.
- if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch) {
- *SecondEndCatch = IC;
- return false;
- }
- // If we encounter a landingpad instruction, the search failed.
- if (isa<LandingPadInst>(*RI))
- return false;
- }
- // If while searching we find a block with no predeccesors,
- // the search failed.
- if (pred_empty(BB))
- return false;
- // Search any predecessors we haven't seen before.
- for (BasicBlock *Pred : predecessors(BB)) {
- if (VisitedBlocks.count(Pred))
- continue;
- if (!allPredCameFromBeginCatch(Pred, Pred->rbegin(), SecondEndCatch,
- VisitedBlocks))
- return false;
- }
- return true;
-}
-
-void Lint::visitEHEndCatch(IntrinsicInst *II) {
- // The check in this function makes a potentially dubious assumption about
- // the CFG, namely that any block involved in a catch is only used for the
- // catch. This will very likely be true of IR generated by a front end,
- // but it may cease to be true, for example, if the IR is run through a
- // pass which combines similar blocks.
- //
- // In general, if we encounter a block the isn't post-dominated by the
- // end catch block while we are searching the end catch block's predecessors
- // for a call to the begin catch intrinsic, then it is possible that it will
- // be legal for a path to reach the end catch block without ever having
- // called llvm.eh.begincatch.
- //
- // What is actually required is that no path is possible at runtime that
- // reaches a call to llvm.eh.endcatch without having previously visited
- // a call to llvm.eh.begincatch (mentally adjusting for the fact that in
- // reality these calls will be removed before code generation).
- //
- // Because this is a lint check, we take a pessimistic approach and warn if
- // the control flow is potentially incorrect.
-
- BasicBlock *EndCatchBB = II->getParent();
-
- // Alls paths to the end catch call must pass through a begin catch call.
-
- // If llvm.eh.begincatch wasn't called in the current block, we'll use this
- // lambda to recursively look for it in predecessors.
- SmallSet<BasicBlock *, 4> VisitedBlocks;
- IntrinsicInst *SecondEndCatch = nullptr;
-
- // This has to be called before it is asserted. Otherwise, the first assert
- // below can never be hit.
- bool BeginCatchFound =
- allPredCameFromBeginCatch(EndCatchBB, BasicBlock::reverse_iterator(II),
- &SecondEndCatch, VisitedBlocks);
- Assert(
- SecondEndCatch == nullptr,
- "llvm.eh.endcatch may be called a second time after llvm.eh.begincatch",
- II, SecondEndCatch);
- Assert(BeginCatchFound,
- "llvm.eh.endcatch may be reachable without passing llvm.eh.begincatch",
- II);
-}
-
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT,
AssumptionCache *AC) {
// Assume undef could be zero.
@@ -777,25 +587,23 @@ void Lint::visitIndirectBrInst(IndirectBrInst &I) {
}
void Lint::visitExtractElementInst(ExtractElementInst &I) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(
- findValue(I.getIndexOperand(), I.getModule()->getDataLayout(),
- /*OffsetOk=*/false)))
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getIndexOperand(),
+ /*OffsetOk=*/false)))
Assert(CI->getValue().ult(I.getVectorOperandType()->getNumElements()),
"Undefined result: extractelement index out of range", &I);
}
void Lint::visitInsertElementInst(InsertElementInst &I) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(
- findValue(I.getOperand(2), I.getModule()->getDataLayout(),
- /*OffsetOk=*/false)))
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(2),
+ /*OffsetOk=*/false)))
Assert(CI->getValue().ult(I.getType()->getNumElements()),
"Undefined result: insertelement index out of range", &I);
}
void Lint::visitUnreachableInst(UnreachableInst &I) {
// This isn't undefined behavior, it's merely suspicious.
- Assert(&I == I.getParent()->begin() ||
- std::prev(BasicBlock::iterator(&I))->mayHaveSideEffects(),
+ Assert(&I == &I.getParent()->front() ||
+ std::prev(I.getIterator())->mayHaveSideEffects(),
"Unusual: unreachable immediately preceded by instruction without "
"side effects",
&I);
@@ -808,13 +616,13 @@ void Lint::visitUnreachableInst(UnreachableInst &I) {
/// Most analysis passes don't require this logic, because instcombine
/// will simplify most of these kinds of things away. But it's a goal of
/// this Lint pass to be useful even on non-optimized IR.
-Value *Lint::findValue(Value *V, const DataLayout &DL, bool OffsetOk) const {
+Value *Lint::findValue(Value *V, bool OffsetOk) const {
SmallPtrSet<Value *, 4> Visited;
- return findValueImpl(V, DL, OffsetOk, Visited);
+ return findValueImpl(V, OffsetOk, Visited);
}
/// findValueImpl - Implementation helper for findValue.
-Value *Lint::findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk,
+Value *Lint::findValueImpl(Value *V, bool OffsetOk,
SmallPtrSetImpl<Value *> &Visited) const {
// Detect self-referential values.
if (!Visited.insert(V).second)
@@ -825,17 +633,18 @@ Value *Lint::findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk,
// TODO: Look through eliminable cast pairs.
// TODO: Look through calls with unique return values.
// TODO: Look through vector insert/extract/shuffle.
- V = OffsetOk ? GetUnderlyingObject(V, DL) : V->stripPointerCasts();
+ V = OffsetOk ? GetUnderlyingObject(V, *DL) : V->stripPointerCasts();
if (LoadInst *L = dyn_cast<LoadInst>(V)) {
- BasicBlock::iterator BBI = L;
+ BasicBlock::iterator BBI = L->getIterator();
BasicBlock *BB = L->getParent();
SmallPtrSet<BasicBlock *, 4> VisitedBlocks;
for (;;) {
if (!VisitedBlocks.insert(BB).second)
break;
- if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(),
- BB, BBI, 6, AA))
- return findValueImpl(U, DL, OffsetOk, Visited);
+ if (Value *U =
+ FindAvailableLoadedValue(L->getPointerOperand(),
+ BB, BBI, DefMaxInstsToScan, AA))
+ return findValueImpl(U, OffsetOk, Visited);
if (BBI != BB->begin()) break;
BB = BB->getUniquePredecessor();
if (!BB) break;
@@ -844,38 +653,38 @@ Value *Lint::findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk,
} else if (PHINode *PN = dyn_cast<PHINode>(V)) {
if (Value *W = PN->hasConstantValue())
if (W != V)
- return findValueImpl(W, DL, OffsetOk, Visited);
+ return findValueImpl(W, OffsetOk, Visited);
} else if (CastInst *CI = dyn_cast<CastInst>(V)) {
- if (CI->isNoopCast(DL))
- return findValueImpl(CI->getOperand(0), DL, OffsetOk, Visited);
+ if (CI->isNoopCast(*DL))
+ return findValueImpl(CI->getOperand(0), OffsetOk, Visited);
} else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) {
if (Value *W = FindInsertedValue(Ex->getAggregateOperand(),
Ex->getIndices()))
if (W != V)
- return findValueImpl(W, DL, OffsetOk, Visited);
+ return findValueImpl(W, OffsetOk, Visited);
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
// Same as above, but for ConstantExpr instead of Instruction.
if (Instruction::isCast(CE->getOpcode())) {
if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()),
CE->getOperand(0)->getType(), CE->getType(),
- DL.getIntPtrType(V->getType())))
- return findValueImpl(CE->getOperand(0), DL, OffsetOk, Visited);
+ DL->getIntPtrType(V->getType())))
+ return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
} else if (CE->getOpcode() == Instruction::ExtractValue) {
ArrayRef<unsigned> Indices = CE->getIndices();
if (Value *W = FindInsertedValue(CE->getOperand(0), Indices))
if (W != V)
- return findValueImpl(W, DL, OffsetOk, Visited);
+ return findValueImpl(W, OffsetOk, Visited);
}
}
// As a last resort, try SimplifyInstruction or constant folding.
if (Instruction *Inst = dyn_cast<Instruction>(V)) {
- if (Value *W = SimplifyInstruction(Inst, DL, TLI, DT, AC))
- return findValueImpl(W, DL, OffsetOk, Visited);
+ if (Value *W = SimplifyInstruction(Inst, *DL, TLI, DT, AC))
+ return findValueImpl(W, OffsetOk, Visited);
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
- if (Value *W = ConstantFoldConstantExpression(CE, DL, TLI))
+ if (Value *W = ConstantFoldConstantExpression(CE, *DL, TLI))
if (W != V)
- return findValueImpl(W, DL, OffsetOk, Visited);
+ return findValueImpl(W, OffsetOk, Visited);
}
return V;
diff --git a/contrib/llvm/lib/Analysis/Loads.cpp b/contrib/llvm/lib/Analysis/Loads.cpp
index 624c5a1..4b2fa3c 100644
--- a/contrib/llvm/lib/Analysis/Loads.cpp
+++ b/contrib/llvm/lib/Analysis/Loads.cpp
@@ -118,7 +118,8 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
// from/to. If so, the previous load or store would have already trapped,
// so there is no harm doing an extra load (also, CSE will later eliminate
// the load entirely).
- BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin();
+ BasicBlock::iterator BBI = ScanFrom->getIterator(),
+ E = ScanFrom->getParent()->begin();
// We can at least always strip pointer casts even though we can't use the
// base here.
@@ -161,6 +162,18 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
return false;
}
+/// DefMaxInstsToScan - the default number of maximum instructions
+/// to scan in the block, used by FindAvailableLoadedValue().
+/// FindAvailableLoadedValue() was introduced in r60148, to improve jump
+/// threading in part by eliminating partially redundant loads.
+/// At that point, the value of MaxInstsToScan was already set to '6'
+/// without documented explanation.
+cl::opt<unsigned>
+llvm::DefMaxInstsToScan("available-load-scan-limit", cl::init(6), cl::Hidden,
+ cl::desc("Use this to specify the default maximum number of instructions "
+ "to scan backward from a given instruction, when searching for "
+ "available loaded value"));
+
/// \brief Scan the ScanBB block backwards to see if we have the value at the
/// memory address *Ptr locally available within a small number of instructions.
///
@@ -199,7 +212,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
while (ScanFrom != ScanBB->begin()) {
// We must ignore debug info directives when counting (otherwise they
// would affect codegen).
- Instruction *Inst = --ScanFrom;
+ Instruction *Inst = &*--ScanFrom;
if (isa<DbgInfoIntrinsic>(Inst))
continue;
@@ -246,9 +259,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
// If we have alias analysis and it says the store won't modify the loaded
// value, ignore the store.
- if (AA &&
- (AA->getModRefInfo(SI, StrippedPtr, AccessSize) &
- AliasAnalysis::Mod) == 0)
+ if (AA && (AA->getModRefInfo(SI, StrippedPtr, AccessSize) & MRI_Mod) == 0)
continue;
// Otherwise the store that may or may not alias the pointer, bail out.
@@ -261,8 +272,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
// If alias analysis claims that it really won't modify the load,
// ignore it.
if (AA &&
- (AA->getModRefInfo(Inst, StrippedPtr, AccessSize) &
- AliasAnalysis::Mod) == 0)
+ (AA->getModRefInfo(Inst, StrippedPtr, AccessSize) & MRI_Mod) == 0)
continue;
// May modify the pointer, bail out.
diff --git a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index becbae4..d7896ad 100644
--- a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -58,12 +58,12 @@ static cl::opt<unsigned> MemoryCheckMergeThreshold(
/// Maximum SIMD width.
const unsigned VectorizerParams::MaxVectorWidth = 64;
-/// \brief We collect interesting dependences up to this threshold.
-static cl::opt<unsigned> MaxInterestingDependence(
- "max-interesting-dependences", cl::Hidden,
- cl::desc("Maximum number of interesting dependences collected by "
- "loop-access analysis (default = 100)"),
- cl::init(100));
+/// \brief We collect dependences up to this threshold.
+static cl::opt<unsigned>
+ MaxDependences("max-dependences", cl::Hidden,
+ cl::desc("Maximum number of dependences collected by "
+ "loop-access analysis (default = 100)"),
+ cl::init(100));
bool VectorizerParams::isInterleaveForced() {
return ::VectorizationInterleave.getNumOccurrences() > 0;
@@ -87,11 +87,10 @@ Value *llvm::stripIntegerCast(Value *V) {
return V;
}
-const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE,
+const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
const ValueToValueMap &PtrToStride,
Value *Ptr, Value *OrigPtr) {
-
- const SCEV *OrigSCEV = SE->getSCEV(Ptr);
+ const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
// If there is an entry in the map return the SCEV of the pointer with the
// symbolic stride replaced by one.
@@ -108,36 +107,82 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE,
ValueToValueMap RewriteMap;
RewriteMap[StrideVal] = One;
- const SCEV *ByOne =
- SCEVParameterRewriter::rewrite(OrigSCEV, *SE, RewriteMap, true);
- DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne
+ ScalarEvolution *SE = PSE.getSE();
+ const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
+ const auto *CT =
+ static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType()));
+
+ PSE.addPredicate(*SE->getEqualPredicate(U, CT));
+ auto *Expr = PSE.getSCEV(Ptr);
+
+ DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *Expr
<< "\n");
- return ByOne;
+ return Expr;
}
// Otherwise, just return the SCEV of the original pointer.
- return SE->getSCEV(Ptr);
+ return OrigSCEV;
}
void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
unsigned DepSetId, unsigned ASId,
- const ValueToValueMap &Strides) {
+ const ValueToValueMap &Strides,
+ PredicatedScalarEvolution &PSE) {
// Get the stride replaced scev.
- const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
+ const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
assert(AR && "Invalid addrec expression");
+ ScalarEvolution *SE = PSE.getSE();
const SCEV *Ex = SE->getBackedgeTakenCount(Lp);
+
+ const SCEV *ScStart = AR->getStart();
const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
- Pointers.emplace_back(Ptr, AR->getStart(), ScEnd, WritePtr, DepSetId, ASId,
- Sc);
+ const SCEV *Step = AR->getStepRecurrence(*SE);
+
+ // For expressions with negative step, the upper bound is ScStart and the
+ // lower bound is ScEnd.
+ if (const SCEVConstant *CStep = dyn_cast<const SCEVConstant>(Step)) {
+ if (CStep->getValue()->isNegative())
+ std::swap(ScStart, ScEnd);
+ } else {
+ // Fallback case: the step is not constant, but the we can still
+ // get the upper and lower bounds of the interval by using min/max
+ // expressions.
+ ScStart = SE->getUMinExpr(ScStart, ScEnd);
+ ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
+ }
+
+ Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
+}
+
+SmallVector<RuntimePointerChecking::PointerCheck, 4>
+RuntimePointerChecking::generateChecks() const {
+ SmallVector<PointerCheck, 4> Checks;
+
+ for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
+ for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
+ const RuntimePointerChecking::CheckingPtrGroup &CGI = CheckingGroups[I];
+ const RuntimePointerChecking::CheckingPtrGroup &CGJ = CheckingGroups[J];
+
+ if (needsChecking(CGI, CGJ))
+ Checks.push_back(std::make_pair(&CGI, &CGJ));
+ }
+ }
+ return Checks;
+}
+
+void RuntimePointerChecking::generateChecks(
+ MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
+ assert(Checks.empty() && "Checks is not empty");
+ groupChecks(DepCands, UseDependencies);
+ Checks = generateChecks();
}
-bool RuntimePointerChecking::needsChecking(
- const CheckingPtrGroup &M, const CheckingPtrGroup &N,
- const SmallVectorImpl<int> *PtrPartition) const {
+bool RuntimePointerChecking::needsChecking(const CheckingPtrGroup &M,
+ const CheckingPtrGroup &N) const {
for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
- if (needsChecking(M.Members[I], N.Members[J], PtrPartition))
+ if (needsChecking(M.Members[I], N.Members[J]))
return true;
return false;
}
@@ -204,8 +249,31 @@ void RuntimePointerChecking::groupChecks(
CheckingGroups.clear();
+ // If we need to check two pointers to the same underlying object
+ // with a non-constant difference, we shouldn't perform any pointer
+ // grouping with those pointers. This is because we can easily get
+ // into cases where the resulting check would return false, even when
+ // the accesses are safe.
+ //
+ // The following example shows this:
+ // for (i = 0; i < 1000; ++i)
+ // a[5000 + i * m] = a[i] + a[i + 9000]
+ //
+ // Here grouping gives a check of (5000, 5000 + 1000 * m) against
+ // (0, 10000) which is always false. However, if m is 1, there is no
+ // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
+ // us to perform an accurate check in this case.
+ //
+ // The above case requires that we have an UnknownDependence between
+ // accesses to the same underlying object. This cannot happen unless
+ // ShouldRetryWithRuntimeCheck is set, and therefore UseDependencies
+ // is also false. In this case we will use the fallback path and create
+ // separate checking groups for all pointers.
+
// If we don't have the dependency partitions, construct a new
- // checking pointer group for each pointer.
+ // checking pointer group for each pointer. This is also required
+ // for correctness, because in this case we can have checking between
+ // pointers to the same underlying object.
if (!UseDependencies) {
for (unsigned I = 0; I < Pointers.size(); ++I)
CheckingGroups.push_back(CheckingPtrGroup(I, *this));
@@ -222,7 +290,7 @@ void RuntimePointerChecking::groupChecks(
// don't process them twice.
SmallSet<unsigned, 2> Seen;
- // Go through all equivalence classes, get the the "pointer check groups"
+ // Go through all equivalence classes, get the "pointer check groups"
// and add them to the overall solution. We use the order in which accesses
// appear in 'Pointers' to enforce determinism.
for (unsigned I = 0; I < Pointers.size(); ++I) {
@@ -280,8 +348,14 @@ void RuntimePointerChecking::groupChecks(
}
}
-bool RuntimePointerChecking::needsChecking(
- unsigned I, unsigned J, const SmallVectorImpl<int> *PtrPartition) const {
+bool RuntimePointerChecking::arePointersInSamePartition(
+ const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
+ unsigned PtrIdx2) {
+ return (PtrToPartition[PtrIdx1] != -1 &&
+ PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
+}
+
+bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
const PointerInfo &PointerI = Pointers[I];
const PointerInfo &PointerJ = Pointers[J];
@@ -297,85 +371,45 @@ bool RuntimePointerChecking::needsChecking(
if (PointerI.AliasSetId != PointerJ.AliasSetId)
return false;
- // If PtrPartition is set omit checks between pointers of the same partition.
- // Partition number -1 means that the pointer is used in multiple partitions.
- // In this case we can't omit the check.
- if (PtrPartition && (*PtrPartition)[I] != -1 &&
- (*PtrPartition)[I] == (*PtrPartition)[J])
- return false;
-
return true;
}
-void RuntimePointerChecking::print(
- raw_ostream &OS, unsigned Depth,
- const SmallVectorImpl<int> *PtrPartition) const {
-
- OS.indent(Depth) << "Run-time memory checks:\n";
-
+void RuntimePointerChecking::printChecks(
+ raw_ostream &OS, const SmallVectorImpl<PointerCheck> &Checks,
+ unsigned Depth) const {
unsigned N = 0;
- for (unsigned I = 0; I < CheckingGroups.size(); ++I)
- for (unsigned J = I + 1; J < CheckingGroups.size(); ++J)
- if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) {
- OS.indent(Depth) << "Check " << N++ << ":\n";
- OS.indent(Depth + 2) << "Comparing group " << I << ":\n";
-
- for (unsigned K = 0; K < CheckingGroups[I].Members.size(); ++K) {
- OS.indent(Depth + 2)
- << *Pointers[CheckingGroups[I].Members[K]].PointerValue << "\n";
- if (PtrPartition)
- OS << " (Partition: "
- << (*PtrPartition)[CheckingGroups[I].Members[K]] << ")"
- << "\n";
- }
+ for (const auto &Check : Checks) {
+ const auto &First = Check.first->Members, &Second = Check.second->Members;
- OS.indent(Depth + 2) << "Against group " << J << ":\n";
+ OS.indent(Depth) << "Check " << N++ << ":\n";
- for (unsigned K = 0; K < CheckingGroups[J].Members.size(); ++K) {
- OS.indent(Depth + 2)
- << *Pointers[CheckingGroups[J].Members[K]].PointerValue << "\n";
- if (PtrPartition)
- OS << " (Partition: "
- << (*PtrPartition)[CheckingGroups[J].Members[K]] << ")"
- << "\n";
- }
- }
+ OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n";
+ for (unsigned K = 0; K < First.size(); ++K)
+ OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n";
- OS.indent(Depth) << "Grouped accesses:\n";
- for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
- OS.indent(Depth + 2) << "Group " << I << ":\n";
- OS.indent(Depth + 4) << "(Low: " << *CheckingGroups[I].Low
- << " High: " << *CheckingGroups[I].High << ")\n";
- for (unsigned J = 0; J < CheckingGroups[I].Members.size(); ++J) {
- OS.indent(Depth + 6) << "Member: "
- << *Pointers[CheckingGroups[I].Members[J]].Expr
- << "\n";
- }
+ OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n";
+ for (unsigned K = 0; K < Second.size(); ++K)
+ OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n";
}
}
-unsigned RuntimePointerChecking::getNumberOfChecks(
- const SmallVectorImpl<int> *PtrPartition) const {
-
- unsigned NumPartitions = CheckingGroups.size();
- unsigned CheckCount = 0;
+void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const {
- for (unsigned I = 0; I < NumPartitions; ++I)
- for (unsigned J = I + 1; J < NumPartitions; ++J)
- if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition))
- CheckCount++;
- return CheckCount;
-}
+ OS.indent(Depth) << "Run-time memory checks:\n";
+ printChecks(OS, Checks, Depth);
-bool RuntimePointerChecking::needsAnyChecking(
- const SmallVectorImpl<int> *PtrPartition) const {
- unsigned NumPointers = Pointers.size();
+ OS.indent(Depth) << "Grouped accesses:\n";
+ for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
+ const auto &CG = CheckingGroups[I];
- for (unsigned I = 0; I < NumPointers; ++I)
- for (unsigned J = I + 1; J < NumPointers; ++J)
- if (needsChecking(I, J, PtrPartition))
- return true;
- return false;
+ OS.indent(Depth + 2) << "Group " << &CG << ":\n";
+ OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
+ << ")\n";
+ for (unsigned J = 0; J < CG.Members.size(); ++J) {
+ OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr
+ << "\n";
+ }
+ }
}
namespace {
@@ -390,9 +424,10 @@ public:
typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI,
- MemoryDepChecker::DepCandidates &DA)
- : DL(Dl), AST(*AA), LI(LI), DepCands(DA),
- IsRTCheckAnalysisNeeded(false) {}
+ MemoryDepChecker::DepCandidates &DA,
+ PredicatedScalarEvolution &PSE)
+ : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckAnalysisNeeded(false),
+ PSE(PSE) {}
/// \brief Register a load and whether it is only read from.
void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
@@ -435,7 +470,7 @@ public:
/// We decided that no dependence analysis would be used. Reset the state.
void resetDepChecks(MemoryDepChecker &DepChecker) {
CheckDeps.clear();
- DepChecker.clearInterestingDependences();
+ DepChecker.clearDependences();
}
MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; }
@@ -477,14 +512,18 @@ private:
/// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared
/// while this remains set if we have potentially dependent accesses.
bool IsRTCheckAnalysisNeeded;
+
+ /// The SCEV predicate containing all the SCEV-related assumptions.
+ PredicatedScalarEvolution &PSE;
};
} // end anonymous namespace
/// \brief Check whether a pointer can participate in a runtime bounds check.
-static bool hasComputableBounds(ScalarEvolution *SE,
- const ValueToValueMap &Strides, Value *Ptr) {
- const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
+static bool hasComputableBounds(PredicatedScalarEvolution &PSE,
+ const ValueToValueMap &Strides, Value *Ptr,
+ Loop *L) {
+ const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
if (!AR)
return false;
@@ -527,11 +566,11 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
else
++NumReadPtrChecks;
- if (hasComputableBounds(SE, StridesMap, Ptr) &&
+ if (hasComputableBounds(PSE, StridesMap, Ptr, TheLoop) &&
// When we run after a failing dependency check we have to make sure
// we don't have wrapping pointers.
(!ShouldCheckStride ||
- isStridedPtr(SE, Ptr, TheLoop, StridesMap) == 1)) {
+ isStridedPtr(PSE, Ptr, TheLoop, StridesMap) == 1)) {
// The id of the dependence set.
unsigned DepId;
@@ -545,7 +584,7 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
// Each access has its own dependence set.
DepId = RunningDepId++;
- RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
+ RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
} else {
@@ -599,9 +638,9 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
}
if (NeedRTCheck && CanDoRT)
- RtCheck.groupChecks(DepCands, IsDepCheckNeeded);
+ RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
- DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks(nullptr)
+ DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
<< " pointer comparisons.\n");
RtCheck.Need = NeedRTCheck;
@@ -706,6 +745,11 @@ void AccessAnalysis::processMemAccesses() {
GetUnderlyingObjects(Ptr, TempObjects, DL, LI);
DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n");
for (Value *UnderlyingObj : TempObjects) {
+ // nullptr never alias, don't join sets for pointer that have "null"
+ // in their UnderlyingObjects list.
+ if (isa<ConstantPointerNull>(UnderlyingObj))
+ continue;
+
UnderlyingObjToAccessMap::iterator Prev =
ObjToLastAccess.find(UnderlyingObj);
if (Prev != ObjToLastAccess.end())
@@ -775,20 +819,20 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
}
/// \brief Check whether the access through \p Ptr has a constant stride.
-int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
- const ValueToValueMap &StridesMap) {
- const Type *Ty = Ptr->getType();
+int llvm::isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr,
+ const Loop *Lp, const ValueToValueMap &StridesMap) {
+ Type *Ty = Ptr->getType();
assert(Ty->isPointerTy() && "Unexpected non-ptr");
// Make sure that the pointer does not point to aggregate types.
- const PointerType *PtrTy = cast<PointerType>(Ty);
+ auto *PtrTy = cast<PointerType>(Ty);
if (PtrTy->getElementType()->isAggregateType()) {
DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
<< *Ptr << "\n");
return 0;
}
- const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Ptr);
+ const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
if (!AR) {
@@ -811,16 +855,16 @@ int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
// to access the pointer value "0" which is undefined behavior in address
// space 0, therefore we can also vectorize this case.
bool IsInBoundsGEP = isInBoundsGep(Ptr);
- bool IsNoWrapAddRec = isNoWrapAddRec(Ptr, AR, SE, Lp);
+ bool IsNoWrapAddRec = isNoWrapAddRec(Ptr, AR, PSE.getSE(), Lp);
bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
- << *Ptr << " SCEV: " << *PtrScev << "\n");
+ << *Ptr << " SCEV: " << *PtrScev << "\n");
return 0;
}
// Check the step is constant.
- const SCEV *Step = AR->getStepRecurrence(*SE);
+ const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
// Calculate the pointer stride and check if it is constant.
const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
@@ -832,7 +876,7 @@ int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
auto &DL = Lp->getHeader()->getModule()->getDataLayout();
int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
- const APInt &APStepVal = C->getValue()->getValue();
+ const APInt &APStepVal = C->getAPInt();
// Huge step value - give up.
if (APStepVal.getBitWidth() > 64)
@@ -872,15 +916,15 @@ bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
llvm_unreachable("unexpected DepType!");
}
-bool MemoryDepChecker::Dependence::isInterestingDependence(DepType Type) {
+bool MemoryDepChecker::Dependence::isBackward() const {
switch (Type) {
case NoDep:
case Forward:
+ case ForwardButPreventsForwarding:
+ case Unknown:
return false;
case BackwardVectorizable:
- case Unknown:
- case ForwardButPreventsForwarding:
case Backward:
case BackwardVectorizableButPreventsForwarding:
return true;
@@ -889,17 +933,21 @@ bool MemoryDepChecker::Dependence::isInterestingDependence(DepType Type) {
}
bool MemoryDepChecker::Dependence::isPossiblyBackward() const {
+ return isBackward() || Type == Unknown;
+}
+
+bool MemoryDepChecker::Dependence::isForward() const {
switch (Type) {
- case NoDep:
case Forward:
case ForwardButPreventsForwarding:
- return false;
+ return true;
+ case NoDep:
case Unknown:
case BackwardVectorizable:
case Backward:
case BackwardVectorizableButPreventsForwarding:
- return true;
+ return false;
}
llvm_unreachable("unexpected DepType!");
}
@@ -999,11 +1047,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
BPtr->getType()->getPointerAddressSpace())
return Dependence::Unknown;
- const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr);
- const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr);
+ const SCEV *AScev = replaceSymbolicStrideSCEV(PSE, Strides, APtr);
+ const SCEV *BScev = replaceSymbolicStrideSCEV(PSE, Strides, BPtr);
- int StrideAPtr = isStridedPtr(SE, APtr, InnermostLoop, Strides);
- int StrideBPtr = isStridedPtr(SE, BPtr, InnermostLoop, Strides);
+ int StrideAPtr = isStridedPtr(PSE, APtr, InnermostLoop, Strides);
+ int StrideBPtr = isStridedPtr(PSE, BPtr, InnermostLoop, Strides);
const SCEV *Src = AScev;
const SCEV *Sink = BScev;
@@ -1020,12 +1068,12 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
std::swap(StrideAPtr, StrideBPtr);
}
- const SCEV *Dist = SE->getMinusSCEV(Sink, Src);
+ const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src);
DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
- << "(Induction step: " << StrideAPtr << ")\n");
+ << "(Induction step: " << StrideAPtr << ")\n");
DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
- << *InstMap[BIdx] << ": " << *Dist << "\n");
+ << *InstMap[BIdx] << ": " << *Dist << "\n");
// Need accesses with constant stride. We don't want to vectorize
// "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
@@ -1048,7 +1096,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
unsigned TypeByteSize = DL.getTypeAllocSize(ATy);
// Negative distances are not plausible dependencies.
- const APInt &Val = C->getValue()->getValue();
+ const APInt &Val = C->getAPInt();
if (Val.isNegative()) {
bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
if (IsTrueDataDependence &&
@@ -1064,7 +1112,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// Could be improved to assert type sizes are the same (i32 == float, etc).
if (Val == 0) {
if (ATy == BTy)
- return Dependence::NoDep;
+ return Dependence::Forward;
DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n");
return Dependence::Unknown;
}
@@ -1203,22 +1251,21 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
isDependent(*A.first, A.second, *B.first, B.second, Strides);
SafeForVectorization &= Dependence::isSafeForVectorization(Type);
- // Gather dependences unless we accumulated MaxInterestingDependence
+ // Gather dependences unless we accumulated MaxDependences
// dependences. In that case return as soon as we find the first
// unsafe dependence. This puts a limit on this quadratic
// algorithm.
- if (RecordInterestingDependences) {
- if (Dependence::isInterestingDependence(Type))
- InterestingDependences.push_back(
- Dependence(A.second, B.second, Type));
-
- if (InterestingDependences.size() >= MaxInterestingDependence) {
- RecordInterestingDependences = false;
- InterestingDependences.clear();
+ if (RecordDependences) {
+ if (Type != Dependence::NoDep)
+ Dependences.push_back(Dependence(A.second, B.second, Type));
+
+ if (Dependences.size() >= MaxDependences) {
+ RecordDependences = false;
+ Dependences.clear();
DEBUG(dbgs() << "Too many dependences, stopped recording\n");
}
}
- if (!RecordInterestingDependences && !SafeForVectorization)
+ if (!RecordDependences && !SafeForVectorization)
return false;
}
++OI;
@@ -1227,8 +1274,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
}
}
- DEBUG(dbgs() << "Total Interesting Dependences: "
- << InterestingDependences.size() << "\n");
+ DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
return SafeForVectorization;
}
@@ -1298,10 +1344,10 @@ bool LoopAccessInfo::canAnalyzeLoop() {
}
// ScalarEvolution needs to be able to find the exit count.
- const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop);
- if (ExitCount == SE->getCouldNotCompute()) {
- emitAnalysis(LoopAccessReport() <<
- "could not determine number of loop iterations");
+ const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop);
+ if (ExitCount == PSE.getSE()->getCouldNotCompute()) {
+ emitAnalysis(LoopAccessReport()
+ << "could not determine number of loop iterations");
DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
return false;
}
@@ -1370,7 +1416,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
if (it->mayWriteToMemory()) {
StoreInst *St = dyn_cast<StoreInst>(it);
if (!St) {
- emitAnalysis(LoopAccessReport(it) <<
+ emitAnalysis(LoopAccessReport(&*it) <<
"instruction cannot be vectorized");
CanVecMem = false;
return;
@@ -1402,7 +1448,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
MemoryDepChecker::DepCandidates DependentAccesses;
AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(),
- AA, LI, DependentAccesses);
+ AA, LI, DependentAccesses, PSE);
// Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
// multiple times on the same object. If the ptr is accessed twice, once
@@ -1453,7 +1499,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
// read a few words, modify, and write a few words, and some of the
// words may be written to the same address.
bool IsReadOnlyPtr = false;
- if (Seen.insert(Ptr).second || !isStridedPtr(SE, Ptr, TheLoop, Strides)) {
+ if (Seen.insert(Ptr).second || !isStridedPtr(PSE, Ptr, TheLoop, Strides)) {
++NumReads;
IsReadOnlyPtr = true;
}
@@ -1483,7 +1529,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
bool CanDoRTIfNeeded =
- Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides);
+ Accesses.canCheckPtrAtRT(PtrRtChecking, PSE.getSE(), TheLoop, Strides);
if (!CanDoRTIfNeeded) {
emitAnalysis(LoopAccessReport() << "cannot identify array bounds");
DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
@@ -1510,6 +1556,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
PtrRtChecking.reset();
PtrRtChecking.Need = true;
+ auto *SE = PSE.getSE();
CanDoRTIfNeeded =
Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides, true);
@@ -1552,7 +1599,7 @@ void LoopAccessInfo::emitAnalysis(LoopAccessReport &Message) {
}
bool LoopAccessInfo::isUniform(Value *V) const {
- return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
+ return (PSE.getSE()->isLoopInvariant(PSE.getSE()->getSCEV(V), TheLoop));
}
// FIXME: this function is currently a duplicate of the one in
@@ -1566,86 +1613,115 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
return nullptr;
}
-std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
- Instruction *Loc, const SmallVectorImpl<int> *PtrPartition) const {
- if (!PtrRtChecking.Need)
- return std::make_pair(nullptr, nullptr);
+namespace {
+/// \brief IR Values for the lower and upper bounds of a pointer evolution. We
+/// need to use value-handles because SCEV expansion can invalidate previously
+/// expanded values. Thus expansion of a pointer can invalidate the bounds for
+/// a previous one.
+struct PointerBounds {
+ TrackingVH<Value> Start;
+ TrackingVH<Value> End;
+};
+} // end anonymous namespace
- SmallVector<TrackingVH<Value>, 2> Starts;
- SmallVector<TrackingVH<Value>, 2> Ends;
+/// \brief Expand code for the lower and upper bound of the pointer group \p CG
+/// in \p TheLoop. \return the values for the bounds.
+static PointerBounds
+expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop,
+ Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE,
+ const RuntimePointerChecking &PtrRtChecking) {
+ Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue;
+ const SCEV *Sc = SE->getSCEV(Ptr);
+
+ if (SE->isLoopInvariant(Sc, TheLoop)) {
+ DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr
+ << "\n");
+ return {Ptr, Ptr};
+ } else {
+ unsigned AS = Ptr->getType()->getPointerAddressSpace();
+ LLVMContext &Ctx = Loc->getContext();
+
+ // Use this type for pointer arithmetic.
+ Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
+ Value *Start = nullptr, *End = nullptr;
+
+ DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
+ Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
+ End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
+ DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n");
+ return {Start, End};
+ }
+}
- LLVMContext &Ctx = Loc->getContext();
- SCEVExpander Exp(*SE, DL, "induction");
- Instruction *FirstInst = nullptr;
+/// \brief Turns a collection of checks into a collection of expanded upper and
+/// lower bounds for both pointers in the check.
+static SmallVector<std::pair<PointerBounds, PointerBounds>, 4> expandBounds(
+ const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks,
+ Loop *L, Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp,
+ const RuntimePointerChecking &PtrRtChecking) {
+ SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds;
+
+ // Here we're relying on the SCEV Expander's cache to only emit code for the
+ // same bounds once.
+ std::transform(
+ PointerChecks.begin(), PointerChecks.end(),
+ std::back_inserter(ChecksWithBounds),
+ [&](const RuntimePointerChecking::PointerCheck &Check) {
+ PointerBounds
+ First = expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking),
+ Second = expandBounds(Check.second, L, Loc, Exp, SE, PtrRtChecking);
+ return std::make_pair(First, Second);
+ });
+
+ return ChecksWithBounds;
+}
- for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) {
- const RuntimePointerChecking::CheckingPtrGroup &CG =
- PtrRtChecking.CheckingGroups[i];
- Value *Ptr = PtrRtChecking.Pointers[CG.Members[0]].PointerValue;
- const SCEV *Sc = SE->getSCEV(Ptr);
-
- if (SE->isLoopInvariant(Sc, TheLoop)) {
- DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr
- << "\n");
- Starts.push_back(Ptr);
- Ends.push_back(Ptr);
- } else {
- unsigned AS = Ptr->getType()->getPointerAddressSpace();
-
- // Use this type for pointer arithmetic.
- Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
- Value *Start = nullptr, *End = nullptr;
-
- DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
- Start = Exp.expandCodeFor(CG.Low, PtrArithTy, Loc);
- End = Exp.expandCodeFor(CG.High, PtrArithTy, Loc);
- DEBUG(dbgs() << "Start: " << *CG.Low << " End: " << *CG.High << "\n");
- Starts.push_back(Start);
- Ends.push_back(End);
- }
- }
+std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks(
+ Instruction *Loc,
+ const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks)
+ const {
+ auto *SE = PSE.getSE();
+ SCEVExpander Exp(*SE, DL, "induction");
+ auto ExpandedChecks =
+ expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, PtrRtChecking);
+ LLVMContext &Ctx = Loc->getContext();
+ Instruction *FirstInst = nullptr;
IRBuilder<> ChkBuilder(Loc);
// Our instructions might fold to a constant.
Value *MemoryRuntimeCheck = nullptr;
- for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) {
- for (unsigned j = i + 1; j < PtrRtChecking.CheckingGroups.size(); ++j) {
- const RuntimePointerChecking::CheckingPtrGroup &CGI =
- PtrRtChecking.CheckingGroups[i];
- const RuntimePointerChecking::CheckingPtrGroup &CGJ =
- PtrRtChecking.CheckingGroups[j];
-
- if (!PtrRtChecking.needsChecking(CGI, CGJ, PtrPartition))
- continue;
- unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
- unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace();
-
- assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) &&
- (AS1 == Ends[i]->getType()->getPointerAddressSpace()) &&
- "Trying to bounds check pointers with different address spaces");
-
- Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
- Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
-
- Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc");
- Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc");
- Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy1, "bc");
- Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy0, "bc");
-
- Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
- FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
- Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
- FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
- Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
+ for (const auto &Check : ExpandedChecks) {
+ const PointerBounds &A = Check.first, &B = Check.second;
+ // Check if two pointers (A and B) conflict where conflict is computed as:
+ // start(A) <= end(B) && start(B) <= end(A)
+ unsigned AS0 = A.Start->getType()->getPointerAddressSpace();
+ unsigned AS1 = B.Start->getType()->getPointerAddressSpace();
+
+ assert((AS0 == B.End->getType()->getPointerAddressSpace()) &&
+ (AS1 == A.End->getType()->getPointerAddressSpace()) &&
+ "Trying to bounds check pointers with different address spaces");
+
+ Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
+ Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
+
+ Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc");
+ Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc");
+ Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc");
+ Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc");
+
+ Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
+ FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
+ Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
+ FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
+ Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
+ FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
+ if (MemoryRuntimeCheck) {
+ IsConflict =
+ ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
- if (MemoryRuntimeCheck) {
- IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict,
- "conflict.rdx");
- FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
- }
- MemoryRuntimeCheck = IsConflict;
}
+ MemoryRuntimeCheck = IsConflict;
}
if (!MemoryRuntimeCheck)
@@ -1661,12 +1737,20 @@ std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
return std::make_pair(FirstInst, Check);
}
+std::pair<Instruction *, Instruction *>
+LoopAccessInfo::addRuntimeChecks(Instruction *Loc) const {
+ if (!PtrRtChecking.Need)
+ return std::make_pair(nullptr, nullptr);
+
+ return addRuntimeChecks(Loc, PtrRtChecking.getChecks());
+}
+
LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
const DataLayout &DL,
const TargetLibraryInfo *TLI, AliasAnalysis *AA,
DominatorTree *DT, LoopInfo *LI,
const ValueToValueMap &Strides)
- : PtrRtChecking(SE), DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL),
+ : PSE(*SE), PtrRtChecking(SE), DepChecker(PSE, L), TheLoop(L), DL(DL),
TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
MaxSafeDepDistBytes(-1U), CanVecMem(false),
StoreToLoopInvariantAddress(false) {
@@ -1685,14 +1769,14 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
if (Report)
OS.indent(Depth) << "Report: " << Report->str() << "\n";
- if (auto *InterestingDependences = DepChecker.getInterestingDependences()) {
- OS.indent(Depth) << "Interesting Dependences:\n";
- for (auto &Dep : *InterestingDependences) {
+ if (auto *Dependences = DepChecker.getDependences()) {
+ OS.indent(Depth) << "Dependences:\n";
+ for (auto &Dep : *Dependences) {
Dep.print(OS, Depth + 2, DepChecker.getMemoryInstructions());
OS << "\n";
}
} else
- OS.indent(Depth) << "Too many interesting dependences, not recorded\n";
+ OS.indent(Depth) << "Too many dependences, not recorded\n";
// List the pair of accesses need run-time checks to prove independence.
PtrRtChecking.print(OS, Depth);
@@ -1701,6 +1785,9 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
OS.indent(Depth) << "Store to invariant address was "
<< (StoreToLoopInvariantAddress ? "" : "not ")
<< "found in loop.\n";
+
+ OS.indent(Depth) << "SCEV assumptions:\n";
+ PSE.getUnionPredicate().print(OS, Depth);
}
const LoopAccessInfo &
@@ -1714,8 +1801,8 @@ LoopAccessAnalysis::getInfo(Loop *L, const ValueToValueMap &Strides) {
if (!LAI) {
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
- LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI,
- Strides);
+ LAI =
+ llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI, Strides);
#ifndef NDEBUG
LAI->NumSymbolicStrides = Strides.size();
#endif
@@ -1737,10 +1824,10 @@ void LoopAccessAnalysis::print(raw_ostream &OS, const Module *M) const {
}
bool LoopAccessAnalysis::runOnFunction(Function &F) {
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
TLI = TLIP ? &TLIP->getTLI() : nullptr;
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
@@ -1748,8 +1835,8 @@ bool LoopAccessAnalysis::runOnFunction(Function &F) {
}
void LoopAccessAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<ScalarEvolution>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
@@ -1761,8 +1848,8 @@ static const char laa_name[] = "Loop Access Analysis";
#define LAA_NAME "loop-accesses"
INITIALIZE_PASS_BEGIN(LoopAccessAnalysis, LAA_NAME, laa_name, false, true)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(LoopAccessAnalysis, LAA_NAME, laa_name, false, true)
diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp
index 6b6faf8..9ab9eea 100644
--- a/contrib/llvm/lib/Analysis/LoopInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp
@@ -102,8 +102,8 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
return false;
if (I->mayReadFromMemory())
return false;
- // The landingpad instruction is immobile.
- if (isa<LandingPadInst>(I))
+ // EH block instructions are immobile.
+ if (I->isEHPad())
return false;
// Determine the insertion point, unless one was given.
if (!InsertPt) {
@@ -120,6 +120,13 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
// Hoist.
I->moveBefore(InsertPt);
+
+ // There is possibility of hoisting this instruction above some arbitrary
+ // condition. Any metadata defined on it can be control dependent on this
+ // condition. Conservatively strip it here so that we don't give any wrong
+ // information to the optimizer.
+ I->dropUnknownNonDebugMetadata();
+
Changed = true;
return true;
}
@@ -172,7 +179,13 @@ PHINode *Loop::getCanonicalInductionVariable() const {
bool Loop::isLCSSAForm(DominatorTree &DT) const {
for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) {
BasicBlock *BB = *BI;
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I) {
+ // Tokens can't be used in PHI nodes and live-out tokens prevent loop
+ // optimizations, so for the purposes of considered LCSSA form, we
+ // can ignore them.
+ if (I->getType()->isTokenTy())
+ continue;
+
for (Use &U : I->uses()) {
Instruction *UI = cast<Instruction>(U.getUser());
BasicBlock *UserBB = UI->getParent();
@@ -188,11 +201,21 @@ bool Loop::isLCSSAForm(DominatorTree &DT) const {
DT.isReachableFromEntry(UserBB))
return false;
}
+ }
}
return true;
}
+bool Loop::isRecursivelyLCSSAForm(DominatorTree &DT) const {
+ if (!isLCSSAForm(DT))
+ return false;
+
+ return std::all_of(begin(), end(), [&](const Loop *L) {
+ return L->isRecursivelyLCSSAForm(DT);
+ });
+}
+
/// isLoopSimplifyForm - Return true if the Loop is in the form that
/// the LoopSimplify form transforms loops to, which is sometimes called
/// normal form.
@@ -211,15 +234,23 @@ bool Loop::isSafeToClone() const {
if (isa<IndirectBrInst>((*I)->getTerminator()))
return false;
- if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator()))
+ if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator())) {
if (II->cannotDuplicate())
return false;
+ // Return false if any loop blocks contain invokes to EH-pads other than
+ // landingpads; we don't know how to split those edges yet.
+ auto *FirstNonPHI = II->getUnwindDest()->getFirstNonPHI();
+ if (FirstNonPHI->isEHPad() && !isa<LandingPadInst>(FirstNonPHI))
+ return false;
+ }
for (BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); BI != BE; ++BI) {
if (const CallInst *CI = dyn_cast<CallInst>(BI)) {
if (CI->cannotDuplicate())
return false;
}
+ if (BI->getType()->isTokenTy() && BI->isUsedOutsideOfBlock(*I))
+ return false;
}
}
return true;
@@ -602,14 +633,12 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) {
return NearLoop;
}
-/// updateUnloop - The last backedge has been removed from a loop--now the
-/// "unloop". Find a new parent for the blocks contained within unloop and
-/// update the loop tree. We don't necessarily have valid dominators at this
-/// point, but LoopInfo is still valid except for the removal of this loop.
-///
-/// Note that Unloop may now be an empty loop. Calling Loop::getHeader without
-/// checking first is illegal.
+LoopInfo::LoopInfo(const DominatorTreeBase<BasicBlock> &DomTree) {
+ analyze(DomTree);
+}
+
void LoopInfo::updateUnloop(Loop *Unloop) {
+ Unloop->markUnlooped();
// First handle the special case of no parent loop to simplify the algorithm.
if (!Unloop->getParentLoop()) {
@@ -675,7 +704,7 @@ LoopInfo LoopAnalysis::run(Function &F, AnalysisManager<Function> *AM) {
// objects. I don't want to add that kind of complexity until the scope of
// the problem is better understood.
LoopInfo LI;
- LI.Analyze(AM->getResult<DominatorTreeAnalysis>(F));
+ LI.analyze(AM->getResult<DominatorTreeAnalysis>(F));
return LI;
}
@@ -685,6 +714,20 @@ PreservedAnalyses LoopPrinterPass::run(Function &F,
return PreservedAnalyses::all();
}
+PrintLoopPass::PrintLoopPass() : OS(dbgs()) {}
+PrintLoopPass::PrintLoopPass(raw_ostream &OS, const std::string &Banner)
+ : OS(OS), Banner(Banner) {}
+
+PreservedAnalyses PrintLoopPass::run(Loop &L) {
+ OS << Banner;
+ for (auto *Block : L.blocks())
+ if (Block)
+ Block->print(OS);
+ else
+ OS << "Printing <null> block";
+ return PreservedAnalyses::all();
+}
+
//===----------------------------------------------------------------------===//
// LoopInfo implementation
//
@@ -698,7 +741,7 @@ INITIALIZE_PASS_END(LoopInfoWrapperPass, "loops", "Natural Loop Information",
bool LoopInfoWrapperPass::runOnFunction(Function &) {
releaseMemory();
- LI.Analyze(getAnalysis<DominatorTreeWrapperPass>().getDomTree());
+ LI.analyze(getAnalysis<DominatorTreeWrapperPass>().getDomTree());
return false;
}
diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp
index e9fcf02..dc42473 100644
--- a/contrib/llvm/lib/Analysis/LoopPass.cpp
+++ b/contrib/llvm/lib/Analysis/LoopPass.cpp
@@ -16,6 +16,7 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
@@ -27,35 +28,26 @@ namespace {
/// PrintLoopPass - Print a Function corresponding to a Loop.
///
-class PrintLoopPass : public LoopPass {
-private:
- std::string Banner;
- raw_ostream &Out; // raw_ostream to print on.
+class PrintLoopPassWrapper : public LoopPass {
+ PrintLoopPass P;
public:
static char ID;
- PrintLoopPass(const std::string &B, raw_ostream &o)
- : LoopPass(ID), Banner(B), Out(o) {}
+ PrintLoopPassWrapper() : LoopPass(ID) {}
+ PrintLoopPassWrapper(raw_ostream &OS, const std::string &Banner)
+ : LoopPass(ID), P(OS, Banner) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
}
bool runOnLoop(Loop *L, LPPassManager &) override {
- Out << Banner;
- for (Loop::block_iterator b = L->block_begin(), be = L->block_end();
- b != be;
- ++b) {
- if (*b)
- (*b)->print(Out);
- else
- Out << "Printing <null> block";
- }
+ P.run(*L);
return false;
}
};
-char PrintLoopPass::ID = 0;
+char PrintLoopPassWrapper::ID = 0;
}
//===----------------------------------------------------------------------===//
@@ -66,81 +58,34 @@ char LPPassManager::ID = 0;
LPPassManager::LPPassManager()
: FunctionPass(ID), PMDataManager() {
- skipThisLoop = false;
- redoThisLoop = false;
LI = nullptr;
CurrentLoop = nullptr;
}
-/// Delete loop from the loop queue and loop hierarchy (LoopInfo).
-void LPPassManager::deleteLoopFromQueue(Loop *L) {
-
- LI->updateUnloop(L);
-
- // Notify passes that the loop is being deleted.
- deleteSimpleAnalysisLoop(L);
-
- // If L is current loop then skip rest of the passes and let
- // runOnFunction remove L from LQ. Otherwise, remove L from LQ now
- // and continue applying other passes on CurrentLoop.
- if (CurrentLoop == L)
- skipThisLoop = true;
-
- delete L;
-
- if (skipThisLoop)
- return;
-
- for (std::deque<Loop *>::iterator I = LQ.begin(),
- E = LQ.end(); I != E; ++I) {
- if (*I == L) {
- LQ.erase(I);
- break;
- }
- }
-}
-
// Inset loop into loop nest (LoopInfo) and loop queue (LQ).
-void LPPassManager::insertLoop(Loop *L, Loop *ParentLoop) {
-
- assert (CurrentLoop != L && "Cannot insert CurrentLoop");
+Loop &LPPassManager::addLoop(Loop *ParentLoop) {
+ // Create a new loop. LI will take ownership.
+ Loop *L = new Loop();
- // Insert into loop nest
- if (ParentLoop)
- ParentLoop->addChildLoop(L);
- else
+ // Insert into the loop nest and the loop queue.
+ if (!ParentLoop) {
+ // This is the top level loop.
LI->addTopLevelLoop(L);
-
- insertLoopIntoQueue(L);
-}
-
-void LPPassManager::insertLoopIntoQueue(Loop *L) {
- // Insert L into loop queue
- if (L == CurrentLoop)
- redoLoop(L);
- else if (!L->getParentLoop())
- // This is top level loop.
LQ.push_front(L);
- else {
- // Insert L after the parent loop.
- for (std::deque<Loop *>::iterator I = LQ.begin(),
- E = LQ.end(); I != E; ++I) {
- if (*I == L->getParentLoop()) {
- // deque does not support insert after.
- ++I;
- LQ.insert(I, 1, L);
- break;
- }
- }
+ return *L;
}
-}
-// Reoptimize this loop. LPPassManager will re-insert this loop into the
-// queue. This allows LoopPass to change loop nest for the loop. This
-// utility may send LPPassManager into infinite loops so use caution.
-void LPPassManager::redoLoop(Loop *L) {
- assert (CurrentLoop == L && "Can redo only CurrentLoop");
- redoThisLoop = true;
+ ParentLoop->addChildLoop(L);
+ // Insert L into the loop queue after the parent loop.
+ for (auto I = LQ.begin(), E = LQ.end(); I != E; ++I) {
+ if (*I == L->getParentLoop()) {
+ // deque does not support insert after.
+ ++I;
+ LQ.insert(I, 1, L);
+ break;
+ }
+ }
+ return *L;
}
/// cloneBasicBlockSimpleAnalysis - Invoke cloneBasicBlockAnalysis hook for
@@ -230,10 +175,7 @@ bool LPPassManager::runOnFunction(Function &F) {
// Walk Loops
while (!LQ.empty()) {
- CurrentLoop = LQ.back();
- skipThisLoop = false;
- redoThisLoop = false;
-
+ CurrentLoop = LQ.back();
// Run all passes on the current Loop.
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
LoopPass *P = getContainedPass(Index);
@@ -253,11 +195,15 @@ bool LPPassManager::runOnFunction(Function &F) {
if (Changed)
dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG,
- skipThisLoop ? "<deleted>" :
- CurrentLoop->getHeader()->getName());
+ CurrentLoop->isUnloop()
+ ? "<deleted>"
+ : CurrentLoop->getHeader()->getName());
dumpPreservedSet(P);
- if (!skipThisLoop) {
+ if (CurrentLoop->isUnloop()) {
+ // Notify passes that the loop is being deleted.
+ deleteSimpleAnalysisLoop(CurrentLoop);
+ } else {
// Manually check that this loop is still healthy. This is done
// instead of relying on LoopInfo::verifyLoop since LoopInfo
// is a function pass and it's really expensive to verify every
@@ -276,12 +222,12 @@ bool LPPassManager::runOnFunction(Function &F) {
removeNotPreservedAnalysis(P);
recordAvailableAnalysis(P);
- removeDeadPasses(P,
- skipThisLoop ? "<deleted>" :
- CurrentLoop->getHeader()->getName(),
+ removeDeadPasses(P, CurrentLoop->isUnloop()
+ ? "<deleted>"
+ : CurrentLoop->getHeader()->getName(),
ON_LOOP_MSG);
- if (skipThisLoop)
+ if (CurrentLoop->isUnloop())
// Do not run other passes on this loop.
break;
}
@@ -289,17 +235,16 @@ bool LPPassManager::runOnFunction(Function &F) {
// If the loop was deleted, release all the loop passes. This frees up
// some memory, and avoids trouble with the pass manager trying to call
// verifyAnalysis on them.
- if (skipThisLoop)
+ if (CurrentLoop->isUnloop()) {
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
Pass *P = getContainedPass(Index);
freePass(P, "<deleted>", ON_LOOP_MSG);
}
+ delete CurrentLoop;
+ }
// Pop the loop from queue after running all passes.
LQ.pop_back();
-
- if (redoThisLoop)
- LQ.push_back(CurrentLoop);
}
// Finalization
@@ -327,7 +272,7 @@ void LPPassManager::dumpPassStructure(unsigned Offset) {
Pass *LoopPass::createPrinterPass(raw_ostream &O,
const std::string &Banner) const {
- return new PrintLoopPass(Banner, O);
+ return new PrintLoopPassWrapper(O, Banner);
}
// Check if this pass is suitable for the current LPPassManager, if
diff --git a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
index da3b829..078cefe 100644
--- a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
@@ -49,7 +49,7 @@ namespace {
void print(raw_ostream &OS, const Module * = nullptr) const override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequiredTransitive<AliasAnalysis>();
+ AU.addRequiredTransitive<AAResultsWrapperPass>();
AU.addRequiredTransitive<MemoryDependenceAnalysis>();
AU.setPreservesAll();
}
@@ -96,7 +96,7 @@ bool MemDepPrinter::runOnFunction(Function &F) {
// All this code uses non-const interfaces because MemDep is not
// const-friendly, though nothing is actually modified.
- for (auto &I : inst_range(F)) {
+ for (auto &I : instructions(F)) {
Instruction *Inst = &I;
if (!Inst->mayReadFromMemory() && !Inst->mayWriteToMemory())
@@ -135,7 +135,7 @@ bool MemDepPrinter::runOnFunction(Function &F) {
}
void MemDepPrinter::print(raw_ostream &OS, const Module *M) const {
- for (const auto &I : inst_range(*F)) {
+ for (const auto &I : instructions(*F)) {
const Instruction *Inst = &I;
DepSetMap::const_iterator DI = Deps.find(Inst);
diff --git a/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp b/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp
index fa292a2..36f1424 100644
--- a/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp
@@ -22,7 +22,8 @@ using namespace llvm;
namespace {
struct MemDerefPrinter : public FunctionPass {
- SmallVector<Value *, 4> Vec;
+ SmallVector<Value *, 4> Deref;
+ SmallPtrSet<Value *, 4> DerefAndAligned;
static char ID; // Pass identification, replacement for typeid
MemDerefPrinter() : FunctionPass(ID) {
@@ -34,7 +35,8 @@ namespace {
bool runOnFunction(Function &F) override;
void print(raw_ostream &OS, const Module * = nullptr) const override;
void releaseMemory() override {
- Vec.clear();
+ Deref.clear();
+ DerefAndAligned.clear();
}
};
}
@@ -51,11 +53,13 @@ FunctionPass *llvm::createMemDerefPrinter() {
bool MemDerefPrinter::runOnFunction(Function &F) {
const DataLayout &DL = F.getParent()->getDataLayout();
- for (auto &I: inst_range(F)) {
+ for (auto &I: instructions(F)) {
if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
Value *PO = LI->getPointerOperand();
if (isDereferenceablePointer(PO, DL))
- Vec.push_back(PO);
+ Deref.push_back(PO);
+ if (isDereferenceableAndAlignedPointer(PO, LI->getAlignment(), DL))
+ DerefAndAligned.insert(PO);
}
}
return false;
@@ -63,8 +67,12 @@ bool MemDerefPrinter::runOnFunction(Function &F) {
void MemDerefPrinter::print(raw_ostream &OS, const Module *M) const {
OS << "The following are dereferenceable:\n";
- for (auto &V: Vec) {
+ for (Value *V: Deref) {
V->print(OS);
+ if (DerefAndAligned.count(V))
+ OS << "\t(aligned)";
+ else
+ OS << "\t(unaligned)";
OS << "\n\n";
}
}
diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
index 8ddac8f..b19ecad 100644
--- a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -31,7 +31,7 @@ using namespace llvm;
#define DEBUG_TYPE "memory-builtins"
-enum AllocType {
+enum AllocType : uint8_t {
OpNewLike = 1<<0, // allocates; never returns null
MallocLike = 1<<1 | OpNewLike, // allocates; may return null
CallocLike = 1<<2, // allocates + bzero
@@ -62,6 +62,14 @@ static const AllocFnsTy AllocationFnData[] = {
{LibFunc::ZnajRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned int, nothrow)
{LibFunc::Znam, OpNewLike, 1, 0, -1}, // new[](unsigned long)
{LibFunc::ZnamRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned long, nothrow)
+ {LibFunc::msvc_new_int, OpNewLike, 1, 0, -1}, // new(unsigned int)
+ {LibFunc::msvc_new_int_nothrow, MallocLike, 2, 0, -1}, // new(unsigned int, nothrow)
+ {LibFunc::msvc_new_longlong, OpNewLike, 1, 0, -1}, // new(unsigned long long)
+ {LibFunc::msvc_new_longlong_nothrow, MallocLike, 2, 0, -1}, // new(unsigned long long, nothrow)
+ {LibFunc::msvc_new_array_int, OpNewLike, 1, 0, -1}, // new[](unsigned int)
+ {LibFunc::msvc_new_array_int_nothrow, MallocLike, 2, 0, -1}, // new[](unsigned int, nothrow)
+ {LibFunc::msvc_new_array_longlong, OpNewLike, 1, 0, -1}, // new[](unsigned long long)
+ {LibFunc::msvc_new_array_longlong_nothrow, MallocLike, 2, 0, -1}, // new[](unsigned long long, nothrow)
{LibFunc::calloc, CallocLike, 2, 0, 1},
{LibFunc::realloc, ReallocLike, 2, 1, -1},
{LibFunc::reallocf, ReallocLike, 2, 1, -1},
@@ -107,18 +115,13 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy,
if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn))
return nullptr;
- unsigned i = 0;
- bool found = false;
- for ( ; i < array_lengthof(AllocationFnData); ++i) {
- if (AllocationFnData[i].Func == TLIFn) {
- found = true;
- break;
- }
- }
- if (!found)
+ const AllocFnsTy *FnData =
+ std::find_if(std::begin(AllocationFnData), std::end(AllocationFnData),
+ [TLIFn](const AllocFnsTy &Fn) { return Fn.Func == TLIFn; });
+
+ if (FnData == std::end(AllocationFnData))
return nullptr;
- const AllocFnsTy *FnData = &AllocationFnData[i];
if ((FnData->AllocTy & AllocTy) != FnData->AllocTy)
return nullptr;
@@ -185,13 +188,6 @@ bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
}
/// \brief Tests if a value is a call or invoke to a library function that
-/// reallocates memory (such as realloc).
-bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
- bool LookThroughBitCast) {
- return getAllocationData(V, ReallocLike, TLI, LookThroughBitCast);
-}
-
-/// \brief Tests if a value is a call or invoke to a library function that
/// allocates memory and never returns null (such as operator new).
bool llvm::isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI,
bool LookThroughBitCast) {
@@ -313,14 +309,26 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
unsigned ExpectedNumParams;
if (TLIFn == LibFunc::free ||
TLIFn == LibFunc::ZdlPv || // operator delete(void*)
- TLIFn == LibFunc::ZdaPv) // operator delete[](void*)
+ TLIFn == LibFunc::ZdaPv || // operator delete[](void*)
+ TLIFn == LibFunc::msvc_delete_ptr32 || // operator delete(void*)
+ TLIFn == LibFunc::msvc_delete_ptr64 || // operator delete(void*)
+ TLIFn == LibFunc::msvc_delete_array_ptr32 || // operator delete[](void*)
+ TLIFn == LibFunc::msvc_delete_array_ptr64) // operator delete[](void*)
ExpectedNumParams = 1;
else if (TLIFn == LibFunc::ZdlPvj || // delete(void*, uint)
TLIFn == LibFunc::ZdlPvm || // delete(void*, ulong)
TLIFn == LibFunc::ZdlPvRKSt9nothrow_t || // delete(void*, nothrow)
TLIFn == LibFunc::ZdaPvj || // delete[](void*, uint)
TLIFn == LibFunc::ZdaPvm || // delete[](void*, ulong)
- TLIFn == LibFunc::ZdaPvRKSt9nothrow_t) // delete[](void*, nothrow)
+ TLIFn == LibFunc::ZdaPvRKSt9nothrow_t || // delete[](void*, nothrow)
+ TLIFn == LibFunc::msvc_delete_ptr32_int || // delete(void*, uint)
+ TLIFn == LibFunc::msvc_delete_ptr64_longlong || // delete(void*, ulonglong)
+ TLIFn == LibFunc::msvc_delete_ptr32_nothrow || // delete(void*, nothrow)
+ TLIFn == LibFunc::msvc_delete_ptr64_nothrow || // delete(void*, nothrow)
+ TLIFn == LibFunc::msvc_delete_array_ptr32_int || // delete[](void*, uint)
+ TLIFn == LibFunc::msvc_delete_array_ptr64_longlong || // delete[](void*, ulonglong)
+ TLIFn == LibFunc::msvc_delete_array_ptr32_nothrow || // delete[](void*, nothrow)
+ TLIFn == LibFunc::msvc_delete_array_ptr64_nothrow) // delete[](void*, nothrow)
ExpectedNumParams = 2;
else
return nullptr;
@@ -621,7 +629,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {
// always generate code immediately before the instruction being
// processed, so that the generated code dominates the same BBs
- Instruction *PrevInsertPoint = Builder.GetInsertPoint();
+ BuilderTy::InsertPointGuard Guard(Builder);
if (Instruction *I = dyn_cast<Instruction>(V))
Builder.SetInsertPoint(I);
@@ -650,9 +658,6 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {
Result = unknown();
}
- if (PrevInsertPoint)
- Builder.SetInsertPoint(PrevInsertPoint);
-
// Don't reuse CacheIt since it may be invalid at this point.
CacheMap[V] = Result;
return Result;
@@ -742,7 +747,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitPHINode(PHINode &PHI) {
// compute offset/size for each PHI incoming pointer
for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i) {
- Builder.SetInsertPoint(PHI.getIncomingBlock(i)->getFirstInsertionPt());
+ Builder.SetInsertPoint(&*PHI.getIncomingBlock(i)->getFirstInsertionPt());
SizeOffsetEvalType EdgeData = compute_(PHI.getIncomingValue(i));
if (!bothKnown(EdgeData)) {
diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 782a67b..3e80bfe 100644
--- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -22,7 +22,9 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/OrderedBasicBlock.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
@@ -49,7 +51,11 @@ STATISTIC(NumCacheCompleteNonLocalPtr,
"Number of block queries that were completely cached");
// Limit for the number of instructions to scan in a block.
-static const unsigned int BlockScanLimit = 100;
+
+static cl::opt<unsigned> BlockScanLimit(
+ "memdep-block-scan-limit", cl::Hidden, cl::init(100),
+ cl::desc("The number of instructions to scan in a block in memory "
+ "dependency analysis (default = 100)"));
// Limit on the number of memdep results to process.
static const unsigned int NumResultsLimit = 100;
@@ -60,7 +66,8 @@ char MemoryDependenceAnalysis::ID = 0;
INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep",
"Memory Dependence Analysis", false, true)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep",
"Memory Dependence Analysis", false, true)
@@ -87,15 +94,17 @@ void MemoryDependenceAnalysis::releaseMemory() {
void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<AssumptionCacheTracker>();
- AU.addRequiredTransitive<AliasAnalysis>();
+ AU.addRequiredTransitive<AAResultsWrapperPass>();
+ AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
}
bool MemoryDependenceAnalysis::runOnFunction(Function &F) {
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
return false;
}
@@ -118,43 +127,43 @@ static void RemoveFromReverseMap(DenseMap<Instruction*,
/// location, fill in Loc with the details, otherwise set Loc.Ptr to null.
/// Return a ModRefInfo value describing the general behavior of the
/// instruction.
-static AliasAnalysis::ModRefResult
-GetLocation(const Instruction *Inst, MemoryLocation &Loc, AliasAnalysis *AA) {
+static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
+ const TargetLibraryInfo &TLI) {
if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
if (LI->isUnordered()) {
Loc = MemoryLocation::get(LI);
- return AliasAnalysis::Ref;
+ return MRI_Ref;
}
if (LI->getOrdering() == Monotonic) {
Loc = MemoryLocation::get(LI);
- return AliasAnalysis::ModRef;
+ return MRI_ModRef;
}
Loc = MemoryLocation();
- return AliasAnalysis::ModRef;
+ return MRI_ModRef;
}
if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
if (SI->isUnordered()) {
Loc = MemoryLocation::get(SI);
- return AliasAnalysis::Mod;
+ return MRI_Mod;
}
if (SI->getOrdering() == Monotonic) {
Loc = MemoryLocation::get(SI);
- return AliasAnalysis::ModRef;
+ return MRI_ModRef;
}
Loc = MemoryLocation();
- return AliasAnalysis::ModRef;
+ return MRI_ModRef;
}
if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
Loc = MemoryLocation::get(V);
- return AliasAnalysis::ModRef;
+ return MRI_ModRef;
}
- if (const CallInst *CI = isFreeCall(Inst, AA->getTargetLibraryInfo())) {
+ if (const CallInst *CI = isFreeCall(Inst, &TLI)) {
// calls to free() deallocate the entire structure
Loc = MemoryLocation(CI->getArgOperand(0));
- return AliasAnalysis::Mod;
+ return MRI_Mod;
}
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
@@ -170,7 +179,7 @@ GetLocation(const Instruction *Inst, MemoryLocation &Loc, AliasAnalysis *AA) {
cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(), AAInfo);
// These intrinsics don't really modify the memory, but returning Mod
// will allow them to be handled conservatively.
- return AliasAnalysis::Mod;
+ return MRI_Mod;
case Intrinsic::invariant_end:
II->getAAMetadata(AAInfo);
Loc = MemoryLocation(
@@ -178,7 +187,7 @@ GetLocation(const Instruction *Inst, MemoryLocation &Loc, AliasAnalysis *AA) {
cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(), AAInfo);
// These intrinsics don't really modify the memory, but returning Mod
// will allow them to be handled conservatively.
- return AliasAnalysis::Mod;
+ return MRI_Mod;
default:
break;
}
@@ -186,10 +195,10 @@ GetLocation(const Instruction *Inst, MemoryLocation &Loc, AliasAnalysis *AA) {
// Otherwise, just do the coarse-grained thing that always works.
if (Inst->mayWriteToMemory())
- return AliasAnalysis::ModRef;
+ return MRI_ModRef;
if (Inst->mayReadFromMemory())
- return AliasAnalysis::Ref;
- return AliasAnalysis::NoModRef;
+ return MRI_Ref;
+ return MRI_NoModRef;
}
/// getCallSiteDependencyFrom - Private helper for finding the local
@@ -207,14 +216,14 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
if (!Limit)
return MemDepResult::getUnknown();
- Instruction *Inst = --ScanIt;
+ Instruction *Inst = &*--ScanIt;
// If this inst is a memory op, get the pointer it accessed
MemoryLocation Loc;
- AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA);
+ ModRefInfo MR = GetLocation(Inst, Loc, *TLI);
if (Loc.Ptr) {
// A simple instruction.
- if (AA->getModRefInfo(CS, Loc) != AliasAnalysis::NoModRef)
+ if (AA->getModRefInfo(CS, Loc) != MRI_NoModRef)
return MemDepResult::getClobber(Inst);
continue;
}
@@ -224,10 +233,10 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
if (isa<DbgInfoIntrinsic>(Inst)) continue;
// If these two calls do not interfere, look past it.
switch (AA->getModRefInfo(CS, InstCS)) {
- case AliasAnalysis::NoModRef:
+ case MRI_NoModRef:
// If the two calls are the same, return InstCS as a Def, so that
// CS can be found redundant and eliminated.
- if (isReadOnlyCall && !(MR & AliasAnalysis::Mod) &&
+ if (isReadOnlyCall && !(MR & MRI_Mod) &&
CS.getInstruction()->isIdenticalToWhenDefined(Inst))
return MemDepResult::getDef(Inst);
@@ -241,7 +250,7 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
// If we could not obtain a pointer for the instruction and the instruction
// touches memory then assume that this is a dependency.
- if (MR != AliasAnalysis::NoModRef)
+ if (MR != MRI_NoModRef)
return MemDepResult::getClobber(Inst);
}
@@ -371,6 +380,75 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(
const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
BasicBlock *BB, Instruction *QueryInst) {
+ if (QueryInst != nullptr) {
+ if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
+ MemDepResult invariantGroupDependency =
+ getInvariantGroupPointerDependency(LI, BB);
+
+ if (invariantGroupDependency.isDef())
+ return invariantGroupDependency;
+ }
+ }
+ return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst);
+}
+
+MemDepResult
+MemoryDependenceAnalysis::getInvariantGroupPointerDependency(LoadInst *LI,
+ BasicBlock *BB) {
+ Value *LoadOperand = LI->getPointerOperand();
+ // It's is not safe to walk the use list of global value, because function
+ // passes aren't allowed to look outside their functions.
+ if (isa<GlobalValue>(LoadOperand))
+ return MemDepResult::getUnknown();
+
+ auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group);
+ if (!InvariantGroupMD)
+ return MemDepResult::getUnknown();
+
+ MemDepResult Result = MemDepResult::getUnknown();
+ llvm::SmallSet<Value *, 14> Seen;
+ // Queue to process all pointers that are equivalent to load operand.
+ llvm::SmallVector<Value *, 8> LoadOperandsQueue;
+ LoadOperandsQueue.push_back(LoadOperand);
+ while (!LoadOperandsQueue.empty()) {
+ Value *Ptr = LoadOperandsQueue.pop_back_val();
+ if (isa<GlobalValue>(Ptr))
+ continue;
+
+ if (auto *BCI = dyn_cast<BitCastInst>(Ptr)) {
+ if (!Seen.count(BCI->getOperand(0))) {
+ LoadOperandsQueue.push_back(BCI->getOperand(0));
+ Seen.insert(BCI->getOperand(0));
+ }
+ }
+
+ for (Use &Us : Ptr->uses()) {
+ auto *U = dyn_cast<Instruction>(Us.getUser());
+ if (!U || U == LI || !DT->dominates(U, LI))
+ continue;
+
+ if (auto *BCI = dyn_cast<BitCastInst>(U)) {
+ if (!Seen.count(BCI)) {
+ LoadOperandsQueue.push_back(BCI);
+ Seen.insert(BCI);
+ }
+ continue;
+ }
+ // If we hit load/store with the same invariant.group metadata (and the
+ // same pointer operand) we can assume that value pointed by pointer
+ // operand didn't change.
+ if ((isa<LoadInst>(U) || isa<StoreInst>(U)) && U->getParent() == BB &&
+ U->getMetadata(LLVMContext::MD_invariant_group) == InvariantGroupMD)
+ return MemDepResult::getDef(U);
+ }
+ }
+ return Result;
+}
+
+MemDepResult MemoryDependenceAnalysis::getSimplePointerDependencyFrom(
+ const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
+ BasicBlock *BB, Instruction *QueryInst) {
+
const Value *MemLocBase = nullptr;
int64_t MemLocOffset = 0;
unsigned Limit = BlockScanLimit;
@@ -416,9 +494,15 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(
const DataLayout &DL = BB->getModule()->getDataLayout();
+ // Create a numbered basic block to lazily compute and cache instruction
+ // positions inside a BB. This is used to provide fast queries for relative
+ // position between two instructions in a BB and can be used by
+ // AliasAnalysis::callCapturesBefore.
+ OrderedBasicBlock OBB(BB);
+
// Walk backwards through the basic block, looking for dependencies.
while (ScanIt != BB->begin()) {
- Instruction *Inst = --ScanIt;
+ Instruction *Inst = &*--ScanIt;
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
// Debug intrinsics don't (and can't) cause dependencies.
@@ -567,7 +651,7 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(
// If alias analysis can tell that this store is guaranteed to not modify
// the query pointer, ignore it. Use getModRefInfo to handle cases where
// the query pointer points to constant memory etc.
- if (AA->getModRefInfo(SI, MemLoc) == AliasAnalysis::NoModRef)
+ if (AA->getModRefInfo(SI, MemLoc) == MRI_NoModRef)
continue;
// Ok, this store might clobber the query pointer. Check to see if it is
@@ -594,7 +678,6 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(
// a subsequent bitcast of the malloc call result. There can be stores to
// the malloced memory between the malloc call and its bitcast uses, and we
// need to continue scanning until the malloc call.
- const TargetLibraryInfo *TLI = AA->getTargetLibraryInfo();
if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, TLI)) {
const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, DL);
@@ -616,17 +699,17 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(
continue;
// See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
- AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc);
+ ModRefInfo MR = AA->getModRefInfo(Inst, MemLoc);
// If necessary, perform additional analysis.
- if (MR == AliasAnalysis::ModRef)
- MR = AA->callCapturesBefore(Inst, MemLoc, DT);
+ if (MR == MRI_ModRef)
+ MR = AA->callCapturesBefore(Inst, MemLoc, DT, &OBB);
switch (MR) {
- case AliasAnalysis::NoModRef:
+ case MRI_NoModRef:
// If the call has no effect on the queried pointer, just ignore it.
continue;
- case AliasAnalysis::Mod:
+ case MRI_Mod:
return MemDepResult::getClobber(Inst);
- case AliasAnalysis::Ref:
+ case MRI_Ref:
// If the call is known to never store to the pointer, and if this is a
// load query, we can safely ignore it (scan past it).
if (isLoad)
@@ -677,20 +760,20 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
LocalCache = MemDepResult::getNonFuncLocal();
} else {
MemoryLocation MemLoc;
- AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA);
+ ModRefInfo MR = GetLocation(QueryInst, MemLoc, *TLI);
if (MemLoc.Ptr) {
// If we can do a pointer scan, make it happen.
- bool isLoad = !(MR & AliasAnalysis::Mod);
+ bool isLoad = !(MR & MRI_Mod);
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst))
isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
- LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos,
- QueryParent, QueryInst);
+ LocalCache = getPointerDependencyFrom(
+ MemLoc, isLoad, ScanPos->getIterator(), QueryParent, QueryInst);
} else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
CallSite QueryCS(QueryInst);
bool isReadOnly = AA->onlyReadsMemory(QueryCS);
- LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos,
- QueryParent);
+ LocalCache = getCallSiteDependencyFrom(
+ QueryCS, isReadOnly, ScanPos->getIterator(), QueryParent);
} else
// Non-memory instruction.
LocalCache = MemDepResult::getUnknown();
@@ -813,7 +896,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
BasicBlock::iterator ScanPos = DirtyBB->end();
if (ExistingResult) {
if (Instruction *Inst = ExistingResult->getResult().getInst()) {
- ScanPos = Inst;
+ ScanPos = Inst->getIterator();
// We're removing QueryInst's use of Inst.
RemoveFromReverseMap(ReverseNonLocalDeps, Inst,
QueryCS.getInstruction());
@@ -952,11 +1035,11 @@ MemDepResult MemoryDependenceAnalysis::GetNonLocalInfoForBlock(
assert(ExistingResult->getResult().getInst()->getParent() == BB &&
"Instruction invalidated?");
++NumCacheDirtyNonLocalPtr;
- ScanPos = ExistingResult->getResult().getInst();
+ ScanPos = ExistingResult->getResult().getInst()->getIterator();
// Eliminating the dirty entry from 'Cache', so update the reverse info.
ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
- RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey);
+ RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey);
} else {
++NumUncacheNonLocalPtr;
}
@@ -1507,7 +1590,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
// the entire block to get to this point.
MemDepResult NewDirtyVal;
if (!RemInst->isTerminator())
- NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst));
+ NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator());
ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
if (ReverseDepIt != ReverseLocalDeps.end()) {
@@ -1614,7 +1697,6 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
- AA->deleteValue(RemInst);
DEBUG(verifyRemoved(RemInst));
}
/// verifyRemoved - Verify that the specified instruction does not occur
diff --git a/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp
deleted file mode 100644
index 322a9a8..0000000
--- a/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp
+++ /dev/null
@@ -1,95 +0,0 @@
-//===- NoAliasAnalysis.cpp - Minimal Alias Analysis Impl ------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the default implementation of the Alias Analysis interface
-// that simply returns "I don't know" for all queries.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-using namespace llvm;
-
-namespace {
- /// NoAA - This class implements the -no-aa pass, which always returns "I
- /// don't know" for alias queries. NoAA is unlike other alias analysis
- /// implementations, in that it does not chain to a previous analysis. As
- /// such it doesn't follow many of the rules that other alias analyses must.
- ///
- struct NoAA : public ImmutablePass, public AliasAnalysis {
- static char ID; // Class identification, replacement for typeinfo
- NoAA() : ImmutablePass(ID) {
- initializeNoAAPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {}
-
- bool doInitialization(Module &M) override {
- // Note: NoAA does not call InitializeAliasAnalysis because it's
- // special and does not support chaining.
- DL = &M.getDataLayout();
- return true;
- }
-
- AliasResult alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) override {
- return MayAlias;
- }
-
- ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override {
- return UnknownModRefBehavior;
- }
- ModRefBehavior getModRefBehavior(const Function *F) override {
- return UnknownModRefBehavior;
- }
-
- bool pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal) override {
- return false;
- }
- ModRefResult getArgModRefInfo(ImmutableCallSite CS,
- unsigned ArgIdx) override {
- return ModRef;
- }
-
- ModRefResult getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) override {
- return ModRef;
- }
- ModRefResult getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) override {
- return ModRef;
- }
-
- void deleteValue(Value *V) override {}
- void addEscapingUse(Use &U) override {}
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- void *getAdjustedAnalysisPointer(const void *ID) override {
- if (ID == &AliasAnalysis::ID)
- return (AliasAnalysis*)this;
- return this;
- }
- };
-} // End of anonymous namespace
-
-// Register this pass...
-char NoAA::ID = 0;
-INITIALIZE_AG_PASS(NoAA, AliasAnalysis, "no-aa",
- "No Alias Analysis (always returns 'may' alias)",
- true, true, true)
-
-ImmutablePass *llvm::createNoAAPass() { return new NoAA(); }
diff --git a/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp
new file mode 100644
index 0000000..25f660f
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp
@@ -0,0 +1,170 @@
+//===- ObjCARCAliasAnalysis.cpp - ObjC ARC Optimization -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines a simple ARC-aware AliasAnalysis using special knowledge
+/// of Objective C to enhance other optimization passes which rely on the Alias
+/// Analysis infrastructure.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+/// TODO: Theoretically we could check for dependencies between objc_* calls
+/// and FMRB_OnlyAccessesArgumentPointees calls or other well-behaved calls.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ObjCARCAliasAnalysis.h"
+#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/PassSupport.h"
+
+#define DEBUG_TYPE "objc-arc-aa"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+AliasResult ObjCARCAAResult::alias(const MemoryLocation &LocA,
+ const MemoryLocation &LocB) {
+ if (!EnableARCOpts)
+ return AAResultBase::alias(LocA, LocB);
+
+ // First, strip off no-ops, including ObjC-specific no-ops, and try making a
+ // precise alias query.
+ const Value *SA = GetRCIdentityRoot(LocA.Ptr);
+ const Value *SB = GetRCIdentityRoot(LocB.Ptr);
+ AliasResult Result =
+ AAResultBase::alias(MemoryLocation(SA, LocA.Size, LocA.AATags),
+ MemoryLocation(SB, LocB.Size, LocB.AATags));
+ if (Result != MayAlias)
+ return Result;
+
+ // If that failed, climb to the underlying object, including climbing through
+ // ObjC-specific no-ops, and try making an imprecise alias query.
+ const Value *UA = GetUnderlyingObjCPtr(SA, DL);
+ const Value *UB = GetUnderlyingObjCPtr(SB, DL);
+ if (UA != SA || UB != SB) {
+ Result = AAResultBase::alias(MemoryLocation(UA), MemoryLocation(UB));
+ // We can't use MustAlias or PartialAlias results here because
+ // GetUnderlyingObjCPtr may return an offsetted pointer value.
+ if (Result == NoAlias)
+ return NoAlias;
+ }
+
+ // If that failed, fail. We don't need to chain here, since that's covered
+ // by the earlier precise query.
+ return MayAlias;
+}
+
+bool ObjCARCAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
+ bool OrLocal) {
+ if (!EnableARCOpts)
+ return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+
+ // First, strip off no-ops, including ObjC-specific no-ops, and try making
+ // a precise alias query.
+ const Value *S = GetRCIdentityRoot(Loc.Ptr);
+ if (AAResultBase::pointsToConstantMemory(
+ MemoryLocation(S, Loc.Size, Loc.AATags), OrLocal))
+ return true;
+
+ // If that failed, climb to the underlying object, including climbing through
+ // ObjC-specific no-ops, and try making an imprecise alias query.
+ const Value *U = GetUnderlyingObjCPtr(S, DL);
+ if (U != S)
+ return AAResultBase::pointsToConstantMemory(MemoryLocation(U), OrLocal);
+
+ // If that failed, fail. We don't need to chain here, since that's covered
+ // by the earlier precise query.
+ return false;
+}
+
+FunctionModRefBehavior ObjCARCAAResult::getModRefBehavior(const Function *F) {
+ if (!EnableARCOpts)
+ return AAResultBase::getModRefBehavior(F);
+
+ switch (GetFunctionClass(F)) {
+ case ARCInstKind::NoopCast:
+ return FMRB_DoesNotAccessMemory;
+ default:
+ break;
+ }
+
+ return AAResultBase::getModRefBehavior(F);
+}
+
+ModRefInfo ObjCARCAAResult::getModRefInfo(ImmutableCallSite CS,
+ const MemoryLocation &Loc) {
+ if (!EnableARCOpts)
+ return AAResultBase::getModRefInfo(CS, Loc);
+
+ switch (GetBasicARCInstKind(CS.getInstruction())) {
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainRV:
+ case ARCInstKind::Autorelease:
+ case ARCInstKind::AutoreleaseRV:
+ case ARCInstKind::NoopCast:
+ case ARCInstKind::AutoreleasepoolPush:
+ case ARCInstKind::FusedRetainAutorelease:
+ case ARCInstKind::FusedRetainAutoreleaseRV:
+ // These functions don't access any memory visible to the compiler.
+ // Note that this doesn't include objc_retainBlock, because it updates
+ // pointers when it copies block data.
+ return MRI_NoModRef;
+ default:
+ break;
+ }
+
+ return AAResultBase::getModRefInfo(CS, Loc);
+}
+
+ObjCARCAAResult ObjCARCAA::run(Function &F, AnalysisManager<Function> *AM) {
+ return ObjCARCAAResult(F.getParent()->getDataLayout(),
+ AM->getResult<TargetLibraryAnalysis>(F));
+}
+
+char ObjCARCAA::PassID;
+
+char ObjCARCAAWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ObjCARCAAWrapperPass, "objc-arc-aa",
+ "ObjC-ARC-Based Alias Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(ObjCARCAAWrapperPass, "objc-arc-aa",
+ "ObjC-ARC-Based Alias Analysis", false, true)
+
+ImmutablePass *llvm::createObjCARCAAWrapperPass() {
+ return new ObjCARCAAWrapperPass();
+}
+
+ObjCARCAAWrapperPass::ObjCARCAAWrapperPass() : ImmutablePass(ID) {
+ initializeObjCARCAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+bool ObjCARCAAWrapperPass::doInitialization(Module &M) {
+ Result.reset(new ObjCARCAAResult(
+ M.getDataLayout(), getAnalysis<TargetLibraryInfoWrapperPass>().getTLI()));
+ return false;
+}
+
+bool ObjCARCAAWrapperPass::doFinalization(Module &M) {
+ Result.reset();
+ return false;
+}
+
+void ObjCARCAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+}
diff --git a/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp b/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp
new file mode 100644
index 0000000..e3e74aa
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp
@@ -0,0 +1,28 @@
+//===- ObjCARCAnalysisUtils.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements common infrastructure for libLLVMObjCARCOpts.a, which
+// implements several scalar transformations over the LLVM intermediate
+// representation, including the C bindings for that library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+/// \brief A handy option to enable/disable all ARC Optimizations.
+bool llvm::objcarc::EnableARCOpts;
+static cl::opt<bool, true>
+EnableARCOptimizations("enable-objc-arc-opts",
+ cl::desc("enable/disable all ARC Optimizations"),
+ cl::location(EnableARCOpts),
+ cl::init(true));
diff --git a/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp b/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp
new file mode 100644
index 0000000..133b635
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp
@@ -0,0 +1,675 @@
+//===- ARCInstKind.cpp - ObjC ARC Optimization ----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines several utility functions used by various ARC
+/// optimizations which are IMHO too big to be in a header file.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ObjCARCInstKind.h"
+#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/Intrinsics.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS,
+ const ARCInstKind Class) {
+ switch (Class) {
+ case ARCInstKind::Retain:
+ return OS << "ARCInstKind::Retain";
+ case ARCInstKind::RetainRV:
+ return OS << "ARCInstKind::RetainRV";
+ case ARCInstKind::RetainBlock:
+ return OS << "ARCInstKind::RetainBlock";
+ case ARCInstKind::Release:
+ return OS << "ARCInstKind::Release";
+ case ARCInstKind::Autorelease:
+ return OS << "ARCInstKind::Autorelease";
+ case ARCInstKind::AutoreleaseRV:
+ return OS << "ARCInstKind::AutoreleaseRV";
+ case ARCInstKind::AutoreleasepoolPush:
+ return OS << "ARCInstKind::AutoreleasepoolPush";
+ case ARCInstKind::AutoreleasepoolPop:
+ return OS << "ARCInstKind::AutoreleasepoolPop";
+ case ARCInstKind::NoopCast:
+ return OS << "ARCInstKind::NoopCast";
+ case ARCInstKind::FusedRetainAutorelease:
+ return OS << "ARCInstKind::FusedRetainAutorelease";
+ case ARCInstKind::FusedRetainAutoreleaseRV:
+ return OS << "ARCInstKind::FusedRetainAutoreleaseRV";
+ case ARCInstKind::LoadWeakRetained:
+ return OS << "ARCInstKind::LoadWeakRetained";
+ case ARCInstKind::StoreWeak:
+ return OS << "ARCInstKind::StoreWeak";
+ case ARCInstKind::InitWeak:
+ return OS << "ARCInstKind::InitWeak";
+ case ARCInstKind::LoadWeak:
+ return OS << "ARCInstKind::LoadWeak";
+ case ARCInstKind::MoveWeak:
+ return OS << "ARCInstKind::MoveWeak";
+ case ARCInstKind::CopyWeak:
+ return OS << "ARCInstKind::CopyWeak";
+ case ARCInstKind::DestroyWeak:
+ return OS << "ARCInstKind::DestroyWeak";
+ case ARCInstKind::StoreStrong:
+ return OS << "ARCInstKind::StoreStrong";
+ case ARCInstKind::CallOrUser:
+ return OS << "ARCInstKind::CallOrUser";
+ case ARCInstKind::Call:
+ return OS << "ARCInstKind::Call";
+ case ARCInstKind::User:
+ return OS << "ARCInstKind::User";
+ case ARCInstKind::IntrinsicUser:
+ return OS << "ARCInstKind::IntrinsicUser";
+ case ARCInstKind::None:
+ return OS << "ARCInstKind::None";
+ }
+ llvm_unreachable("Unknown instruction class!");
+}
+
+ARCInstKind llvm::objcarc::GetFunctionClass(const Function *F) {
+ Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+
+ // No (mandatory) arguments.
+ if (AI == AE)
+ return StringSwitch<ARCInstKind>(F->getName())
+ .Case("objc_autoreleasePoolPush", ARCInstKind::AutoreleasepoolPush)
+ .Case("clang.arc.use", ARCInstKind::IntrinsicUser)
+ .Default(ARCInstKind::CallOrUser);
+
+ // One argument.
+ const Argument *A0 = &*AI++;
+ if (AI == AE)
+ // Argument is a pointer.
+ if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) {
+ Type *ETy = PTy->getElementType();
+ // Argument is i8*.
+ if (ETy->isIntegerTy(8))
+ return StringSwitch<ARCInstKind>(F->getName())
+ .Case("objc_retain", ARCInstKind::Retain)
+ .Case("objc_retainAutoreleasedReturnValue", ARCInstKind::RetainRV)
+ .Case("objc_retainBlock", ARCInstKind::RetainBlock)
+ .Case("objc_release", ARCInstKind::Release)
+ .Case("objc_autorelease", ARCInstKind::Autorelease)
+ .Case("objc_autoreleaseReturnValue", ARCInstKind::AutoreleaseRV)
+ .Case("objc_autoreleasePoolPop", ARCInstKind::AutoreleasepoolPop)
+ .Case("objc_retainedObject", ARCInstKind::NoopCast)
+ .Case("objc_unretainedObject", ARCInstKind::NoopCast)
+ .Case("objc_unretainedPointer", ARCInstKind::NoopCast)
+ .Case("objc_retain_autorelease",
+ ARCInstKind::FusedRetainAutorelease)
+ .Case("objc_retainAutorelease", ARCInstKind::FusedRetainAutorelease)
+ .Case("objc_retainAutoreleaseReturnValue",
+ ARCInstKind::FusedRetainAutoreleaseRV)
+ .Case("objc_sync_enter", ARCInstKind::User)
+ .Case("objc_sync_exit", ARCInstKind::User)
+ .Default(ARCInstKind::CallOrUser);
+
+ // Argument is i8**
+ if (PointerType *Pte = dyn_cast<PointerType>(ETy))
+ if (Pte->getElementType()->isIntegerTy(8))
+ return StringSwitch<ARCInstKind>(F->getName())
+ .Case("objc_loadWeakRetained", ARCInstKind::LoadWeakRetained)
+ .Case("objc_loadWeak", ARCInstKind::LoadWeak)
+ .Case("objc_destroyWeak", ARCInstKind::DestroyWeak)
+ .Default(ARCInstKind::CallOrUser);
+ }
+
+ // Two arguments, first is i8**.
+ const Argument *A1 = &*AI++;
+ if (AI == AE)
+ if (PointerType *PTy = dyn_cast<PointerType>(A0->getType()))
+ if (PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType()))
+ if (Pte->getElementType()->isIntegerTy(8))
+ if (PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) {
+ Type *ETy1 = PTy1->getElementType();
+ // Second argument is i8*
+ if (ETy1->isIntegerTy(8))
+ return StringSwitch<ARCInstKind>(F->getName())
+ .Case("objc_storeWeak", ARCInstKind::StoreWeak)
+ .Case("objc_initWeak", ARCInstKind::InitWeak)
+ .Case("objc_storeStrong", ARCInstKind::StoreStrong)
+ .Default(ARCInstKind::CallOrUser);
+ // Second argument is i8**.
+ if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1))
+ if (Pte1->getElementType()->isIntegerTy(8))
+ return StringSwitch<ARCInstKind>(F->getName())
+ .Case("objc_moveWeak", ARCInstKind::MoveWeak)
+ .Case("objc_copyWeak", ARCInstKind::CopyWeak)
+ // Ignore annotation calls. This is important to stop the
+ // optimizer from treating annotations as uses which would
+ // make the state of the pointers they are attempting to
+ // elucidate to be incorrect.
+ .Case("llvm.arc.annotation.topdown.bbstart",
+ ARCInstKind::None)
+ .Case("llvm.arc.annotation.topdown.bbend",
+ ARCInstKind::None)
+ .Case("llvm.arc.annotation.bottomup.bbstart",
+ ARCInstKind::None)
+ .Case("llvm.arc.annotation.bottomup.bbend",
+ ARCInstKind::None)
+ .Default(ARCInstKind::CallOrUser);
+ }
+
+ // Anything else.
+ return ARCInstKind::CallOrUser;
+}
+
+// A whitelist of intrinsics that we know do not use objc pointers or decrement
+// ref counts.
+static bool isInertIntrinsic(unsigned ID) {
+ // TODO: Make this into a covered switch.
+ switch (ID) {
+ case Intrinsic::returnaddress:
+ case Intrinsic::frameaddress:
+ case Intrinsic::stacksave:
+ case Intrinsic::stackrestore:
+ case Intrinsic::vastart:
+ case Intrinsic::vacopy:
+ case Intrinsic::vaend:
+ case Intrinsic::objectsize:
+ case Intrinsic::prefetch:
+ case Intrinsic::stackprotector:
+ case Intrinsic::eh_return_i32:
+ case Intrinsic::eh_return_i64:
+ case Intrinsic::eh_typeid_for:
+ case Intrinsic::eh_dwarf_cfa:
+ case Intrinsic::eh_sjlj_lsda:
+ case Intrinsic::eh_sjlj_functioncontext:
+ case Intrinsic::init_trampoline:
+ case Intrinsic::adjust_trampoline:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::invariant_start:
+ case Intrinsic::invariant_end:
+ // Don't let dbg info affect our results.
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_value:
+ // Short cut: Some intrinsics obviously don't use ObjC pointers.
+ return true;
+ default:
+ return false;
+ }
+}
+
+// A whitelist of intrinsics that we know do not use objc pointers or decrement
+// ref counts.
+static bool isUseOnlyIntrinsic(unsigned ID) {
+ // We are conservative and even though intrinsics are unlikely to touch
+ // reference counts, we white list them for safety.
+ //
+ // TODO: Expand this into a covered switch. There is a lot more here.
+ switch (ID) {
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ case Intrinsic::memset:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// \brief Determine what kind of construct V is.
+ARCInstKind llvm::objcarc::GetARCInstKind(const Value *V) {
+ if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ // Any instruction other than bitcast and gep with a pointer operand have a
+ // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer
+ // to a subsequent use, rather than using it themselves, in this sense.
+ // As a short cut, several other opcodes are known to have no pointer
+ // operands of interest. And ret is never followed by a release, so it's
+ // not interesting to examine.
+ switch (I->getOpcode()) {
+ case Instruction::Call: {
+ const CallInst *CI = cast<CallInst>(I);
+ // See if we have a function that we know something about.
+ if (const Function *F = CI->getCalledFunction()) {
+ ARCInstKind Class = GetFunctionClass(F);
+ if (Class != ARCInstKind::CallOrUser)
+ return Class;
+ Intrinsic::ID ID = F->getIntrinsicID();
+ if (isInertIntrinsic(ID))
+ return ARCInstKind::None;
+ if (isUseOnlyIntrinsic(ID))
+ return ARCInstKind::User;
+ }
+
+ // Otherwise, be conservative.
+ return GetCallSiteClass(CI);
+ }
+ case Instruction::Invoke:
+ // Otherwise, be conservative.
+ return GetCallSiteClass(cast<InvokeInst>(I));
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ case Instruction::Select:
+ case Instruction::PHI:
+ case Instruction::Ret:
+ case Instruction::Br:
+ case Instruction::Switch:
+ case Instruction::IndirectBr:
+ case Instruction::Alloca:
+ case Instruction::VAArg:
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::FDiv:
+ case Instruction::SRem:
+ case Instruction::URem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::SExt:
+ case Instruction::ZExt:
+ case Instruction::Trunc:
+ case Instruction::IntToPtr:
+ case Instruction::FCmp:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::InsertElement:
+ case Instruction::ExtractElement:
+ case Instruction::ShuffleVector:
+ case Instruction::ExtractValue:
+ break;
+ case Instruction::ICmp:
+ // Comparing a pointer with null, or any other constant, isn't an
+ // interesting use, because we don't care what the pointer points to, or
+ // about the values of any other dynamic reference-counted pointers.
+ if (IsPotentialRetainableObjPtr(I->getOperand(1)))
+ return ARCInstKind::User;
+ break;
+ default:
+ // For anything else, check all the operands.
+ // Note that this includes both operands of a Store: while the first
+ // operand isn't actually being dereferenced, it is being stored to
+ // memory where we can no longer track who might read it and dereference
+ // it, so we have to consider it potentially used.
+ for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end();
+ OI != OE; ++OI)
+ if (IsPotentialRetainableObjPtr(*OI))
+ return ARCInstKind::User;
+ }
+ }
+
+ // Otherwise, it's totally inert for ARC purposes.
+ return ARCInstKind::None;
+}
+
+/// \brief Test if the given class is a kind of user.
+bool llvm::objcarc::IsUser(ARCInstKind Class) {
+ switch (Class) {
+ case ARCInstKind::User:
+ case ARCInstKind::CallOrUser:
+ case ARCInstKind::IntrinsicUser:
+ return true;
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainRV:
+ case ARCInstKind::RetainBlock:
+ case ARCInstKind::Release:
+ case ARCInstKind::Autorelease:
+ case ARCInstKind::AutoreleaseRV:
+ case ARCInstKind::AutoreleasepoolPush:
+ case ARCInstKind::AutoreleasepoolPop:
+ case ARCInstKind::NoopCast:
+ case ARCInstKind::FusedRetainAutorelease:
+ case ARCInstKind::FusedRetainAutoreleaseRV:
+ case ARCInstKind::LoadWeakRetained:
+ case ARCInstKind::StoreWeak:
+ case ARCInstKind::InitWeak:
+ case ARCInstKind::LoadWeak:
+ case ARCInstKind::MoveWeak:
+ case ARCInstKind::CopyWeak:
+ case ARCInstKind::DestroyWeak:
+ case ARCInstKind::StoreStrong:
+ case ARCInstKind::Call:
+ case ARCInstKind::None:
+ return false;
+ }
+ llvm_unreachable("covered switch isn't covered?");
+}
+
+/// \brief Test if the given class is objc_retain or equivalent.
+bool llvm::objcarc::IsRetain(ARCInstKind Class) {
+ switch (Class) {
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainRV:
+ return true;
+ // I believe we treat retain block as not a retain since it can copy its
+ // block.
+ case ARCInstKind::RetainBlock:
+ case ARCInstKind::Release:
+ case ARCInstKind::Autorelease:
+ case ARCInstKind::AutoreleaseRV:
+ case ARCInstKind::AutoreleasepoolPush:
+ case ARCInstKind::AutoreleasepoolPop:
+ case ARCInstKind::NoopCast:
+ case ARCInstKind::FusedRetainAutorelease:
+ case ARCInstKind::FusedRetainAutoreleaseRV:
+ case ARCInstKind::LoadWeakRetained:
+ case ARCInstKind::StoreWeak:
+ case ARCInstKind::InitWeak:
+ case ARCInstKind::LoadWeak:
+ case ARCInstKind::MoveWeak:
+ case ARCInstKind::CopyWeak:
+ case ARCInstKind::DestroyWeak:
+ case ARCInstKind::StoreStrong:
+ case ARCInstKind::IntrinsicUser:
+ case ARCInstKind::CallOrUser:
+ case ARCInstKind::Call:
+ case ARCInstKind::User:
+ case ARCInstKind::None:
+ return false;
+ }
+ llvm_unreachable("covered switch isn't covered?");
+}
+
+/// \brief Test if the given class is objc_autorelease or equivalent.
+bool llvm::objcarc::IsAutorelease(ARCInstKind Class) {
+ switch (Class) {
+ case ARCInstKind::Autorelease:
+ case ARCInstKind::AutoreleaseRV:
+ return true;
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainRV:
+ case ARCInstKind::RetainBlock:
+ case ARCInstKind::Release:
+ case ARCInstKind::AutoreleasepoolPush:
+ case ARCInstKind::AutoreleasepoolPop:
+ case ARCInstKind::NoopCast:
+ case ARCInstKind::FusedRetainAutorelease:
+ case ARCInstKind::FusedRetainAutoreleaseRV:
+ case ARCInstKind::LoadWeakRetained:
+ case ARCInstKind::StoreWeak:
+ case ARCInstKind::InitWeak:
+ case ARCInstKind::LoadWeak:
+ case ARCInstKind::MoveWeak:
+ case ARCInstKind::CopyWeak:
+ case ARCInstKind::DestroyWeak:
+ case ARCInstKind::StoreStrong:
+ case ARCInstKind::IntrinsicUser:
+ case ARCInstKind::CallOrUser:
+ case ARCInstKind::Call:
+ case ARCInstKind::User:
+ case ARCInstKind::None:
+ return false;
+ }
+ llvm_unreachable("covered switch isn't covered?");
+}
+
+/// \brief Test if the given class represents instructions which return their
+/// argument verbatim.
+bool llvm::objcarc::IsForwarding(ARCInstKind Class) {
+ switch (Class) {
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainRV:
+ case ARCInstKind::Autorelease:
+ case ARCInstKind::AutoreleaseRV:
+ case ARCInstKind::NoopCast:
+ return true;
+ case ARCInstKind::RetainBlock:
+ case ARCInstKind::Release:
+ case ARCInstKind::AutoreleasepoolPush:
+ case ARCInstKind::AutoreleasepoolPop:
+ case ARCInstKind::FusedRetainAutorelease:
+ case ARCInstKind::FusedRetainAutoreleaseRV:
+ case ARCInstKind::LoadWeakRetained:
+ case ARCInstKind::StoreWeak:
+ case ARCInstKind::InitWeak:
+ case ARCInstKind::LoadWeak:
+ case ARCInstKind::MoveWeak:
+ case ARCInstKind::CopyWeak:
+ case ARCInstKind::DestroyWeak:
+ case ARCInstKind::StoreStrong:
+ case ARCInstKind::IntrinsicUser:
+ case ARCInstKind::CallOrUser:
+ case ARCInstKind::Call:
+ case ARCInstKind::User:
+ case ARCInstKind::None:
+ return false;
+ }
+ llvm_unreachable("covered switch isn't covered?");
+}
+
+/// \brief Test if the given class represents instructions which do nothing if
+/// passed a null pointer.
+bool llvm::objcarc::IsNoopOnNull(ARCInstKind Class) {
+ switch (Class) {
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainRV:
+ case ARCInstKind::Release:
+ case ARCInstKind::Autorelease:
+ case ARCInstKind::AutoreleaseRV:
+ case ARCInstKind::RetainBlock:
+ return true;
+ case ARCInstKind::AutoreleasepoolPush:
+ case ARCInstKind::AutoreleasepoolPop:
+ case ARCInstKind::FusedRetainAutorelease:
+ case ARCInstKind::FusedRetainAutoreleaseRV:
+ case ARCInstKind::LoadWeakRetained:
+ case ARCInstKind::StoreWeak:
+ case ARCInstKind::InitWeak:
+ case ARCInstKind::LoadWeak:
+ case ARCInstKind::MoveWeak:
+ case ARCInstKind::CopyWeak:
+ case ARCInstKind::DestroyWeak:
+ case ARCInstKind::StoreStrong:
+ case ARCInstKind::IntrinsicUser:
+ case ARCInstKind::CallOrUser:
+ case ARCInstKind::Call:
+ case ARCInstKind::User:
+ case ARCInstKind::None:
+ case ARCInstKind::NoopCast:
+ return false;
+ }
+ llvm_unreachable("covered switch isn't covered?");
+}
+
+/// \brief Test if the given class represents instructions which are always safe
+/// to mark with the "tail" keyword.
+bool llvm::objcarc::IsAlwaysTail(ARCInstKind Class) {
+ // ARCInstKind::RetainBlock may be given a stack argument.
+ switch (Class) {
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainRV:
+ case ARCInstKind::AutoreleaseRV:
+ return true;
+ case ARCInstKind::Release:
+ case ARCInstKind::Autorelease:
+ case ARCInstKind::RetainBlock:
+ case ARCInstKind::AutoreleasepoolPush:
+ case ARCInstKind::AutoreleasepoolPop:
+ case ARCInstKind::FusedRetainAutorelease:
+ case ARCInstKind::FusedRetainAutoreleaseRV:
+ case ARCInstKind::LoadWeakRetained:
+ case ARCInstKind::StoreWeak:
+ case ARCInstKind::InitWeak:
+ case ARCInstKind::LoadWeak:
+ case ARCInstKind::MoveWeak:
+ case ARCInstKind::CopyWeak:
+ case ARCInstKind::DestroyWeak:
+ case ARCInstKind::StoreStrong:
+ case ARCInstKind::IntrinsicUser:
+ case ARCInstKind::CallOrUser:
+ case ARCInstKind::Call:
+ case ARCInstKind::User:
+ case ARCInstKind::None:
+ case ARCInstKind::NoopCast:
+ return false;
+ }
+ llvm_unreachable("covered switch isn't covered?");
+}
+
+/// \brief Test if the given class represents instructions which are never safe
+/// to mark with the "tail" keyword.
+bool llvm::objcarc::IsNeverTail(ARCInstKind Class) {
+ /// It is never safe to tail call objc_autorelease since by tail calling
+ /// objc_autorelease: fast autoreleasing causing our object to be potentially
+ /// reclaimed from the autorelease pool which violates the semantics of
+ /// __autoreleasing types in ARC.
+ switch (Class) {
+ case ARCInstKind::Autorelease:
+ return true;
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainRV:
+ case ARCInstKind::AutoreleaseRV:
+ case ARCInstKind::Release:
+ case ARCInstKind::RetainBlock:
+ case ARCInstKind::AutoreleasepoolPush:
+ case ARCInstKind::AutoreleasepoolPop:
+ case ARCInstKind::FusedRetainAutorelease:
+ case ARCInstKind::FusedRetainAutoreleaseRV:
+ case ARCInstKind::LoadWeakRetained:
+ case ARCInstKind::StoreWeak:
+ case ARCInstKind::InitWeak:
+ case ARCInstKind::LoadWeak:
+ case ARCInstKind::MoveWeak:
+ case ARCInstKind::CopyWeak:
+ case ARCInstKind::DestroyWeak:
+ case ARCInstKind::StoreStrong:
+ case ARCInstKind::IntrinsicUser:
+ case ARCInstKind::CallOrUser:
+ case ARCInstKind::Call:
+ case ARCInstKind::User:
+ case ARCInstKind::None:
+ case ARCInstKind::NoopCast:
+ return false;
+ }
+ llvm_unreachable("covered switch isn't covered?");
+}
+
+/// \brief Test if the given class represents instructions which are always safe
+/// to mark with the nounwind attribute.
+bool llvm::objcarc::IsNoThrow(ARCInstKind Class) {
+ // objc_retainBlock is not nounwind because it calls user copy constructors
+ // which could theoretically throw.
+ switch (Class) {
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainRV:
+ case ARCInstKind::Release:
+ case ARCInstKind::Autorelease:
+ case ARCInstKind::AutoreleaseRV:
+ case ARCInstKind::AutoreleasepoolPush:
+ case ARCInstKind::AutoreleasepoolPop:
+ return true;
+ case ARCInstKind::RetainBlock:
+ case ARCInstKind::FusedRetainAutorelease:
+ case ARCInstKind::FusedRetainAutoreleaseRV:
+ case ARCInstKind::LoadWeakRetained:
+ case ARCInstKind::StoreWeak:
+ case ARCInstKind::InitWeak:
+ case ARCInstKind::LoadWeak:
+ case ARCInstKind::MoveWeak:
+ case ARCInstKind::CopyWeak:
+ case ARCInstKind::DestroyWeak:
+ case ARCInstKind::StoreStrong:
+ case ARCInstKind::IntrinsicUser:
+ case ARCInstKind::CallOrUser:
+ case ARCInstKind::Call:
+ case ARCInstKind::User:
+ case ARCInstKind::None:
+ case ARCInstKind::NoopCast:
+ return false;
+ }
+ llvm_unreachable("covered switch isn't covered?");
+}
+
+/// Test whether the given instruction can autorelease any pointer or cause an
+/// autoreleasepool pop.
+///
+/// This means that it *could* interrupt the RV optimization.
+bool llvm::objcarc::CanInterruptRV(ARCInstKind Class) {
+ switch (Class) {
+ case ARCInstKind::AutoreleasepoolPop:
+ case ARCInstKind::CallOrUser:
+ case ARCInstKind::Call:
+ case ARCInstKind::Autorelease:
+ case ARCInstKind::AutoreleaseRV:
+ case ARCInstKind::FusedRetainAutorelease:
+ case ARCInstKind::FusedRetainAutoreleaseRV:
+ return true;
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainRV:
+ case ARCInstKind::Release:
+ case ARCInstKind::AutoreleasepoolPush:
+ case ARCInstKind::RetainBlock:
+ case ARCInstKind::LoadWeakRetained:
+ case ARCInstKind::StoreWeak:
+ case ARCInstKind::InitWeak:
+ case ARCInstKind::LoadWeak:
+ case ARCInstKind::MoveWeak:
+ case ARCInstKind::CopyWeak:
+ case ARCInstKind::DestroyWeak:
+ case ARCInstKind::StoreStrong:
+ case ARCInstKind::IntrinsicUser:
+ case ARCInstKind::User:
+ case ARCInstKind::None:
+ case ARCInstKind::NoopCast:
+ return false;
+ }
+ llvm_unreachable("covered switch isn't covered?");
+}
+
+bool llvm::objcarc::CanDecrementRefCount(ARCInstKind Kind) {
+ switch (Kind) {
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainRV:
+ case ARCInstKind::Autorelease:
+ case ARCInstKind::AutoreleaseRV:
+ case ARCInstKind::NoopCast:
+ case ARCInstKind::FusedRetainAutorelease:
+ case ARCInstKind::FusedRetainAutoreleaseRV:
+ case ARCInstKind::IntrinsicUser:
+ case ARCInstKind::User:
+ case ARCInstKind::None:
+ return false;
+
+ // The cases below are conservative.
+
+ // RetainBlock can result in user defined copy constructors being called
+ // implying releases may occur.
+ case ARCInstKind::RetainBlock:
+ case ARCInstKind::Release:
+ case ARCInstKind::AutoreleasepoolPush:
+ case ARCInstKind::AutoreleasepoolPop:
+ case ARCInstKind::LoadWeakRetained:
+ case ARCInstKind::StoreWeak:
+ case ARCInstKind::InitWeak:
+ case ARCInstKind::LoadWeak:
+ case ARCInstKind::MoveWeak:
+ case ARCInstKind::CopyWeak:
+ case ARCInstKind::DestroyWeak:
+ case ARCInstKind::StoreStrong:
+ case ARCInstKind::CallOrUser:
+ case ARCInstKind::Call:
+ return true;
+ }
+
+ llvm_unreachable("covered switch isn't covered?");
+}
diff --git a/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp b/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp
new file mode 100644
index 0000000..0f0016f
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp
@@ -0,0 +1,85 @@
+//===- OrderedBasicBlock.cpp --------------------------------- -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the OrderedBasicBlock class. OrderedBasicBlock
+// maintains an interface where clients can query if one instruction comes
+// before another in a BasicBlock. Since BasicBlock currently lacks a reliable
+// way to query relative position between instructions one can use
+// OrderedBasicBlock to do such queries. OrderedBasicBlock is lazily built on a
+// source BasicBlock and maintains an internal Instruction -> Position map. A
+// OrderedBasicBlock instance should be discarded whenever the source
+// BasicBlock changes.
+//
+// It's currently used by the CaptureTracker in order to find relative
+// positions of a pair of instructions inside a BasicBlock.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/OrderedBasicBlock.h"
+#include "llvm/IR/Instruction.h"
+using namespace llvm;
+
+OrderedBasicBlock::OrderedBasicBlock(const BasicBlock *BasicB)
+ : NextInstPos(0), BB(BasicB) {
+ LastInstFound = BB->end();
+}
+
+/// \brief Given no cached results, find if \p A comes before \p B in \p BB.
+/// Cache and number out instruction while walking \p BB.
+bool OrderedBasicBlock::comesBefore(const Instruction *A,
+ const Instruction *B) {
+ const Instruction *Inst = nullptr;
+ assert(!(LastInstFound == BB->end() && NextInstPos != 0) &&
+ "Instruction supposed to be in NumberedInsts");
+
+ // Start the search with the instruction found in the last lookup round.
+ auto II = BB->begin();
+ auto IE = BB->end();
+ if (LastInstFound != IE)
+ II = std::next(LastInstFound);
+
+ // Number all instructions up to the point where we find 'A' or 'B'.
+ for (; II != IE; ++II) {
+ Inst = cast<Instruction>(II);
+ NumberedInsts[Inst] = NextInstPos++;
+ if (Inst == A || Inst == B)
+ break;
+ }
+
+ assert(II != IE && "Instruction not found?");
+ assert((Inst == A || Inst == B) && "Should find A or B");
+ LastInstFound = II;
+ return Inst == A;
+}
+
+/// \brief Find out whether \p A dominates \p B, meaning whether \p A
+/// comes before \p B in \p BB. This is a simplification that considers
+/// cached instruction positions and ignores other basic blocks, being
+/// only relevant to compare relative instructions positions inside \p BB.
+bool OrderedBasicBlock::dominates(const Instruction *A, const Instruction *B) {
+ assert(A->getParent() == B->getParent() &&
+ "Instructions must be in the same basic block!");
+
+ // First we lookup the instructions. If they don't exist, lookup will give us
+ // back ::end(). If they both exist, we compare the numbers. Otherwise, if NA
+ // exists and NB doesn't, it means NA must come before NB because we would
+ // have numbered NB as well if it didn't. The same is true for NB. If it
+ // exists, but NA does not, NA must come after it. If neither exist, we need
+ // to number the block and cache the results (by calling comesBefore).
+ auto NAI = NumberedInsts.find(A);
+ auto NBI = NumberedInsts.find(B);
+ if (NAI != NumberedInsts.end() && NBI != NumberedInsts.end())
+ return NAI->second < NBI->second;
+ if (NAI != NumberedInsts.end())
+ return true;
+ if (NBI != NumberedInsts.end())
+ return false;
+
+ return comesBefore(A, B);
+}
diff --git a/contrib/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm/lib/Analysis/RegionInfo.cpp
index 8cd8534..f59d267 100644
--- a/contrib/llvm/lib/Analysis/RegionInfo.cpp
+++ b/contrib/llvm/lib/Analysis/RegionInfo.cpp
@@ -21,6 +21,9 @@
#include <algorithm>
#include <iterator>
#include <set>
+#ifndef NDEBUG
+#include "llvm/Analysis/RegionPrinter.h"
+#endif
using namespace llvm;
@@ -103,6 +106,12 @@ void RegionInfo::recalculate(Function &F, DominatorTree *DT_,
calculate(F);
}
+#ifndef NDEBUG
+void RegionInfo::view() { viewRegion(this); }
+
+void RegionInfo::viewOnly() { viewRegionOnly(this); }
+#endif
+
//===----------------------------------------------------------------------===//
// RegionInfoPass implementation
//
diff --git a/contrib/llvm/lib/Analysis/RegionPrinter.cpp b/contrib/llvm/lib/Analysis/RegionPrinter.cpp
index d7f5109..acb218d 100644
--- a/contrib/llvm/lib/Analysis/RegionPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/RegionPrinter.cpp
@@ -20,6 +20,9 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#ifndef NDEBUG
+#include "llvm/IR/LegacyPassManager.h"
+#endif
using namespace llvm;
@@ -55,25 +58,22 @@ struct DOTGraphTraits<RegionNode*> : public DefaultDOTGraphTraits {
}
};
-template<>
-struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> {
+template <>
+struct DOTGraphTraits<RegionInfo *> : public DOTGraphTraits<RegionNode *> {
DOTGraphTraits (bool isSimple = false)
: DOTGraphTraits<RegionNode*>(isSimple) {}
- static std::string getGraphName(RegionInfoPass *DT) {
- return "Region Graph";
- }
+ static std::string getGraphName(const RegionInfo *) { return "Region Graph"; }
- std::string getNodeLabel(RegionNode *Node, RegionInfoPass *G) {
- RegionInfo &RI = G->getRegionInfo();
- return DOTGraphTraits<RegionNode*>::getNodeLabel(Node,
- reinterpret_cast<RegionNode*>(RI.getTopLevelRegion()));
+ std::string getNodeLabel(RegionNode *Node, RegionInfo *G) {
+ return DOTGraphTraits<RegionNode *>::getNodeLabel(
+ Node, reinterpret_cast<RegionNode *>(G->getTopLevelRegion()));
}
std::string getEdgeAttributes(RegionNode *srcNode,
- GraphTraits<RegionInfo*>::ChildIteratorType CI, RegionInfoPass *G) {
- RegionInfo &RI = G->getRegionInfo();
+ GraphTraits<RegionInfo *>::ChildIteratorType CI,
+ RegionInfo *G) {
RegionNode *destNode = *CI;
if (srcNode->isSubRegion() || destNode->isSubRegion())
@@ -83,7 +83,7 @@ struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> {
BasicBlock *srcBB = srcNode->getNodeAs<BasicBlock>();
BasicBlock *destBB = destNode->getNodeAs<BasicBlock>();
- Region *R = RI.getRegionFor(destBB);
+ Region *R = G->getRegionFor(destBB);
while (R && R->getParent())
if (R->getParent()->getEntry() == destBB)
@@ -91,7 +91,7 @@ struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> {
else
break;
- if (R->getEntry() == destBB && R->contains(srcBB))
+ if (R && R->getEntry() == destBB && R->contains(srcBB))
return "constraint=false";
return "";
@@ -99,8 +99,7 @@ struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> {
// Print the cluster of the subregions. This groups the single basic blocks
// and adds a different background color for each group.
- static void printRegionCluster(const Region &R,
- GraphWriter<RegionInfoPass*> &GW,
+ static void printRegionCluster(const Region &R, GraphWriter<RegionInfo *> &GW,
unsigned depth = 0) {
raw_ostream &O = GW.getOStream();
O.indent(2 * depth) << "subgraph cluster_" << static_cast<const void*>(&R)
@@ -132,50 +131,81 @@ struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> {
O.indent(2 * depth) << "}\n";
}
- static void addCustomGraphFeatures(const RegionInfoPass* RIP,
- GraphWriter<RegionInfoPass*> &GW) {
- const RegionInfo &RI = RIP->getRegionInfo();
+ static void addCustomGraphFeatures(const RegionInfo *G,
+ GraphWriter<RegionInfo *> &GW) {
raw_ostream &O = GW.getOStream();
O << "\tcolorscheme = \"paired12\"\n";
- printRegionCluster(*RI.getTopLevelRegion(), GW, 4);
+ printRegionCluster(*G->getTopLevelRegion(), GW, 4);
}
};
} //end namespace llvm
namespace {
+struct RegionInfoPassGraphTraits {
+ static RegionInfo *getGraph(RegionInfoPass *RIP) {
+ return &RIP->getRegionInfo();
+ }
+};
+
+struct RegionPrinter
+ : public DOTGraphTraitsPrinter<RegionInfoPass, false, RegionInfo *,
+ RegionInfoPassGraphTraits> {
+ static char ID;
+ RegionPrinter()
+ : DOTGraphTraitsPrinter<RegionInfoPass, false, RegionInfo *,
+ RegionInfoPassGraphTraits>("reg", ID) {
+ initializeRegionPrinterPass(*PassRegistry::getPassRegistry());
+ }
+};
+char RegionPrinter::ID = 0;
+
+struct RegionOnlyPrinter
+ : public DOTGraphTraitsPrinter<RegionInfoPass, true, RegionInfo *,
+ RegionInfoPassGraphTraits> {
+ static char ID;
+ RegionOnlyPrinter()
+ : DOTGraphTraitsPrinter<RegionInfoPass, true, RegionInfo *,
+ RegionInfoPassGraphTraits>("reg", ID) {
+ initializeRegionOnlyPrinterPass(*PassRegistry::getPassRegistry());
+ }
+};
+char RegionOnlyPrinter::ID = 0;
+
struct RegionViewer
- : public DOTGraphTraitsViewer<RegionInfoPass, false> {
+ : public DOTGraphTraitsViewer<RegionInfoPass, false, RegionInfo *,
+ RegionInfoPassGraphTraits> {
static char ID;
- RegionViewer() : DOTGraphTraitsViewer<RegionInfoPass, false>("reg", ID){
+ RegionViewer()
+ : DOTGraphTraitsViewer<RegionInfoPass, false, RegionInfo *,
+ RegionInfoPassGraphTraits>("reg", ID) {
initializeRegionViewerPass(*PassRegistry::getPassRegistry());
}
};
char RegionViewer::ID = 0;
struct RegionOnlyViewer
- : public DOTGraphTraitsViewer<RegionInfoPass, true> {
+ : public DOTGraphTraitsViewer<RegionInfoPass, true, RegionInfo *,
+ RegionInfoPassGraphTraits> {
static char ID;
- RegionOnlyViewer() : DOTGraphTraitsViewer<RegionInfoPass, true>("regonly", ID) {
+ RegionOnlyViewer()
+ : DOTGraphTraitsViewer<RegionInfoPass, true, RegionInfo *,
+ RegionInfoPassGraphTraits>("regonly", ID) {
initializeRegionOnlyViewerPass(*PassRegistry::getPassRegistry());
}
};
char RegionOnlyViewer::ID = 0;
-struct RegionPrinter
- : public DOTGraphTraitsPrinter<RegionInfoPass, false> {
- static char ID;
- RegionPrinter() :
- DOTGraphTraitsPrinter<RegionInfoPass, false>("reg", ID) {
- initializeRegionPrinterPass(*PassRegistry::getPassRegistry());
- }
-};
-char RegionPrinter::ID = 0;
} //end anonymous namespace
INITIALIZE_PASS(RegionPrinter, "dot-regions",
"Print regions of function to 'dot' file", true, true)
+INITIALIZE_PASS(
+ RegionOnlyPrinter, "dot-regions-only",
+ "Print regions of function to 'dot' file (with no function bodies)", true,
+ true)
+
INITIALIZE_PASS(RegionViewer, "view-regions", "View regions of function",
true, true)
@@ -183,25 +213,12 @@ INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only",
"View regions of function (with no function bodies)",
true, true)
-namespace {
-
-struct RegionOnlyPrinter
- : public DOTGraphTraitsPrinter<RegionInfoPass, true> {
- static char ID;
- RegionOnlyPrinter() :
- DOTGraphTraitsPrinter<RegionInfoPass, true>("reg", ID) {
- initializeRegionOnlyPrinterPass(*PassRegistry::getPassRegistry());
- }
-};
+FunctionPass *llvm::createRegionPrinterPass() { return new RegionPrinter(); }
+FunctionPass *llvm::createRegionOnlyPrinterPass() {
+ return new RegionOnlyPrinter();
}
-char RegionOnlyPrinter::ID = 0;
-INITIALIZE_PASS(RegionOnlyPrinter, "dot-regions-only",
- "Print regions of function to 'dot' file "
- "(with no function bodies)",
- true, true)
-
FunctionPass* llvm::createRegionViewerPass() {
return new RegionViewer();
}
@@ -210,11 +227,41 @@ FunctionPass* llvm::createRegionOnlyViewerPass() {
return new RegionOnlyViewer();
}
-FunctionPass* llvm::createRegionPrinterPass() {
- return new RegionPrinter();
+#ifndef NDEBUG
+static void viewRegionInfo(RegionInfo *RI, bool ShortNames) {
+ assert(RI && "Argument must be non-null");
+
+ llvm::Function *F = RI->getTopLevelRegion()->getEntry()->getParent();
+ std::string GraphName = DOTGraphTraits<RegionInfo *>::getGraphName(RI);
+
+ llvm::ViewGraph(RI, "reg", ShortNames,
+ Twine(GraphName) + " for '" + F->getName() + "' function");
}
-FunctionPass* llvm::createRegionOnlyPrinterPass() {
- return new RegionOnlyPrinter();
+static void invokeFunctionPass(const Function *F, FunctionPass *ViewerPass) {
+ assert(F && "Argument must be non-null");
+ assert(!F->isDeclaration() && "Function must have an implementation");
+
+ // The viewer and analysis passes do not modify anything, so we can safely
+ // remove the const qualifier
+ auto NonConstF = const_cast<Function *>(F);
+
+ llvm::legacy::FunctionPassManager FPM(NonConstF->getParent());
+ FPM.add(ViewerPass);
+ FPM.doInitialization();
+ FPM.run(*NonConstF);
+ FPM.doFinalization();
}
+void llvm::viewRegion(RegionInfo *RI) { viewRegionInfo(RI, false); }
+
+void llvm::viewRegion(const Function *F) {
+ invokeFunctionPass(F, createRegionViewerPass());
+}
+
+void llvm::viewRegionOnly(RegionInfo *RI) { viewRegionInfo(RI, true); }
+
+void llvm::viewRegionOnly(const Function *F) {
+ invokeFunctionPass(F, createRegionOnlyViewerPass());
+}
+#endif
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
index 9c7c175..34074ef 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -83,11 +83,13 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SaveAndRestore.h"
#include <algorithm>
using namespace llvm;
@@ -114,16 +116,6 @@ static cl::opt<bool>
VerifySCEV("verify-scev",
cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
-INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
- "Scalar Evolution Analysis", false, true)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution",
- "Scalar Evolution Analysis", false, true)
-char ScalarEvolution::ID = 0;
-
//===----------------------------------------------------------------------===//
// SCEV class definitions
//===----------------------------------------------------------------------===//
@@ -132,12 +124,11 @@ char ScalarEvolution::ID = 0;
// Implementation of the SCEV class.
//
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void SCEV::dump() const {
print(dbgs());
dbgs() << '\n';
}
-#endif
void SCEV::print(raw_ostream &OS) const {
switch (static_cast<SCEVTypes>(getSCEVType())) {
@@ -303,7 +294,7 @@ bool SCEV::isNonConstantNegative() const {
if (!SC) return false;
// Return true if the value is negative, this matches things like (-42 * V).
- return SC->getValue()->getValue().isNegative();
+ return SC->getAPInt().isNegative();
}
SCEVCouldNotCompute::SCEVCouldNotCompute() :
@@ -455,179 +446,179 @@ bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const {
//===----------------------------------------------------------------------===//
namespace {
- /// SCEVComplexityCompare - Return true if the complexity of the LHS is less
- /// than the complexity of the RHS. This comparator is used to canonicalize
- /// expressions.
- class SCEVComplexityCompare {
- const LoopInfo *const LI;
- public:
- explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {}
-
- // Return true or false if LHS is less than, or at least RHS, respectively.
- bool operator()(const SCEV *LHS, const SCEV *RHS) const {
- return compare(LHS, RHS) < 0;
- }
-
- // Return negative, zero, or positive, if LHS is less than, equal to, or
- // greater than RHS, respectively. A three-way result allows recursive
- // comparisons to be more efficient.
- int compare(const SCEV *LHS, const SCEV *RHS) const {
- // Fast-path: SCEVs are uniqued so we can do a quick equality check.
- if (LHS == RHS)
- return 0;
-
- // Primarily, sort the SCEVs by their getSCEVType().
- unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
- if (LType != RType)
- return (int)LType - (int)RType;
-
- // Aside from the getSCEVType() ordering, the particular ordering
- // isn't very important except that it's beneficial to be consistent,
- // so that (a + b) and (b + a) don't end up as different expressions.
- switch (static_cast<SCEVTypes>(LType)) {
- case scUnknown: {
- const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
- const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
-
- // Sort SCEVUnknown values with some loose heuristics. TODO: This is
- // not as complete as it could be.
- const Value *LV = LU->getValue(), *RV = RU->getValue();
-
- // Order pointer values after integer values. This helps SCEVExpander
- // form GEPs.
- bool LIsPointer = LV->getType()->isPointerTy(),
- RIsPointer = RV->getType()->isPointerTy();
- if (LIsPointer != RIsPointer)
- return (int)LIsPointer - (int)RIsPointer;
-
- // Compare getValueID values.
- unsigned LID = LV->getValueID(),
- RID = RV->getValueID();
- if (LID != RID)
- return (int)LID - (int)RID;
-
- // Sort arguments by their position.
- if (const Argument *LA = dyn_cast<Argument>(LV)) {
- const Argument *RA = cast<Argument>(RV);
- unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
- return (int)LArgNo - (int)RArgNo;
- }
-
- // For instructions, compare their loop depth, and their operand
- // count. This is pretty loose.
- if (const Instruction *LInst = dyn_cast<Instruction>(LV)) {
- const Instruction *RInst = cast<Instruction>(RV);
-
- // Compare loop depths.
- const BasicBlock *LParent = LInst->getParent(),
- *RParent = RInst->getParent();
- if (LParent != RParent) {
- unsigned LDepth = LI->getLoopDepth(LParent),
- RDepth = LI->getLoopDepth(RParent);
- if (LDepth != RDepth)
- return (int)LDepth - (int)RDepth;
- }
-
- // Compare the number of operands.
- unsigned LNumOps = LInst->getNumOperands(),
- RNumOps = RInst->getNumOperands();
- return (int)LNumOps - (int)RNumOps;
- }
+/// SCEVComplexityCompare - Return true if the complexity of the LHS is less
+/// than the complexity of the RHS. This comparator is used to canonicalize
+/// expressions.
+class SCEVComplexityCompare {
+ const LoopInfo *const LI;
+public:
+ explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {}
- return 0;
- }
+ // Return true or false if LHS is less than, or at least RHS, respectively.
+ bool operator()(const SCEV *LHS, const SCEV *RHS) const {
+ return compare(LHS, RHS) < 0;
+ }
- case scConstant: {
- const SCEVConstant *LC = cast<SCEVConstant>(LHS);
- const SCEVConstant *RC = cast<SCEVConstant>(RHS);
-
- // Compare constant values.
- const APInt &LA = LC->getValue()->getValue();
- const APInt &RA = RC->getValue()->getValue();
- unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
- if (LBitWidth != RBitWidth)
- return (int)LBitWidth - (int)RBitWidth;
- return LA.ult(RA) ? -1 : 1;
+ // Return negative, zero, or positive, if LHS is less than, equal to, or
+ // greater than RHS, respectively. A three-way result allows recursive
+ // comparisons to be more efficient.
+ int compare(const SCEV *LHS, const SCEV *RHS) const {
+ // Fast-path: SCEVs are uniqued so we can do a quick equality check.
+ if (LHS == RHS)
+ return 0;
+
+ // Primarily, sort the SCEVs by their getSCEVType().
+ unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
+ if (LType != RType)
+ return (int)LType - (int)RType;
+
+ // Aside from the getSCEVType() ordering, the particular ordering
+ // isn't very important except that it's beneficial to be consistent,
+ // so that (a + b) and (b + a) don't end up as different expressions.
+ switch (static_cast<SCEVTypes>(LType)) {
+ case scUnknown: {
+ const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
+ const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
+
+ // Sort SCEVUnknown values with some loose heuristics. TODO: This is
+ // not as complete as it could be.
+ const Value *LV = LU->getValue(), *RV = RU->getValue();
+
+ // Order pointer values after integer values. This helps SCEVExpander
+ // form GEPs.
+ bool LIsPointer = LV->getType()->isPointerTy(),
+ RIsPointer = RV->getType()->isPointerTy();
+ if (LIsPointer != RIsPointer)
+ return (int)LIsPointer - (int)RIsPointer;
+
+ // Compare getValueID values.
+ unsigned LID = LV->getValueID(),
+ RID = RV->getValueID();
+ if (LID != RID)
+ return (int)LID - (int)RID;
+
+ // Sort arguments by their position.
+ if (const Argument *LA = dyn_cast<Argument>(LV)) {
+ const Argument *RA = cast<Argument>(RV);
+ unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
+ return (int)LArgNo - (int)RArgNo;
}
- case scAddRecExpr: {
- const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
- const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
-
- // Compare addrec loop depths.
- const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
- if (LLoop != RLoop) {
- unsigned LDepth = LLoop->getLoopDepth(),
- RDepth = RLoop->getLoopDepth();
+ // For instructions, compare their loop depth, and their operand
+ // count. This is pretty loose.
+ if (const Instruction *LInst = dyn_cast<Instruction>(LV)) {
+ const Instruction *RInst = cast<Instruction>(RV);
+
+ // Compare loop depths.
+ const BasicBlock *LParent = LInst->getParent(),
+ *RParent = RInst->getParent();
+ if (LParent != RParent) {
+ unsigned LDepth = LI->getLoopDepth(LParent),
+ RDepth = LI->getLoopDepth(RParent);
if (LDepth != RDepth)
return (int)LDepth - (int)RDepth;
}
- // Addrec complexity grows with operand count.
- unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
- if (LNumOps != RNumOps)
- return (int)LNumOps - (int)RNumOps;
+ // Compare the number of operands.
+ unsigned LNumOps = LInst->getNumOperands(),
+ RNumOps = RInst->getNumOperands();
+ return (int)LNumOps - (int)RNumOps;
+ }
- // Lexicographically compare.
- for (unsigned i = 0; i != LNumOps; ++i) {
- long X = compare(LA->getOperand(i), RA->getOperand(i));
- if (X != 0)
- return X;
- }
+ return 0;
+ }
- return 0;
+ case scConstant: {
+ const SCEVConstant *LC = cast<SCEVConstant>(LHS);
+ const SCEVConstant *RC = cast<SCEVConstant>(RHS);
+
+ // Compare constant values.
+ const APInt &LA = LC->getAPInt();
+ const APInt &RA = RC->getAPInt();
+ unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
+ if (LBitWidth != RBitWidth)
+ return (int)LBitWidth - (int)RBitWidth;
+ return LA.ult(RA) ? -1 : 1;
+ }
+
+ case scAddRecExpr: {
+ const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
+ const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
+
+ // Compare addrec loop depths.
+ const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
+ if (LLoop != RLoop) {
+ unsigned LDepth = LLoop->getLoopDepth(),
+ RDepth = RLoop->getLoopDepth();
+ if (LDepth != RDepth)
+ return (int)LDepth - (int)RDepth;
}
- case scAddExpr:
- case scMulExpr:
- case scSMaxExpr:
- case scUMaxExpr: {
- const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
- const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
-
- // Lexicographically compare n-ary expressions.
- unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
- if (LNumOps != RNumOps)
- return (int)LNumOps - (int)RNumOps;
-
- for (unsigned i = 0; i != LNumOps; ++i) {
- if (i >= RNumOps)
- return 1;
- long X = compare(LC->getOperand(i), RC->getOperand(i));
- if (X != 0)
- return X;
- }
+ // Addrec complexity grows with operand count.
+ unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
+ if (LNumOps != RNumOps)
return (int)LNumOps - (int)RNumOps;
+
+ // Lexicographically compare.
+ for (unsigned i = 0; i != LNumOps; ++i) {
+ long X = compare(LA->getOperand(i), RA->getOperand(i));
+ if (X != 0)
+ return X;
}
- case scUDivExpr: {
- const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
- const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
+ return 0;
+ }
+
+ case scAddExpr:
+ case scMulExpr:
+ case scSMaxExpr:
+ case scUMaxExpr: {
+ const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
+ const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
- // Lexicographically compare udiv expressions.
- long X = compare(LC->getLHS(), RC->getLHS());
+ // Lexicographically compare n-ary expressions.
+ unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
+ if (LNumOps != RNumOps)
+ return (int)LNumOps - (int)RNumOps;
+
+ for (unsigned i = 0; i != LNumOps; ++i) {
+ if (i >= RNumOps)
+ return 1;
+ long X = compare(LC->getOperand(i), RC->getOperand(i));
if (X != 0)
return X;
- return compare(LC->getRHS(), RC->getRHS());
}
+ return (int)LNumOps - (int)RNumOps;
+ }
- case scTruncate:
- case scZeroExtend:
- case scSignExtend: {
- const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
- const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
+ case scUDivExpr: {
+ const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
+ const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
- // Compare cast expressions by operand.
- return compare(LC->getOperand(), RC->getOperand());
- }
+ // Lexicographically compare udiv expressions.
+ long X = compare(LC->getLHS(), RC->getLHS());
+ if (X != 0)
+ return X;
+ return compare(LC->getRHS(), RC->getRHS());
+ }
- case scCouldNotCompute:
- llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
- }
- llvm_unreachable("Unknown SCEV kind!");
+ case scTruncate:
+ case scZeroExtend:
+ case scSignExtend: {
+ const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
+ const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
+
+ // Compare cast expressions by operand.
+ return compare(LC->getOperand(), RC->getOperand());
}
- };
-}
+
+ case scCouldNotCompute:
+ llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+ }
+ llvm_unreachable("Unknown SCEV kind!");
+ }
+};
+} // end anonymous namespace
/// GroupByComplexity - Given a list of SCEV objects, order them by their
/// complexity, and group objects of the same complexity together by value.
@@ -675,24 +666,22 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
}
}
-namespace {
-struct FindSCEVSize {
- int Size;
- FindSCEVSize() : Size(0) {}
-
- bool follow(const SCEV *S) {
- ++Size;
- // Keep looking at all operands of S.
- return true;
- }
- bool isDone() const {
- return false;
- }
-};
-}
-
// Returns the size of the SCEV S.
static inline int sizeOfSCEV(const SCEV *S) {
+ struct FindSCEVSize {
+ int Size;
+ FindSCEVSize() : Size(0) {}
+
+ bool follow(const SCEV *S) {
+ ++Size;
+ // Keep looking at all operands of S.
+ return true;
+ }
+ bool isDone() const {
+ return false;
+ }
+ };
+
FindSCEVSize F;
SCEVTraversal<FindSCEVSize> ST(F);
ST.visitAll(S);
@@ -771,8 +760,8 @@ public:
void visitConstant(const SCEVConstant *Numerator) {
if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) {
- APInt NumeratorVal = Numerator->getValue()->getValue();
- APInt DenominatorVal = D->getValue()->getValue();
+ APInt NumeratorVal = Numerator->getAPInt();
+ APInt DenominatorVal = D->getAPInt();
uint32_t NumeratorBW = NumeratorVal.getBitWidth();
uint32_t DenominatorBW = DenominatorVal.getBitWidth();
@@ -792,17 +781,15 @@ public:
void visitAddRecExpr(const SCEVAddRecExpr *Numerator) {
const SCEV *StartQ, *StartR, *StepQ, *StepR;
- assert(Numerator->isAffine() && "Numerator should be affine");
+ if (!Numerator->isAffine())
+ return cannotDivide(Numerator);
divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR);
divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR);
// Bail out if the types do not match.
Type *Ty = Denominator->getType();
if (Ty != StartQ->getType() || Ty != StartR->getType() ||
- Ty != StepQ->getType() || Ty != StepR->getType()) {
- Quotient = Zero;
- Remainder = Numerator;
- return;
- }
+ Ty != StepQ->getType() || Ty != StepR->getType())
+ return cannotDivide(Numerator);
Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(),
Numerator->getNoWrapFlags());
Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(),
@@ -818,11 +805,8 @@ public:
divide(SE, Op, Denominator, &Q, &R);
// Bail out if types do not match.
- if (Ty != Q->getType() || Ty != R->getType()) {
- Quotient = Zero;
- Remainder = Numerator;
- return;
- }
+ if (Ty != Q->getType() || Ty != R->getType())
+ return cannotDivide(Numerator);
Qs.push_back(Q);
Rs.push_back(R);
@@ -845,11 +829,8 @@ public:
bool FoundDenominatorTerm = false;
for (const SCEV *Op : Numerator->operands()) {
// Bail out if types do not match.
- if (Ty != Op->getType()) {
- Quotient = Zero;
- Remainder = Numerator;
- return;
- }
+ if (Ty != Op->getType())
+ return cannotDivide(Numerator);
if (FoundDenominatorTerm) {
Qs.push_back(Op);
@@ -865,11 +846,8 @@ public:
}
// Bail out if types do not match.
- if (Ty != Q->getType()) {
- Quotient = Zero;
- Remainder = Numerator;
- return;
- }
+ if (Ty != Q->getType())
+ return cannotDivide(Numerator);
FoundDenominatorTerm = true;
Qs.push_back(Q);
@@ -884,11 +862,8 @@ public:
return;
}
- if (!isa<SCEVUnknown>(Denominator)) {
- Quotient = Zero;
- Remainder = Numerator;
- return;
- }
+ if (!isa<SCEVUnknown>(Denominator))
+ return cannotDivide(Numerator);
// The Remainder is obtained by replacing Denominator by 0 in Numerator.
ValueToValueMap RewriteMap;
@@ -908,15 +883,12 @@ public:
// Quotient is (Numerator - Remainder) divided by Denominator.
const SCEV *Q, *R;
const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder);
- if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) {
- // This SCEV does not seem to simplify: fail the division here.
- Quotient = Zero;
- Remainder = Numerator;
- return;
- }
+ // This SCEV does not seem to simplify: fail the division here.
+ if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator))
+ return cannotDivide(Numerator);
divide(SE, Diff, Denominator, &Q, &R);
- assert(R == Zero &&
- "(Numerator - Remainder) should evenly divide Denominator");
+ if (R != Zero)
+ return cannotDivide(Numerator);
Quotient = Q;
}
@@ -924,11 +896,18 @@ private:
SCEVDivision(ScalarEvolution &S, const SCEV *Numerator,
const SCEV *Denominator)
: SE(S), Denominator(Denominator) {
- Zero = SE.getConstant(Denominator->getType(), 0);
- One = SE.getConstant(Denominator->getType(), 1);
+ Zero = SE.getZero(Denominator->getType());
+ One = SE.getOne(Denominator->getType());
+
+ // We generally do not know how to divide Expr by Denominator. We
+ // initialize the division to a "cannot divide" state to simplify the rest
+ // of the code.
+ cannotDivide(Numerator);
+ }
- // By default, we don't know how to divide Expr by Denominator.
- // Providing the default here simplifies the rest of the code.
+ // Convenience function for giving up on the division. We set the quotient to
+ // be equal to zero and the remainder to be equal to the numerator.
+ void cannotDivide(const SCEV *Numerator) {
Quotient = Zero;
Remainder = Numerator;
}
@@ -1151,8 +1130,8 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
// If the input value is a chrec scev, truncate the chrec's operands.
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
SmallVector<const SCEV *, 4> Operands;
- for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
- Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty));
+ for (const SCEV *Op : AddRec->operands())
+ Operands.push_back(getTruncateExpr(Op, Ty));
return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);
}
@@ -1287,7 +1266,9 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
// `Step`:
// 1. NSW/NUW flags on the step increment.
- const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags());
+ auto PreStartFlags =
+ ScalarEvolution::maskFlags(SA->getNoWrapFlags(), SCEV::FlagNUW);
+ const SCEV *PreStart = SE->getAddExpr(DiffOps, PreStartFlags);
const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
@@ -1322,9 +1303,9 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(Step, &Pred, SE);
if (OverflowLimit &&
- SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) {
+ SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit))
return PreStart;
- }
+
return nullptr;
}
@@ -1390,24 +1371,22 @@ bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start,
if (!StartC)
return false;
- APInt StartAI = StartC->getValue()->getValue();
+ APInt StartAI = StartC->getAPInt();
for (unsigned Delta : {-2, -1, 1, 2}) {
const SCEV *PreStart = getConstant(StartAI - Delta);
+ FoldingSetNodeID ID;
+ ID.AddInteger(scAddRecExpr);
+ ID.AddPointer(PreStart);
+ ID.AddPointer(Step);
+ ID.AddPointer(L);
+ void *IP = nullptr;
+ const auto *PreAR =
+ static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+
// Give up if we don't already have the add recurrence we need because
// actually constructing an add recurrence is relatively expensive.
- const SCEVAddRecExpr *PreAR = [&]() {
- FoldingSetNodeID ID;
- ID.AddInteger(scAddRecExpr);
- ID.AddPointer(PreStart);
- ID.AddPointer(Step);
- ID.AddPointer(L);
- void *IP = nullptr;
- return static_cast<SCEVAddRecExpr *>(
- this->UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
- }();
-
if (PreAR && PreAR->getNoWrapFlags(WrapType)) { // proves (2)
const SCEV *DeltaS = getConstant(StartC->getType(), Delta);
ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
@@ -1578,6 +1557,18 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
}
}
+ if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) {
+ // zext((A + B + ...)<nuw>) --> (zext(A) + zext(B) + ...)<nuw>
+ if (SA->getNoWrapFlags(SCEV::FlagNUW)) {
+ // If the addition does not unsign overflow then we can, by definition,
+ // commute the zero extension with the addition operation.
+ SmallVector<const SCEV *, 4> Ops;
+ for (const auto *Op : SA->operands())
+ Ops.push_back(getZeroExtendExpr(Op, Ty));
+ return getAddExpr(Ops, SCEV::FlagNUW);
+ }
+ }
+
// The cast wasn't folded; create an explicit cast node.
// Recompute the insert position, as it may have been invalidated.
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
@@ -1635,14 +1626,14 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
}
// sext(C1 + (C2 * x)) --> C1 + sext(C2 * x) if C1 < C2
- if (auto SA = dyn_cast<SCEVAddExpr>(Op)) {
+ if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) {
if (SA->getNumOperands() == 2) {
- auto SC1 = dyn_cast<SCEVConstant>(SA->getOperand(0));
- auto SMul = dyn_cast<SCEVMulExpr>(SA->getOperand(1));
+ auto *SC1 = dyn_cast<SCEVConstant>(SA->getOperand(0));
+ auto *SMul = dyn_cast<SCEVMulExpr>(SA->getOperand(1));
if (SMul && SC1) {
- if (auto SC2 = dyn_cast<SCEVConstant>(SMul->getOperand(0))) {
- const APInt &C1 = SC1->getValue()->getValue();
- const APInt &C2 = SC2->getValue()->getValue();
+ if (auto *SC2 = dyn_cast<SCEVConstant>(SMul->getOperand(0))) {
+ const APInt &C1 = SC1->getAPInt();
+ const APInt &C2 = SC2->getAPInt();
if (C1.isStrictlyPositive() && C2.isStrictlyPositive() &&
C2.ugt(C1) && C2.isPowerOf2())
return getAddExpr(getSignExtendExpr(SC1, Ty),
@@ -1650,6 +1641,16 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
}
}
}
+
+ // sext((A + B + ...)<nsw>) --> (sext(A) + sext(B) + ...)<nsw>
+ if (SA->getNoWrapFlags(SCEV::FlagNSW)) {
+ // If the addition does not sign overflow then we can, by definition,
+ // commute the sign extension with the addition operation.
+ SmallVector<const SCEV *, 4> Ops;
+ for (const auto *Op : SA->operands())
+ Ops.push_back(getSignExtendExpr(Op, Ty));
+ return getAddExpr(Ops, SCEV::FlagNSW);
+ }
}
// If the input value is a chrec scev, and we can prove that the value
// did not overflow the old, smaller, value, we can sign extend all of the
@@ -1754,16 +1755,16 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
// If Start and Step are constants, check if we can apply this
// transformation:
// sext{C1,+,C2} --> C1 + sext{0,+,C2} if C1 < C2
- auto SC1 = dyn_cast<SCEVConstant>(Start);
- auto SC2 = dyn_cast<SCEVConstant>(Step);
+ auto *SC1 = dyn_cast<SCEVConstant>(Start);
+ auto *SC2 = dyn_cast<SCEVConstant>(Step);
if (SC1 && SC2) {
- const APInt &C1 = SC1->getValue()->getValue();
- const APInt &C2 = SC2->getValue()->getValue();
+ const APInt &C1 = SC1->getAPInt();
+ const APInt &C2 = SC2->getAPInt();
if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) &&
C2.isPowerOf2()) {
Start = getSignExtendExpr(Start, Ty);
- const SCEV *NewAR = getAddRecExpr(getConstant(AR->getType(), 0), Step,
- L, AR->getNoWrapFlags());
+ const SCEV *NewAR = getAddRecExpr(getZero(AR->getType()), Step, L,
+ AR->getNoWrapFlags());
return getAddExpr(Start, getSignExtendExpr(NewAR, Ty));
}
}
@@ -1798,7 +1799,7 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
// Sign-extend negative constants.
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
- if (SC->getValue()->getValue().isNegative())
+ if (SC->getAPInt().isNegative())
return getSignExtendExpr(Op, Ty);
// Peel off a truncate cast.
@@ -1876,7 +1877,7 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
// Pull a buried constant out to the outside.
if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero())
Interesting = true;
- AccumulatedConstant += Scale * C->getValue()->getValue();
+ AccumulatedConstant += Scale * C->getAPInt();
}
// Next comes everything else. We're especially interested in multiplies
@@ -1885,7 +1886,7 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]);
if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) {
APInt NewScale =
- Scale * cast<SCEVConstant>(Mul->getOperand(0))->getValue()->getValue();
+ Scale * cast<SCEVConstant>(Mul->getOperand(0))->getAPInt();
if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) {
// A multiplication of a constant with another add; recurse.
const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1));
@@ -1898,8 +1899,7 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
// the map.
SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end());
const SCEV *Key = SE.getMulExpr(MulOps);
- std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
- M.insert(std::make_pair(Key, NewScale));
+ auto Pair = M.insert(std::make_pair(Key, NewScale));
if (Pair.second) {
NewOps.push_back(Pair.first->first);
} else {
@@ -1927,22 +1927,15 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
return Interesting;
}
-namespace {
- struct APIntCompare {
- bool operator()(const APInt &LHS, const APInt &RHS) const {
- return LHS.ult(RHS);
- }
- };
-}
-
// We're trying to construct a SCEV of type `Type' with `Ops' as operands and
// `OldFlags' as can't-wrap behavior. Infer a more aggressive set of
// can't-overflow flags for the operation if possible.
static SCEV::NoWrapFlags
StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
const SmallVectorImpl<const SCEV *> &Ops,
- SCEV::NoWrapFlags OldFlags) {
+ SCEV::NoWrapFlags Flags) {
using namespace std::placeholders;
+ typedef OverflowingBinaryOperator OBO;
bool CanAnalyze =
Type == scAddExpr || Type == scAddRecExpr || Type == scMulExpr;
@@ -1951,18 +1944,42 @@ StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
SCEV::NoWrapFlags SignOrUnsignWrap =
- ScalarEvolution::maskFlags(OldFlags, SignOrUnsignMask);
+ ScalarEvolution::maskFlags(Flags, SignOrUnsignMask);
// If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
- auto IsKnownNonNegative =
- std::bind(std::mem_fn(&ScalarEvolution::isKnownNonNegative), SE, _1);
+ auto IsKnownNonNegative = [&](const SCEV *S) {
+ return SE->isKnownNonNegative(S);
+ };
+
+ if (SignOrUnsignWrap == SCEV::FlagNSW && all_of(Ops, IsKnownNonNegative))
+ Flags =
+ ScalarEvolution::setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
- if (SignOrUnsignWrap == SCEV::FlagNSW &&
- std::all_of(Ops.begin(), Ops.end(), IsKnownNonNegative))
- return ScalarEvolution::setFlags(OldFlags,
- (SCEV::NoWrapFlags)SignOrUnsignMask);
+ SignOrUnsignWrap = ScalarEvolution::maskFlags(Flags, SignOrUnsignMask);
+
+ if (SignOrUnsignWrap != SignOrUnsignMask && Type == scAddExpr &&
+ Ops.size() == 2 && isa<SCEVConstant>(Ops[0])) {
+
+ // (A + C) --> (A + C)<nsw> if the addition does not sign overflow
+ // (A + C) --> (A + C)<nuw> if the addition does not unsign overflow
+
+ const APInt &C = cast<SCEVConstant>(Ops[0])->getAPInt();
+ if (!(SignOrUnsignWrap & SCEV::FlagNSW)) {
+ auto NSWRegion =
+ ConstantRange::makeNoWrapRegion(Instruction::Add, C, OBO::NoSignedWrap);
+ if (NSWRegion.contains(SE->getSignedRange(Ops[1])))
+ Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
+ }
+ if (!(SignOrUnsignWrap & SCEV::FlagNUW)) {
+ auto NUWRegion =
+ ConstantRange::makeNoWrapRegion(Instruction::Add, C,
+ OBO::NoUnsignedWrap);
+ if (NUWRegion.contains(SE->getUnsignedRange(Ops[1])))
+ Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
+ }
+ }
- return OldFlags;
+ return Flags;
}
/// getAddExpr - Get a canonical add expression, or something simpler if
@@ -1980,10 +1997,10 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
"SCEVAddExpr operand types don't match!");
#endif
- Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags);
-
// Sort by complexity, this groups all similar expression types together.
- GroupByComplexity(Ops, LI);
+ GroupByComplexity(Ops, &LI);
+
+ Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags);
// If there are any constants, fold them together.
unsigned Idx = 0;
@@ -1992,8 +2009,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
assert(Idx < Ops.size());
while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
// We found two constants, fold them together!
- Ops[0] = getConstant(LHSC->getValue()->getValue() +
- RHSC->getValue()->getValue());
+ Ops[0] = getConstant(LHSC->getAPInt() + RHSC->getAPInt());
if (Ops.size() == 2) return Ops[0];
Ops.erase(Ops.begin()+1); // Erase the folded element
LHSC = cast<SCEVConstant>(Ops[0]);
@@ -2063,8 +2079,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
break;
}
LargeMulOps.push_back(T->getOperand());
- } else if (const SCEVConstant *C =
- dyn_cast<SCEVConstant>(M->getOperand(j))) {
+ } else if (const auto *C = dyn_cast<SCEVConstant>(M->getOperand(j))) {
LargeMulOps.push_back(getAnyExtendExpr(C, SrcType));
} else {
Ok = false;
@@ -2123,24 +2138,28 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
Ops.data(), Ops.size(),
APInt(BitWidth, 1), *this)) {
+ struct APIntCompare {
+ bool operator()(const APInt &LHS, const APInt &RHS) const {
+ return LHS.ult(RHS);
+ }
+ };
+
// Some interesting folding opportunity is present, so its worthwhile to
// re-generate the operands list. Group the operands by constant scale,
// to avoid multiplying by the same constant scale multiple times.
std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
- for (SmallVectorImpl<const SCEV *>::const_iterator I = NewOps.begin(),
- E = NewOps.end(); I != E; ++I)
- MulOpLists[M.find(*I)->second].push_back(*I);
+ for (const SCEV *NewOp : NewOps)
+ MulOpLists[M.find(NewOp)->second].push_back(NewOp);
// Re-generate the operands list.
Ops.clear();
if (AccumulatedConstant != 0)
Ops.push_back(getConstant(AccumulatedConstant));
- for (std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare>::iterator
- I = MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I)
- if (I->first != 0)
- Ops.push_back(getMulExpr(getConstant(I->first),
- getAddExpr(I->second)));
+ for (auto &MulOp : MulOpLists)
+ if (MulOp.first != 0)
+ Ops.push_back(getMulExpr(getConstant(MulOp.first),
+ getAddExpr(MulOp.second)));
if (Ops.empty())
- return getConstant(Ty, 0);
+ return getZero(Ty);
if (Ops.size() == 1)
return Ops[0];
return getAddExpr(Ops);
@@ -2168,7 +2187,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
InnerMul = getMulExpr(MulOps);
}
- const SCEV *One = getConstant(Ty, 1);
+ const SCEV *One = getOne(Ty);
const SCEV *AddOne = getAddExpr(One, InnerMul);
const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV);
if (Ops.size() == 2) return OuterMul;
@@ -2279,8 +2298,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
AddRec->op_end());
for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
++OtherIdx)
- if (const SCEVAddRecExpr *OtherAddRec =
- dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
+ if (const auto *OtherAddRec = dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
if (OtherAddRec->getLoop() == AddRecLoop) {
for (unsigned i = 0, e = OtherAddRec->getNumOperands();
i != e; ++i) {
@@ -2388,10 +2406,10 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
"SCEVMulExpr operand types don't match!");
#endif
- Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);
-
// Sort by complexity, this groups all similar expression types together.
- GroupByComplexity(Ops, LI);
+ GroupByComplexity(Ops, &LI);
+
+ Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);
// If there are any constants, fold them together.
unsigned Idx = 0;
@@ -2410,9 +2428,8 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
++Idx;
while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
// We found two constants, fold them together!
- ConstantInt *Fold = ConstantInt::get(getContext(),
- LHSC->getValue()->getValue() *
- RHSC->getValue()->getValue());
+ ConstantInt *Fold =
+ ConstantInt::get(getContext(), LHSC->getAPInt() * RHSC->getAPInt());
Ops[0] = getConstant(Fold);
Ops.erase(Ops.begin()+1); // Erase the folded element
if (Ops.size() == 1) return Ops[0];
@@ -2433,23 +2450,19 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {
SmallVector<const SCEV *, 4> NewOps;
bool AnyFolded = false;
- for (SCEVAddRecExpr::op_iterator I = Add->op_begin(),
- E = Add->op_end(); I != E; ++I) {
- const SCEV *Mul = getMulExpr(Ops[0], *I);
+ for (const SCEV *AddOp : Add->operands()) {
+ const SCEV *Mul = getMulExpr(Ops[0], AddOp);
if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;
NewOps.push_back(Mul);
}
if (AnyFolded)
return getAddExpr(NewOps);
- }
- else if (const SCEVAddRecExpr *
- AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
+ } else if (const auto *AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
// Negation preserves a recurrence's no self-wrap property.
SmallVector<const SCEV *, 4> Operands;
- for (SCEVAddRecExpr::op_iterator I = AddRec->op_begin(),
- E = AddRec->op_end(); I != E; ++I) {
- Operands.push_back(getMulExpr(Ops[0], *I));
- }
+ for (const SCEV *AddRecOp : AddRec->operands())
+ Operands.push_back(getMulExpr(Ops[0], AddRecOp));
+
return getAddRecExpr(Operands, AddRec->getLoop(),
AddRec->getNoWrapFlags(SCEV::FlagNW));
}
@@ -2560,7 +2573,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
SmallVector<const SCEV*, 7> AddRecOps;
for (int x = 0, xe = AddRec->getNumOperands() +
OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {
- const SCEV *Term = getConstant(Ty, 0);
+ const SCEV *Term = getZero(Ty);
for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
@@ -2638,11 +2651,11 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
// its operands.
// TODO: Generalize this to non-constants by using known-bits information.
Type *Ty = LHS->getType();
- unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros();
+ unsigned LZ = RHSC->getAPInt().countLeadingZeros();
unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1;
// For non-power-of-two values, effectively round the value up to the
// nearest power of two.
- if (!RHSC->getValue()->getValue().isPowerOf2())
+ if (!RHSC->getAPInt().isPowerOf2())
++MaxShiftAmt;
IntegerType *ExtTy =
IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
@@ -2650,18 +2663,17 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
if (const SCEVConstant *Step =
dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) {
// {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
- const APInt &StepInt = Step->getValue()->getValue();
- const APInt &DivInt = RHSC->getValue()->getValue();
+ const APInt &StepInt = Step->getAPInt();
+ const APInt &DivInt = RHSC->getAPInt();
if (!StepInt.urem(DivInt) &&
getZeroExtendExpr(AR, ExtTy) ==
getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
getZeroExtendExpr(Step, ExtTy),
AR->getLoop(), SCEV::FlagAnyWrap)) {
SmallVector<const SCEV *, 4> Operands;
- for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i)
- Operands.push_back(getUDivExpr(AR->getOperand(i), RHS));
- return getAddRecExpr(Operands, AR->getLoop(),
- SCEV::FlagNW);
+ for (const SCEV *Op : AR->operands())
+ Operands.push_back(getUDivExpr(Op, RHS));
+ return getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagNW);
}
/// Get a canonical UDivExpr for a recurrence.
/// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
@@ -2672,7 +2684,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
getZeroExtendExpr(Step, ExtTy),
AR->getLoop(), SCEV::FlagAnyWrap)) {
- const APInt &StartInt = StartC->getValue()->getValue();
+ const APInt &StartInt = StartC->getAPInt();
const APInt &StartRem = StartInt.urem(StepInt);
if (StartRem != 0)
LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step,
@@ -2682,8 +2694,8 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
// (A*B)/C --> A*(B/C) if safe and B/C can be folded.
if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
SmallVector<const SCEV *, 4> Operands;
- for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i)
- Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy));
+ for (const SCEV *Op : M->operands())
+ Operands.push_back(getZeroExtendExpr(Op, ExtTy));
if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
// Find an operand that's safely divisible.
for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
@@ -2700,8 +2712,8 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
// (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) {
SmallVector<const SCEV *, 4> Operands;
- for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i)
- Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy));
+ for (const SCEV *Op : A->operands())
+ Operands.push_back(getZeroExtendExpr(Op, ExtTy));
if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
Operands.clear();
for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
@@ -2739,8 +2751,8 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
}
static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
- APInt A = C1->getValue()->getValue().abs();
- APInt B = C2->getValue()->getValue().abs();
+ APInt A = C1->getAPInt().abs();
+ APInt B = C2->getAPInt().abs();
uint32_t ABW = A.getBitWidth();
uint32_t BBW = B.getBitWidth();
@@ -2769,8 +2781,7 @@ const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS,
if (const SCEVConstant *RHSCst = dyn_cast<SCEVConstant>(RHS)) {
// If the mulexpr multiplies by a constant, then that constant must be the
// first element of the mulexpr.
- if (const SCEVConstant *LHSCst =
- dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
+ if (const auto *LHSCst = dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
if (LHSCst == RHSCst) {
SmallVector<const SCEV *, 2> Operands;
Operands.append(Mul->op_begin() + 1, Mul->op_end());
@@ -2782,10 +2793,10 @@ const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS,
// check.
APInt Factor = gcd(LHSCst, RHSCst);
if (!Factor.isIntN(1)) {
- LHSCst = cast<SCEVConstant>(
- getConstant(LHSCst->getValue()->getValue().udiv(Factor)));
- RHSCst = cast<SCEVConstant>(
- getConstant(RHSCst->getValue()->getValue().udiv(Factor)));
+ LHSCst =
+ cast<SCEVConstant>(getConstant(LHSCst->getAPInt().udiv(Factor)));
+ RHSCst =
+ cast<SCEVConstant>(getConstant(RHSCst->getAPInt().udiv(Factor)));
SmallVector<const SCEV *, 2> Operands;
Operands.push_back(LHSCst);
Operands.append(Mul->op_begin() + 1, Mul->op_end());
@@ -2859,22 +2870,19 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
// Canonicalize nested AddRecs in by nesting them in order of loop depth.
if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
const Loop *NestedLoop = NestedAR->getLoop();
- if (L->contains(NestedLoop) ?
- (L->getLoopDepth() < NestedLoop->getLoopDepth()) :
- (!NestedLoop->contains(L) &&
- DT->dominates(L->getHeader(), NestedLoop->getHeader()))) {
+ if (L->contains(NestedLoop)
+ ? (L->getLoopDepth() < NestedLoop->getLoopDepth())
+ : (!NestedLoop->contains(L) &&
+ DT.dominates(L->getHeader(), NestedLoop->getHeader()))) {
SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),
NestedAR->op_end());
Operands[0] = NestedAR->getStart();
// AddRecs require their operands be loop-invariant with respect to their
// loops. Don't perform this transformation if it would break this
// requirement.
- bool AllInvariant = true;
- for (unsigned i = 0, e = Operands.size(); i != e; ++i)
- if (!isLoopInvariant(Operands[i], L)) {
- AllInvariant = false;
- break;
- }
+ bool AllInvariant = all_of(
+ Operands, [&](const SCEV *Op) { return isLoopInvariant(Op, L); });
+
if (AllInvariant) {
// Create a recurrence for the outer loop with the same step size.
//
@@ -2884,12 +2892,10 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags());
NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags);
- AllInvariant = true;
- for (unsigned i = 0, e = NestedOperands.size(); i != e; ++i)
- if (!isLoopInvariant(NestedOperands[i], NestedLoop)) {
- AllInvariant = false;
- break;
- }
+ AllInvariant = all_of(NestedOperands, [&](const SCEV *Op) {
+ return isLoopInvariant(Op, NestedLoop);
+ });
+
if (AllInvariant) {
// Ok, both add recurrences are valid after the transformation.
//
@@ -2936,10 +2942,11 @@ ScalarEvolution::getGEPExpr(Type *PointeeType, const SCEV *BaseExpr,
// FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP
// instruction to its SCEV, because the Instruction may be guarded by control
// flow and the no-overflow bits may not be valid for the expression in any
- // context.
+ // context. This can be fixed similarly to how these flags are handled for
+ // adds.
SCEV::NoWrapFlags Wrap = InBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
- const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
+ const SCEV *TotalOffset = getZero(IntPtrTy);
// The address space is unimportant. The first thing we do on CurTy is getting
// its element type.
Type *CurTy = PointerType::getUnqual(PointeeType);
@@ -2996,7 +3003,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
#endif
// Sort by complexity, this groups all similar expression types together.
- GroupByComplexity(Ops, LI);
+ GroupByComplexity(Ops, &LI);
// If there are any constants, fold them together.
unsigned Idx = 0;
@@ -3005,9 +3012,8 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
assert(Idx < Ops.size());
while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
// We found two constants, fold them together!
- ConstantInt *Fold = ConstantInt::get(getContext(),
- APIntOps::smax(LHSC->getValue()->getValue(),
- RHSC->getValue()->getValue()));
+ ConstantInt *Fold = ConstantInt::get(
+ getContext(), APIntOps::smax(LHSC->getAPInt(), RHSC->getAPInt()));
Ops[0] = getConstant(Fold);
Ops.erase(Ops.begin()+1); // Erase the folded element
if (Ops.size() == 1) return Ops[0];
@@ -3100,7 +3106,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
#endif
// Sort by complexity, this groups all similar expression types together.
- GroupByComplexity(Ops, LI);
+ GroupByComplexity(Ops, &LI);
// If there are any constants, fold them together.
unsigned Idx = 0;
@@ -3109,9 +3115,8 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
assert(Idx < Ops.size());
while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
// We found two constants, fold them together!
- ConstantInt *Fold = ConstantInt::get(getContext(),
- APIntOps::umax(LHSC->getValue()->getValue(),
- RHSC->getValue()->getValue()));
+ ConstantInt *Fold = ConstantInt::get(
+ getContext(), APIntOps::umax(LHSC->getAPInt(), RHSC->getAPInt()));
Ops[0] = getConstant(Fold);
Ops.erase(Ops.begin()+1); // Erase the folded element
if (Ops.size() == 1) return Ops[0];
@@ -3200,8 +3205,7 @@ const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
// We can bypass creating a target-independent
// constant expression and then folding it back into a ConstantInt.
// This is just a compile-time optimization.
- return getConstant(IntTy,
- F->getParent()->getDataLayout().getTypeAllocSize(AllocTy));
+ return getConstant(IntTy, getDataLayout().getTypeAllocSize(AllocTy));
}
const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
@@ -3211,9 +3215,7 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
// constant expression and then folding it back into a ConstantInt.
// This is just a compile-time optimization.
return getConstant(
- IntTy,
- F->getParent()->getDataLayout().getStructLayout(STy)->getElementOffset(
- FieldNo));
+ IntTy, getDataLayout().getStructLayout(STy)->getElementOffset(FieldNo));
}
const SCEV *ScalarEvolution::getUnknown(Value *V) {
@@ -3255,7 +3257,7 @@ bool ScalarEvolution::isSCEVable(Type *Ty) const {
/// for which isSCEVable must return true.
uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
assert(isSCEVable(Ty) && "Type is not SCEVable!");
- return F->getParent()->getDataLayout().getTypeSizeInBits(Ty);
+ return getDataLayout().getTypeSizeInBits(Ty);
}
/// getEffectiveSCEVType - Return a type with the same bitwidth as
@@ -3265,20 +3267,20 @@ uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
assert(isSCEVable(Ty) && "Type is not SCEVable!");
- if (Ty->isIntegerTy()) {
+ if (Ty->isIntegerTy())
return Ty;
- }
// The only other support type is pointer.
assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
- return F->getParent()->getDataLayout().getIntPtrType(Ty);
+ return getDataLayout().getIntPtrType(Ty);
}
const SCEV *ScalarEvolution::getCouldNotCompute() {
- return &CouldNotCompute;
+ return CouldNotCompute.get();
}
-namespace {
+
+bool ScalarEvolution::checkValidity(const SCEV *S) const {
// Helper class working with SCEVTraversal to figure out if a SCEV contains
// a SCEVUnknown with null value-pointer. FindInvalidSCEVUnknown::FindOne
// is set iff if find such SCEVUnknown.
@@ -3300,9 +3302,7 @@ namespace {
}
bool isDone() const { return FindOne; }
};
-}
-bool ScalarEvolution::checkValidity(const SCEV *S) const {
FindInvalidSCEVUnknown F;
SCEVTraversal<FindInvalidSCEVUnknown> ST(F);
ST.visitAll(S);
@@ -3315,35 +3315,39 @@ bool ScalarEvolution::checkValidity(const SCEV *S) const {
const SCEV *ScalarEvolution::getSCEV(Value *V) {
assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
+ const SCEV *S = getExistingSCEV(V);
+ if (S == nullptr) {
+ S = createSCEV(V);
+ ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S));
+ }
+ return S;
+}
+
+const SCEV *ScalarEvolution::getExistingSCEV(Value *V) {
+ assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
+
ValueExprMapType::iterator I = ValueExprMap.find_as(V);
if (I != ValueExprMap.end()) {
const SCEV *S = I->second;
if (checkValidity(S))
return S;
- else
- ValueExprMap.erase(I);
+ ValueExprMap.erase(I);
}
- const SCEV *S = createSCEV(V);
-
- // The process of creating a SCEV for V may have caused other SCEVs
- // to have been created, so it's necessary to insert the new entry
- // from scratch, rather than trying to remember the insert position
- // above.
- ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S));
- return S;
+ return nullptr;
}
/// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
///
-const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) {
+const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V,
+ SCEV::NoWrapFlags Flags) {
if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
return getConstant(
cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
Type *Ty = V->getType();
Ty = getEffectiveSCEVType(Ty);
- return getMulExpr(V,
- getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))));
+ return getMulExpr(
+ V, getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))), Flags);
}
/// getNotSCEV - Return a SCEV corresponding to ~V = -1-V
@@ -3362,15 +3366,40 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
/// getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
SCEV::NoWrapFlags Flags) {
- assert(!maskFlags(Flags, SCEV::FlagNUW) && "subtraction does not have NUW");
-
// Fast path: X - X --> 0.
if (LHS == RHS)
- return getConstant(LHS->getType(), 0);
+ return getZero(LHS->getType());
+
+ // We represent LHS - RHS as LHS + (-1)*RHS. This transformation
+ // makes it so that we cannot make much use of NUW.
+ auto AddFlags = SCEV::FlagAnyWrap;
+ const bool RHSIsNotMinSigned =
+ !getSignedRange(RHS).getSignedMin().isMinSignedValue();
+ if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) {
+ // Let M be the minimum representable signed value. Then (-1)*RHS
+ // signed-wraps if and only if RHS is M. That can happen even for
+ // a NSW subtraction because e.g. (-1)*M signed-wraps even though
+ // -1 - M does not. So to transfer NSW from LHS - RHS to LHS +
+ // (-1)*RHS, we need to prove that RHS != M.
+ //
+ // If LHS is non-negative and we know that LHS - RHS does not
+ // signed-wrap, then RHS cannot be M. So we can rule out signed-wrap
+ // either by proving that RHS > M or that LHS >= 0.
+ if (RHSIsNotMinSigned || isKnownNonNegative(LHS)) {
+ AddFlags = SCEV::FlagNSW;
+ }
+ }
+
+ // FIXME: Find a correct way to transfer NSW to (-1)*M when LHS -
+ // RHS is NSW and LHS >= 0.
+ //
+ // The difficulty here is that the NSW flag may have been proven
+ // relative to a loop that is to be found in a recurrence in LHS and
+ // not in RHS. Applying NSW to (-1)*M may then let the NSW have a
+ // larger scope than intended.
+ auto NegFlags = RHSIsNotMinSigned ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
- // X - Y --> X + -Y.
- // X -(nsw || nuw) Y --> X + -Y.
- return getAddExpr(LHS, getNegativeSCEV(RHS));
+ return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags);
}
/// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
@@ -3513,16 +3542,14 @@ const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) {
return getPointerBase(Cast->getOperand());
- }
- else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) {
+ } else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) {
const SCEV *PtrOp = nullptr;
- for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
- I != E; ++I) {
- if ((*I)->getType()->isPointerTy()) {
+ for (const SCEV *NAryOp : NAry->operands()) {
+ if (NAryOp->getType()->isPointerTy()) {
// Cannot find the base of an expression with multiple pointer operands.
if (PtrOp)
return V;
- PtrOp = *I;
+ PtrOp = NAryOp;
}
}
if (!PtrOp)
@@ -3558,8 +3585,7 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
if (!Visited.insert(I).second)
continue;
- ValueExprMapType::iterator It =
- ValueExprMap.find_as(static_cast<Value *>(I));
+ auto It = ValueExprMap.find_as(static_cast<Value *>(I));
if (It != ValueExprMap.end()) {
const SCEV *Old = It->second;
@@ -3587,165 +3613,476 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
}
}
-/// createNodeForPHI - PHI nodes have two cases. Either the PHI node exists in
-/// a loop header, making it a potential recurrence, or it doesn't.
-///
-const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
- if (const Loop *L = LI->getLoopFor(PN->getParent()))
- if (L->getHeader() == PN->getParent()) {
- // The loop may have multiple entrances or multiple exits; we can analyze
- // this phi as an addrec if it has a unique entry value and a unique
- // backedge value.
- Value *BEValueV = nullptr, *StartValueV = nullptr;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- Value *V = PN->getIncomingValue(i);
- if (L->contains(PN->getIncomingBlock(i))) {
- if (!BEValueV) {
- BEValueV = V;
- } else if (BEValueV != V) {
- BEValueV = nullptr;
- break;
- }
- } else if (!StartValueV) {
- StartValueV = V;
- } else if (StartValueV != V) {
- StartValueV = nullptr;
- break;
- }
- }
- if (BEValueV && StartValueV) {
- // While we are analyzing this PHI node, handle its value symbolically.
- const SCEV *SymbolicName = getUnknown(PN);
- assert(ValueExprMap.find_as(PN) == ValueExprMap.end() &&
- "PHI node already processed?");
- ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName));
-
- // Using this symbolic name for the PHI, analyze the value coming around
- // the back-edge.
- const SCEV *BEValue = getSCEV(BEValueV);
-
- // NOTE: If BEValue is loop invariant, we know that the PHI node just
- // has a special value for the first iteration of the loop.
-
- // If the value coming around the backedge is an add with the symbolic
- // value we just inserted, then we found a simple induction variable!
- if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) {
- // If there is a single occurrence of the symbolic value, replace it
- // with a recurrence.
- unsigned FoundIndex = Add->getNumOperands();
- for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
- if (Add->getOperand(i) == SymbolicName)
- if (FoundIndex == e) {
- FoundIndex = i;
- break;
- }
+namespace {
+class SCEVInitRewriter : public SCEVRewriteVisitor<SCEVInitRewriter> {
+public:
+ static const SCEV *rewrite(const SCEV *Scev, const Loop *L,
+ ScalarEvolution &SE) {
+ SCEVInitRewriter Rewriter(L, SE);
+ const SCEV *Result = Rewriter.visit(Scev);
+ return Rewriter.isValid() ? Result : SE.getCouldNotCompute();
+ }
- if (FoundIndex != Add->getNumOperands()) {
- // Create an add with everything but the specified operand.
- SmallVector<const SCEV *, 8> Ops;
- for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
- if (i != FoundIndex)
- Ops.push_back(Add->getOperand(i));
- const SCEV *Accum = getAddExpr(Ops);
-
- // This is not a valid addrec if the step amount is varying each
- // loop iteration, but is not itself an addrec in this loop.
- if (isLoopInvariant(Accum, L) ||
- (isa<SCEVAddRecExpr>(Accum) &&
- cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
- SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
-
- // If the increment doesn't overflow, then neither the addrec nor
- // the post-increment will overflow.
- if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) {
- if (OBO->getOperand(0) == PN) {
- if (OBO->hasNoUnsignedWrap())
- Flags = setFlags(Flags, SCEV::FlagNUW);
- if (OBO->hasNoSignedWrap())
- Flags = setFlags(Flags, SCEV::FlagNSW);
- }
- } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
- // If the increment is an inbounds GEP, then we know the address
- // space cannot be wrapped around. We cannot make any guarantee
- // about signed or unsigned overflow because pointers are
- // unsigned but we may have a negative index from the base
- // pointer. We can guarantee that no unsigned wrap occurs if the
- // indices form a positive value.
- if (GEP->isInBounds() && GEP->getOperand(0) == PN) {
- Flags = setFlags(Flags, SCEV::FlagNW);
-
- const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
- if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
- Flags = setFlags(Flags, SCEV::FlagNUW);
- }
+ SCEVInitRewriter(const Loop *L, ScalarEvolution &SE)
+ : SCEVRewriteVisitor(SE), L(L), Valid(true) {}
- // We cannot transfer nuw and nsw flags from subtraction
- // operations -- sub nuw X, Y is not the same as add nuw X, -Y
- // for instance.
- }
+ const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+ if (!(SE.getLoopDisposition(Expr, L) == ScalarEvolution::LoopInvariant))
+ Valid = false;
+ return Expr;
+ }
- const SCEV *StartVal = getSCEV(StartValueV);
- const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
-
- // Since the no-wrap flags are on the increment, they apply to the
- // post-incremented value as well.
- if (isLoopInvariant(Accum, L))
- (void)getAddRecExpr(getAddExpr(StartVal, Accum),
- Accum, L, Flags);
-
- // Okay, for the entire analysis of this edge we assumed the PHI
- // to be symbolic. We now need to go back and purge all of the
- // entries for the scalars that use the symbolic expression.
- ForgetSymbolicName(PN, SymbolicName);
- ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
- return PHISCEV;
- }
+ const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+ // Only allow AddRecExprs for this loop.
+ if (Expr->getLoop() == L)
+ return Expr->getStart();
+ Valid = false;
+ return Expr;
+ }
+
+ bool isValid() { return Valid; }
+
+private:
+ const Loop *L;
+ bool Valid;
+};
+
+class SCEVShiftRewriter : public SCEVRewriteVisitor<SCEVShiftRewriter> {
+public:
+ static const SCEV *rewrite(const SCEV *Scev, const Loop *L,
+ ScalarEvolution &SE) {
+ SCEVShiftRewriter Rewriter(L, SE);
+ const SCEV *Result = Rewriter.visit(Scev);
+ return Rewriter.isValid() ? Result : SE.getCouldNotCompute();
+ }
+
+ SCEVShiftRewriter(const Loop *L, ScalarEvolution &SE)
+ : SCEVRewriteVisitor(SE), L(L), Valid(true) {}
+
+ const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+ // Only allow AddRecExprs for this loop.
+ if (!(SE.getLoopDisposition(Expr, L) == ScalarEvolution::LoopInvariant))
+ Valid = false;
+ return Expr;
+ }
+
+ const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+ if (Expr->getLoop() == L && Expr->isAffine())
+ return SE.getMinusSCEV(Expr, Expr->getStepRecurrence(SE));
+ Valid = false;
+ return Expr;
+ }
+ bool isValid() { return Valid; }
+
+private:
+ const Loop *L;
+ bool Valid;
+};
+} // end anonymous namespace
+
+const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
+ const Loop *L = LI.getLoopFor(PN->getParent());
+ if (!L || L->getHeader() != PN->getParent())
+ return nullptr;
+
+ // The loop may have multiple entrances or multiple exits; we can analyze
+ // this phi as an addrec if it has a unique entry value and a unique
+ // backedge value.
+ Value *BEValueV = nullptr, *StartValueV = nullptr;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *V = PN->getIncomingValue(i);
+ if (L->contains(PN->getIncomingBlock(i))) {
+ if (!BEValueV) {
+ BEValueV = V;
+ } else if (BEValueV != V) {
+ BEValueV = nullptr;
+ break;
+ }
+ } else if (!StartValueV) {
+ StartValueV = V;
+ } else if (StartValueV != V) {
+ StartValueV = nullptr;
+ break;
+ }
+ }
+ if (BEValueV && StartValueV) {
+ // While we are analyzing this PHI node, handle its value symbolically.
+ const SCEV *SymbolicName = getUnknown(PN);
+ assert(ValueExprMap.find_as(PN) == ValueExprMap.end() &&
+ "PHI node already processed?");
+ ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName));
+
+ // Using this symbolic name for the PHI, analyze the value coming around
+ // the back-edge.
+ const SCEV *BEValue = getSCEV(BEValueV);
+
+ // NOTE: If BEValue is loop invariant, we know that the PHI node just
+ // has a special value for the first iteration of the loop.
+
+ // If the value coming around the backedge is an add with the symbolic
+ // value we just inserted, then we found a simple induction variable!
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) {
+ // If there is a single occurrence of the symbolic value, replace it
+ // with a recurrence.
+ unsigned FoundIndex = Add->getNumOperands();
+ for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
+ if (Add->getOperand(i) == SymbolicName)
+ if (FoundIndex == e) {
+ FoundIndex = i;
+ break;
}
- } else if (const SCEVAddRecExpr *AddRec =
- dyn_cast<SCEVAddRecExpr>(BEValue)) {
- // Otherwise, this could be a loop like this:
- // i = 0; for (j = 1; ..; ++j) { .... i = j; }
- // In this case, j = {1,+,1} and BEValue is j.
- // Because the other in-value of i (0) fits the evolution of BEValue
- // i really is an addrec evolution.
- if (AddRec->getLoop() == L && AddRec->isAffine()) {
- const SCEV *StartVal = getSCEV(StartValueV);
-
- // If StartVal = j.start - j.stride, we can use StartVal as the
- // initial step of the addrec evolution.
- if (StartVal == getMinusSCEV(AddRec->getOperand(0),
- AddRec->getOperand(1))) {
- // FIXME: For constant StartVal, we should be able to infer
- // no-wrap flags.
- const SCEV *PHISCEV =
- getAddRecExpr(StartVal, AddRec->getOperand(1), L,
- SCEV::FlagAnyWrap);
-
- // Okay, for the entire analysis of this edge we assumed the PHI
- // to be symbolic. We now need to go back and purge all of the
- // entries for the scalars that use the symbolic expression.
- ForgetSymbolicName(PN, SymbolicName);
- ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
- return PHISCEV;
+
+ if (FoundIndex != Add->getNumOperands()) {
+ // Create an add with everything but the specified operand.
+ SmallVector<const SCEV *, 8> Ops;
+ for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
+ if (i != FoundIndex)
+ Ops.push_back(Add->getOperand(i));
+ const SCEV *Accum = getAddExpr(Ops);
+
+ // This is not a valid addrec if the step amount is varying each
+ // loop iteration, but is not itself an addrec in this loop.
+ if (isLoopInvariant(Accum, L) ||
+ (isa<SCEVAddRecExpr>(Accum) &&
+ cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
+ SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
+
+ // If the increment doesn't overflow, then neither the addrec nor
+ // the post-increment will overflow.
+ if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) {
+ if (OBO->getOperand(0) == PN) {
+ if (OBO->hasNoUnsignedWrap())
+ Flags = setFlags(Flags, SCEV::FlagNUW);
+ if (OBO->hasNoSignedWrap())
+ Flags = setFlags(Flags, SCEV::FlagNSW);
+ }
+ } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
+ // If the increment is an inbounds GEP, then we know the address
+ // space cannot be wrapped around. We cannot make any guarantee
+ // about signed or unsigned overflow because pointers are
+ // unsigned but we may have a negative index from the base
+ // pointer. We can guarantee that no unsigned wrap occurs if the
+ // indices form a positive value.
+ if (GEP->isInBounds() && GEP->getOperand(0) == PN) {
+ Flags = setFlags(Flags, SCEV::FlagNW);
+
+ const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
+ if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
+ Flags = setFlags(Flags, SCEV::FlagNUW);
}
+
+ // We cannot transfer nuw and nsw flags from subtraction
+ // operations -- sub nuw X, Y is not the same as add nuw X, -Y
+ // for instance.
}
+
+ const SCEV *StartVal = getSCEV(StartValueV);
+ const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
+
+ // Since the no-wrap flags are on the increment, they apply to the
+ // post-incremented value as well.
+ if (isLoopInvariant(Accum, L))
+ (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags);
+
+ // Okay, for the entire analysis of this edge we assumed the PHI
+ // to be symbolic. We now need to go back and purge all of the
+ // entries for the scalars that use the symbolic expression.
+ ForgetSymbolicName(PN, SymbolicName);
+ ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
+ return PHISCEV;
+ }
+ }
+ } else {
+ // Otherwise, this could be a loop like this:
+ // i = 0; for (j = 1; ..; ++j) { .... i = j; }
+ // In this case, j = {1,+,1} and BEValue is j.
+ // Because the other in-value of i (0) fits the evolution of BEValue
+ // i really is an addrec evolution.
+ //
+ // We can generalize this saying that i is the shifted value of BEValue
+ // by one iteration:
+ // PHI(f(0), f({1,+,1})) --> f({0,+,1})
+ const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this);
+ const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this);
+ if (Shifted != getCouldNotCompute() &&
+ Start != getCouldNotCompute()) {
+ const SCEV *StartVal = getSCEV(StartValueV);
+ if (Start == StartVal) {
+ // Okay, for the entire analysis of this edge we assumed the PHI
+ // to be symbolic. We now need to go back and purge all of the
+ // entries for the scalars that use the symbolic expression.
+ ForgetSymbolicName(PN, SymbolicName);
+ ValueExprMap[SCEVCallbackVH(PN, this)] = Shifted;
+ return Shifted;
}
}
}
+ }
+
+ return nullptr;
+}
+
+// Checks if the SCEV S is available at BB. S is considered available at BB
+// if S can be materialized at BB without introducing a fault.
+static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S,
+ BasicBlock *BB) {
+ struct CheckAvailable {
+ bool TraversalDone = false;
+ bool Available = true;
+
+ const Loop *L = nullptr; // The loop BB is in (can be nullptr)
+ BasicBlock *BB = nullptr;
+ DominatorTree &DT;
+
+ CheckAvailable(const Loop *L, BasicBlock *BB, DominatorTree &DT)
+ : L(L), BB(BB), DT(DT) {}
+
+ bool setUnavailable() {
+ TraversalDone = true;
+ Available = false;
+ return false;
+ }
+
+ bool follow(const SCEV *S) {
+ switch (S->getSCEVType()) {
+ case scConstant: case scTruncate: case scZeroExtend: case scSignExtend:
+ case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr:
+ // These expressions are available if their operand(s) is/are.
+ return true;
+
+ case scAddRecExpr: {
+ // We allow add recurrences that are on the loop BB is in, or some
+ // outer loop. This guarantees availability because the value of the
+ // add recurrence at BB is simply the "current" value of the induction
+ // variable. We can relax this in the future; for instance an add
+ // recurrence on a sibling dominating loop is also available at BB.
+ const auto *ARLoop = cast<SCEVAddRecExpr>(S)->getLoop();
+ if (L && (ARLoop == L || ARLoop->contains(L)))
+ return true;
+
+ return setUnavailable();
+ }
+
+ case scUnknown: {
+ // For SCEVUnknown, we check for simple dominance.
+ const auto *SU = cast<SCEVUnknown>(S);
+ Value *V = SU->getValue();
+
+ if (isa<Argument>(V))
+ return false;
+
+ if (isa<Instruction>(V) && DT.dominates(cast<Instruction>(V), BB))
+ return false;
+
+ return setUnavailable();
+ }
+
+ case scUDivExpr:
+ case scCouldNotCompute:
+ // We do not try to smart about these at all.
+ return setUnavailable();
+ }
+ llvm_unreachable("switch should be fully covered!");
+ }
+
+ bool isDone() { return TraversalDone; }
+ };
+
+ CheckAvailable CA(L, BB, DT);
+ SCEVTraversal<CheckAvailable> ST(CA);
+
+ ST.visitAll(S);
+ return CA.Available;
+}
+
+// Try to match a control flow sequence that branches out at BI and merges back
+// at Merge into a "C ? LHS : RHS" select pattern. Return true on a successful
+// match.
+static bool BrPHIToSelect(DominatorTree &DT, BranchInst *BI, PHINode *Merge,
+ Value *&C, Value *&LHS, Value *&RHS) {
+ C = BI->getCondition();
+
+ BasicBlockEdge LeftEdge(BI->getParent(), BI->getSuccessor(0));
+ BasicBlockEdge RightEdge(BI->getParent(), BI->getSuccessor(1));
+
+ if (!LeftEdge.isSingleEdge())
+ return false;
+
+ assert(RightEdge.isSingleEdge() && "Follows from LeftEdge.isSingleEdge()");
+
+ Use &LeftUse = Merge->getOperandUse(0);
+ Use &RightUse = Merge->getOperandUse(1);
+
+ if (DT.dominates(LeftEdge, LeftUse) && DT.dominates(RightEdge, RightUse)) {
+ LHS = LeftUse;
+ RHS = RightUse;
+ return true;
+ }
+
+ if (DT.dominates(LeftEdge, RightUse) && DT.dominates(RightEdge, LeftUse)) {
+ LHS = RightUse;
+ RHS = LeftUse;
+ return true;
+ }
+
+ return false;
+}
+
+const SCEV *ScalarEvolution::createNodeFromSelectLikePHI(PHINode *PN) {
+ if (PN->getNumIncomingValues() == 2) {
+ const Loop *L = LI.getLoopFor(PN->getParent());
+
+ // We don't want to break LCSSA, even in a SCEV expression tree.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (LI.getLoopFor(PN->getIncomingBlock(i)) != L)
+ return nullptr;
+
+ // Try to match
+ //
+ // br %cond, label %left, label %right
+ // left:
+ // br label %merge
+ // right:
+ // br label %merge
+ // merge:
+ // V = phi [ %x, %left ], [ %y, %right ]
+ //
+ // as "select %cond, %x, %y"
+
+ BasicBlock *IDom = DT[PN->getParent()]->getIDom()->getBlock();
+ assert(IDom && "At least the entry block should dominate PN");
+
+ auto *BI = dyn_cast<BranchInst>(IDom->getTerminator());
+ Value *Cond = nullptr, *LHS = nullptr, *RHS = nullptr;
+
+ if (BI && BI->isConditional() &&
+ BrPHIToSelect(DT, BI, PN, Cond, LHS, RHS) &&
+ IsAvailableOnEntry(L, DT, getSCEV(LHS), PN->getParent()) &&
+ IsAvailableOnEntry(L, DT, getSCEV(RHS), PN->getParent()))
+ return createNodeForSelectOrPHI(PN, Cond, LHS, RHS);
+ }
+
+ return nullptr;
+}
+
+const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
+ if (const SCEV *S = createAddRecFromPHI(PN))
+ return S;
+
+ if (const SCEV *S = createNodeFromSelectLikePHI(PN))
+ return S;
// If the PHI has a single incoming value, follow that value, unless the
// PHI's incoming blocks are in a different loop, in which case doing so
// risks breaking LCSSA form. Instcombine would normally zap these, but
// it doesn't have DominatorTree information, so it may miss cases.
- if (Value *V =
- SimplifyInstruction(PN, F->getParent()->getDataLayout(), TLI, DT, AC))
- if (LI->replacementPreservesLCSSAForm(PN, V))
+ if (Value *V = SimplifyInstruction(PN, getDataLayout(), &TLI, &DT, &AC))
+ if (LI.replacementPreservesLCSSAForm(PN, V))
return getSCEV(V);
// If it's not a loop phi, we can't handle it yet.
return getUnknown(PN);
}
+const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I,
+ Value *Cond,
+ Value *TrueVal,
+ Value *FalseVal) {
+ // Handle "constant" branch or select. This can occur for instance when a
+ // loop pass transforms an inner loop and moves on to process the outer loop.
+ if (auto *CI = dyn_cast<ConstantInt>(Cond))
+ return getSCEV(CI->isOne() ? TrueVal : FalseVal);
+
+ // Try to match some simple smax or umax patterns.
+ auto *ICI = dyn_cast<ICmpInst>(Cond);
+ if (!ICI)
+ return getUnknown(I);
+
+ Value *LHS = ICI->getOperand(0);
+ Value *RHS = ICI->getOperand(1);
+
+ switch (ICI->getPredicate()) {
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ std::swap(LHS, RHS);
+ // fall through
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ // a >s b ? a+x : b+x -> smax(a, b)+x
+ // a >s b ? b+x : a+x -> smin(a, b)+x
+ if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) {
+ const SCEV *LS = getNoopOrSignExtend(getSCEV(LHS), I->getType());
+ const SCEV *RS = getNoopOrSignExtend(getSCEV(RHS), I->getType());
+ const SCEV *LA = getSCEV(TrueVal);
+ const SCEV *RA = getSCEV(FalseVal);
+ const SCEV *LDiff = getMinusSCEV(LA, LS);
+ const SCEV *RDiff = getMinusSCEV(RA, RS);
+ if (LDiff == RDiff)
+ return getAddExpr(getSMaxExpr(LS, RS), LDiff);
+ LDiff = getMinusSCEV(LA, RS);
+ RDiff = getMinusSCEV(RA, LS);
+ if (LDiff == RDiff)
+ return getAddExpr(getSMinExpr(LS, RS), LDiff);
+ }
+ break;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ std::swap(LHS, RHS);
+ // fall through
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ // a >u b ? a+x : b+x -> umax(a, b)+x
+ // a >u b ? b+x : a+x -> umin(a, b)+x
+ if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) {
+ const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
+ const SCEV *RS = getNoopOrZeroExtend(getSCEV(RHS), I->getType());
+ const SCEV *LA = getSCEV(TrueVal);
+ const SCEV *RA = getSCEV(FalseVal);
+ const SCEV *LDiff = getMinusSCEV(LA, LS);
+ const SCEV *RDiff = getMinusSCEV(RA, RS);
+ if (LDiff == RDiff)
+ return getAddExpr(getUMaxExpr(LS, RS), LDiff);
+ LDiff = getMinusSCEV(LA, RS);
+ RDiff = getMinusSCEV(RA, LS);
+ if (LDiff == RDiff)
+ return getAddExpr(getUMinExpr(LS, RS), LDiff);
+ }
+ break;
+ case ICmpInst::ICMP_NE:
+ // n != 0 ? n+x : 1+x -> umax(n, 1)+x
+ if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) &&
+ isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
+ const SCEV *One = getOne(I->getType());
+ const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
+ const SCEV *LA = getSCEV(TrueVal);
+ const SCEV *RA = getSCEV(FalseVal);
+ const SCEV *LDiff = getMinusSCEV(LA, LS);
+ const SCEV *RDiff = getMinusSCEV(RA, One);
+ if (LDiff == RDiff)
+ return getAddExpr(getUMaxExpr(One, LS), LDiff);
+ }
+ break;
+ case ICmpInst::ICMP_EQ:
+ // n == 0 ? 1+x : n+x -> umax(n, 1)+x
+ if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) &&
+ isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
+ const SCEV *One = getOne(I->getType());
+ const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
+ const SCEV *LA = getSCEV(TrueVal);
+ const SCEV *RA = getSCEV(FalseVal);
+ const SCEV *LDiff = getMinusSCEV(LA, One);
+ const SCEV *RDiff = getMinusSCEV(RA, LS);
+ if (LDiff == RDiff)
+ return getAddExpr(getUMaxExpr(One, LS), LDiff);
+ }
+ break;
+ default:
+ break;
+ }
+
+ return getUnknown(I);
+}
+
/// createNodeForGEP - Expand GEP instructions into add and multiply
/// operations. This allows them to be analyzed by regular SCEV code.
///
@@ -3769,7 +4106,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
uint32_t
ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
- return C->getValue()->getValue().countTrailingZeros();
+ return C->getAPInt().countTrailingZeros();
if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S))
return std::min(GetMinTrailingZeros(T->getOperand()),
@@ -3834,8 +4171,8 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
// For a SCEVUnknown, ask ValueTracking.
unsigned BitWidth = getTypeSizeInBits(U->getType());
APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
- computeKnownBits(U->getValue(), Zeros, Ones,
- F->getParent()->getDataLayout(), 0, AC, nullptr, DT);
+ computeKnownBits(U->getValue(), Zeros, Ones, getDataLayout(), 0, &AC,
+ nullptr, &DT);
return Zeros.countTrailingOnes();
}
@@ -3846,26 +4183,9 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
/// GetRangeFromMetadata - Helper method to assign a range to V from
/// metadata present in the IR.
static Optional<ConstantRange> GetRangeFromMetadata(Value *V) {
- if (Instruction *I = dyn_cast<Instruction>(V)) {
- if (MDNode *MD = I->getMetadata(LLVMContext::MD_range)) {
- ConstantRange TotalRange(
- cast<IntegerType>(I->getType())->getBitWidth(), false);
-
- unsigned NumRanges = MD->getNumOperands() / 2;
- assert(NumRanges >= 1);
-
- for (unsigned i = 0; i < NumRanges; ++i) {
- ConstantInt *Lower =
- mdconst::extract<ConstantInt>(MD->getOperand(2 * i + 0));
- ConstantInt *Upper =
- mdconst::extract<ConstantInt>(MD->getOperand(2 * i + 1));
- ConstantRange Range(Lower->getValue(), Upper->getValue());
- TotalRange = TotalRange.unionWith(Range);
- }
-
- return TotalRange;
- }
- }
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (MDNode *MD = I->getMetadata(LLVMContext::MD_range))
+ return getConstantRangeFromMetadata(*MD);
return None;
}
@@ -3887,7 +4207,7 @@ ScalarEvolution::getRange(const SCEV *S,
return I->second;
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
- return setRange(C, SignHint, ConstantRange(C->getValue()->getValue()));
+ return setRange(C, SignHint, ConstantRange(C->getAPInt()));
unsigned BitWidth = getTypeSizeInBits(S->getType());
ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
@@ -3965,9 +4285,8 @@ ScalarEvolution::getRange(const SCEV *S,
if (AddRec->getNoWrapFlags(SCEV::FlagNUW))
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
if (!C->getValue()->isZero())
- ConservativeResult =
- ConservativeResult.intersectWith(
- ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0)));
+ ConservativeResult = ConservativeResult.intersectWith(
+ ConstantRange(C->getAPInt(), APInt(BitWidth, 0)));
// If there's no signed wrap, and all the operands have the same sign or
// zero, the value won't ever change sign.
@@ -4065,18 +4384,18 @@ ScalarEvolution::getRange(const SCEV *S,
// Split here to avoid paying the compile-time cost of calling both
// computeKnownBits and ComputeNumSignBits. This restriction can be lifted
// if needed.
- const DataLayout &DL = F->getParent()->getDataLayout();
+ const DataLayout &DL = getDataLayout();
if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) {
// For a SCEVUnknown, ask ValueTracking.
APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
- computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT);
+ computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, &AC, nullptr, &DT);
if (Ones != ~Zeros + 1)
ConservativeResult =
ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1));
} else {
assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED &&
"generalize as needed!");
- unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, AC, nullptr, DT);
+ unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, &AC, nullptr, &DT);
if (NS > 1)
ConservativeResult = ConservativeResult.intersectWith(
ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
@@ -4089,8 +4408,64 @@ ScalarEvolution::getRange(const SCEV *S,
return setRange(S, SignHint, ConservativeResult);
}
-/// createSCEV - We know that there is no SCEV for the specified value.
-/// Analyze the expression.
+SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) {
+ if (isa<ConstantExpr>(V)) return SCEV::FlagAnyWrap;
+ const BinaryOperator *BinOp = cast<BinaryOperator>(V);
+
+ // Return early if there are no flags to propagate to the SCEV.
+ SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
+ if (BinOp->hasNoUnsignedWrap())
+ Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
+ if (BinOp->hasNoSignedWrap())
+ Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
+ if (Flags == SCEV::FlagAnyWrap) {
+ return SCEV::FlagAnyWrap;
+ }
+
+ // Here we check that BinOp is in the header of the innermost loop
+ // containing BinOp, since we only deal with instructions in the loop
+ // header. The actual loop we need to check later will come from an add
+ // recurrence, but getting that requires computing the SCEV of the operands,
+ // which can be expensive. This check we can do cheaply to rule out some
+ // cases early.
+ Loop *innermostContainingLoop = LI.getLoopFor(BinOp->getParent());
+ if (innermostContainingLoop == nullptr ||
+ innermostContainingLoop->getHeader() != BinOp->getParent())
+ return SCEV::FlagAnyWrap;
+
+ // Only proceed if we can prove that BinOp does not yield poison.
+ if (!isKnownNotFullPoison(BinOp)) return SCEV::FlagAnyWrap;
+
+ // At this point we know that if V is executed, then it does not wrap
+ // according to at least one of NSW or NUW. If V is not executed, then we do
+ // not know if the calculation that V represents would wrap. Multiple
+ // instructions can map to the same SCEV. If we apply NSW or NUW from V to
+ // the SCEV, we must guarantee no wrapping for that SCEV also when it is
+ // derived from other instructions that map to the same SCEV. We cannot make
+ // that guarantee for cases where V is not executed. So we need to find the
+ // loop that V is considered in relation to and prove that V is executed for
+ // every iteration of that loop. That implies that the value that V
+ // calculates does not wrap anywhere in the loop, so then we can apply the
+ // flags to the SCEV.
+ //
+ // We check isLoopInvariant to disambiguate in case we are adding two
+ // recurrences from different loops, so that we know which loop to prove
+ // that V is executed in.
+ for (int OpIndex = 0; OpIndex < 2; ++OpIndex) {
+ const SCEV *Op = getSCEV(BinOp->getOperand(OpIndex));
+ if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
+ const int OtherOpIndex = 1 - OpIndex;
+ const SCEV *OtherOp = getSCEV(BinOp->getOperand(OtherOpIndex));
+ if (isLoopInvariant(OtherOp, AddRec->getLoop()) &&
+ isGuaranteedToExecuteForEveryIteration(BinOp, AddRec->getLoop()))
+ return Flags;
+ }
+ }
+ return SCEV::FlagAnyWrap;
+}
+
+/// createSCEV - We know that there is no SCEV for the specified value. Analyze
+/// the expression.
///
const SCEV *ScalarEvolution::createSCEV(Value *V) {
if (!isSCEVable(V->getType()))
@@ -4104,14 +4479,14 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
// reachable. Such instructions don't matter, and they aren't required
// to obey basic rules for definitions dominating uses which this
// analysis depends on.
- if (!DT->isReachableFromEntry(I->getParent()))
+ if (!DT.isReachableFromEntry(I->getParent()))
return getUnknown(V);
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
Opcode = CE->getOpcode();
else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
return getConstant(CI);
else if (isa<ConstantPointerNull>(V))
- return getConstant(V->getType(), 0);
+ return getZero(V->getType());
else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee());
else
@@ -4126,47 +4501,79 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
// because it leads to N-1 getAddExpr calls for N ultimate operands.
// Instead, gather up all the operands and make a single getAddExpr call.
// LLVM IR canonical form means we need only traverse the left operands.
- //
- // Don't apply this instruction's NSW or NUW flags to the new
- // expression. The instruction may be guarded by control flow that the
- // no-wrap behavior depends on. Non-control-equivalent instructions can be
- // mapped to the same SCEV expression, and it would be incorrect to transfer
- // NSW/NUW semantics to those operations.
SmallVector<const SCEV *, 4> AddOps;
- AddOps.push_back(getSCEV(U->getOperand(1)));
- for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) {
- unsigned Opcode = Op->getValueID() - Value::InstructionVal;
- if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
+ for (Value *Op = U;; Op = U->getOperand(0)) {
+ U = dyn_cast<Operator>(Op);
+ unsigned Opcode = U ? U->getOpcode() : 0;
+ if (!U || (Opcode != Instruction::Add && Opcode != Instruction::Sub)) {
+ assert(Op != V && "V should be an add");
+ AddOps.push_back(getSCEV(Op));
+ break;
+ }
+
+ if (auto *OpSCEV = getExistingSCEV(U)) {
+ AddOps.push_back(OpSCEV);
+ break;
+ }
+
+ // If a NUW or NSW flag can be applied to the SCEV for this
+ // addition, then compute the SCEV for this addition by itself
+ // with a separate call to getAddExpr. We need to do that
+ // instead of pushing the operands of the addition onto AddOps,
+ // since the flags are only known to apply to this particular
+ // addition - they may not apply to other additions that can be
+ // formed with operands from AddOps.
+ const SCEV *RHS = getSCEV(U->getOperand(1));
+ SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U);
+ if (Flags != SCEV::FlagAnyWrap) {
+ const SCEV *LHS = getSCEV(U->getOperand(0));
+ if (Opcode == Instruction::Sub)
+ AddOps.push_back(getMinusSCEV(LHS, RHS, Flags));
+ else
+ AddOps.push_back(getAddExpr(LHS, RHS, Flags));
break;
- U = cast<Operator>(Op);
- const SCEV *Op1 = getSCEV(U->getOperand(1));
+ }
+
if (Opcode == Instruction::Sub)
- AddOps.push_back(getNegativeSCEV(Op1));
+ AddOps.push_back(getNegativeSCEV(RHS));
else
- AddOps.push_back(Op1);
+ AddOps.push_back(RHS);
}
- AddOps.push_back(getSCEV(U->getOperand(0)));
return getAddExpr(AddOps);
}
+
case Instruction::Mul: {
- // Don't transfer NSW/NUW for the same reason as AddExpr.
SmallVector<const SCEV *, 4> MulOps;
- MulOps.push_back(getSCEV(U->getOperand(1)));
- for (Value *Op = U->getOperand(0);
- Op->getValueID() == Instruction::Mul + Value::InstructionVal;
- Op = U->getOperand(0)) {
- U = cast<Operator>(Op);
+ for (Value *Op = U;; Op = U->getOperand(0)) {
+ U = dyn_cast<Operator>(Op);
+ if (!U || U->getOpcode() != Instruction::Mul) {
+ assert(Op != V && "V should be a mul");
+ MulOps.push_back(getSCEV(Op));
+ break;
+ }
+
+ if (auto *OpSCEV = getExistingSCEV(U)) {
+ MulOps.push_back(OpSCEV);
+ break;
+ }
+
+ SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U);
+ if (Flags != SCEV::FlagAnyWrap) {
+ MulOps.push_back(getMulExpr(getSCEV(U->getOperand(0)),
+ getSCEV(U->getOperand(1)), Flags));
+ break;
+ }
+
MulOps.push_back(getSCEV(U->getOperand(1)));
}
- MulOps.push_back(getSCEV(U->getOperand(0)));
return getMulExpr(MulOps);
}
case Instruction::UDiv:
return getUDivExpr(getSCEV(U->getOperand(0)),
getSCEV(U->getOperand(1)));
case Instruction::Sub:
- return getMinusSCEV(getSCEV(U->getOperand(0)),
- getSCEV(U->getOperand(1)));
+ return getMinusSCEV(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1)),
+ getNoWrapFlagsFromUB(U));
case Instruction::And:
// For an expression like x&255 that merely masks off the high bits,
// use zext(trunc(x)) as the SCEV expression.
@@ -4185,8 +4592,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
unsigned TZ = A.countTrailingZeros();
unsigned BitWidth = A.getBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(U->getOperand(0), KnownZero, KnownOne,
- F->getParent()->getDataLayout(), 0, AC, nullptr, DT);
+ computeKnownBits(U->getOperand(0), KnownZero, KnownOne, getDataLayout(),
+ 0, &AC, nullptr, &DT);
APInt EffectiveMask =
APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
@@ -4286,9 +4693,18 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
if (SA->getValue().uge(BitWidth))
break;
+ // It is currently not resolved how to interpret NSW for left
+ // shift by BitWidth - 1, so we avoid applying flags in that
+ // case. Remove this check (or this comment) once the situation
+ // is resolved. See
+ // http://lists.llvm.org/pipermail/llvm-dev/2015-April/084195.html
+ // and http://reviews.llvm.org/D8890 .
+ auto Flags = SCEV::FlagAnyWrap;
+ if (SA->getValue().ult(BitWidth - 1)) Flags = getNoWrapFlagsFromUB(U);
+
Constant *X = ConstantInt::get(getContext(),
APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
- return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X));
+ return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X), Flags);
}
break;
@@ -4363,94 +4779,13 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
return createNodeForPHI(cast<PHINode>(U));
case Instruction::Select:
- // This could be a smax or umax that was lowered earlier.
- // Try to recover it.
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(U->getOperand(0))) {
- Value *LHS = ICI->getOperand(0);
- Value *RHS = ICI->getOperand(1);
- switch (ICI->getPredicate()) {
- case ICmpInst::ICMP_SLT:
- case ICmpInst::ICMP_SLE:
- std::swap(LHS, RHS);
- // fall through
- case ICmpInst::ICMP_SGT:
- case ICmpInst::ICMP_SGE:
- // a >s b ? a+x : b+x -> smax(a, b)+x
- // a >s b ? b+x : a+x -> smin(a, b)+x
- if (getTypeSizeInBits(LHS->getType()) <=
- getTypeSizeInBits(U->getType())) {
- const SCEV *LS = getNoopOrSignExtend(getSCEV(LHS), U->getType());
- const SCEV *RS = getNoopOrSignExtend(getSCEV(RHS), U->getType());
- const SCEV *LA = getSCEV(U->getOperand(1));
- const SCEV *RA = getSCEV(U->getOperand(2));
- const SCEV *LDiff = getMinusSCEV(LA, LS);
- const SCEV *RDiff = getMinusSCEV(RA, RS);
- if (LDiff == RDiff)
- return getAddExpr(getSMaxExpr(LS, RS), LDiff);
- LDiff = getMinusSCEV(LA, RS);
- RDiff = getMinusSCEV(RA, LS);
- if (LDiff == RDiff)
- return getAddExpr(getSMinExpr(LS, RS), LDiff);
- }
- break;
- case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_ULE:
- std::swap(LHS, RHS);
- // fall through
- case ICmpInst::ICMP_UGT:
- case ICmpInst::ICMP_UGE:
- // a >u b ? a+x : b+x -> umax(a, b)+x
- // a >u b ? b+x : a+x -> umin(a, b)+x
- if (getTypeSizeInBits(LHS->getType()) <=
- getTypeSizeInBits(U->getType())) {
- const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType());
- const SCEV *RS = getNoopOrZeroExtend(getSCEV(RHS), U->getType());
- const SCEV *LA = getSCEV(U->getOperand(1));
- const SCEV *RA = getSCEV(U->getOperand(2));
- const SCEV *LDiff = getMinusSCEV(LA, LS);
- const SCEV *RDiff = getMinusSCEV(RA, RS);
- if (LDiff == RDiff)
- return getAddExpr(getUMaxExpr(LS, RS), LDiff);
- LDiff = getMinusSCEV(LA, RS);
- RDiff = getMinusSCEV(RA, LS);
- if (LDiff == RDiff)
- return getAddExpr(getUMinExpr(LS, RS), LDiff);
- }
- break;
- case ICmpInst::ICMP_NE:
- // n != 0 ? n+x : 1+x -> umax(n, 1)+x
- if (getTypeSizeInBits(LHS->getType()) <=
- getTypeSizeInBits(U->getType()) &&
- isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
- const SCEV *One = getConstant(U->getType(), 1);
- const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType());
- const SCEV *LA = getSCEV(U->getOperand(1));
- const SCEV *RA = getSCEV(U->getOperand(2));
- const SCEV *LDiff = getMinusSCEV(LA, LS);
- const SCEV *RDiff = getMinusSCEV(RA, One);
- if (LDiff == RDiff)
- return getAddExpr(getUMaxExpr(One, LS), LDiff);
- }
- break;
- case ICmpInst::ICMP_EQ:
- // n == 0 ? 1+x : n+x -> umax(n, 1)+x
- if (getTypeSizeInBits(LHS->getType()) <=
- getTypeSizeInBits(U->getType()) &&
- isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
- const SCEV *One = getConstant(U->getType(), 1);
- const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType());
- const SCEV *LA = getSCEV(U->getOperand(1));
- const SCEV *RA = getSCEV(U->getOperand(2));
- const SCEV *LDiff = getMinusSCEV(LA, One);
- const SCEV *RDiff = getMinusSCEV(RA, LS);
- if (LDiff == RDiff)
- return getAddExpr(getUMaxExpr(One, LS), LDiff);
- }
- break;
- default:
- break;
- }
- }
+ // U can also be a select constant expr, which let fall through. Since
+ // createNodeForSelect only works for a condition that is an `ICmpInst`, and
+ // constant expressions cannot have instructions as operands, we'd have
+ // returned getUnknown for a select constant expressions anyway.
+ if (isa<Instruction>(U))
+ return createNodeForSelectOrPHI(cast<Instruction>(U), U->getOperand(0),
+ U->getOperand(1), U->getOperand(2));
default: // We cannot analyze this expression.
break;
@@ -4534,8 +4869,7 @@ ScalarEvolution::getSmallConstantTripMultiple(Loop *L,
return 1;
// Get the trip count from the BE count by adding 1.
- const SCEV *TCMul = getAddExpr(ExitCount,
- getConstant(ExitCount->getType(), 1));
+ const SCEV *TCMul = getAddExpr(ExitCount, getOne(ExitCount->getType()));
// FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt
// to factor simple cases.
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(TCMul))
@@ -4610,10 +4944,10 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
if (!Pair.second)
return Pair.first->second;
- // ComputeBackedgeTakenCount may allocate memory for its result. Inserting it
+ // computeBackedgeTakenCount may allocate memory for its result. Inserting it
// into the BackedgeTakenCounts map transfers ownership. Otherwise, the result
// must be cleared in this scope.
- BackedgeTakenInfo Result = ComputeBackedgeTakenCount(L);
+ BackedgeTakenInfo Result = computeBackedgeTakenCount(L);
if (Result.getExact(this) != getCouldNotCompute()) {
assert(isLoopInvariant(Result.getExact(this), L) &&
@@ -4666,7 +5000,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
}
// Re-lookup the insert position, since the call to
- // ComputeBackedgeTakenCount above could result in a
+ // computeBackedgeTakenCount above could result in a
// recusive call to getBackedgeTakenInfo (on a different
// loop), which would invalidate the iterator computed
// earlier.
@@ -4744,12 +5078,12 @@ void ScalarEvolution::forgetValue(Value *V) {
}
/// getExact - Get the exact loop backedge taken count considering all loop
-/// exits. A computable result can only be return for loops with a single exit.
-/// Returning the minimum taken count among all exits is incorrect because one
-/// of the loop's exit limit's may have been skipped. HowFarToZero assumes that
-/// the limit of each loop test is never skipped. This is a valid assumption as
-/// long as the loop exits via that test. For precise results, it is the
-/// caller's responsibility to specify the relevant loop exit using
+/// exits. A computable result can only be returned for loops with a single
+/// exit. Returning the minimum taken count among all exits is incorrect
+/// because one of the loop's exit limit's may have been skipped. HowFarToZero
+/// assumes that the limit of each loop test is never skipped. This is a valid
+/// assumption as long as the loop exits via that test. For precise results, it
+/// is the caller's responsibility to specify the relevant loop exit using
/// getExact(ExitingBlock, SE).
const SCEV *
ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
@@ -4847,10 +5181,10 @@ void ScalarEvolution::BackedgeTakenInfo::clear() {
delete[] ExitNotTaken.getNextExit();
}
-/// ComputeBackedgeTakenCount - Compute the number of times the backedge
+/// computeBackedgeTakenCount - Compute the number of times the backedge
/// of the specified loop will execute.
ScalarEvolution::BackedgeTakenInfo
-ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
+ScalarEvolution::computeBackedgeTakenCount(const Loop *L) {
SmallVector<BasicBlock *, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
@@ -4864,7 +5198,7 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
// and compute maxBECount.
for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
BasicBlock *ExitBB = ExitingBlocks[i];
- ExitLimit EL = ComputeExitLimit(L, ExitBB);
+ ExitLimit EL = computeExitLimit(L, ExitBB);
// 1. For each exit that can be computed, add an entry to ExitCounts.
// CouldComputeBECount is true only if all exits can be computed.
@@ -4885,7 +5219,7 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
// MaxBECount is conservatively the maximum EL.Max, where CouldNotCompute is
// considered greater than any computable EL.Max.
if (EL.Max != getCouldNotCompute() && Latch &&
- DT->dominates(ExitBB, Latch)) {
+ DT.dominates(ExitBB, Latch)) {
if (!MustExitMaxBECount)
MustExitMaxBECount = EL.Max;
else {
@@ -4906,13 +5240,11 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount);
}
-/// ComputeExitLimit - Compute the number of times the backedge of the specified
-/// loop will execute if it exits via the specified block.
ScalarEvolution::ExitLimit
-ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
+ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
- // Okay, we've chosen an exiting block. See what condition causes us to
- // exit at this block and remember the exit block and whether all other targets
+ // Okay, we've chosen an exiting block. See what condition causes us to exit
+ // at this block and remember the exit block and whether all other targets
// lead to the loop header.
bool MustExecuteLoopHeader = true;
BasicBlock *Exit = nullptr;
@@ -4952,8 +5284,7 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
if (!Pred)
return getCouldNotCompute();
TerminatorInst *PredTerm = Pred->getTerminator();
- for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) {
- BasicBlock *PredSucc = PredTerm->getSuccessor(i);
+ for (const BasicBlock *PredSucc : PredTerm->successors()) {
if (PredSucc == BB)
continue;
// If the predecessor has a successor that isn't BB and isn't
@@ -4976,19 +5307,19 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
if (BranchInst *BI = dyn_cast<BranchInst>(Term)) {
assert(BI->isConditional() && "If unconditional, it can't be in loop!");
// Proceed to the next level to examine the exit condition expression.
- return ComputeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0),
+ return computeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0),
BI->getSuccessor(1),
/*ControlsExit=*/IsOnlyExit);
}
if (SwitchInst *SI = dyn_cast<SwitchInst>(Term))
- return ComputeExitLimitFromSingleExitSwitch(L, SI, Exit,
+ return computeExitLimitFromSingleExitSwitch(L, SI, Exit,
/*ControlsExit=*/IsOnlyExit);
return getCouldNotCompute();
}
-/// ComputeExitLimitFromCond - Compute the number of times the
+/// computeExitLimitFromCond - Compute the number of times the
/// backedge of the specified loop will execute if its exit condition
/// were a conditional branch of ExitCond, TBB, and FBB.
///
@@ -4997,7 +5328,7 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
/// condition is true and can infer that failing to meet the condition prior to
/// integer wraparound results in undefined behavior.
ScalarEvolution::ExitLimit
-ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
+ScalarEvolution::computeExitLimitFromCond(const Loop *L,
Value *ExitCond,
BasicBlock *TBB,
BasicBlock *FBB,
@@ -5007,9 +5338,9 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
if (BO->getOpcode() == Instruction::And) {
// Recurse on the operands of the and.
bool EitherMayExit = L->contains(TBB);
- ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
+ ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
ControlsExit && !EitherMayExit);
- ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
+ ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
ControlsExit && !EitherMayExit);
const SCEV *BECount = getCouldNotCompute();
const SCEV *MaxBECount = getCouldNotCompute();
@@ -5042,9 +5373,9 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
if (BO->getOpcode() == Instruction::Or) {
// Recurse on the operands of the or.
bool EitherMayExit = L->contains(FBB);
- ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
+ ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
ControlsExit && !EitherMayExit);
- ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
+ ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
ControlsExit && !EitherMayExit);
const SCEV *BECount = getCouldNotCompute();
const SCEV *MaxBECount = getCouldNotCompute();
@@ -5079,7 +5410,7 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
// With an icmp, it may be feasible to compute an exact backedge-taken count.
// Proceed to the next level to examine the icmp.
if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
- return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit);
+ return computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit);
// Check for a constant condition. These are normally stripped out by
// SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
@@ -5091,18 +5422,15 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
return getCouldNotCompute();
else
// The backedge is never taken.
- return getConstant(CI->getType(), 0);
+ return getZero(CI->getType());
}
// If it's not an integer or pointer comparison then compute it the hard way.
- return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
+ return computeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
}
-/// ComputeExitLimitFromICmp - Compute the number of times the
-/// backedge of the specified loop will execute if its exit condition
-/// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB.
ScalarEvolution::ExitLimit
-ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
+ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
ICmpInst *ExitCond,
BasicBlock *TBB,
BasicBlock *FBB,
@@ -5119,11 +5447,16 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
ExitLimit ItCnt =
- ComputeLoadConstantCompareExitLimit(LI, RHS, L, Cond);
+ computeLoadConstantCompareExitLimit(LI, RHS, L, Cond);
if (ItCnt.hasAnyInfo())
return ItCnt;
}
+ ExitLimit ShiftEL = computeShiftCompareExitLimit(
+ ExitCond->getOperand(0), ExitCond->getOperand(1), L, Cond);
+ if (ShiftEL.hasAnyInfo())
+ return ShiftEL;
+
const SCEV *LHS = getSCEV(ExitCond->getOperand(0));
const SCEV *RHS = getSCEV(ExitCond->getOperand(1));
@@ -5149,7 +5482,7 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
if (AddRec->getLoop() == L) {
// Form the constant range.
ConstantRange CompRange(
- ICmpInst::makeConstantRange(Cond, RHSC->getValue()->getValue()));
+ ICmpInst::makeConstantRange(Cond, RHSC->getAPInt()));
const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this);
if (!isa<SCEVCouldNotCompute>(Ret)) return Ret;
@@ -5183,21 +5516,13 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
break;
}
default:
-#if 0
- dbgs() << "ComputeBackedgeTakenCount ";
- if (ExitCond->getOperand(0)->getType()->isUnsigned())
- dbgs() << "[unsigned] ";
- dbgs() << *LHS << " "
- << Instruction::getOpcodeName(Instruction::ICmp)
- << " " << *RHS << "\n";
-#endif
break;
}
- return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
+ return computeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
}
ScalarEvolution::ExitLimit
-ScalarEvolution::ComputeExitLimitFromSingleExitSwitch(const Loop *L,
+ScalarEvolution::computeExitLimitFromSingleExitSwitch(const Loop *L,
SwitchInst *Switch,
BasicBlock *ExitingBlock,
bool ControlsExit) {
@@ -5230,11 +5555,11 @@ EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
return cast<SCEVConstant>(Val)->getValue();
}
-/// ComputeLoadConstantCompareExitLimit - Given an exit condition of
+/// computeLoadConstantCompareExitLimit - Given an exit condition of
/// 'icmp op load X, cst', try to see if we can compute the backedge
/// execution count.
ScalarEvolution::ExitLimit
-ScalarEvolution::ComputeLoadConstantCompareExitLimit(
+ScalarEvolution::computeLoadConstantCompareExitLimit(
LoadInst *LI,
Constant *RHS,
const Loop *L,
@@ -5303,11 +5628,6 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit(
Result = ConstantExpr::getICmp(predicate, Result, RHS);
if (!isa<ConstantInt>(Result)) break; // Couldn't decide for sure
if (cast<ConstantInt>(Result)->getValue().isMinValue()) {
-#if 0
- dbgs() << "\n***\n*** Computed loop count " << *ItCst
- << "\n*** From global " << *GV << "*** BB: " << *L->getHeader()
- << "***\n";
-#endif
++NumArrayLenItCounts;
return getConstant(ItCst); // Found terminating iteration!
}
@@ -5315,6 +5635,149 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit(
return getCouldNotCompute();
}
+ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit(
+ Value *LHS, Value *RHSV, const Loop *L, ICmpInst::Predicate Pred) {
+ ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV);
+ if (!RHS)
+ return getCouldNotCompute();
+
+ const BasicBlock *Latch = L->getLoopLatch();
+ if (!Latch)
+ return getCouldNotCompute();
+
+ const BasicBlock *Predecessor = L->getLoopPredecessor();
+ if (!Predecessor)
+ return getCouldNotCompute();
+
+ // Return true if V is of the form "LHS `shift_op` <positive constant>".
+ // Return LHS in OutLHS and shift_opt in OutOpCode.
+ auto MatchPositiveShift =
+ [](Value *V, Value *&OutLHS, Instruction::BinaryOps &OutOpCode) {
+
+ using namespace PatternMatch;
+
+ ConstantInt *ShiftAmt;
+ if (match(V, m_LShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
+ OutOpCode = Instruction::LShr;
+ else if (match(V, m_AShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
+ OutOpCode = Instruction::AShr;
+ else if (match(V, m_Shl(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
+ OutOpCode = Instruction::Shl;
+ else
+ return false;
+
+ return ShiftAmt->getValue().isStrictlyPositive();
+ };
+
+ // Recognize a "shift recurrence" either of the form %iv or of %iv.shifted in
+ //
+ // loop:
+ // %iv = phi i32 [ %iv.shifted, %loop ], [ %val, %preheader ]
+ // %iv.shifted = lshr i32 %iv, <positive constant>
+ //
+ // Return true on a succesful match. Return the corresponding PHI node (%iv
+ // above) in PNOut and the opcode of the shift operation in OpCodeOut.
+ auto MatchShiftRecurrence =
+ [&](Value *V, PHINode *&PNOut, Instruction::BinaryOps &OpCodeOut) {
+ Optional<Instruction::BinaryOps> PostShiftOpCode;
+
+ {
+ Instruction::BinaryOps OpC;
+ Value *V;
+
+ // If we encounter a shift instruction, "peel off" the shift operation,
+ // and remember that we did so. Later when we inspect %iv's backedge
+ // value, we will make sure that the backedge value uses the same
+ // operation.
+ //
+ // Note: the peeled shift operation does not have to be the same
+ // instruction as the one feeding into the PHI's backedge value. We only
+ // really care about it being the same *kind* of shift instruction --
+ // that's all that is required for our later inferences to hold.
+ if (MatchPositiveShift(LHS, V, OpC)) {
+ PostShiftOpCode = OpC;
+ LHS = V;
+ }
+ }
+
+ PNOut = dyn_cast<PHINode>(LHS);
+ if (!PNOut || PNOut->getParent() != L->getHeader())
+ return false;
+
+ Value *BEValue = PNOut->getIncomingValueForBlock(Latch);
+ Value *OpLHS;
+
+ return
+ // The backedge value for the PHI node must be a shift by a positive
+ // amount
+ MatchPositiveShift(BEValue, OpLHS, OpCodeOut) &&
+
+ // of the PHI node itself
+ OpLHS == PNOut &&
+
+ // and the kind of shift should be match the kind of shift we peeled
+ // off, if any.
+ (!PostShiftOpCode.hasValue() || *PostShiftOpCode == OpCodeOut);
+ };
+
+ PHINode *PN;
+ Instruction::BinaryOps OpCode;
+ if (!MatchShiftRecurrence(LHS, PN, OpCode))
+ return getCouldNotCompute();
+
+ const DataLayout &DL = getDataLayout();
+
+ // The key rationale for this optimization is that for some kinds of shift
+ // recurrences, the value of the recurrence "stabilizes" to either 0 or -1
+ // within a finite number of iterations. If the condition guarding the
+ // backedge (in the sense that the backedge is taken if the condition is true)
+ // is false for the value the shift recurrence stabilizes to, then we know
+ // that the backedge is taken only a finite number of times.
+
+ ConstantInt *StableValue = nullptr;
+ switch (OpCode) {
+ default:
+ llvm_unreachable("Impossible case!");
+
+ case Instruction::AShr: {
+ // {K,ashr,<positive-constant>} stabilizes to signum(K) in at most
+ // bitwidth(K) iterations.
+ Value *FirstValue = PN->getIncomingValueForBlock(Predecessor);
+ bool KnownZero, KnownOne;
+ ComputeSignBit(FirstValue, KnownZero, KnownOne, DL, 0, nullptr,
+ Predecessor->getTerminator(), &DT);
+ auto *Ty = cast<IntegerType>(RHS->getType());
+ if (KnownZero)
+ StableValue = ConstantInt::get(Ty, 0);
+ else if (KnownOne)
+ StableValue = ConstantInt::get(Ty, -1, true);
+ else
+ return getCouldNotCompute();
+
+ break;
+ }
+ case Instruction::LShr:
+ case Instruction::Shl:
+ // Both {K,lshr,<positive-constant>} and {K,shl,<positive-constant>}
+ // stabilize to 0 in at most bitwidth(K) iterations.
+ StableValue = ConstantInt::get(cast<IntegerType>(RHS->getType()), 0);
+ break;
+ }
+
+ auto *Result =
+ ConstantFoldCompareInstOperands(Pred, StableValue, RHS, DL, &TLI);
+ assert(Result->getType()->isIntegerTy(1) &&
+ "Otherwise cannot be an operand to a branch instruction");
+
+ if (Result->isZeroValue()) {
+ unsigned BitWidth = getTypeSizeInBits(RHS->getType());
+ const SCEV *UpperBound =
+ getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth);
+ return ExitLimit(getCouldNotCompute(), UpperBound);
+ }
+
+ return getCouldNotCompute();
+}
/// CanConstantFold - Return true if we can constant fold an instruction of the
/// specified type, assuming that all operands were constants.
@@ -5356,12 +5819,10 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
// Otherwise, we can evaluate this instruction if all of its operands are
// constant or derived from a PHI node themselves.
PHINode *PHI = nullptr;
- for (Instruction::op_iterator OpI = UseInst->op_begin(),
- OpE = UseInst->op_end(); OpI != OpE; ++OpI) {
-
- if (isa<Constant>(*OpI)) continue;
+ for (Value *Op : UseInst->operands()) {
+ if (isa<Constant>(Op)) continue;
- Instruction *OpInst = dyn_cast<Instruction>(*OpI);
+ Instruction *OpInst = dyn_cast<Instruction>(Op);
if (!OpInst || !canConstantEvolve(OpInst, L)) return nullptr;
PHINode *P = dyn_cast<PHINode>(OpInst);
@@ -5395,9 +5856,8 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I || !canConstantEvolve(I, L)) return nullptr;
- if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ if (PHINode *PN = dyn_cast<PHINode>(I))
return PN;
- }
// Record non-constant instructions contained by the loop.
DenseMap<Instruction *, PHINode *> PHIMap;
@@ -5454,6 +5914,30 @@ static Constant *EvaluateExpression(Value *V, const Loop *L,
TLI);
}
+
+// If every incoming value to PN except the one for BB is a specific Constant,
+// return that, else return nullptr.
+static Constant *getOtherIncomingValue(PHINode *PN, BasicBlock *BB) {
+ Constant *IncomingVal = nullptr;
+
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ if (PN->getIncomingBlock(i) == BB)
+ continue;
+
+ auto *CurrentVal = dyn_cast<Constant>(PN->getIncomingValue(i));
+ if (!CurrentVal)
+ return nullptr;
+
+ if (IncomingVal != CurrentVal) {
+ if (IncomingVal)
+ return nullptr;
+ IncomingVal = CurrentVal;
+ }
+ }
+
+ return IncomingVal;
+}
+
/// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
/// in the header of its containing loop, we know the loop executes a
/// constant number of times, and the PHI node is just a recurrence
@@ -5462,8 +5946,7 @@ Constant *
ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
const APInt &BEs,
const Loop *L) {
- DenseMap<PHINode*, Constant*>::const_iterator I =
- ConstantEvolutionLoopExitValue.find(PN);
+ auto I = ConstantEvolutionLoopExitValue.find(PN);
if (I != ConstantEvolutionLoopExitValue.end())
return I->second;
@@ -5476,22 +5959,21 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
BasicBlock *Header = L->getHeader();
assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
- // Since the loop is canonicalized, the PHI node must have two entries. One
- // entry must be a constant (coming in from outside of the loop), and the
- // second must be derived from the same PHI.
- bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
- PHINode *PHI = nullptr;
- for (BasicBlock::iterator I = Header->begin();
- (PHI = dyn_cast<PHINode>(I)); ++I) {
- Constant *StartCST =
- dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
+ BasicBlock *Latch = L->getLoopLatch();
+ if (!Latch)
+ return nullptr;
+
+ for (auto &I : *Header) {
+ PHINode *PHI = dyn_cast<PHINode>(&I);
+ if (!PHI) break;
+ auto *StartCST = getOtherIncomingValue(PHI, Latch);
if (!StartCST) continue;
CurrentIterVals[PHI] = StartCST;
}
if (!CurrentIterVals.count(PN))
return RetVal = nullptr;
- Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
+ Value *BEValue = PN->getIncomingValueForBlock(Latch);
// Execute the loop symbolically to determine the exit value.
if (BEs.getActiveBits() >= 32)
@@ -5499,7 +5981,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
unsigned NumIterations = BEs.getZExtValue(); // must be in range
unsigned IterationNum = 0;
- const DataLayout &DL = F->getParent()->getDataLayout();
+ const DataLayout &DL = getDataLayout();
for (; ; ++IterationNum) {
if (IterationNum == NumIterations)
return RetVal = CurrentIterVals[PN]; // Got exit value!
@@ -5508,7 +5990,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
// EvaluateExpression adds non-phi values to the CurrentIterVals map.
DenseMap<Instruction *, Constant *> NextIterVals;
Constant *NextPHI =
- EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI);
+ EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
if (!NextPHI)
return nullptr; // Couldn't evaluate!
NextIterVals[PN] = NextPHI;
@@ -5519,23 +6001,21 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
// cease to be able to evaluate one of them or if they stop evolving,
// because that doesn't necessarily prevent us from computing PN.
SmallVector<std::pair<PHINode *, Constant *>, 8> PHIsToCompute;
- for (DenseMap<Instruction *, Constant *>::const_iterator
- I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){
- PHINode *PHI = dyn_cast<PHINode>(I->first);
+ for (const auto &I : CurrentIterVals) {
+ PHINode *PHI = dyn_cast<PHINode>(I.first);
if (!PHI || PHI == PN || PHI->getParent() != Header) continue;
- PHIsToCompute.push_back(std::make_pair(PHI, I->second));
+ PHIsToCompute.emplace_back(PHI, I.second);
}
// We use two distinct loops because EvaluateExpression may invalidate any
// iterators into CurrentIterVals.
- for (SmallVectorImpl<std::pair<PHINode *, Constant*> >::const_iterator
- I = PHIsToCompute.begin(), E = PHIsToCompute.end(); I != E; ++I) {
- PHINode *PHI = I->first;
+ for (const auto &I : PHIsToCompute) {
+ PHINode *PHI = I.first;
Constant *&NextPHI = NextIterVals[PHI];
if (!NextPHI) { // Not already computed.
- Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
- NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI);
+ Value *BEValue = PHI->getIncomingValueForBlock(Latch);
+ NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
}
- if (NextPHI != I->second)
+ if (NextPHI != I.second)
StoppedEvolving = false;
}
@@ -5548,12 +6028,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
}
}
-/// ComputeExitCountExhaustively - If the loop is known to execute a
-/// constant number of times (the condition evolves only from constants),
-/// try to evaluate a few iterations of the loop until we get the exit
-/// condition gets a value of ExitWhen (true or false). If we cannot
-/// evaluate the trip count of the loop, return getCouldNotCompute().
-const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
+const SCEV *ScalarEvolution::computeExitCountExhaustively(const Loop *L,
Value *Cond,
bool ExitWhen) {
PHINode *PN = getConstantEvolvingPHI(Cond, L);
@@ -5567,14 +6042,14 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
BasicBlock *Header = L->getHeader();
assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
- // One entry must be a constant (coming in from outside of the loop), and the
- // second must be derived from the same PHI.
- bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
- PHINode *PHI = nullptr;
- for (BasicBlock::iterator I = Header->begin();
- (PHI = dyn_cast<PHINode>(I)); ++I) {
- Constant *StartCST =
- dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
+ BasicBlock *Latch = L->getLoopLatch();
+ assert(Latch && "Should follow from NumIncomingValues == 2!");
+
+ for (auto &I : *Header) {
+ PHINode *PHI = dyn_cast<PHINode>(&I);
+ if (!PHI)
+ break;
+ auto *StartCST = getOtherIncomingValue(PHI, Latch);
if (!StartCST) continue;
CurrentIterVals[PHI] = StartCST;
}
@@ -5585,10 +6060,10 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
// the loop symbolically to determine when the condition gets a value of
// "ExitWhen".
unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis.
- const DataLayout &DL = F->getParent()->getDataLayout();
+ const DataLayout &DL = getDataLayout();
for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
- ConstantInt *CondVal = dyn_cast_or_null<ConstantInt>(
- EvaluateExpression(Cond, L, CurrentIterVals, DL, TLI));
+ auto *CondVal = dyn_cast_or_null<ConstantInt>(
+ EvaluateExpression(Cond, L, CurrentIterVals, DL, &TLI));
// Couldn't symbolically evaluate.
if (!CondVal) return getCouldNotCompute();
@@ -5605,20 +6080,17 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
// calling EvaluateExpression on them because that may invalidate iterators
// into CurrentIterVals.
SmallVector<PHINode *, 8> PHIsToCompute;
- for (DenseMap<Instruction *, Constant *>::const_iterator
- I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){
- PHINode *PHI = dyn_cast<PHINode>(I->first);
+ for (const auto &I : CurrentIterVals) {
+ PHINode *PHI = dyn_cast<PHINode>(I.first);
if (!PHI || PHI->getParent() != Header) continue;
PHIsToCompute.push_back(PHI);
}
- for (SmallVectorImpl<PHINode *>::const_iterator I = PHIsToCompute.begin(),
- E = PHIsToCompute.end(); I != E; ++I) {
- PHINode *PHI = *I;
+ for (PHINode *PHI : PHIsToCompute) {
Constant *&NextPHI = NextIterVals[PHI];
if (NextPHI) continue; // Already computed!
- Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
- NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI);
+ Value *BEValue = PHI->getIncomingValueForBlock(Latch);
+ NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
}
CurrentIterVals.swap(NextIterVals);
}
@@ -5638,22 +6110,22 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
/// In the case that a relevant loop exit value cannot be computed, the
/// original value V is returned.
const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
+ SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values =
+ ValuesAtScopes[V];
// Check to see if we've folded this expression at this loop before.
- SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values = ValuesAtScopes[V];
- for (unsigned u = 0; u < Values.size(); u++) {
- if (Values[u].first == L)
- return Values[u].second ? Values[u].second : V;
- }
- Values.push_back(std::make_pair(L, static_cast<const SCEV *>(nullptr)));
+ for (auto &LS : Values)
+ if (LS.first == L)
+ return LS.second ? LS.second : V;
+
+ Values.emplace_back(L, nullptr);
+
// Otherwise compute it.
const SCEV *C = computeSCEVAtScope(V, L);
- SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values2 = ValuesAtScopes[V];
- for (unsigned u = Values2.size(); u > 0; u--) {
- if (Values2[u - 1].first == L) {
- Values2[u - 1].second = C;
+ for (auto &LS : reverse(ValuesAtScopes[V]))
+ if (LS.first == L) {
+ LS.second = C;
break;
}
- }
return C;
}
@@ -5763,7 +6235,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
// exit value from the loop without using SCEVs.
if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) {
if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) {
- const Loop *LI = (*this->LI)[I->getParent()];
+ const Loop *LI = this->LI[I->getParent()];
if (LI && LI->getParentLoop() == L) // Looking for loop exit value.
if (PHINode *PN = dyn_cast<PHINode>(I))
if (PN->getParent() == LI->getHeader()) {
@@ -5777,9 +6249,8 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
// Okay, we know how many times the containing loop executes. If
// this is a constant evolving PHI node, get the final value at
// the specified iteration number.
- Constant *RV = getConstantEvolutionLoopExitValue(PN,
- BTCC->getValue()->getValue(),
- LI);
+ Constant *RV =
+ getConstantEvolutionLoopExitValue(PN, BTCC->getAPInt(), LI);
if (RV) return getSCEV(RV);
}
}
@@ -5791,8 +6262,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
if (CanConstantFold(I)) {
SmallVector<Constant *, 4> Operands;
bool MadeImprovement = false;
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
- Value *Op = I->getOperand(i);
+ for (Value *Op : I->operands()) {
if (Constant *C = dyn_cast<Constant>(Op)) {
Operands.push_back(C);
continue;
@@ -5821,16 +6291,16 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
// Check to see if getSCEVAtScope actually made an improvement.
if (MadeImprovement) {
Constant *C = nullptr;
- const DataLayout &DL = F->getParent()->getDataLayout();
+ const DataLayout &DL = getDataLayout();
if (const CmpInst *CI = dyn_cast<CmpInst>(I))
C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
- Operands[1], DL, TLI);
+ Operands[1], DL, &TLI);
else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
if (!LI->isVolatile())
C = ConstantFoldLoadFromConstPtr(Operands[0], DL);
} else
C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands,
- DL, TLI);
+ DL, &TLI);
if (!C) return V;
return getSCEV(C);
}
@@ -6021,10 +6491,10 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
return std::make_pair(CNC, CNC);
}
- uint32_t BitWidth = LC->getValue()->getValue().getBitWidth();
- const APInt &L = LC->getValue()->getValue();
- const APInt &M = MC->getValue()->getValue();
- const APInt &N = NC->getValue()->getValue();
+ uint32_t BitWidth = LC->getAPInt().getBitWidth();
+ const APInt &L = LC->getAPInt();
+ const APInt &M = MC->getAPInt();
+ const APInt &N = NC->getAPInt();
APInt Two(BitWidth, 2);
APInt Four(BitWidth, 4);
@@ -6103,10 +6573,6 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
if (R1 && R2) {
-#if 0
- dbgs() << "HFTZ: " << *V << " - sol#1: " << *R1
- << " sol#2: " << *R2 << "\n";
-#endif
// Pick the smallest positive root value.
if (ConstantInt *CB =
dyn_cast<ConstantInt>(ConstantExpr::getICmp(CmpInst::ICMP_ULT,
@@ -6160,7 +6626,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
// For negative steps (counting down to zero):
// N = Start/-Step
// First compute the unsigned distance from zero in the direction of Step.
- bool CountDown = StepC->getValue()->getValue().isNegative();
+ bool CountDown = StepC->getAPInt().isNegative();
const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start);
// Handle unitary steps, which cannot wraparound.
@@ -6185,13 +6651,53 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
// done by counting and comparing the number of trailing zeros of Step and
// Distance.
if (!CountDown) {
- const APInt &StepV = StepC->getValue()->getValue();
+ const APInt &StepV = StepC->getAPInt();
// StepV.isPowerOf2() returns true if StepV is an positive power of two. It
// also returns true if StepV is maximally negative (eg, INT_MIN), but that
// case is not handled as this code is guarded by !CountDown.
if (StepV.isPowerOf2() &&
- GetMinTrailingZeros(Distance) >= StepV.countTrailingZeros())
- return getUDivExactExpr(Distance, Step);
+ GetMinTrailingZeros(Distance) >= StepV.countTrailingZeros()) {
+ // Here we've constrained the equation to be of the form
+ //
+ // 2^(N + k) * Distance' = (StepV == 2^N) * X (mod 2^W) ... (0)
+ //
+ // where we're operating on a W bit wide integer domain and k is
+ // non-negative. The smallest unsigned solution for X is the trip count.
+ //
+ // (0) is equivalent to:
+ //
+ // 2^(N + k) * Distance' - 2^N * X = L * 2^W
+ // <=> 2^N(2^k * Distance' - X) = L * 2^(W - N) * 2^N
+ // <=> 2^k * Distance' - X = L * 2^(W - N)
+ // <=> 2^k * Distance' = L * 2^(W - N) + X ... (1)
+ //
+ // The smallest X satisfying (1) is unsigned remainder of dividing the LHS
+ // by 2^(W - N).
+ //
+ // <=> X = 2^k * Distance' URem 2^(W - N) ... (2)
+ //
+ // E.g. say we're solving
+ //
+ // 2 * Val = 2 * X (in i8) ... (3)
+ //
+ // then from (2), we get X = Val URem i8 128 (k = 0 in this case).
+ //
+ // Note: It is tempting to solve (3) by setting X = Val, but Val is not
+ // necessarily the smallest unsigned value of X that satisfies (3).
+ // E.g. if Val is i8 -127 then the smallest value of X that satisfies (3)
+ // is i8 1, not i8 -127
+
+ const auto *ModuloResult = getUDivExactExpr(Distance, Step);
+
+ // Since SCEV does not have a URem node, we construct one using a truncate
+ // and a zero extend.
+
+ unsigned NarrowWidth = StepV.getBitWidth() - StepV.countTrailingZeros();
+ auto *NarrowTy = IntegerType::get(getContext(), NarrowWidth);
+ auto *WideTy = Distance->getType();
+
+ return getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy);
+ }
}
// If the condition controls loop exit (the loop exits only if the expression
@@ -6207,8 +6713,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
// Then, try to solve the above equation provided that Start is constant.
if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
- return SolveLinEquationWithOverflow(StepC->getValue()->getValue(),
- -StartC->getValue()->getValue(),
+ return SolveLinEquationWithOverflow(StepC->getAPInt(), -StartC->getAPInt(),
*this);
return getCouldNotCompute();
}
@@ -6226,7 +6731,7 @@ ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
// already. If so, the backedge will execute zero times.
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
if (!C->getValue()->isNullValue())
- return getConstant(C->getType(), 0);
+ return getZero(C->getType());
return getCouldNotCompute(); // Otherwise it will loop infinitely.
}
@@ -6251,7 +6756,7 @@ ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) {
// A loop's header is defined to be a block that dominates the loop.
// If the header has a unique predecessor outside the loop, it must be
// a block that has exactly one successor that can reach the loop.
- if (Loop *L = LI->getLoopFor(BB))
+ if (Loop *L = LI.getLoopFor(BB))
return std::make_pair(L->getLoopPredecessor(), L->getHeader());
return std::pair<BasicBlock *, BasicBlock *>();
@@ -6267,13 +6772,20 @@ static bool HasSameValue(const SCEV *A, const SCEV *B) {
// Quick check to see if they are the same SCEV.
if (A == B) return true;
+ auto ComputesEqualValues = [](const Instruction *A, const Instruction *B) {
+ // Not all instructions that are "identical" compute the same value. For
+ // instance, two distinct alloca instructions allocating the same type are
+ // identical and do not read memory; but compute distinct values.
+ return A->isIdenticalTo(B) && (isa<BinaryOperator>(A) || isa<GetElementPtrInst>(A));
+ };
+
// Otherwise, if they're both SCEVUnknown, it's possible that they hold
// two different instructions with the same value. Check for this case.
if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A))
if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B))
if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue()))
if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue()))
- if (AI->isIdenticalTo(BI) && !AI->mayReadFromMemory())
+ if (ComputesEqualValues(AI, BI))
return true;
// Otherwise assume they may have a different value.
@@ -6324,7 +6836,7 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
// If there's a constant operand, canonicalize comparisons with boundary
// cases, and canonicalize *-or-equal comparisons to regular comparisons.
if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) {
- const APInt &RA = RC->getValue()->getValue();
+ const APInt &RA = RC->getAPInt();
switch (Pred) {
default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
case ICmpInst::ICMP_EQ:
@@ -6515,16 +7027,14 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
Pred = ICmpInst::ICMP_ULT;
Changed = true;
} else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) {
- LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
- SCEV::FlagNUW);
+ LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS);
Pred = ICmpInst::ICMP_ULT;
Changed = true;
}
break;
case ICmpInst::ICMP_UGE:
if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) {
- RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
- SCEV::FlagNUW);
+ RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS);
Pred = ICmpInst::ICMP_UGT;
Changed = true;
} else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) {
@@ -6612,10 +7122,140 @@ bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred,
if (LeftGuarded && RightGuarded)
return true;
+ if (isKnownPredicateViaSplitting(Pred, LHS, RHS))
+ return true;
+
// Otherwise see what can be done with known constant ranges.
return isKnownPredicateWithRanges(Pred, LHS, RHS);
}
+bool ScalarEvolution::isMonotonicPredicate(const SCEVAddRecExpr *LHS,
+ ICmpInst::Predicate Pred,
+ bool &Increasing) {
+ bool Result = isMonotonicPredicateImpl(LHS, Pred, Increasing);
+
+#ifndef NDEBUG
+ // Verify an invariant: inverting the predicate should turn a monotonically
+ // increasing change to a monotonically decreasing one, and vice versa.
+ bool IncreasingSwapped;
+ bool ResultSwapped = isMonotonicPredicateImpl(
+ LHS, ICmpInst::getSwappedPredicate(Pred), IncreasingSwapped);
+
+ assert(Result == ResultSwapped && "should be able to analyze both!");
+ if (ResultSwapped)
+ assert(Increasing == !IncreasingSwapped &&
+ "monotonicity should flip as we flip the predicate");
+#endif
+
+ return Result;
+}
+
+bool ScalarEvolution::isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS,
+ ICmpInst::Predicate Pred,
+ bool &Increasing) {
+
+ // A zero step value for LHS means the induction variable is essentially a
+ // loop invariant value. We don't really depend on the predicate actually
+ // flipping from false to true (for increasing predicates, and the other way
+ // around for decreasing predicates), all we care about is that *if* the
+ // predicate changes then it only changes from false to true.
+ //
+ // A zero step value in itself is not very useful, but there may be places
+ // where SCEV can prove X >= 0 but not prove X > 0, so it is helpful to be
+ // as general as possible.
+
+ switch (Pred) {
+ default:
+ return false; // Conservative answer
+
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ if (!LHS->getNoWrapFlags(SCEV::FlagNUW))
+ return false;
+
+ Increasing = Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE;
+ return true;
+
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE: {
+ if (!LHS->getNoWrapFlags(SCEV::FlagNSW))
+ return false;
+
+ const SCEV *Step = LHS->getStepRecurrence(*this);
+
+ if (isKnownNonNegative(Step)) {
+ Increasing = Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE;
+ return true;
+ }
+
+ if (isKnownNonPositive(Step)) {
+ Increasing = Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE;
+ return true;
+ }
+
+ return false;
+ }
+
+ }
+
+ llvm_unreachable("switch has default clause!");
+}
+
+bool ScalarEvolution::isLoopInvariantPredicate(
+ ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L,
+ ICmpInst::Predicate &InvariantPred, const SCEV *&InvariantLHS,
+ const SCEV *&InvariantRHS) {
+
+ // If there is a loop-invariant, force it into the RHS, otherwise bail out.
+ if (!isLoopInvariant(RHS, L)) {
+ if (!isLoopInvariant(LHS, L))
+ return false;
+
+ std::swap(LHS, RHS);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+
+ const SCEVAddRecExpr *ArLHS = dyn_cast<SCEVAddRecExpr>(LHS);
+ if (!ArLHS || ArLHS->getLoop() != L)
+ return false;
+
+ bool Increasing;
+ if (!isMonotonicPredicate(ArLHS, Pred, Increasing))
+ return false;
+
+ // If the predicate "ArLHS `Pred` RHS" monotonically increases from false to
+ // true as the loop iterates, and the backedge is control dependent on
+ // "ArLHS `Pred` RHS" == true then we can reason as follows:
+ //
+ // * if the predicate was false in the first iteration then the predicate
+ // is never evaluated again, since the loop exits without taking the
+ // backedge.
+ // * if the predicate was true in the first iteration then it will
+ // continue to be true for all future iterations since it is
+ // monotonically increasing.
+ //
+ // For both the above possibilities, we can replace the loop varying
+ // predicate with its value on the first iteration of the loop (which is
+ // loop invariant).
+ //
+ // A similar reasoning applies for a monotonically decreasing predicate, by
+ // replacing true with false and false with true in the above two bullets.
+
+ auto P = Increasing ? Pred : ICmpInst::getInversePredicate(Pred);
+
+ if (!isLoopBackedgeGuardedByCond(L, P, LHS, RHS))
+ return false;
+
+ InvariantPred = Pred;
+ InvariantLHS = ArLHS->getStart();
+ InvariantRHS = RHS;
+ return true;
+}
+
bool
ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS) {
@@ -6690,6 +7330,84 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
return false;
}
+bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred,
+ const SCEV *LHS,
+ const SCEV *RHS) {
+
+ // Match Result to (X + Y)<ExpectedFlags> where Y is a constant integer.
+ // Return Y via OutY.
+ auto MatchBinaryAddToConst =
+ [this](const SCEV *Result, const SCEV *X, APInt &OutY,
+ SCEV::NoWrapFlags ExpectedFlags) {
+ const SCEV *NonConstOp, *ConstOp;
+ SCEV::NoWrapFlags FlagsPresent;
+
+ if (!splitBinaryAdd(Result, ConstOp, NonConstOp, FlagsPresent) ||
+ !isa<SCEVConstant>(ConstOp) || NonConstOp != X)
+ return false;
+
+ OutY = cast<SCEVConstant>(ConstOp)->getAPInt();
+ return (FlagsPresent & ExpectedFlags) == ExpectedFlags;
+ };
+
+ APInt C;
+
+ switch (Pred) {
+ default:
+ break;
+
+ case ICmpInst::ICMP_SGE:
+ std::swap(LHS, RHS);
+ case ICmpInst::ICMP_SLE:
+ // X s<= (X + C)<nsw> if C >= 0
+ if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) && C.isNonNegative())
+ return true;
+
+ // (X + C)<nsw> s<= X if C <= 0
+ if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) &&
+ !C.isStrictlyPositive())
+ return true;
+ break;
+
+ case ICmpInst::ICMP_SGT:
+ std::swap(LHS, RHS);
+ case ICmpInst::ICMP_SLT:
+ // X s< (X + C)<nsw> if C > 0
+ if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) &&
+ C.isStrictlyPositive())
+ return true;
+
+ // (X + C)<nsw> s< X if C < 0
+ if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) && C.isNegative())
+ return true;
+ break;
+ }
+
+ return false;
+}
+
+bool ScalarEvolution::isKnownPredicateViaSplitting(ICmpInst::Predicate Pred,
+ const SCEV *LHS,
+ const SCEV *RHS) {
+ if (Pred != ICmpInst::ICMP_ULT || ProvingSplitPredicate)
+ return false;
+
+ // Allowing arbitrary number of activations of isKnownPredicateViaSplitting on
+ // the stack can result in exponential time complexity.
+ SaveAndRestore<bool> Restore(ProvingSplitPredicate, true);
+
+ // If L >= 0 then I `ult` L <=> I >= 0 && I `slt` L
+ //
+ // To prove L >= 0 we use isKnownNonNegative whereas to prove I >= 0 we use
+ // isKnownPredicate. isKnownPredicate is more powerful, but also more
+ // expensive; and using isKnownNonNegative(RHS) is sufficient for most of the
+ // interesting cases seen in practice. We can consider "upgrading" L >= 0 to
+ // use isKnownPredicate later if needed.
+ return isKnownNonNegative(RHS) &&
+ isKnownPredicate(CmpInst::ICMP_SGE, LHS, getZero(LHS->getType())) &&
+ isKnownPredicate(CmpInst::ICMP_SLT, LHS, RHS);
+}
+
/// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is
/// protected by a conditional between LHS and RHS. This is used to
/// to eliminate casts.
@@ -6715,46 +7433,49 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
LoopContinuePredicate->getSuccessor(0) != L->getHeader()))
return true;
+ // We don't want more than one activation of the following loops on the stack
+ // -- that can lead to O(n!) time complexity.
+ if (WalkingBEDominatingConds)
+ return false;
+
+ SaveAndRestore<bool> ClearOnExit(WalkingBEDominatingConds, true);
+
+ // See if we can exploit a trip count to prove the predicate.
+ const auto &BETakenInfo = getBackedgeTakenInfo(L);
+ const SCEV *LatchBECount = BETakenInfo.getExact(Latch, this);
+ if (LatchBECount != getCouldNotCompute()) {
+ // We know that Latch branches back to the loop header exactly
+ // LatchBECount times. This means the backdege condition at Latch is
+ // equivalent to "{0,+,1} u< LatchBECount".
+ Type *Ty = LatchBECount->getType();
+ auto NoWrapFlags = SCEV::NoWrapFlags(SCEV::FlagNUW | SCEV::FlagNW);
+ const SCEV *LoopCounter =
+ getAddRecExpr(getZero(Ty), getOne(Ty), L, NoWrapFlags);
+ if (isImpliedCond(Pred, LHS, RHS, ICmpInst::ICMP_ULT, LoopCounter,
+ LatchBECount))
+ return true;
+ }
+
// Check conditions due to any @llvm.assume intrinsics.
- for (auto &AssumeVH : AC->assumptions()) {
+ for (auto &AssumeVH : AC.assumptions()) {
if (!AssumeVH)
continue;
auto *CI = cast<CallInst>(AssumeVH);
- if (!DT->dominates(CI, Latch->getTerminator()))
+ if (!DT.dominates(CI, Latch->getTerminator()))
continue;
if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
return true;
}
- struct ClearWalkingBEDominatingCondsOnExit {
- ScalarEvolution &SE;
-
- explicit ClearWalkingBEDominatingCondsOnExit(ScalarEvolution &SE)
- : SE(SE){};
-
- ~ClearWalkingBEDominatingCondsOnExit() {
- SE.WalkingBEDominatingConds = false;
- }
- };
-
- // We don't want more than one activation of the following loop on the stack
- // -- that can lead to O(n!) time complexity.
- if (WalkingBEDominatingConds)
- return false;
-
- WalkingBEDominatingConds = true;
- ClearWalkingBEDominatingCondsOnExit ClearOnExit(*this);
-
// If the loop is not reachable from the entry block, we risk running into an
// infinite loop as we walk up into the dom tree. These loops do not matter
// anyway, so we just return a conservative answer when we see them.
- if (!DT->isReachableFromEntry(L->getHeader()))
+ if (!DT.isReachableFromEntry(L->getHeader()))
return false;
- for (DomTreeNode *DTN = (*DT)[Latch], *HeaderDTN = (*DT)[L->getHeader()];
- DTN != HeaderDTN;
- DTN = DTN->getIDom()) {
+ for (DomTreeNode *DTN = DT[Latch], *HeaderDTN = DT[L->getHeader()];
+ DTN != HeaderDTN; DTN = DTN->getIDom()) {
assert(DTN && "should reach the loop header before reaching the root!");
@@ -6778,7 +7499,7 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
// We're constructively (and conservatively) enumerating edges within the
// loop body that dominate the latch. The dominator tree better agree
// with us on this:
- assert(DT->dominates(DominatingEdge, Latch) && "should be!");
+ assert(DT.dominates(DominatingEdge, Latch) && "should be!");
if (isImpliedCond(Pred, LHS, RHS, Condition,
BB != ContinuePredicate->getSuccessor(0)))
@@ -6823,11 +7544,11 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
}
// Check conditions due to any @llvm.assume intrinsics.
- for (auto &AssumeVH : AC->assumptions()) {
+ for (auto &AssumeVH : AC.assumptions()) {
if (!AssumeVH)
continue;
auto *CI = cast<CallInst>(AssumeVH);
- if (!DT->dominates(CI, L->getHeader()))
+ if (!DT.dominates(CI, L->getHeader()))
continue;
if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
@@ -6837,6 +7558,7 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
return false;
}
+namespace {
/// RAII wrapper to prevent recursive application of isImpliedCond.
/// ScalarEvolution's PendingLoopPredicates set must be empty unless we are
/// currently evaluating isImpliedCond.
@@ -6854,6 +7576,7 @@ struct MarkPendingLoopPredicate {
LoopPreds.erase(Cond);
}
};
+} // end anonymous namespace
/// isImpliedCond - Test whether the condition described by Pred, LHS,
/// and RHS is true whenever the given Cond value evaluates to true.
@@ -6892,6 +7615,14 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
const SCEV *FoundLHS = getSCEV(ICI->getOperand(0));
const SCEV *FoundRHS = getSCEV(ICI->getOperand(1));
+ return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS);
+}
+
+bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
+ const SCEV *RHS,
+ ICmpInst::Predicate FoundPred,
+ const SCEV *FoundLHS,
+ const SCEV *FoundRHS) {
// Balance the types.
if (getTypeSizeInBits(LHS->getType()) <
getTypeSizeInBits(FoundLHS->getType())) {
@@ -6947,6 +7678,13 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
RHS, LHS, FoundLHS, FoundRHS);
}
+ // Unsigned comparison is the same as signed comparison when both the operands
+ // are non-negative.
+ if (CmpInst::isUnsigned(FoundPred) &&
+ CmpInst::getSignedPredicate(FoundPred) == Pred &&
+ isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS))
+ return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS);
+
// Check if we can make progress by sharpening ranges.
if (FoundPred == ICmpInst::ICMP_NE &&
(isa<SCEVConstant>(FoundLHS) || isa<SCEVConstant>(FoundRHS))) {
@@ -6970,7 +7708,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
APInt Min = ICmpInst::isSigned(Pred) ?
getSignedRange(V).getSignedMin() : getUnsignedRange(V).getUnsignedMin();
- if (Min == C->getValue()->getValue()) {
+ if (Min == C->getAPInt()) {
// Given (V >= Min && V != Min) we conclude V >= (Min + 1).
// This is true even if (Min + 1) wraps around -- in case of
// wraparound, (Min + 1) < Min, so (V >= Min => V >= (Min + 1)).
@@ -7021,6 +7759,149 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
return false;
}
+bool ScalarEvolution::splitBinaryAdd(const SCEV *Expr,
+ const SCEV *&L, const SCEV *&R,
+ SCEV::NoWrapFlags &Flags) {
+ const auto *AE = dyn_cast<SCEVAddExpr>(Expr);
+ if (!AE || AE->getNumOperands() != 2)
+ return false;
+
+ L = AE->getOperand(0);
+ R = AE->getOperand(1);
+ Flags = AE->getNoWrapFlags();
+ return true;
+}
+
+bool ScalarEvolution::computeConstantDifference(const SCEV *Less,
+ const SCEV *More,
+ APInt &C) {
+ // We avoid subtracting expressions here because this function is usually
+ // fairly deep in the call stack (i.e. is called many times).
+
+ if (isa<SCEVAddRecExpr>(Less) && isa<SCEVAddRecExpr>(More)) {
+ const auto *LAR = cast<SCEVAddRecExpr>(Less);
+ const auto *MAR = cast<SCEVAddRecExpr>(More);
+
+ if (LAR->getLoop() != MAR->getLoop())
+ return false;
+
+ // We look at affine expressions only; not for correctness but to keep
+ // getStepRecurrence cheap.
+ if (!LAR->isAffine() || !MAR->isAffine())
+ return false;
+
+ if (LAR->getStepRecurrence(*this) != MAR->getStepRecurrence(*this))
+ return false;
+
+ Less = LAR->getStart();
+ More = MAR->getStart();
+
+ // fall through
+ }
+
+ if (isa<SCEVConstant>(Less) && isa<SCEVConstant>(More)) {
+ const auto &M = cast<SCEVConstant>(More)->getAPInt();
+ const auto &L = cast<SCEVConstant>(Less)->getAPInt();
+ C = M - L;
+ return true;
+ }
+
+ const SCEV *L, *R;
+ SCEV::NoWrapFlags Flags;
+ if (splitBinaryAdd(Less, L, R, Flags))
+ if (const auto *LC = dyn_cast<SCEVConstant>(L))
+ if (R == More) {
+ C = -(LC->getAPInt());
+ return true;
+ }
+
+ if (splitBinaryAdd(More, L, R, Flags))
+ if (const auto *LC = dyn_cast<SCEVConstant>(L))
+ if (R == Less) {
+ C = LC->getAPInt();
+ return true;
+ }
+
+ return false;
+}
+
+bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow(
+ ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
+ const SCEV *FoundLHS, const SCEV *FoundRHS) {
+ if (Pred != CmpInst::ICMP_SLT && Pred != CmpInst::ICMP_ULT)
+ return false;
+
+ const auto *AddRecLHS = dyn_cast<SCEVAddRecExpr>(LHS);
+ if (!AddRecLHS)
+ return false;
+
+ const auto *AddRecFoundLHS = dyn_cast<SCEVAddRecExpr>(FoundLHS);
+ if (!AddRecFoundLHS)
+ return false;
+
+ // We'd like to let SCEV reason about control dependencies, so we constrain
+ // both the inequalities to be about add recurrences on the same loop. This
+ // way we can use isLoopEntryGuardedByCond later.
+
+ const Loop *L = AddRecFoundLHS->getLoop();
+ if (L != AddRecLHS->getLoop())
+ return false;
+
+ // FoundLHS u< FoundRHS u< -C => (FoundLHS + C) u< (FoundRHS + C) ... (1)
+ //
+ // FoundLHS s< FoundRHS s< INT_MIN - C => (FoundLHS + C) s< (FoundRHS + C)
+ // ... (2)
+ //
+ // Informal proof for (2), assuming (1) [*]:
+ //
+ // We'll also assume (A s< B) <=> ((A + INT_MIN) u< (B + INT_MIN)) ... (3)[**]
+ //
+ // Then
+ //
+ // FoundLHS s< FoundRHS s< INT_MIN - C
+ // <=> (FoundLHS + INT_MIN) u< (FoundRHS + INT_MIN) u< -C [ using (3) ]
+ // <=> (FoundLHS + INT_MIN + C) u< (FoundRHS + INT_MIN + C) [ using (1) ]
+ // <=> (FoundLHS + INT_MIN + C + INT_MIN) s<
+ // (FoundRHS + INT_MIN + C + INT_MIN) [ using (3) ]
+ // <=> FoundLHS + C s< FoundRHS + C
+ //
+ // [*]: (1) can be proved by ruling out overflow.
+ //
+ // [**]: This can be proved by analyzing all the four possibilities:
+ // (A s< 0, B s< 0), (A s< 0, B s>= 0), (A s>= 0, B s< 0) and
+ // (A s>= 0, B s>= 0).
+ //
+ // Note:
+ // Despite (2), "FoundRHS s< INT_MIN - C" does not mean that "FoundRHS + C"
+ // will not sign underflow. For instance, say FoundLHS = (i8 -128), FoundRHS
+ // = (i8 -127) and C = (i8 -100). Then INT_MIN - C = (i8 -28), and FoundRHS
+ // s< (INT_MIN - C). Lack of sign overflow / underflow in "FoundRHS + C" is
+ // neither necessary nor sufficient to prove "(FoundLHS + C) s< (FoundRHS +
+ // C)".
+
+ APInt LDiff, RDiff;
+ if (!computeConstantDifference(FoundLHS, LHS, LDiff) ||
+ !computeConstantDifference(FoundRHS, RHS, RDiff) ||
+ LDiff != RDiff)
+ return false;
+
+ if (LDiff == 0)
+ return true;
+
+ APInt FoundRHSLimit;
+
+ if (Pred == CmpInst::ICMP_ULT) {
+ FoundRHSLimit = -RDiff;
+ } else {
+ assert(Pred == CmpInst::ICMP_SLT && "Checked above!");
+ FoundRHSLimit = APInt::getSignedMinValue(getTypeSizeInBits(RHS->getType())) - RDiff;
+ }
+
+ // Try to prove (1) or (2), as needed.
+ return isLoopEntryGuardedByCond(L, Pred, FoundRHS,
+ getConstant(FoundRHSLimit));
+}
+
/// isImpliedCondOperands - Test whether the condition described by Pred,
/// LHS, and RHS is true whenever the condition described by Pred, FoundLHS,
/// and FoundRHS is true.
@@ -7031,6 +7912,9 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS))
return true;
+ if (isImpliedCondOperandsViaNoOverflow(Pred, LHS, RHS, FoundLHS, FoundRHS))
+ return true;
+
return isImpliedCondOperandsHelper(Pred, LHS, RHS,
FoundLHS, FoundRHS) ||
// ~x < ~y --> x > y
@@ -7043,17 +7927,13 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
/// If Expr computes ~A, return A else return nullptr
static const SCEV *MatchNotExpr(const SCEV *Expr) {
const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr);
- if (!Add || Add->getNumOperands() != 2) return nullptr;
-
- const SCEVConstant *AddLHS = dyn_cast<SCEVConstant>(Add->getOperand(0));
- if (!(AddLHS && AddLHS->getValue()->getValue().isAllOnesValue()))
+ if (!Add || Add->getNumOperands() != 2 ||
+ !Add->getOperand(0)->isAllOnesValue())
return nullptr;
const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1));
- if (!AddRHS || AddRHS->getNumOperands() != 2) return nullptr;
-
- const SCEVConstant *MulLHS = dyn_cast<SCEVConstant>(AddRHS->getOperand(0));
- if (!(MulLHS && MulLHS->getValue()->getValue().isAllOnesValue()))
+ if (!AddRHS || AddRHS->getNumOperands() != 2 ||
+ !AddRHS->getOperand(0)->isAllOnesValue())
return nullptr;
return AddRHS->getOperand(1);
@@ -7067,8 +7947,7 @@ static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr,
const MaxExprType *MaxExpr = dyn_cast<MaxExprType>(MaybeMaxExpr);
if (!MaxExpr) return false;
- auto It = std::find(MaxExpr->op_begin(), MaxExpr->op_end(), Candidate);
- return It != MaxExpr->op_end();
+ return find(MaxExpr->operands(), Candidate) != MaxExpr->op_end();
}
@@ -7084,6 +7963,38 @@ static bool IsMinConsistingOf(ScalarEvolution &SE,
return IsMaxConsistingOf<MaxExprType>(MaybeMaxExpr, SE.getNotSCEV(Candidate));
}
+static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE,
+ ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS) {
+
+ // If both sides are affine addrecs for the same loop, with equal
+ // steps, and we know the recurrences don't wrap, then we only
+ // need to check the predicate on the starting values.
+
+ if (!ICmpInst::isRelational(Pred))
+ return false;
+
+ const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS);
+ if (!LAR)
+ return false;
+ const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS);
+ if (!RAR)
+ return false;
+ if (LAR->getLoop() != RAR->getLoop())
+ return false;
+ if (!LAR->isAffine() || !RAR->isAffine())
+ return false;
+
+ if (LAR->getStepRecurrence(SE) != RAR->getStepRecurrence(SE))
+ return false;
+
+ SCEV::NoWrapFlags NW = ICmpInst::isSigned(Pred) ?
+ SCEV::FlagNSW : SCEV::FlagNUW;
+ if (!LAR->getNoWrapFlags(NW) || !RAR->getNoWrapFlags(NW))
+ return false;
+
+ return SE.isKnownPredicate(Pred, LAR->getStart(), RAR->getStart());
+}
/// Is LHS `Pred` RHS true on the virtue of LHS or RHS being a Min or Max
/// expression?
@@ -7129,7 +8040,9 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
auto IsKnownPredicateFull =
[this](ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) {
return isKnownPredicateWithRanges(Pred, LHS, RHS) ||
- IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS);
+ IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) ||
+ IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) ||
+ isKnownPredicateViaNoOverflow(Pred, LHS, RHS);
};
switch (Pred) {
@@ -7185,7 +8098,7 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
!isa<SCEVConstant>(AddLHS->getOperand(0)))
return false;
- APInt ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getValue()->getValue();
+ APInt ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getAPInt();
// `FoundLHSRange` is the range we know `FoundLHS` to be in by virtue of the
// antecedent "`FoundLHS` `Pred` `FoundRHS`".
@@ -7194,13 +8107,12 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
// Since `LHS` is `FoundLHS` + `AddLHS->getOperand(0)`, we can compute a range
// for `LHS`:
- APInt Addend =
- cast<SCEVConstant>(AddLHS->getOperand(0))->getValue()->getValue();
+ APInt Addend = cast<SCEVConstant>(AddLHS->getOperand(0))->getAPInt();
ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(Addend));
// We can also compute the range of values for `LHS` that satisfy the
// consequent, "`LHS` `Pred` `RHS`":
- APInt ConstRHS = cast<SCEVConstant>(RHS)->getValue()->getValue();
+ APInt ConstRHS = cast<SCEVConstant>(RHS)->getAPInt();
ConstantRange SatisfyingLHSRange =
ConstantRange::makeSatisfyingICmpRegion(Pred, ConstRHS);
@@ -7217,7 +8129,7 @@ bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
if (NoWrap) return false;
unsigned BitWidth = getTypeSizeInBits(RHS->getType());
- const SCEV *One = getConstant(Stride->getType(), 1);
+ const SCEV *One = getOne(Stride->getType());
if (IsSigned) {
APInt MaxRHS = getSignedRange(RHS).getSignedMax();
@@ -7246,7 +8158,7 @@ bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
if (NoWrap) return false;
unsigned BitWidth = getTypeSizeInBits(RHS->getType());
- const SCEV *One = getConstant(Stride->getType(), 1);
+ const SCEV *One = getOne(Stride->getType());
if (IsSigned) {
APInt MinRHS = getSignedRange(RHS).getSignedMin();
@@ -7271,7 +8183,7 @@ bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
// stride and presence of the equality in the comparison.
const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step,
bool Equality) {
- const SCEV *One = getConstant(Step->getType(), 1);
+ const SCEV *One = getOne(Step->getType());
Delta = Equality ? getAddExpr(Delta, Step)
: getAddExpr(Delta, getMinusSCEV(Step, One));
return getUDivExpr(Delta, Step);
@@ -7324,7 +8236,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
// overflow, in which case if RHS - Start is a constant, we don't need to
// do a max operation since we can just figure it out statically
if (NoWrap && isa<SCEVConstant>(Diff)) {
- APInt D = dyn_cast<const SCEVConstant>(Diff)->getValue()->getValue();
+ APInt D = dyn_cast<const SCEVConstant>(Diff)->getAPInt();
if (D.isNegative())
End = Start;
} else
@@ -7405,7 +8317,7 @@ ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
// overflow, in which case if RHS - Start is a constant, we don't need to
// do a max operation since we can just figure it out statically
if (NoWrap && isa<SCEVConstant>(Diff)) {
- APInt D = dyn_cast<const SCEVConstant>(Diff)->getValue()->getValue();
+ APInt D = dyn_cast<const SCEVConstant>(Diff)->getAPInt();
if (!D.isNegative())
End = Start;
} else
@@ -7460,23 +8372,20 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart()))
if (!SC->getValue()->isZero()) {
SmallVector<const SCEV *, 4> Operands(op_begin(), op_end());
- Operands[0] = SE.getConstant(SC->getType(), 0);
+ Operands[0] = SE.getZero(SC->getType());
const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(),
getNoWrapFlags(FlagNW));
- if (const SCEVAddRecExpr *ShiftedAddRec =
- dyn_cast<SCEVAddRecExpr>(Shifted))
+ if (const auto *ShiftedAddRec = dyn_cast<SCEVAddRecExpr>(Shifted))
return ShiftedAddRec->getNumIterationsInRange(
- Range.subtract(SC->getValue()->getValue()), SE);
+ Range.subtract(SC->getAPInt()), SE);
// This is strange and shouldn't happen.
return SE.getCouldNotCompute();
}
// The only time we can solve this is when we have all constant indices.
// Otherwise, we cannot determine the overflow conditions.
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
- if (!isa<SCEVConstant>(getOperand(i)))
- return SE.getCouldNotCompute();
-
+ if (any_of(operands(), [](const SCEV *Op) { return !isa<SCEVConstant>(Op); }))
+ return SE.getCouldNotCompute();
// Okay at this point we know that all elements of the chrec are constants and
// that the start element is zero.
@@ -7485,7 +8394,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
// iteration exits.
unsigned BitWidth = SE.getTypeSizeInBits(getType());
if (!Range.contains(APInt(BitWidth, 0)))
- return SE.getConstant(getType(), 0);
+ return SE.getZero(getType());
if (isAffine()) {
// If this is an affine expression then we have this situation:
@@ -7496,7 +8405,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
// If A is negative then the lower of the range is the last possible loop
// value. Also note that we already checked for a full range.
APInt One(BitWidth,1);
- APInt A = cast<SCEVConstant>(getOperand(1))->getValue()->getValue();
+ APInt A = cast<SCEVConstant>(getOperand(1))->getAPInt();
APInt End = A.sge(One) ? (Range.getUpper() - One) : Range.getLower();
// The exit value should be (End+A)/A.
@@ -7528,15 +8437,13 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
FlagAnyWrap);
// Next, solve the constructed addrec
- std::pair<const SCEV *,const SCEV *> Roots =
- SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE);
+ auto Roots = SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE);
const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
if (R1) {
// Pick the smallest positive root value.
- if (ConstantInt *CB =
- dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT,
- R1->getValue(), R2->getValue()))) {
+ if (ConstantInt *CB = dyn_cast<ConstantInt>(ConstantExpr::getICmp(
+ ICmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) {
if (!CB->getZExtValue())
std::swap(R1, R2); // R1 is the minimum root now.
@@ -7549,7 +8456,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
if (Range.contains(R1Val->getValue())) {
// The next iteration must be out of the range...
ConstantInt *NextVal =
- ConstantInt::get(SE.getContext(), R1->getValue()->getValue()+1);
+ ConstantInt::get(SE.getContext(), R1->getAPInt() + 1);
R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
if (!Range.contains(R1Val->getValue()))
@@ -7560,7 +8467,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
// If R1 was not in the range, then it is a good return value. Make
// sure that R1-1 WAS in the range though, just in case.
ConstantInt *NextVal =
- ConstantInt::get(SE.getContext(), R1->getValue()->getValue()-1);
+ ConstantInt::get(SE.getContext(), R1->getAPInt() - 1);
R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
if (Range.contains(R1Val->getValue()))
return R1;
@@ -7644,9 +8551,84 @@ struct SCEVCollectTerms {
}
bool isDone() const { return false; }
};
+
+// Check if a SCEV contains an AddRecExpr.
+struct SCEVHasAddRec {
+ bool &ContainsAddRec;
+
+ SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) {
+ ContainsAddRec = false;
+ }
+
+ bool follow(const SCEV *S) {
+ if (isa<SCEVAddRecExpr>(S)) {
+ ContainsAddRec = true;
+
+ // Stop recursion: once we collected a term, do not walk its operands.
+ return false;
+ }
+
+ // Keep looking.
+ return true;
+ }
+ bool isDone() const { return false; }
+};
+
+// Find factors that are multiplied with an expression that (possibly as a
+// subexpression) contains an AddRecExpr. In the expression:
+//
+// 8 * (100 + %p * %q * (%a + {0, +, 1}_loop))
+//
+// "%p * %q" are factors multiplied by the expression "(%a + {0, +, 1}_loop)"
+// that contains the AddRec {0, +, 1}_loop. %p * %q are likely to be array size
+// parameters as they form a product with an induction variable.
+//
+// This collector expects all array size parameters to be in the same MulExpr.
+// It might be necessary to later add support for collecting parameters that are
+// spread over different nested MulExpr.
+struct SCEVCollectAddRecMultiplies {
+ SmallVectorImpl<const SCEV *> &Terms;
+ ScalarEvolution &SE;
+
+ SCEVCollectAddRecMultiplies(SmallVectorImpl<const SCEV *> &T, ScalarEvolution &SE)
+ : Terms(T), SE(SE) {}
+
+ bool follow(const SCEV *S) {
+ if (auto *Mul = dyn_cast<SCEVMulExpr>(S)) {
+ bool HasAddRec = false;
+ SmallVector<const SCEV *, 0> Operands;
+ for (auto Op : Mul->operands()) {
+ if (isa<SCEVUnknown>(Op)) {
+ Operands.push_back(Op);
+ } else {
+ bool ContainsAddRec;
+ SCEVHasAddRec ContiansAddRec(ContainsAddRec);
+ visitAll(Op, ContiansAddRec);
+ HasAddRec |= ContainsAddRec;
+ }
+ }
+ if (Operands.size() == 0)
+ return true;
+
+ if (!HasAddRec)
+ return false;
+
+ Terms.push_back(SE.getMulExpr(Operands));
+ // Stop recursion: once we collected a term, do not walk its operands.
+ return false;
+ }
+
+ // Keep looking.
+ return true;
+ }
+ bool isDone() const { return false; }
+};
}
-/// Find parametric terms in this SCEVAddRecExpr.
+/// Find parametric terms in this SCEVAddRecExpr. We first for parameters in
+/// two places:
+/// 1) The strides of AddRec expressions.
+/// 2) Unknowns that are multiplied with AddRec expressions.
void ScalarEvolution::collectParametricTerms(const SCEV *Expr,
SmallVectorImpl<const SCEV *> &Terms) {
SmallVector<const SCEV *, 4> Strides;
@@ -7669,6 +8651,9 @@ void ScalarEvolution::collectParametricTerms(const SCEV *Expr,
for (const SCEV *T : Terms)
dbgs() << *T << "\n";
});
+
+ SCEVCollectAddRecMultiplies MulCollector(Terms, *this);
+ visitAll(Expr, MulCollector);
}
static bool findArrayDimensionsRec(ScalarEvolution &SE,
@@ -7718,30 +8703,28 @@ static bool findArrayDimensionsRec(ScalarEvolution &SE,
return true;
}
-namespace {
-struct FindParameter {
- bool FoundParameter;
- FindParameter() : FoundParameter(false) {}
-
- bool follow(const SCEV *S) {
- if (isa<SCEVUnknown>(S)) {
- FoundParameter = true;
- // Stop recursion: we found a parameter.
- return false;
- }
- // Keep looking.
- return true;
- }
- bool isDone() const {
- // Stop recursion if we have found a parameter.
- return FoundParameter;
- }
-};
-}
-
// Returns true when S contains at least a SCEVUnknown parameter.
static inline bool
containsParameters(const SCEV *S) {
+ struct FindParameter {
+ bool FoundParameter;
+ FindParameter() : FoundParameter(false) {}
+
+ bool follow(const SCEV *S) {
+ if (isa<SCEVUnknown>(S)) {
+ FoundParameter = true;
+ // Stop recursion: we found a parameter.
+ return false;
+ }
+ // Keep looking.
+ return true;
+ }
+ bool isDone() const {
+ // Stop recursion if we have found a parameter.
+ return FoundParameter;
+ }
+ };
+
FindParameter F;
SCEVTraversal<FindParameter> ST(F);
ST.visitAll(S);
@@ -7829,11 +8812,13 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
- // Divide all terms by the element size.
+ // Try to divide all terms by the element size. If term is not divisible by
+ // element size, proceed with the original term.
for (const SCEV *&Term : Terms) {
const SCEV *Q, *R;
SCEVDivision::divide(SE, Term, ElementSize, &Q, &R);
- Term = Q;
+ if (!Q->isZero())
+ Term = Q;
}
SmallVector<const SCEV *, 4> NewTerms;
@@ -7875,7 +8860,7 @@ void ScalarEvolution::computeAccessFunctions(
if (Sizes.empty())
return;
- if (auto AR = dyn_cast<SCEVAddRecExpr>(Expr))
+ if (auto *AR = dyn_cast<SCEVAddRecExpr>(Expr))
if (!AR->isAffine())
return;
@@ -8059,58 +9044,55 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
// ScalarEvolution Class Implementation
//===----------------------------------------------------------------------===//
-ScalarEvolution::ScalarEvolution()
- : FunctionPass(ID), WalkingBEDominatingConds(false), ValuesAtScopes(64),
- LoopDispositions(64), BlockDispositions(64), FirstUnknown(nullptr) {
- initializeScalarEvolutionPass(*PassRegistry::getPassRegistry());
-}
-
-bool ScalarEvolution::runOnFunction(Function &F) {
- this->F = &F;
- AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- return false;
-}
-
-void ScalarEvolution::releaseMemory() {
+ScalarEvolution::ScalarEvolution(Function &F, TargetLibraryInfo &TLI,
+ AssumptionCache &AC, DominatorTree &DT,
+ LoopInfo &LI)
+ : F(F), TLI(TLI), AC(AC), DT(DT), LI(LI),
+ CouldNotCompute(new SCEVCouldNotCompute()),
+ WalkingBEDominatingConds(false), ProvingSplitPredicate(false),
+ ValuesAtScopes(64), LoopDispositions(64), BlockDispositions(64),
+ FirstUnknown(nullptr) {}
+
+ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
+ : F(Arg.F), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT), LI(Arg.LI),
+ CouldNotCompute(std::move(Arg.CouldNotCompute)),
+ ValueExprMap(std::move(Arg.ValueExprMap)),
+ WalkingBEDominatingConds(false), ProvingSplitPredicate(false),
+ BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)),
+ ConstantEvolutionLoopExitValue(
+ std::move(Arg.ConstantEvolutionLoopExitValue)),
+ ValuesAtScopes(std::move(Arg.ValuesAtScopes)),
+ LoopDispositions(std::move(Arg.LoopDispositions)),
+ BlockDispositions(std::move(Arg.BlockDispositions)),
+ UnsignedRanges(std::move(Arg.UnsignedRanges)),
+ SignedRanges(std::move(Arg.SignedRanges)),
+ UniqueSCEVs(std::move(Arg.UniqueSCEVs)),
+ UniquePreds(std::move(Arg.UniquePreds)),
+ SCEVAllocator(std::move(Arg.SCEVAllocator)),
+ FirstUnknown(Arg.FirstUnknown) {
+ Arg.FirstUnknown = nullptr;
+}
+
+ScalarEvolution::~ScalarEvolution() {
// Iterate through all the SCEVUnknown instances and call their
// destructors, so that they release their references to their values.
- for (SCEVUnknown *U = FirstUnknown; U; U = U->Next)
- U->~SCEVUnknown();
+ for (SCEVUnknown *U = FirstUnknown; U;) {
+ SCEVUnknown *Tmp = U;
+ U = U->Next;
+ Tmp->~SCEVUnknown();
+ }
FirstUnknown = nullptr;
ValueExprMap.clear();
// Free any extra memory created for ExitNotTakenInfo in the unlikely event
// that a loop had multiple computable exits.
- for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I =
- BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end();
- I != E; ++I) {
- I->second.clear();
- }
+ for (auto &BTCI : BackedgeTakenCounts)
+ BTCI.second.clear();
assert(PendingLoopPredicates.empty() && "isImpliedCond garbage");
assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!");
-
- BackedgeTakenCounts.clear();
- ConstantEvolutionLoopExitValue.clear();
- ValuesAtScopes.clear();
- LoopDispositions.clear();
- BlockDispositions.clear();
- UnsignedRanges.clear();
- SignedRanges.clear();
- UniqueSCEVs.clear();
- SCEVAllocator.Reset();
-}
-
-void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequiredTransitive<LoopInfoWrapperPass>();
- AU.addRequiredTransitive<DominatorTreeWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
+ assert(!ProvingSplitPredicate && "ProvingSplitPredicate garbage!");
}
bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) {
@@ -8152,7 +9134,7 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
OS << "\n";
}
-void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
+void ScalarEvolution::print(raw_ostream &OS) const {
// ScalarEvolution's implementation of the print method is to print
// out SCEV values of all instructions that are interesting. Doing
// this potentially causes it to create new SCEV objects though,
@@ -8162,13 +9144,13 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
OS << "Classifying expressions for: ";
- F->printAsOperand(OS, /*PrintType=*/false);
+ F.printAsOperand(OS, /*PrintType=*/false);
OS << "\n";
- for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
- if (isSCEVable(I->getType()) && !isa<CmpInst>(*I)) {
- OS << *I << '\n';
+ for (Instruction &I : instructions(F))
+ if (isSCEVable(I.getType()) && !isa<CmpInst>(I)) {
+ OS << I << '\n';
OS << " --> ";
- const SCEV *SV = SE.getSCEV(&*I);
+ const SCEV *SV = SE.getSCEV(&I);
SV->print(OS);
if (!isa<SCEVCouldNotCompute>(SV)) {
OS << " U: ";
@@ -8177,7 +9159,7 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
SE.getSignedRange(SV).print(OS);
}
- const Loop *L = LI->getLoopFor((*I).getParent());
+ const Loop *L = LI.getLoopFor(I.getParent());
const SCEV *AtUse = SE.getSCEVAtScope(SV, L);
if (AtUse != SV) {
@@ -8205,9 +9187,9 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
}
OS << "Determining loop execution counts for: ";
- F->printAsOperand(OS, /*PrintType=*/false);
+ F.printAsOperand(OS, /*PrintType=*/false);
OS << "\n";
- for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+ for (LoopInfo::iterator I = LI.begin(), E = LI.end(); I != E; ++I)
PrintLoopInfo(OS, &SE, *I);
}
@@ -8260,9 +9242,8 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
// This recurrence is variant w.r.t. L if any of its operands
// are variant.
- for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
- I != E; ++I)
- if (!isLoopInvariant(*I, L))
+ for (auto *Op : AR->operands())
+ if (!isLoopInvariant(Op, L))
return LoopVariant;
// Otherwise it's loop-invariant.
@@ -8272,11 +9253,9 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
case scMulExpr:
case scUMaxExpr:
case scSMaxExpr: {
- const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
bool HasVarying = false;
- for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
- I != E; ++I) {
- LoopDisposition D = getLoopDisposition(*I, L);
+ for (auto *Op : cast<SCEVNAryExpr>(S)->operands()) {
+ LoopDisposition D = getLoopDisposition(Op, L);
if (D == LoopVariant)
return LoopVariant;
if (D == LoopComputable)
@@ -8300,7 +9279,7 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
// invariant if they are not contained in the specified loop.
// Instructions are never considered invariant in the function body
// (null loop) because they are defined within the "loop".
- if (Instruction *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue()))
+ if (auto *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue()))
return (L && !L->contains(I)) ? LoopInvariant : LoopVariant;
return LoopInvariant;
case scCouldNotCompute:
@@ -8351,7 +9330,7 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
// produces the addrec's value is a PHI, and a PHI effectively properly
// dominates its entire containing block.
const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
- if (!DT->dominates(AR->getLoop()->getHeader(), BB))
+ if (!DT.dominates(AR->getLoop()->getHeader(), BB))
return DoesNotDominateBlock;
}
// FALL THROUGH into SCEVNAryExpr handling.
@@ -8361,9 +9340,8 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
case scSMaxExpr: {
const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
bool Proper = true;
- for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
- I != E; ++I) {
- BlockDisposition D = getBlockDisposition(*I, BB);
+ for (const SCEV *NAryOp : NAry->operands()) {
+ BlockDisposition D = getBlockDisposition(NAryOp, BB);
if (D == DoesNotDominateBlock)
return DoesNotDominateBlock;
if (D == DominatesBlock)
@@ -8388,7 +9366,7 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) {
if (I->getParent() == BB)
return DominatesBlock;
- if (DT->properlyDominates(I->getParent(), BB))
+ if (DT.properlyDominates(I->getParent(), BB))
return ProperlyDominatesBlock;
return DoesNotDominateBlock;
}
@@ -8407,24 +9385,22 @@ bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) {
return getBlockDisposition(S, BB) == ProperlyDominatesBlock;
}
-namespace {
-// Search for a SCEV expression node within an expression tree.
-// Implements SCEVTraversal::Visitor.
-struct SCEVSearch {
- const SCEV *Node;
- bool IsFound;
+bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
+ // Search for a SCEV expression node within an expression tree.
+ // Implements SCEVTraversal::Visitor.
+ struct SCEVSearch {
+ const SCEV *Node;
+ bool IsFound;
- SCEVSearch(const SCEV *N): Node(N), IsFound(false) {}
+ SCEVSearch(const SCEV *N): Node(N), IsFound(false) {}
- bool follow(const SCEV *S) {
- IsFound |= (S == Node);
- return !IsFound;
- }
- bool isDone() const { return IsFound; }
-};
-}
+ bool follow(const SCEV *S) {
+ IsFound |= (S == Node);
+ return !IsFound;
+ }
+ bool isDone() const { return IsFound; }
+ };
-bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
SCEVSearch Search(Op);
visitAll(S, Search);
return Search.IsFound;
@@ -8463,43 +9439,39 @@ static void replaceSubString(std::string &Str, StringRef From, StringRef To) {
/// getLoopBackedgeTakenCounts - Helper method for verifyAnalysis.
static void
getLoopBackedgeTakenCounts(Loop *L, VerifyMap &Map, ScalarEvolution &SE) {
- for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I) {
- getLoopBackedgeTakenCounts(*I, Map, SE); // recurse.
-
- std::string &S = Map[L];
- if (S.empty()) {
- raw_string_ostream OS(S);
- SE.getBackedgeTakenCount(L)->print(OS);
+ std::string &S = Map[L];
+ if (S.empty()) {
+ raw_string_ostream OS(S);
+ SE.getBackedgeTakenCount(L)->print(OS);
- // false and 0 are semantically equivalent. This can happen in dead loops.
- replaceSubString(OS.str(), "false", "0");
- // Remove wrap flags, their use in SCEV is highly fragile.
- // FIXME: Remove this when SCEV gets smarter about them.
- replaceSubString(OS.str(), "<nw>", "");
- replaceSubString(OS.str(), "<nsw>", "");
- replaceSubString(OS.str(), "<nuw>", "");
- }
+ // false and 0 are semantically equivalent. This can happen in dead loops.
+ replaceSubString(OS.str(), "false", "0");
+ // Remove wrap flags, their use in SCEV is highly fragile.
+ // FIXME: Remove this when SCEV gets smarter about them.
+ replaceSubString(OS.str(), "<nw>", "");
+ replaceSubString(OS.str(), "<nsw>", "");
+ replaceSubString(OS.str(), "<nuw>", "");
}
-}
-void ScalarEvolution::verifyAnalysis() const {
- if (!VerifySCEV)
- return;
+ for (auto *R : reverse(*L))
+ getLoopBackedgeTakenCounts(R, Map, SE); // recurse.
+}
+void ScalarEvolution::verify() const {
ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
// Gather stringified backedge taken counts for all loops using SCEV's caches.
// FIXME: It would be much better to store actual values instead of strings,
// but SCEV pointers will change if we drop the caches.
VerifyMap BackedgeDumpsOld, BackedgeDumpsNew;
- for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I)
+ for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I)
getLoopBackedgeTakenCounts(*I, BackedgeDumpsOld, SE);
- // Gather stringified backedge taken counts for all loops without using
- // SCEV's caches.
- SE.releaseMemory();
- for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I)
- getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE);
+ // Gather stringified backedge taken counts for all loops using a fresh
+ // ScalarEvolution object.
+ ScalarEvolution SE2(F, TLI, AC, DT, LI);
+ for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I)
+ getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE2);
// Now compare whether they're the same with and without caches. This allows
// verifying that no pass changed the cache.
@@ -8532,3 +9504,238 @@ void ScalarEvolution::verifyAnalysis() const {
// TODO: Verify more things.
}
+
+char ScalarEvolutionAnalysis::PassID;
+
+ScalarEvolution ScalarEvolutionAnalysis::run(Function &F,
+ AnalysisManager<Function> *AM) {
+ return ScalarEvolution(F, AM->getResult<TargetLibraryAnalysis>(F),
+ AM->getResult<AssumptionAnalysis>(F),
+ AM->getResult<DominatorTreeAnalysis>(F),
+ AM->getResult<LoopAnalysis>(F));
+}
+
+PreservedAnalyses
+ScalarEvolutionPrinterPass::run(Function &F, AnalysisManager<Function> *AM) {
+ AM->getResult<ScalarEvolutionAnalysis>(F).print(OS);
+ return PreservedAnalyses::all();
+}
+
+INITIALIZE_PASS_BEGIN(ScalarEvolutionWrapperPass, "scalar-evolution",
+ "Scalar Evolution Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(ScalarEvolutionWrapperPass, "scalar-evolution",
+ "Scalar Evolution Analysis", false, true)
+char ScalarEvolutionWrapperPass::ID = 0;
+
+ScalarEvolutionWrapperPass::ScalarEvolutionWrapperPass() : FunctionPass(ID) {
+ initializeScalarEvolutionWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+bool ScalarEvolutionWrapperPass::runOnFunction(Function &F) {
+ SE.reset(new ScalarEvolution(
+ F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+ getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
+ getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+ getAnalysis<LoopInfoWrapperPass>().getLoopInfo()));
+ return false;
+}
+
+void ScalarEvolutionWrapperPass::releaseMemory() { SE.reset(); }
+
+void ScalarEvolutionWrapperPass::print(raw_ostream &OS, const Module *) const {
+ SE->print(OS);
+}
+
+void ScalarEvolutionWrapperPass::verifyAnalysis() const {
+ if (!VerifySCEV)
+ return;
+
+ SE->verify();
+}
+
+void ScalarEvolutionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<AssumptionCacheTracker>();
+ AU.addRequiredTransitive<LoopInfoWrapperPass>();
+ AU.addRequiredTransitive<DominatorTreeWrapperPass>();
+ AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
+}
+
+const SCEVPredicate *
+ScalarEvolution::getEqualPredicate(const SCEVUnknown *LHS,
+ const SCEVConstant *RHS) {
+ FoldingSetNodeID ID;
+ // Unique this node based on the arguments
+ ID.AddInteger(SCEVPredicate::P_Equal);
+ ID.AddPointer(LHS);
+ ID.AddPointer(RHS);
+ void *IP = nullptr;
+ if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP))
+ return S;
+ SCEVEqualPredicate *Eq = new (SCEVAllocator)
+ SCEVEqualPredicate(ID.Intern(SCEVAllocator), LHS, RHS);
+ UniquePreds.InsertNode(Eq, IP);
+ return Eq;
+}
+
+namespace {
+class SCEVPredicateRewriter : public SCEVRewriteVisitor<SCEVPredicateRewriter> {
+public:
+ static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE,
+ SCEVUnionPredicate &A) {
+ SCEVPredicateRewriter Rewriter(SE, A);
+ return Rewriter.visit(Scev);
+ }
+
+ SCEVPredicateRewriter(ScalarEvolution &SE, SCEVUnionPredicate &P)
+ : SCEVRewriteVisitor(SE), P(P) {}
+
+ const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+ auto ExprPreds = P.getPredicatesForExpr(Expr);
+ for (auto *Pred : ExprPreds)
+ if (const auto *IPred = dyn_cast<const SCEVEqualPredicate>(Pred))
+ if (IPred->getLHS() == Expr)
+ return IPred->getRHS();
+
+ return Expr;
+ }
+
+private:
+ SCEVUnionPredicate &P;
+};
+} // end anonymous namespace
+
+const SCEV *ScalarEvolution::rewriteUsingPredicate(const SCEV *Scev,
+ SCEVUnionPredicate &Preds) {
+ return SCEVPredicateRewriter::rewrite(Scev, *this, Preds);
+}
+
+/// SCEV predicates
+SCEVPredicate::SCEVPredicate(const FoldingSetNodeIDRef ID,
+ SCEVPredicateKind Kind)
+ : FastID(ID), Kind(Kind) {}
+
+SCEVEqualPredicate::SCEVEqualPredicate(const FoldingSetNodeIDRef ID,
+ const SCEVUnknown *LHS,
+ const SCEVConstant *RHS)
+ : SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) {}
+
+bool SCEVEqualPredicate::implies(const SCEVPredicate *N) const {
+ const auto *Op = dyn_cast<const SCEVEqualPredicate>(N);
+
+ if (!Op)
+ return false;
+
+ return Op->LHS == LHS && Op->RHS == RHS;
+}
+
+bool SCEVEqualPredicate::isAlwaysTrue() const { return false; }
+
+const SCEV *SCEVEqualPredicate::getExpr() const { return LHS; }
+
+void SCEVEqualPredicate::print(raw_ostream &OS, unsigned Depth) const {
+ OS.indent(Depth) << "Equal predicate: " << *LHS << " == " << *RHS << "\n";
+}
+
+/// Union predicates don't get cached so create a dummy set ID for it.
+SCEVUnionPredicate::SCEVUnionPredicate()
+ : SCEVPredicate(FoldingSetNodeIDRef(nullptr, 0), P_Union) {}
+
+bool SCEVUnionPredicate::isAlwaysTrue() const {
+ return all_of(Preds,
+ [](const SCEVPredicate *I) { return I->isAlwaysTrue(); });
+}
+
+ArrayRef<const SCEVPredicate *>
+SCEVUnionPredicate::getPredicatesForExpr(const SCEV *Expr) {
+ auto I = SCEVToPreds.find(Expr);
+ if (I == SCEVToPreds.end())
+ return ArrayRef<const SCEVPredicate *>();
+ return I->second;
+}
+
+bool SCEVUnionPredicate::implies(const SCEVPredicate *N) const {
+ if (const auto *Set = dyn_cast<const SCEVUnionPredicate>(N))
+ return all_of(Set->Preds,
+ [this](const SCEVPredicate *I) { return this->implies(I); });
+
+ auto ScevPredsIt = SCEVToPreds.find(N->getExpr());
+ if (ScevPredsIt == SCEVToPreds.end())
+ return false;
+ auto &SCEVPreds = ScevPredsIt->second;
+
+ return any_of(SCEVPreds,
+ [N](const SCEVPredicate *I) { return I->implies(N); });
+}
+
+const SCEV *SCEVUnionPredicate::getExpr() const { return nullptr; }
+
+void SCEVUnionPredicate::print(raw_ostream &OS, unsigned Depth) const {
+ for (auto Pred : Preds)
+ Pred->print(OS, Depth);
+}
+
+void SCEVUnionPredicate::add(const SCEVPredicate *N) {
+ if (const auto *Set = dyn_cast<const SCEVUnionPredicate>(N)) {
+ for (auto Pred : Set->Preds)
+ add(Pred);
+ return;
+ }
+
+ if (implies(N))
+ return;
+
+ const SCEV *Key = N->getExpr();
+ assert(Key && "Only SCEVUnionPredicate doesn't have an "
+ " associated expression!");
+
+ SCEVToPreds[Key].push_back(N);
+ Preds.push_back(N);
+}
+
+PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE)
+ : SE(SE), Generation(0) {}
+
+const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) {
+ const SCEV *Expr = SE.getSCEV(V);
+ RewriteEntry &Entry = RewriteMap[Expr];
+
+ // If we already have an entry and the version matches, return it.
+ if (Entry.second && Generation == Entry.first)
+ return Entry.second;
+
+ // We found an entry but it's stale. Rewrite the stale entry
+ // acording to the current predicate.
+ if (Entry.second)
+ Expr = Entry.second;
+
+ const SCEV *NewSCEV = SE.rewriteUsingPredicate(Expr, Preds);
+ Entry = {Generation, NewSCEV};
+
+ return NewSCEV;
+}
+
+void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) {
+ if (Preds.implies(&Pred))
+ return;
+ Preds.add(&Pred);
+ updateGeneration();
+}
+
+const SCEVUnionPredicate &PredicatedScalarEvolution::getUnionPredicate() const {
+ return Preds;
+}
+
+void PredicatedScalarEvolution::updateGeneration() {
+ // If the generation number wrapped recompute everything.
+ if (++Generation == 0) {
+ for (auto &II : RewriteMap) {
+ const SCEV *Rewritten = II.second.second;
+ II.second = {Generation, SE.rewriteUsingPredicate(Rewritten, Preds)};
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 6bc0d85..2e50c80 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -19,125 +19,42 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
using namespace llvm;
-namespace {
- /// ScalarEvolutionAliasAnalysis - This is a simple alias analysis
- /// implementation that uses ScalarEvolution to answer queries.
- class ScalarEvolutionAliasAnalysis : public FunctionPass,
- public AliasAnalysis {
- ScalarEvolution *SE;
-
- public:
- static char ID; // Class identification, replacement for typeinfo
- ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(nullptr) {
- initializeScalarEvolutionAliasAnalysisPass(
- *PassRegistry::getPassRegistry());
- }
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- void *getAdjustedAnalysisPointer(AnalysisID PI) override {
- if (PI == &AliasAnalysis::ID)
- return (AliasAnalysis*)this;
- return this;
- }
-
- private:
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- bool runOnFunction(Function &F) override;
- AliasResult alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) override;
-
- Value *GetBaseValue(const SCEV *S);
- };
-} // End of anonymous namespace
-
-// Register this pass...
-char ScalarEvolutionAliasAnalysis::ID = 0;
-INITIALIZE_AG_PASS_BEGIN(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa",
- "ScalarEvolution-based Alias Analysis", false, true, false)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
-INITIALIZE_AG_PASS_END(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa",
- "ScalarEvolution-based Alias Analysis", false, true, false)
-
-FunctionPass *llvm::createScalarEvolutionAliasAnalysisPass() {
- return new ScalarEvolutionAliasAnalysis();
-}
-
-void
-ScalarEvolutionAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequiredTransitive<ScalarEvolution>();
- AU.setPreservesAll();
- AliasAnalysis::getAnalysisUsage(AU);
-}
-
-bool
-ScalarEvolutionAliasAnalysis::runOnFunction(Function &F) {
- InitializeAliasAnalysis(this, &F.getParent()->getDataLayout());
- SE = &getAnalysis<ScalarEvolution>();
- return false;
-}
-
-/// GetBaseValue - Given an expression, try to find a
-/// base value. Return null is none was found.
-Value *
-ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) {
- if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
- // In an addrec, assume that the base will be in the start, rather
- // than the step.
- return GetBaseValue(AR->getStart());
- } else if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
- // If there's a pointer operand, it'll be sorted at the end of the list.
- const SCEV *Last = A->getOperand(A->getNumOperands()-1);
- if (Last->getType()->isPointerTy())
- return GetBaseValue(Last);
- } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
- // This is a leaf node.
- return U->getValue();
- }
- // No Identified object found.
- return nullptr;
-}
-
-AliasResult ScalarEvolutionAliasAnalysis::alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
+AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
+ const MemoryLocation &LocB) {
// If either of the memory references is empty, it doesn't matter what the
// pointer values are. This allows the code below to ignore this special
// case.
if (LocA.Size == 0 || LocB.Size == 0)
return NoAlias;
- // This is ScalarEvolutionAliasAnalysis. Get the SCEVs!
- const SCEV *AS = SE->getSCEV(const_cast<Value *>(LocA.Ptr));
- const SCEV *BS = SE->getSCEV(const_cast<Value *>(LocB.Ptr));
+ // This is SCEVAAResult. Get the SCEVs!
+ const SCEV *AS = SE.getSCEV(const_cast<Value *>(LocA.Ptr));
+ const SCEV *BS = SE.getSCEV(const_cast<Value *>(LocB.Ptr));
// If they evaluate to the same expression, it's a MustAlias.
- if (AS == BS) return MustAlias;
+ if (AS == BS)
+ return MustAlias;
// If something is known about the difference between the two addresses,
// see if it's enough to prove a NoAlias.
- if (SE->getEffectiveSCEVType(AS->getType()) ==
- SE->getEffectiveSCEVType(BS->getType())) {
- unsigned BitWidth = SE->getTypeSizeInBits(AS->getType());
+ if (SE.getEffectiveSCEVType(AS->getType()) ==
+ SE.getEffectiveSCEVType(BS->getType())) {
+ unsigned BitWidth = SE.getTypeSizeInBits(AS->getType());
APInt ASizeInt(BitWidth, LocA.Size);
APInt BSizeInt(BitWidth, LocB.Size);
// Compute the difference between the two pointers.
- const SCEV *BA = SE->getMinusSCEV(BS, AS);
+ const SCEV *BA = SE.getMinusSCEV(BS, AS);
// Test whether the difference is known to be great enough that memory of
// the given sizes don't overlap. This assumes that ASizeInt and BSizeInt
// are non-zero, which is special-cased above.
- if (ASizeInt.ule(SE->getUnsignedRange(BA).getUnsignedMin()) &&
- (-BSizeInt).uge(SE->getUnsignedRange(BA).getUnsignedMax()))
+ if (ASizeInt.ule(SE.getUnsignedRange(BA).getUnsignedMin()) &&
+ (-BSizeInt).uge(SE.getUnsignedRange(BA).getUnsignedMax()))
return NoAlias;
// Folding the subtraction while preserving range information can be tricky
@@ -145,13 +62,13 @@ AliasResult ScalarEvolutionAliasAnalysis::alias(const MemoryLocation &LocA,
// and try again to see if things fold better that way.
// Compute the difference between the two pointers.
- const SCEV *AB = SE->getMinusSCEV(AS, BS);
+ const SCEV *AB = SE.getMinusSCEV(AS, BS);
// Test whether the difference is known to be great enough that memory of
// the given sizes don't overlap. This assumes that ASizeInt and BSizeInt
// are non-zero, which is special-cased above.
- if (BSizeInt.ule(SE->getUnsignedRange(AB).getUnsignedMin()) &&
- (-ASizeInt).uge(SE->getUnsignedRange(AB).getUnsignedMax()))
+ if (BSizeInt.ule(SE.getUnsignedRange(AB).getUnsignedMin()) &&
+ (-ASizeInt).uge(SE.getUnsignedRange(AB).getUnsignedMax()))
return NoAlias;
}
@@ -170,5 +87,62 @@ AliasResult ScalarEvolutionAliasAnalysis::alias(const MemoryLocation &LocA,
return NoAlias;
// Forward the query to the next analysis.
- return AliasAnalysis::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB);
+}
+
+/// Given an expression, try to find a base value.
+///
+/// Returns null if none was found.
+Value *SCEVAAResult::GetBaseValue(const SCEV *S) {
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ // In an addrec, assume that the base will be in the start, rather
+ // than the step.
+ return GetBaseValue(AR->getStart());
+ } else if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
+ // If there's a pointer operand, it'll be sorted at the end of the list.
+ const SCEV *Last = A->getOperand(A->getNumOperands() - 1);
+ if (Last->getType()->isPointerTy())
+ return GetBaseValue(Last);
+ } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ // This is a leaf node.
+ return U->getValue();
+ }
+ // No Identified object found.
+ return nullptr;
+}
+
+SCEVAAResult SCEVAA::run(Function &F, AnalysisManager<Function> *AM) {
+ return SCEVAAResult(AM->getResult<TargetLibraryAnalysis>(F),
+ AM->getResult<ScalarEvolutionAnalysis>(F));
+}
+
+char SCEVAA::PassID;
+
+char SCEVAAWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(SCEVAAWrapperPass, "scev-aa",
+ "ScalarEvolution-based Alias Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(SCEVAAWrapperPass, "scev-aa",
+ "ScalarEvolution-based Alias Analysis", false, true)
+
+FunctionPass *llvm::createSCEVAAWrapperPass() {
+ return new SCEVAAWrapperPass();
+}
+
+SCEVAAWrapperPass::SCEVAAWrapperPass() : FunctionPass(ID) {
+ initializeSCEVAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+bool SCEVAAWrapperPass::runOnFunction(Function &F) {
+ Result.reset(
+ new SCEVAAResult(getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+ getAnalysis<ScalarEvolutionWrapperPass>().getSE()));
+ return false;
+}
+
+void SCEVAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
index fee2a2d..921403d 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -63,7 +63,7 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
// Create a new cast, and leave the old cast in place in case
// it is being used as an insert point. Clear its operand
// so that it doesn't hold anything live.
- Ret = CastInst::Create(Op, V, Ty, "", IP);
+ Ret = CastInst::Create(Op, V, Ty, "", &*IP);
Ret->takeName(CI);
CI->replaceAllUsesWith(Ret);
CI->setOperand(0, UndefValue::get(V->getType()));
@@ -75,17 +75,39 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
// Create a new cast.
if (!Ret)
- Ret = CastInst::Create(Op, V, Ty, V->getName(), IP);
+ Ret = CastInst::Create(Op, V, Ty, V->getName(), &*IP);
// We assert at the end of the function since IP might point to an
// instruction with different dominance properties than a cast
// (an invoke for example) and not dominate BIP (but the cast does).
- assert(SE.DT->dominates(Ret, BIP));
+ assert(SE.DT.dominates(Ret, &*BIP));
rememberInstruction(Ret);
return Ret;
}
+static BasicBlock::iterator findInsertPointAfter(Instruction *I,
+ BasicBlock *MustDominate) {
+ BasicBlock::iterator IP = ++I->getIterator();
+ if (auto *II = dyn_cast<InvokeInst>(I))
+ IP = II->getNormalDest()->begin();
+
+ while (isa<PHINode>(IP))
+ ++IP;
+
+ while (IP->isEHPad()) {
+ if (isa<FuncletPadInst>(IP) || isa<LandingPadInst>(IP)) {
+ ++IP;
+ } else if (isa<CatchSwitchInst>(IP)) {
+ IP = MustDominate->getFirstInsertionPt();
+ } else {
+ llvm_unreachable("unexpected eh pad!");
+ }
+ }
+
+ return IP;
+}
+
/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
/// which must be possible with a noop cast, doing what we can to share
/// the casts.
@@ -135,19 +157,14 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
while ((isa<BitCastInst>(IP) &&
isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) &&
cast<BitCastInst>(IP)->getOperand(0) != A) ||
- isa<DbgInfoIntrinsic>(IP) ||
- isa<LandingPadInst>(IP))
+ isa<DbgInfoIntrinsic>(IP))
++IP;
return ReuseOrCreateCast(A, Ty, Op, IP);
}
// Cast the instruction immediately after the instruction.
Instruction *I = cast<Instruction>(V);
- BasicBlock::iterator IP = I; ++IP;
- if (InvokeInst *II = dyn_cast<InvokeInst>(I))
- IP = II->getNormalDest()->begin();
- while (isa<PHINode>(IP) || isa<LandingPadInst>(IP))
- ++IP;
+ BasicBlock::iterator IP = findInsertPointAfter(I, Builder.GetInsertBlock());
return ReuseOrCreateCast(I, Ty, Op, IP);
}
@@ -174,7 +191,7 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
ScanLimit++;
if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
IP->getOperand(1) == RHS)
- return IP;
+ return &*IP;
if (IP == BlockBegin) break;
}
}
@@ -184,13 +201,13 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
BuilderType::InsertPointGuard Guard(Builder);
// Move the insertion point out of as many loops as we can.
- while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+ while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) break;
// Ok, move up a level.
- Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+ Builder.SetInsertPoint(Preheader->getTerminator());
}
// If we haven't found this binop, insert it.
@@ -229,19 +246,15 @@ static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder,
// Check for divisibility.
if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) {
ConstantInt *CI =
- ConstantInt::get(SE.getContext(),
- C->getValue()->getValue().sdiv(
- FC->getValue()->getValue()));
+ ConstantInt::get(SE.getContext(), C->getAPInt().sdiv(FC->getAPInt()));
// If the quotient is zero and the remainder is non-zero, reject
// the value at this scale. It will be considered for subsequent
// smaller scales.
if (!CI->isZero()) {
const SCEV *Div = SE.getConstant(CI);
S = Div;
- Remainder =
- SE.getAddExpr(Remainder,
- SE.getConstant(C->getValue()->getValue().srem(
- FC->getValue()->getValue())));
+ Remainder = SE.getAddExpr(
+ Remainder, SE.getConstant(C->getAPInt().srem(FC->getAPInt())));
return true;
}
}
@@ -254,10 +267,9 @@ static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder,
// of the given factor. If so, we can factor it.
const SCEVConstant *FC = cast<SCEVConstant>(Factor);
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
- if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) {
+ if (!C->getAPInt().srem(FC->getAPInt())) {
SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
- NewMulOps[0] = SE.getConstant(
- C->getValue()->getValue().sdiv(FC->getValue()->getValue()));
+ NewMulOps[0] = SE.getConstant(C->getAPInt().sdiv(FC->getAPInt()));
S = SE.getMulExpr(NewMulOps);
return true;
}
@@ -402,8 +414,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
const SCEV *ElSize = SE.getSizeOfExpr(IntPtrTy, ElTy);
if (!ElSize->isZero()) {
SmallVector<const SCEV *, 8> NewOps;
- for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
- const SCEV *Op = Ops[i];
+ for (const SCEV *Op : Ops) {
const SCEV *Remainder = SE.getConstant(Ty, 0);
if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) {
// Op now has ElSize factored out.
@@ -414,7 +425,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
} else {
// The operand was not divisible, so add it to the list of operands
// we'll scan next iteration.
- NewOps.push_back(Ops[i]);
+ NewOps.push_back(Op);
}
}
// If we made any changes, update Ops.
@@ -483,7 +494,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace()));
assert(!isa<Instruction>(V) ||
- SE.DT->dominates(cast<Instruction>(V), Builder.GetInsertPoint()));
+ SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
// Expand the operands for a plain byte offset.
Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty);
@@ -508,7 +519,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
ScanLimit++;
if (IP->getOpcode() == Instruction::GetElementPtr &&
IP->getOperand(0) == V && IP->getOperand(1) == Idx)
- return IP;
+ return &*IP;
if (IP == BlockBegin) break;
}
}
@@ -517,13 +528,13 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
BuilderType::InsertPointGuard Guard(Builder);
// Move the insertion point out of as many loops as we can.
- while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+ while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) break;
// Ok, move up a level.
- Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+ Builder.SetInsertPoint(Preheader->getTerminator());
}
// Emit a GEP.
@@ -537,16 +548,13 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
BuilderType::InsertPoint SaveInsertPt = Builder.saveIP();
// Move the insertion point out of as many loops as we can.
- while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+ while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
if (!L->isLoopInvariant(V)) break;
- bool AnyIndexNotLoopInvariant = false;
- for (SmallVectorImpl<Value *>::const_iterator I = GepIndices.begin(),
- E = GepIndices.end(); I != E; ++I)
- if (!L->isLoopInvariant(*I)) {
- AnyIndexNotLoopInvariant = true;
- break;
- }
+ bool AnyIndexNotLoopInvariant =
+ std::any_of(GepIndices.begin(), GepIndices.end(),
+ [L](Value *Op) { return !L->isLoopInvariant(Op); });
+
if (AnyIndexNotLoopInvariant)
break;
@@ -554,7 +562,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
if (!Preheader) break;
// Ok, move up a level.
- Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+ Builder.SetInsertPoint(Preheader->getTerminator());
}
// Insert a pretty getelementptr. Note that this GEP is not marked inbounds,
@@ -563,9 +571,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
Value *Casted = V;
if (V->getType() != PTy)
Casted = InsertNoopCastOfTo(Casted, PTy);
- Value *GEP = Builder.CreateGEP(OriginalElTy, Casted,
- GepIndices,
- "scevgep");
+ Value *GEP = Builder.CreateGEP(OriginalElTy, Casted, GepIndices, "scevgep");
Ops.push_back(SE.getUnknown(GEP));
rememberInstruction(GEP);
@@ -593,8 +599,7 @@ static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
/// expression, according to PickMostRelevantLoop.
const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
// Test whether we've already computed the most relevant loop for this SCEV.
- std::pair<DenseMap<const SCEV *, const Loop *>::iterator, bool> Pair =
- RelevantLoops.insert(std::make_pair(S, nullptr));
+ auto Pair = RelevantLoops.insert(std::make_pair(S, nullptr));
if (!Pair.second)
return Pair.first->second;
@@ -603,7 +608,7 @@ const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
return nullptr;
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
- return Pair.first->second = SE.LI->getLoopFor(I->getParent());
+ return Pair.first->second = SE.LI.getLoopFor(I->getParent());
// A non-instruction has no relevant loops.
return nullptr;
}
@@ -611,9 +616,8 @@ const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
const Loop *L = nullptr;
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
L = AR->getLoop();
- for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end();
- I != E; ++I)
- L = PickMostRelevantLoop(L, getRelevantLoop(*I), *SE.DT);
+ for (const SCEV *Op : N->operands())
+ L = PickMostRelevantLoop(L, getRelevantLoop(Op), SE.DT);
return RelevantLoops[N] = L;
}
if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) {
@@ -621,10 +625,8 @@ const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
return RelevantLoops[C] = Result;
}
if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
- const Loop *Result =
- PickMostRelevantLoop(getRelevantLoop(D->getLHS()),
- getRelevantLoop(D->getRHS()),
- *SE.DT);
+ const Loop *Result = PickMostRelevantLoop(
+ getRelevantLoop(D->getLHS()), getRelevantLoop(D->getRHS()), SE.DT);
return RelevantLoops[D] = Result;
}
llvm_unreachable("Unexpected SCEV type!");
@@ -679,13 +681,12 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
// Sort by loop. Use a stable sort so that constants follow non-constants and
// pointer operands precede non-pointer operands.
- std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
+ std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT));
// Emit instructions to add all the operands. Hoist as much as possible
// out of loops, and form meaningful getelementptrs where possible.
Value *Sum = nullptr;
- for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
- I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
+ for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) {
const Loop *CurLoop = I->first;
const SCEV *Op = I->second;
if (!Sum) {
@@ -747,14 +748,13 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
// Sort by loop. Use a stable sort so that constants follow non-constants.
- std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
+ std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT));
// Emit instructions to mul all the operands. Hoist as much as possible
// out of loops.
Value *Prod = nullptr;
- for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
- I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ++I) {
- const SCEV *Op = I->second;
+ for (const auto &I : OpsAndLoops) {
+ const SCEV *Op = I.second;
if (!Prod) {
// This is the first operand. Just expand it.
Prod = expand(Op);
@@ -788,7 +788,7 @@ Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
Value *LHS = expandCodeFor(S->getLHS(), Ty);
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
- const APInt &RHS = SC->getValue()->getValue();
+ const APInt &RHS = SC->getAPInt();
if (RHS.isPowerOf2())
return InsertBinop(Instruction::LShr, LHS,
ConstantInt::get(Ty, RHS.logBase2()));
@@ -834,7 +834,7 @@ bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
for (User::op_iterator OI = IncV->op_begin()+1,
OE = IncV->op_end(); OI != OE; ++OI)
if (Instruction *OInst = dyn_cast<Instruction>(OI))
- if (!SE.DT->dominates(OInst, IVIncInsertPos))
+ if (!SE.DT.dominates(OInst, IVIncInsertPos))
return false;
}
// Advance to the next instruction.
@@ -873,19 +873,18 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
case Instruction::Add:
case Instruction::Sub: {
Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1));
- if (!OInst || SE.DT->dominates(OInst, InsertPos))
+ if (!OInst || SE.DT.dominates(OInst, InsertPos))
return dyn_cast<Instruction>(IncV->getOperand(0));
return nullptr;
}
case Instruction::BitCast:
return dyn_cast<Instruction>(IncV->getOperand(0));
case Instruction::GetElementPtr:
- for (Instruction::op_iterator I = IncV->op_begin()+1, E = IncV->op_end();
- I != E; ++I) {
+ for (auto I = IncV->op_begin() + 1, E = IncV->op_end(); I != E; ++I) {
if (isa<Constant>(*I))
continue;
if (Instruction *OInst = dyn_cast<Instruction>(*I)) {
- if (!SE.DT->dominates(OInst, InsertPos))
+ if (!SE.DT.dominates(OInst, InsertPos))
return nullptr;
}
if (allowScale) {
@@ -912,13 +911,16 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
/// it available to other uses in this loop. Recursively hoist any operands,
/// until we reach a value that dominates InsertPos.
bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) {
- if (SE.DT->dominates(IncV, InsertPos))
+ if (SE.DT.dominates(IncV, InsertPos))
return true;
// InsertPos must itself dominate IncV so that IncV's new position satisfies
// its existing users.
- if (isa<PHINode>(InsertPos)
- || !SE.DT->dominates(InsertPos->getParent(), IncV->getParent()))
+ if (isa<PHINode>(InsertPos) ||
+ !SE.DT.dominates(InsertPos->getParent(), IncV->getParent()))
+ return false;
+
+ if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos))
return false;
// Check that the chain of IV operands leading back to Phi can be hoisted.
@@ -930,11 +932,10 @@ bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) {
// IncV is safe to hoist.
IVIncs.push_back(IncV);
IncV = Oper;
- if (SE.DT->dominates(IncV, InsertPos))
+ if (SE.DT.dominates(IncV, InsertPos))
break;
}
- for (SmallVectorImpl<Instruction*>::reverse_iterator I = IVIncs.rbegin(),
- E = IVIncs.rend(); I != E; ++I) {
+ for (auto I = IVIncs.rbegin(), E = IVIncs.rend(); I != E; ++I) {
(*I)->moveBefore(InsertPos);
}
return true;
@@ -1002,7 +1003,7 @@ static void hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist,
}
/// \brief Check whether we can cheaply express the requested SCEV in terms of
-/// the available PHI SCEV by truncation and/or invertion of the step.
+/// the available PHI SCEV by truncation and/or inversion of the step.
static bool canBeCheaplyTransformed(ScalarEvolution &SE,
const SCEVAddRecExpr *Phi,
const SCEVAddRecExpr *Requested,
@@ -1084,12 +1085,13 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
// Only try partially matching scevs that need truncation and/or
// step-inversion if we know this loop is outside the current loop.
- bool TryNonMatchingSCEV = IVIncInsertLoop &&
- SE.DT->properlyDominates(LatchBlock, IVIncInsertLoop->getHeader());
+ bool TryNonMatchingSCEV =
+ IVIncInsertLoop &&
+ SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader());
- for (BasicBlock::iterator I = L->getHeader()->begin();
- PHINode *PN = dyn_cast<PHINode>(I); ++I) {
- if (!SE.isSCEVable(PN->getType()))
+ for (auto &I : *L->getHeader()) {
+ auto *PN = dyn_cast<PHINode>(&I);
+ if (!PN || !SE.isSCEVable(PN->getType()))
continue;
const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PN));
@@ -1142,7 +1144,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
// Potentially, move the increment. We have made sure in
// isExpandedAddRecExprPHI or hoistIVInc that this is possible.
if (L == IVIncInsertLoop)
- hoistBeforePos(SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch);
+ hoistBeforePos(&SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch);
// Ok, the add recurrence looks usable.
// Remember this PHI, even in post-inc mode.
@@ -1167,13 +1169,13 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
PostIncLoops.clear();
// Expand code for the start value.
- Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy,
- L->getHeader()->begin());
+ Value *StartV =
+ expandCodeFor(Normalized->getStart(), ExpandTy, &L->getHeader()->front());
// StartV must be hoisted into L's preheader to dominate the new phi.
assert(!isa<Instruction>(StartV) ||
- SE.DT->properlyDominates(cast<Instruction>(StartV)->getParent(),
- L->getHeader()));
+ SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(),
+ L->getHeader()));
// Expand code for the step value. Do this before creating the PHI so that PHI
// reuse code doesn't see an incomplete PHI.
@@ -1185,7 +1187,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
if (useSubtract)
Step = SE.getNegativeSCEV(Step);
// Expand the step somewhere that dominates the loop header.
- Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
+ Value *StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front());
// The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
// we actually do emit an addition. It does not apply if we emit a
@@ -1249,9 +1251,8 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
if (PostIncLoops.count(L)) {
PostIncLoopSet Loops;
Loops.insert(L);
- Normalized =
- cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, nullptr,
- nullptr, Loops, SE, *SE.DT));
+ Normalized = cast<SCEVAddRecExpr>(TransformForPostIncUse(
+ Normalize, S, nullptr, nullptr, Loops, SE, SE.DT));
}
// Strip off any non-loop-dominating component from the addrec start.
@@ -1301,9 +1302,9 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
// For an expansion to use the postinc form, the client must call
// expandCodeFor with an InsertPoint that is either outside the PostIncLoop
// or dominated by IVIncInsertPos.
- if (isa<Instruction>(Result)
- && !SE.DT->dominates(cast<Instruction>(Result),
- Builder.GetInsertPoint())) {
+ if (isa<Instruction>(Result) &&
+ !SE.DT.dominates(cast<Instruction>(Result),
+ &*Builder.GetInsertPoint())) {
// The induction variable's postinc expansion does not dominate this use.
// IVUsers tries to prevent this case, so it is rare. However, it can
// happen when an IVUser outside the loop is not dominated by the latch
@@ -1321,7 +1322,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
{
// Expand the step somewhere that dominates the loop header.
BuilderType::InsertPointGuard Guard(Builder);
- StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
+ StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front());
}
Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
}
@@ -1395,13 +1396,9 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
S->getNoWrapFlags(SCEV::FlagNW)));
BasicBlock::iterator NewInsertPt =
- std::next(BasicBlock::iterator(cast<Instruction>(V)));
- BuilderType::InsertPointGuard Guard(Builder);
- while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt) ||
- isa<LandingPadInst>(NewInsertPt))
- ++NewInsertPt;
+ findInsertPointAfter(cast<Instruction>(V), Builder.GetInsertBlock());
V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr,
- NewInsertPt);
+ &*NewInsertPt);
return V;
}
@@ -1442,7 +1439,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
BasicBlock *Header = L->getHeader();
pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar",
- Header->begin());
+ &Header->front());
rememberInstruction(CanonicalIV);
SmallSet<BasicBlock *, 4> PredSeen;
@@ -1587,7 +1584,8 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
Instruction *IP) {
- Builder.SetInsertPoint(IP->getParent(), IP);
+ assert(IP);
+ Builder.SetInsertPoint(IP);
return expandCodeFor(SH, Ty);
}
@@ -1605,8 +1603,8 @@ Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) {
Value *SCEVExpander::expand(const SCEV *S) {
// Compute an insertion point for this SCEV object. Hoist the instructions
// as far out in the loop nest as possible.
- Instruction *InsertPt = Builder.GetInsertPoint();
- for (Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock()); ;
+ Instruction *InsertPt = &*Builder.GetInsertPoint();
+ for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
L = L->getParentLoop())
if (SE.isLoopInvariant(S, L)) {
if (!L) break;
@@ -1616,30 +1614,29 @@ Value *SCEVExpander::expand(const SCEV *S) {
// LSR sets the insertion point for AddRec start/step values to the
// block start to simplify value reuse, even though it's an invalid
// position. SCEVExpander must correct for this in all cases.
- InsertPt = L->getHeader()->getFirstInsertionPt();
+ InsertPt = &*L->getHeader()->getFirstInsertionPt();
}
} else {
// If the SCEV is computable at this level, insert it into the header
// after the PHIs (and after any other instructions that we've inserted
// there) so that it is guaranteed to dominate any user inside the loop.
if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
- InsertPt = L->getHeader()->getFirstInsertionPt();
+ InsertPt = &*L->getHeader()->getFirstInsertionPt();
while (InsertPt != Builder.GetInsertPoint()
&& (isInsertedInstruction(InsertPt)
|| isa<DbgInfoIntrinsic>(InsertPt))) {
- InsertPt = std::next(BasicBlock::iterator(InsertPt));
+ InsertPt = &*std::next(InsertPt->getIterator());
}
break;
}
// Check to see if we already expanded this here.
- std::map<std::pair<const SCEV *, Instruction *>, TrackingVH<Value> >::iterator
- I = InsertedExpressions.find(std::make_pair(S, InsertPt));
+ auto I = InsertedExpressions.find(std::make_pair(S, InsertPt));
if (I != InsertedExpressions.end())
return I->second;
BuilderType::InsertPointGuard Guard(Builder);
- Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
+ Builder.SetInsertPoint(InsertPt);
// Expand the expression into instructions.
Value *V = visit(S);
@@ -1677,8 +1674,8 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
// Emit code for it.
BuilderType::InsertPointGuard Guard(Builder);
- PHINode *V = cast<PHINode>(expandCodeFor(H, nullptr,
- L->getHeader()->begin()));
+ PHINode *V =
+ cast<PHINode>(expandCodeFor(H, nullptr, &L->getHeader()->front()));
return V;
}
@@ -1694,10 +1691,13 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
const TargetTransformInfo *TTI) {
// Find integer phis in order of increasing width.
SmallVector<PHINode*, 8> Phis;
- for (BasicBlock::iterator I = L->getHeader()->begin();
- PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
- Phis.push_back(Phi);
+ for (auto &I : *L->getHeader()) {
+ if (auto *PN = dyn_cast<PHINode>(&I))
+ Phis.push_back(PN);
+ else
+ break;
}
+
if (TTI)
std::sort(Phis.begin(), Phis.end(), [](Value *LHS, Value *RHS) {
// Put pointers at the back and make sure pointer < pointer = false.
@@ -1711,13 +1711,23 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
DenseMap<const SCEV *, PHINode *> ExprToIVMap;
// Process phis from wide to narrow. Map wide phis to their truncation
// so narrow phis can reuse them.
- for (SmallVectorImpl<PHINode*>::const_iterator PIter = Phis.begin(),
- PEnd = Phis.end(); PIter != PEnd; ++PIter) {
- PHINode *Phi = *PIter;
+ for (PHINode *Phi : Phis) {
+ auto SimplifyPHINode = [&](PHINode *PN) -> Value * {
+ if (Value *V = SimplifyInstruction(PN, DL, &SE.TLI, &SE.DT, &SE.AC))
+ return V;
+ if (!SE.isSCEVable(PN->getType()))
+ return nullptr;
+ auto *Const = dyn_cast<SCEVConstant>(SE.getSCEV(PN));
+ if (!Const)
+ return nullptr;
+ return Const->getValue();
+ };
// Fold constant phis. They may be congruent to other constant phis and
// would confuse the logic below that expects proper IVs.
- if (Value *V = SimplifyInstruction(Phi, DL, SE.TLI, SE.DT, SE.AC)) {
+ if (Value *V = SimplifyPHINode(Phi)) {
+ if (V->getType() != Phi->getType())
+ continue;
Phi->replaceAllUsesWith(V);
DeadInsts.emplace_back(Phi);
++NumElim;
@@ -1784,7 +1794,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
if (OrigInc->getType() != IsomorphicInc->getType()) {
Instruction *IP = nullptr;
if (PHINode *PN = dyn_cast<PHINode>(OrigInc))
- IP = PN->getParent()->getFirstInsertionPt();
+ IP = &*PN->getParent()->getFirstInsertionPt();
else
IP = OrigInc->getNextNode();
@@ -1802,7 +1812,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
++NumElim;
Value *NewIV = OrigPhiRef;
if (OrigPhiRef->getType() != Phi->getType()) {
- IRBuilder<> Builder(L->getHeader()->getFirstInsertionPt());
+ IRBuilder<> Builder(&*L->getHeader()->getFirstInsertionPt());
Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName);
}
@@ -1812,8 +1822,46 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
return NumElim;
}
+Value *SCEVExpander::findExistingExpansion(const SCEV *S,
+ const Instruction *At, Loop *L) {
+ using namespace llvm::PatternMatch;
+
+ SmallVector<BasicBlock *, 4> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ // Look for suitable value in simple conditions at the loop exits.
+ for (BasicBlock *BB : ExitingBlocks) {
+ ICmpInst::Predicate Pred;
+ Instruction *LHS, *RHS;
+ BasicBlock *TrueBB, *FalseBB;
+
+ if (!match(BB->getTerminator(),
+ m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)),
+ TrueBB, FalseBB)))
+ continue;
+
+ if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At))
+ return LHS;
+
+ if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At))
+ return RHS;
+ }
+
+ // There is potential to make this significantly smarter, but this simple
+ // heuristic already gets some interesting cases.
+
+ // Can not find suitable value.
+ return nullptr;
+}
+
bool SCEVExpander::isHighCostExpansionHelper(
- const SCEV *S, Loop *L, SmallPtrSetImpl<const SCEV *> &Processed) {
+ const SCEV *S, Loop *L, const Instruction *At,
+ SmallPtrSetImpl<const SCEV *> &Processed) {
+
+ // If we can find an existing value for this scev avaliable at the point "At"
+ // then consider the expression cheap.
+ if (At && findExistingExpansion(S, At, L) != nullptr)
+ return false;
// Zero/One operand expressions
switch (S->getSCEVType()) {
@@ -1821,14 +1869,14 @@ bool SCEVExpander::isHighCostExpansionHelper(
case scConstant:
return false;
case scTruncate:
- return isHighCostExpansionHelper(cast<SCEVTruncateExpr>(S)->getOperand(), L,
- Processed);
+ return isHighCostExpansionHelper(cast<SCEVTruncateExpr>(S)->getOperand(),
+ L, At, Processed);
case scZeroExtend:
return isHighCostExpansionHelper(cast<SCEVZeroExtendExpr>(S)->getOperand(),
- L, Processed);
+ L, At, Processed);
case scSignExtend:
return isHighCostExpansionHelper(cast<SCEVSignExtendExpr>(S)->getOperand(),
- L, Processed);
+ L, At, Processed);
}
if (!Processed.insert(S).second)
@@ -1836,10 +1884,10 @@ bool SCEVExpander::isHighCostExpansionHelper(
if (auto *UDivExpr = dyn_cast<SCEVUDivExpr>(S)) {
// If the divisor is a power of two and the SCEV type fits in a native
- // integer, consider the divison cheap irrespective of whether it occurs in
+ // integer, consider the division cheap irrespective of whether it occurs in
// the user code since it can be lowered into a right shift.
if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS()))
- if (SC->getValue()->getValue().isPowerOf2()) {
+ if (SC->getAPInt().isPowerOf2()) {
const DataLayout &DL =
L->getHeader()->getParent()->getParent()->getDataLayout();
unsigned Width = cast<IntegerType>(UDivExpr->getType())->getBitWidth();
@@ -1855,22 +1903,14 @@ bool SCEVExpander::isHighCostExpansionHelper(
if (!ExitingBB)
return true;
- BranchInst *ExitingBI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
- if (!ExitingBI || !ExitingBI->isConditional())
+ // At the beginning of this function we already tried to find existing value
+ // for plain 'S'. Now try to lookup 'S + 1' since it is common pattern
+ // involving division. This is just a simple search heuristic.
+ if (!At)
+ At = &ExitingBB->back();
+ if (!findExistingExpansion(
+ SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), At, L))
return true;
-
- ICmpInst *OrigCond = dyn_cast<ICmpInst>(ExitingBI->getCondition());
- if (!OrigCond)
- return true;
-
- const SCEV *RHS = SE.getSCEV(OrigCond->getOperand(1));
- RHS = SE.getMinusSCEV(RHS, SE.getConstant(RHS->getType(), 1));
- if (RHS != S) {
- const SCEV *LHS = SE.getSCEV(OrigCond->getOperand(0));
- LHS = SE.getMinusSCEV(LHS, SE.getConstant(LHS->getType(), 1));
- if (LHS != S)
- return true;
- }
}
// HowManyLessThans uses a Max expression whenever the loop is not guarded by
@@ -1882,11 +1922,9 @@ bool SCEVExpander::isHighCostExpansionHelper(
// BackedgeTakenCount. They may already exist in program code, and if not,
// they are not too expensive rematerialize.
if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(S)) {
- for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
- I != E; ++I) {
- if (isHighCostExpansionHelper(*I, L, Processed))
+ for (auto *Op : NAry->operands())
+ if (isHighCostExpansionHelper(Op, L, At, Processed))
return true;
- }
}
// If we haven't recognized an expensive SCEV pattern, assume it's an
@@ -1894,6 +1932,43 @@ bool SCEVExpander::isHighCostExpansionHelper(
return false;
}
+Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred,
+ Instruction *IP) {
+ assert(IP);
+ switch (Pred->getKind()) {
+ case SCEVPredicate::P_Union:
+ return expandUnionPredicate(cast<SCEVUnionPredicate>(Pred), IP);
+ case SCEVPredicate::P_Equal:
+ return expandEqualPredicate(cast<SCEVEqualPredicate>(Pred), IP);
+ }
+ llvm_unreachable("Unknown SCEV predicate type");
+}
+
+Value *SCEVExpander::expandEqualPredicate(const SCEVEqualPredicate *Pred,
+ Instruction *IP) {
+ Value *Expr0 = expandCodeFor(Pred->getLHS(), Pred->getLHS()->getType(), IP);
+ Value *Expr1 = expandCodeFor(Pred->getRHS(), Pred->getRHS()->getType(), IP);
+
+ Builder.SetInsertPoint(IP);
+ auto *I = Builder.CreateICmpNE(Expr0, Expr1, "ident.check");
+ return I;
+}
+
+Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union,
+ Instruction *IP) {
+ auto *BoolType = IntegerType::get(IP->getContext(), 1);
+ Value *Check = ConstantInt::getNullValue(BoolType);
+
+ // Loop over all checks in this set.
+ for (auto Pred : Union->getPredicates()) {
+ auto *NextCheck = expandCodeForPredicate(Pred, IP);
+ Builder.SetInsertPoint(IP);
+ Check = Builder.CreateOr(Check, NextCheck);
+ }
+
+ return Check;
+}
+
namespace {
// Search for a SCEV subexpression that is not safe to expand. Any expression
// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
index b238fe4..b7fd5d5 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -109,7 +109,7 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
SmallVector<const SCEV *, 8> Operands;
const Loop *L = AR->getLoop();
// The addrec conceptually uses its operands at loop entry.
- Instruction *LUser = L->getHeader()->begin();
+ Instruction *LUser = &L->getHeader()->front();
// Transform each operand.
for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
I != E; ++I) {
diff --git a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
index a5fca3e..029997a 100644
--- a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
@@ -32,22 +32,23 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
+
using namespace llvm;
// A handy option for disabling scoped no-alias functionality. The same effect
// can also be achieved by stripping the associated metadata tags from IR, but
// this option is sometimes more convenient.
-static cl::opt<bool>
-EnableScopedNoAlias("enable-scoped-noalias", cl::init(true));
+static cl::opt<bool> EnableScopedNoAlias("enable-scoped-noalias",
+ cl::init(true));
namespace {
/// AliasScopeNode - This is a simple wrapper around an MDNode which provides
@@ -57,7 +58,7 @@ class AliasScopeNode {
const MDNode *Node;
public:
- AliasScopeNode() : Node(0) {}
+ AliasScopeNode() : Node(nullptr) {}
explicit AliasScopeNode(const MDNode *N) : Node(N) {}
/// getNode - Get the MDNode for this AliasScopeNode.
@@ -70,79 +71,74 @@ public:
return dyn_cast_or_null<MDNode>(Node->getOperand(1));
}
};
+} // end of anonymous namespace
-/// ScopedNoAliasAA - This is a simple alias analysis
-/// implementation that uses scoped-noalias metadata to answer queries.
-class ScopedNoAliasAA : public ImmutablePass, public AliasAnalysis {
-public:
- static char ID; // Class identification, replacement for typeinfo
- ScopedNoAliasAA() : ImmutablePass(ID) {
- initializeScopedNoAliasAAPass(*PassRegistry::getPassRegistry());
- }
+AliasResult ScopedNoAliasAAResult::alias(const MemoryLocation &LocA,
+ const MemoryLocation &LocB) {
+ if (!EnableScopedNoAlias)
+ return AAResultBase::alias(LocA, LocB);
- bool doInitialization(Module &M) override;
+ // Get the attached MDNodes.
+ const MDNode *AScopes = LocA.AATags.Scope, *BScopes = LocB.AATags.Scope;
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- void *getAdjustedAnalysisPointer(const void *PI) override {
- if (PI == &AliasAnalysis::ID)
- return (AliasAnalysis*)this;
- return this;
- }
+ const MDNode *ANoAlias = LocA.AATags.NoAlias, *BNoAlias = LocB.AATags.NoAlias;
-protected:
- bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const;
- void collectMDInDomain(const MDNode *List, const MDNode *Domain,
- SmallPtrSetImpl<const MDNode *> &Nodes) const;
-
-private:
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- AliasResult alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) override;
- bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal) override;
- ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override;
- ModRefBehavior getModRefBehavior(const Function *F) override;
- ModRefResult getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) override;
- ModRefResult getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) override;
-};
-} // End of anonymous namespace
+ if (!mayAliasInScopes(AScopes, BNoAlias))
+ return NoAlias;
-// Register this pass...
-char ScopedNoAliasAA::ID = 0;
-INITIALIZE_AG_PASS(ScopedNoAliasAA, AliasAnalysis, "scoped-noalias",
- "Scoped NoAlias Alias Analysis", false, true, false)
+ if (!mayAliasInScopes(BScopes, ANoAlias))
+ return NoAlias;
-ImmutablePass *llvm::createScopedNoAliasAAPass() {
- return new ScopedNoAliasAA();
+ // If they may alias, chain to the next AliasAnalysis.
+ return AAResultBase::alias(LocA, LocB);
}
-bool ScopedNoAliasAA::doInitialization(Module &M) {
- InitializeAliasAnalysis(this, &M.getDataLayout());
- return true;
+ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS,
+ const MemoryLocation &Loc) {
+ if (!EnableScopedNoAlias)
+ return AAResultBase::getModRefInfo(CS, Loc);
+
+ if (!mayAliasInScopes(Loc.AATags.Scope, CS.getInstruction()->getMetadata(
+ LLVMContext::MD_noalias)))
+ return MRI_NoModRef;
+
+ if (!mayAliasInScopes(
+ CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
+ Loc.AATags.NoAlias))
+ return MRI_NoModRef;
+
+ return AAResultBase::getModRefInfo(CS, Loc);
}
-void
-ScopedNoAliasAA::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AliasAnalysis::getAnalysisUsage(AU);
+ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ if (!EnableScopedNoAlias)
+ return AAResultBase::getModRefInfo(CS1, CS2);
+
+ if (!mayAliasInScopes(
+ CS1.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
+ CS2.getInstruction()->getMetadata(LLVMContext::MD_noalias)))
+ return MRI_NoModRef;
+
+ if (!mayAliasInScopes(
+ CS2.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
+ CS1.getInstruction()->getMetadata(LLVMContext::MD_noalias)))
+ return MRI_NoModRef;
+
+ return AAResultBase::getModRefInfo(CS1, CS2);
}
-void
-ScopedNoAliasAA::collectMDInDomain(const MDNode *List, const MDNode *Domain,
- SmallPtrSetImpl<const MDNode *> &Nodes) const {
+void ScopedNoAliasAAResult::collectMDInDomain(
+ const MDNode *List, const MDNode *Domain,
+ SmallPtrSetImpl<const MDNode *> &Nodes) const {
for (unsigned i = 0, ie = List->getNumOperands(); i != ie; ++i)
if (const MDNode *MD = dyn_cast<MDNode>(List->getOperand(i)))
if (AliasScopeNode(MD).getDomain() == Domain)
Nodes.insert(MD);
}
-bool
-ScopedNoAliasAA::mayAliasInScopes(const MDNode *Scopes,
- const MDNode *NoAlias) const {
+bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes,
+ const MDNode *NoAlias) const {
if (!Scopes || !NoAlias)
return true;
@@ -177,76 +173,40 @@ ScopedNoAliasAA::mayAliasInScopes(const MDNode *Scopes,
return true;
}
-AliasResult ScopedNoAliasAA::alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
- if (!EnableScopedNoAlias)
- return AliasAnalysis::alias(LocA, LocB);
-
- // Get the attached MDNodes.
- const MDNode *AScopes = LocA.AATags.Scope,
- *BScopes = LocB.AATags.Scope;
+ScopedNoAliasAAResult ScopedNoAliasAA::run(Function &F,
+ AnalysisManager<Function> *AM) {
+ return ScopedNoAliasAAResult(AM->getResult<TargetLibraryAnalysis>(F));
+}
- const MDNode *ANoAlias = LocA.AATags.NoAlias,
- *BNoAlias = LocB.AATags.NoAlias;
+char ScopedNoAliasAA::PassID;
- if (!mayAliasInScopes(AScopes, BNoAlias))
- return NoAlias;
-
- if (!mayAliasInScopes(BScopes, ANoAlias))
- return NoAlias;
+char ScopedNoAliasAAWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ScopedNoAliasAAWrapperPass, "scoped-noalias",
+ "Scoped NoAlias Alias Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(ScopedNoAliasAAWrapperPass, "scoped-noalias",
+ "Scoped NoAlias Alias Analysis", false, true)
- // If they may alias, chain to the next AliasAnalysis.
- return AliasAnalysis::alias(LocA, LocB);
+ImmutablePass *llvm::createScopedNoAliasAAWrapperPass() {
+ return new ScopedNoAliasAAWrapperPass();
}
-bool ScopedNoAliasAA::pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal) {
- return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ScopedNoAliasAAWrapperPass::ScopedNoAliasAAWrapperPass() : ImmutablePass(ID) {
+ initializeScopedNoAliasAAWrapperPassPass(*PassRegistry::getPassRegistry());
}
-AliasAnalysis::ModRefBehavior
-ScopedNoAliasAA::getModRefBehavior(ImmutableCallSite CS) {
- return AliasAnalysis::getModRefBehavior(CS);
+bool ScopedNoAliasAAWrapperPass::doInitialization(Module &M) {
+ Result.reset(new ScopedNoAliasAAResult(
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI()));
+ return false;
}
-AliasAnalysis::ModRefBehavior
-ScopedNoAliasAA::getModRefBehavior(const Function *F) {
- return AliasAnalysis::getModRefBehavior(F);
+bool ScopedNoAliasAAWrapperPass::doFinalization(Module &M) {
+ Result.reset();
+ return false;
}
-AliasAnalysis::ModRefResult
-ScopedNoAliasAA::getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) {
- if (!EnableScopedNoAlias)
- return AliasAnalysis::getModRefInfo(CS, Loc);
-
- if (!mayAliasInScopes(Loc.AATags.Scope, CS.getInstruction()->getMetadata(
- LLVMContext::MD_noalias)))
- return NoModRef;
-
- if (!mayAliasInScopes(
- CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
- Loc.AATags.NoAlias))
- return NoModRef;
-
- return AliasAnalysis::getModRefInfo(CS, Loc);
-}
-
-AliasAnalysis::ModRefResult
-ScopedNoAliasAA::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
- if (!EnableScopedNoAlias)
- return AliasAnalysis::getModRefInfo(CS1, CS2);
-
- if (!mayAliasInScopes(
- CS1.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
- CS2.getInstruction()->getMetadata(LLVMContext::MD_noalias)))
- return NoModRef;
-
- if (!mayAliasInScopes(
- CS2.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
- CS1.getInstruction()->getMetadata(LLVMContext::MD_noalias)))
- return NoModRef;
-
- return AliasAnalysis::getModRefInfo(CS1, CS2);
+void ScopedNoAliasAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
-
diff --git a/contrib/llvm/lib/Analysis/SparsePropagation.cpp b/contrib/llvm/lib/Analysis/SparsePropagation.cpp
index edd82f5..f5a927b 100644
--- a/contrib/llvm/lib/Analysis/SparsePropagation.cpp
+++ b/contrib/llvm/lib/Analysis/SparsePropagation.cpp
@@ -328,17 +328,17 @@ void SparseSolver::Solve(Function &F) {
void SparseSolver::Print(Function &F, raw_ostream &OS) const {
OS << "\nFUNCTION: " << F.getName() << "\n";
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (!BBExecutable.count(BB))
+ for (auto &BB : F) {
+ if (!BBExecutable.count(&BB))
OS << "INFEASIBLE: ";
OS << "\t";
- if (BB->hasName())
- OS << BB->getName() << ":\n";
+ if (BB.hasName())
+ OS << BB.getName() << ":\n";
else
OS << "; anon bb\n";
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- LatticeFunc->PrintValue(getLatticeState(I), OS);
- OS << *I << "\n";
+ for (auto &I : BB) {
+ LatticeFunc->PrintValue(getLatticeState(&I), OS);
+ OS << I << "\n";
}
OS << "\n";
diff --git a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 635c50c..e00f4ae 100644
--- a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -61,10 +61,19 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
}
#endif // !NDEBUG
+ if (T.getArch() == Triple::r600 ||
+ T.getArch() == Triple::amdgcn) {
+ TLI.setUnavailable(LibFunc::ldexp);
+ TLI.setUnavailable(LibFunc::ldexpf);
+ TLI.setUnavailable(LibFunc::ldexpl);
+ }
+
// There are no library implementations of mempcy and memset for AMD gpus and
// these can be difficult to lower in the backend.
if (T.getArch() == Triple::r600 ||
- T.getArch() == Triple::amdgcn) {
+ T.getArch() == Triple::amdgcn ||
+ T.getArch() == Triple::wasm32 ||
+ T.getArch() == Triple::wasm64) {
TLI.setUnavailable(LibFunc::memcpy);
TLI.setUnavailable(LibFunc::memset);
TLI.setUnavailable(LibFunc::memset_pattern16);
@@ -72,13 +81,14 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
}
// memset_pattern16 is only available on iOS 3.0 and Mac OS X 10.5 and later.
+ // All versions of watchOS support it.
if (T.isMacOSX()) {
if (T.isMacOSXVersionLT(10, 5))
TLI.setUnavailable(LibFunc::memset_pattern16);
} else if (T.isiOS()) {
if (T.isOSVersionLT(3, 0))
TLI.setUnavailable(LibFunc::memset_pattern16);
- } else {
+ } else if (!T.isWatchOS()) {
TLI.setUnavailable(LibFunc::memset_pattern16);
}
@@ -286,8 +296,13 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
}
break;
case Triple::IOS:
+ case Triple::TvOS:
+ case Triple::WatchOS:
TLI.setUnavailable(LibFunc::exp10l);
- if (T.isOSVersionLT(7, 0)) {
+ if (!T.isWatchOS() && (T.isOSVersionLT(7, 0) ||
+ (T.isOSVersionLT(9, 0) &&
+ (T.getArch() == Triple::x86 ||
+ T.getArch() == Triple::x86_64)))) {
TLI.setUnavailable(LibFunc::exp10);
TLI.setUnavailable(LibFunc::exp10f);
} else {
@@ -311,12 +326,14 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
// ffsl is available on at least Darwin, Mac OS X, iOS, FreeBSD, and
// Linux (GLIBC):
// http://developer.apple.com/library/mac/#documentation/Darwin/Reference/ManPages/man3/ffsl.3.html
- // http://svn.freebsd.org/base/user/eri/pf45/head/lib/libc/string/ffsl.c
+ // http://svn.freebsd.org/base/head/lib/libc/string/ffsl.c
// http://www.gnu.org/software/gnulib/manual/html_node/ffsl.html
switch (T.getOS()) {
case Triple::Darwin:
case Triple::MacOSX:
case Triple::IOS:
+ case Triple::TvOS:
+ case Triple::WatchOS:
case Triple::FreeBSD:
case Triple::Linux:
break;
@@ -325,9 +342,14 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
}
// ffsll is available on at least FreeBSD and Linux (GLIBC):
- // http://svn.freebsd.org/base/user/eri/pf45/head/lib/libc/string/ffsll.c
+ // http://svn.freebsd.org/base/head/lib/libc/string/ffsll.c
// http://www.gnu.org/software/gnulib/manual/html_node/ffsll.html
switch (T.getOS()) {
+ case Triple::Darwin:
+ case Triple::MacOSX:
+ case Triple::IOS:
+ case Triple::TvOS:
+ case Triple::WatchOS:
case Triple::FreeBSD:
case Triple::Linux:
break;
@@ -335,6 +357,16 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc::ffsll);
}
+ // The following functions are available on at least FreeBSD:
+ // http://svn.freebsd.org/base/head/lib/libc/string/fls.c
+ // http://svn.freebsd.org/base/head/lib/libc/string/flsl.c
+ // http://svn.freebsd.org/base/head/lib/libc/string/flsll.c
+ if (!T.isOSFreeBSD()) {
+ TLI.setUnavailable(LibFunc::fls);
+ TLI.setUnavailable(LibFunc::flsl);
+ TLI.setUnavailable(LibFunc::flsll);
+ }
+
// The following functions are available on at least Linux:
if (!T.isOSLinux()) {
TLI.setUnavailable(LibFunc::dunder_strdup);
diff --git a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
index 7d1c3fb..9c1d3fd 100644
--- a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -46,30 +46,37 @@ TargetTransformInfo &TargetTransformInfo::operator=(TargetTransformInfo &&RHS) {
return *this;
}
-unsigned TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty,
- Type *OpTy) const {
- return TTIImpl->getOperationCost(Opcode, Ty, OpTy);
+int TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty,
+ Type *OpTy) const {
+ int Cost = TTIImpl->getOperationCost(Opcode, Ty, OpTy);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned TargetTransformInfo::getCallCost(FunctionType *FTy,
- int NumArgs) const {
- return TTIImpl->getCallCost(FTy, NumArgs);
+int TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs) const {
+ int Cost = TTIImpl->getCallCost(FTy, NumArgs);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned
-TargetTransformInfo::getCallCost(const Function *F,
- ArrayRef<const Value *> Arguments) const {
- return TTIImpl->getCallCost(F, Arguments);
+int TargetTransformInfo::getCallCost(const Function *F,
+ ArrayRef<const Value *> Arguments) const {
+ int Cost = TTIImpl->getCallCost(F, Arguments);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned
-TargetTransformInfo::getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<const Value *> Arguments) const {
- return TTIImpl->getIntrinsicCost(IID, RetTy, Arguments);
+int TargetTransformInfo::getIntrinsicCost(
+ Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments) const {
+ int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned TargetTransformInfo::getUserCost(const User *U) const {
- return TTIImpl->getUserCost(U);
+int TargetTransformInfo::getUserCost(const User *U) const {
+ int Cost = TTIImpl->getUserCost(U);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
bool TargetTransformInfo::hasBranchDivergence() const {
@@ -106,14 +113,20 @@ bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
Scale, AddrSpace);
}
-bool TargetTransformInfo::isLegalMaskedStore(Type *DataType,
- int Consecutive) const {
- return TTIImpl->isLegalMaskedStore(DataType, Consecutive);
+bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const {
+ return TTIImpl->isLegalMaskedStore(DataType);
+}
+
+bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType) const {
+ return TTIImpl->isLegalMaskedLoad(DataType);
}
-bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType,
- int Consecutive) const {
- return TTIImpl->isLegalMaskedLoad(DataType, Consecutive);
+bool TargetTransformInfo::isLegalMaskedGather(Type *DataType) const {
+ return TTIImpl->isLegalMaskedGather(DataType);
+}
+
+bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType) const {
+ return TTIImpl->isLegalMaskedGather(DataType);
}
int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
@@ -121,8 +134,10 @@ int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
bool HasBaseReg,
int64_t Scale,
unsigned AddrSpace) const {
- return TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
- Scale, AddrSpace);
+ int Cost = TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
+ Scale, AddrSpace);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const {
@@ -153,6 +168,10 @@ bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) c
return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
}
+bool TargetTransformInfo::enableInterleavedAccessVectorization() const {
+ return TTIImpl->enableInterleavedAccessVectorization();
+}
+
TargetTransformInfo::PopcntSupportKind
TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
return TTIImpl->getPopcntSupport(IntTyWidthInBit);
@@ -162,22 +181,30 @@ bool TargetTransformInfo::haveFastSqrt(Type *Ty) const {
return TTIImpl->haveFastSqrt(Ty);
}
-unsigned TargetTransformInfo::getFPOpCost(Type *Ty) const {
- return TTIImpl->getFPOpCost(Ty);
+int TargetTransformInfo::getFPOpCost(Type *Ty) const {
+ int Cost = TTIImpl->getFPOpCost(Ty);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
- return TTIImpl->getIntImmCost(Imm, Ty);
+int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
+ int Cost = TTIImpl->getIntImmCost(Imm, Ty);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx,
- const APInt &Imm, Type *Ty) const {
- return TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty);
+int TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx,
+ const APInt &Imm, Type *Ty) const {
+ int Cost = TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
- const APInt &Imm, Type *Ty) const {
- return TTIImpl->getIntImmCost(IID, Idx, Imm, Ty);
+int TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty) const {
+ int Cost = TTIImpl->getIntImmCost(IID, Idx, Imm, Ty);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
@@ -192,81 +219,122 @@ unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
return TTIImpl->getMaxInterleaveFactor(VF);
}
-unsigned TargetTransformInfo::getArithmeticInstrCost(
+int TargetTransformInfo::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
OperandValueProperties Opd2PropInfo) const {
- return TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
- Opd1PropInfo, Opd2PropInfo);
+ int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+ Opd1PropInfo, Opd2PropInfo);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty,
- int Index, Type *SubTp) const {
- return TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp);
+int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty, int Index,
+ Type *SubTp) const {
+ int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst,
- Type *Src) const {
- return TTIImpl->getCastInstrCost(Opcode, Dst, Src);
+int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const {
+ int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned TargetTransformInfo::getCFInstrCost(unsigned Opcode) const {
- return TTIImpl->getCFInstrCost(Opcode);
+int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const {
+ int Cost = TTIImpl->getCFInstrCost(Opcode);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) const {
- return TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy);
+int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const {
+ int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index) const {
- return TTIImpl->getVectorInstrCost(Opcode, Val, Index);
+int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const {
+ int Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
- unsigned Alignment,
- unsigned AddressSpace) const {
- return TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) const {
+ int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned
-TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
- unsigned Alignment,
- unsigned AddressSpace) const {
- return TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+int TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) const {
+ int Cost =
+ TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
+}
+
+int TargetTransformInfo::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
+ Value *Ptr, bool VariableMask,
+ unsigned Alignment) const {
+ int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
+ Alignment);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned TargetTransformInfo::getInterleavedMemoryOpCost(
+int TargetTransformInfo::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
unsigned Alignment, unsigned AddressSpace) const {
- return TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
+ int Cost = TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned
-TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Type *> Tys) const {
- return TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys);
+int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Type *> Tys) const {
+ int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy,
- ArrayRef<Type *> Tys) const {
- return TTIImpl->getCallInstrCost(F, RetTy, Tys);
+int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Value *> Args) const {
+ int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
+}
+
+int TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy,
+ ArrayRef<Type *> Tys) const {
+ int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {
return TTIImpl->getNumberOfParts(Tp);
}
-unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp,
- bool IsComplex) const {
- return TTIImpl->getAddressComputationCost(Tp, IsComplex);
+int TargetTransformInfo::getAddressComputationCost(Type *Tp,
+ bool IsComplex) const {
+ int Cost = TTIImpl->getAddressComputationCost(Tp, IsComplex);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty,
- bool IsPairwiseForm) const {
- return TTIImpl->getReductionCost(Opcode, Ty, IsPairwiseForm);
+int TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty,
+ bool IsPairwiseForm) const {
+ int Cost = TTIImpl->getReductionCost(Opcode, Ty, IsPairwiseForm);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
unsigned
@@ -284,9 +352,9 @@ Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic(
return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
}
-bool TargetTransformInfo::hasCompatibleFunctionAttributes(
- const Function *Caller, const Function *Callee) const {
- return TTIImpl->hasCompatibleFunctionAttributes(Caller, Callee);
+bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
+ const Function *Callee) const {
+ return TTIImpl->areInlineCompatible(Caller, Callee);
}
TargetTransformInfo::Concept::~Concept() {}
@@ -294,16 +362,16 @@ TargetTransformInfo::Concept::~Concept() {}
TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
TargetIRAnalysis::TargetIRAnalysis(
- std::function<Result(Function &)> TTICallback)
+ std::function<Result(const Function &)> TTICallback)
: TTICallback(TTICallback) {}
-TargetIRAnalysis::Result TargetIRAnalysis::run(Function &F) {
+TargetIRAnalysis::Result TargetIRAnalysis::run(const Function &F) {
return TTICallback(F);
}
char TargetIRAnalysis::PassID;
-TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(Function &F) {
+TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) {
return Result(F.getParent()->getDataLayout());
}
@@ -327,7 +395,7 @@ TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass(
*PassRegistry::getPassRegistry());
}
-TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(Function &F) {
+TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(const Function &F) {
TTI = TIRA.run(F);
return *TTI;
}
diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 4e9c6f6..805f3ef 100644
--- a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -121,15 +121,13 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/ADT/SetVector.h"
using namespace llvm;
// A handy option for disabling TBAA functionality. The same effect can also be
@@ -138,199 +136,138 @@ using namespace llvm;
static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));
namespace {
- /// TBAANode - This is a simple wrapper around an MDNode which provides a
- /// higher-level interface by hiding the details of how alias analysis
- /// information is encoded in its operands.
- class TBAANode {
- const MDNode *Node;
-
- public:
- TBAANode() : Node(nullptr) {}
- explicit TBAANode(const MDNode *N) : Node(N) {}
-
- /// getNode - Get the MDNode for this TBAANode.
- const MDNode *getNode() const { return Node; }
-
- /// getParent - Get this TBAANode's Alias tree parent.
- TBAANode getParent() const {
- if (Node->getNumOperands() < 2)
- return TBAANode();
- MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
- if (!P)
- return TBAANode();
- // Ok, this node has a valid parent. Return it.
- return TBAANode(P);
- }
-
- /// TypeIsImmutable - Test if this TBAANode represents a type for objects
- /// which are not modified (by any means) in the context where this
- /// AliasAnalysis is relevant.
- bool TypeIsImmutable() const {
- if (Node->getNumOperands() < 3)
- return false;
- ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(2));
- if (!CI)
- return false;
- return CI->getValue()[0];
- }
- };
-
- /// This is a simple wrapper around an MDNode which provides a
- /// higher-level interface by hiding the details of how alias analysis
- /// information is encoded in its operands.
- class TBAAStructTagNode {
- /// This node should be created with createTBAAStructTagNode.
- const MDNode *Node;
+/// TBAANode - This is a simple wrapper around an MDNode which provides a
+/// higher-level interface by hiding the details of how alias analysis
+/// information is encoded in its operands.
+class TBAANode {
+ const MDNode *Node;
+
+public:
+ TBAANode() : Node(nullptr) {}
+ explicit TBAANode(const MDNode *N) : Node(N) {}
+
+ /// getNode - Get the MDNode for this TBAANode.
+ const MDNode *getNode() const { return Node; }
+
+ /// getParent - Get this TBAANode's Alias tree parent.
+ TBAANode getParent() const {
+ if (Node->getNumOperands() < 2)
+ return TBAANode();
+ MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
+ if (!P)
+ return TBAANode();
+ // Ok, this node has a valid parent. Return it.
+ return TBAANode(P);
+ }
- public:
- explicit TBAAStructTagNode(const MDNode *N) : Node(N) {}
+ /// TypeIsImmutable - Test if this TBAANode represents a type for objects
+ /// which are not modified (by any means) in the context where this
+ /// AliasAnalysis is relevant.
+ bool TypeIsImmutable() const {
+ if (Node->getNumOperands() < 3)
+ return false;
+ ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(2));
+ if (!CI)
+ return false;
+ return CI->getValue()[0];
+ }
+};
- /// Get the MDNode for this TBAAStructTagNode.
- const MDNode *getNode() const { return Node; }
+/// This is a simple wrapper around an MDNode which provides a
+/// higher-level interface by hiding the details of how alias analysis
+/// information is encoded in its operands.
+class TBAAStructTagNode {
+ /// This node should be created with createTBAAStructTagNode.
+ const MDNode *Node;
- const MDNode *getBaseType() const {
- return dyn_cast_or_null<MDNode>(Node->getOperand(0));
- }
- const MDNode *getAccessType() const {
- return dyn_cast_or_null<MDNode>(Node->getOperand(1));
- }
- uint64_t getOffset() const {
- return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue();
- }
- /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for
- /// objects which are not modified (by any means) in the context where this
- /// AliasAnalysis is relevant.
- bool TypeIsImmutable() const {
- if (Node->getNumOperands() < 4)
- return false;
- ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(3));
- if (!CI)
- return false;
- return CI->getValue()[0];
- }
- };
-
- /// This is a simple wrapper around an MDNode which provides a
- /// higher-level interface by hiding the details of how alias analysis
- /// information is encoded in its operands.
- class TBAAStructTypeNode {
- /// This node should be created with createTBAAStructTypeNode.
- const MDNode *Node;
-
- public:
- TBAAStructTypeNode() : Node(nullptr) {}
- explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {}
-
- /// Get the MDNode for this TBAAStructTypeNode.
- const MDNode *getNode() const { return Node; }
-
- /// Get this TBAAStructTypeNode's field in the type DAG with
- /// given offset. Update the offset to be relative to the field type.
- TBAAStructTypeNode getParent(uint64_t &Offset) const {
- // Parent can be omitted for the root node.
- if (Node->getNumOperands() < 2)
- return TBAAStructTypeNode();
+public:
+ explicit TBAAStructTagNode(const MDNode *N) : Node(N) {}
- // Fast path for a scalar type node and a struct type node with a single
- // field.
- if (Node->getNumOperands() <= 3) {
- uint64_t Cur = Node->getNumOperands() == 2
- ? 0
- : mdconst::extract<ConstantInt>(Node->getOperand(2))
- ->getZExtValue();
- Offset -= Cur;
- MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
- if (!P)
- return TBAAStructTypeNode();
- return TBAAStructTypeNode(P);
- }
+ /// Get the MDNode for this TBAAStructTagNode.
+ const MDNode *getNode() const { return Node; }
- // Assume the offsets are in order. We return the previous field if
- // the current offset is bigger than the given offset.
- unsigned TheIdx = 0;
- for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) {
- uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(Idx + 1))
- ->getZExtValue();
- if (Cur > Offset) {
- assert(Idx >= 3 &&
- "TBAAStructTypeNode::getParent should have an offset match!");
- TheIdx = Idx - 2;
- break;
- }
- }
- // Move along the last field.
- if (TheIdx == 0)
- TheIdx = Node->getNumOperands() - 2;
- uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(TheIdx + 1))
- ->getZExtValue();
+ const MDNode *getBaseType() const {
+ return dyn_cast_or_null<MDNode>(Node->getOperand(0));
+ }
+ const MDNode *getAccessType() const {
+ return dyn_cast_or_null<MDNode>(Node->getOperand(1));
+ }
+ uint64_t getOffset() const {
+ return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue();
+ }
+ /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for
+ /// objects which are not modified (by any means) in the context where this
+ /// AliasAnalysis is relevant.
+ bool TypeIsImmutable() const {
+ if (Node->getNumOperands() < 4)
+ return false;
+ ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(3));
+ if (!CI)
+ return false;
+ return CI->getValue()[0];
+ }
+};
+
+/// This is a simple wrapper around an MDNode which provides a
+/// higher-level interface by hiding the details of how alias analysis
+/// information is encoded in its operands.
+class TBAAStructTypeNode {
+ /// This node should be created with createTBAAStructTypeNode.
+ const MDNode *Node;
+
+public:
+ TBAAStructTypeNode() : Node(nullptr) {}
+ explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {}
+
+ /// Get the MDNode for this TBAAStructTypeNode.
+ const MDNode *getNode() const { return Node; }
+
+ /// Get this TBAAStructTypeNode's field in the type DAG with
+ /// given offset. Update the offset to be relative to the field type.
+ TBAAStructTypeNode getParent(uint64_t &Offset) const {
+ // Parent can be omitted for the root node.
+ if (Node->getNumOperands() < 2)
+ return TBAAStructTypeNode();
+
+ // Fast path for a scalar type node and a struct type node with a single
+ // field.
+ if (Node->getNumOperands() <= 3) {
+ uint64_t Cur = Node->getNumOperands() == 2
+ ? 0
+ : mdconst::extract<ConstantInt>(Node->getOperand(2))
+ ->getZExtValue();
Offset -= Cur;
- MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx));
+ MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
if (!P)
return TBAAStructTypeNode();
return TBAAStructTypeNode(P);
}
- };
-}
-
-namespace {
- /// TypeBasedAliasAnalysis - This is a simple alias analysis
- /// implementation that uses TypeBased to answer queries.
- class TypeBasedAliasAnalysis : public ImmutablePass,
- public AliasAnalysis {
- public:
- static char ID; // Class identification, replacement for typeinfo
- TypeBasedAliasAnalysis() : ImmutablePass(ID) {
- initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry());
- }
- bool doInitialization(Module &M) override;
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- void *getAdjustedAnalysisPointer(const void *PI) override {
- if (PI == &AliasAnalysis::ID)
- return (AliasAnalysis*)this;
- return this;
+ // Assume the offsets are in order. We return the previous field if
+ // the current offset is bigger than the given offset.
+ unsigned TheIdx = 0;
+ for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) {
+ uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(Idx + 1))
+ ->getZExtValue();
+ if (Cur > Offset) {
+ assert(Idx >= 3 &&
+ "TBAAStructTypeNode::getParent should have an offset match!");
+ TheIdx = Idx - 2;
+ break;
+ }
}
-
- bool Aliases(const MDNode *A, const MDNode *B) const;
- bool PathAliases(const MDNode *A, const MDNode *B) const;
-
- private:
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- AliasResult alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) override;
- bool pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal) override;
- ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override;
- ModRefBehavior getModRefBehavior(const Function *F) override;
- ModRefResult getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) override;
- ModRefResult getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) override;
- };
-} // End of anonymous namespace
-
-// Register this pass...
-char TypeBasedAliasAnalysis::ID = 0;
-INITIALIZE_AG_PASS(TypeBasedAliasAnalysis, AliasAnalysis, "tbaa",
- "Type-Based Alias Analysis", false, true, false)
-
-ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() {
- return new TypeBasedAliasAnalysis();
-}
-
-bool TypeBasedAliasAnalysis::doInitialization(Module &M) {
- InitializeAliasAnalysis(this, &M.getDataLayout());
- return true;
-}
-
-void
-TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AliasAnalysis::getAnalysisUsage(AU);
+ // Move along the last field.
+ if (TheIdx == 0)
+ TheIdx = Node->getNumOperands() - 2;
+ uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(TheIdx + 1))
+ ->getZExtValue();
+ Offset -= Cur;
+ MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx));
+ if (!P)
+ return TBAAStructTypeNode();
+ return TBAAStructTypeNode(P);
+ }
+};
}
/// Check the first operand of the tbaa tag node, if it is a MDNode, we treat
@@ -342,145 +279,36 @@ static bool isStructPathTBAA(const MDNode *MD) {
return isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3;
}
-/// Aliases - Test whether the type represented by A may alias the
-/// type represented by B.
-bool
-TypeBasedAliasAnalysis::Aliases(const MDNode *A,
- const MDNode *B) const {
- // Make sure that both MDNodes are struct-path aware.
- if (isStructPathTBAA(A) && isStructPathTBAA(B))
- return PathAliases(A, B);
-
- // Keep track of the root node for A and B.
- TBAANode RootA, RootB;
-
- // Climb the tree from A to see if we reach B.
- for (TBAANode T(A); ; ) {
- if (T.getNode() == B)
- // B is an ancestor of A.
- return true;
-
- RootA = T;
- T = T.getParent();
- if (!T.getNode())
- break;
- }
-
- // Climb the tree from B to see if we reach A.
- for (TBAANode T(B); ; ) {
- if (T.getNode() == A)
- // A is an ancestor of B.
- return true;
-
- RootB = T;
- T = T.getParent();
- if (!T.getNode())
- break;
- }
-
- // Neither node is an ancestor of the other.
-
- // If they have different roots, they're part of different potentially
- // unrelated type systems, so we must be conservative.
- if (RootA.getNode() != RootB.getNode())
- return true;
-
- // If they have the same root, then we've proved there's no alias.
- return false;
-}
-
-/// Test whether the struct-path tag represented by A may alias the
-/// struct-path tag represented by B.
-bool
-TypeBasedAliasAnalysis::PathAliases(const MDNode *A,
- const MDNode *B) const {
- // Verify that both input nodes are struct-path aware.
- assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware.");
- assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware.");
-
- // Keep track of the root node for A and B.
- TBAAStructTypeNode RootA, RootB;
- TBAAStructTagNode TagA(A), TagB(B);
-
- // TODO: We need to check if AccessType of TagA encloses AccessType of
- // TagB to support aggregate AccessType. If yes, return true.
-
- // Start from the base type of A, follow the edge with the correct offset in
- // the type DAG and adjust the offset until we reach the base type of B or
- // until we reach the Root node.
- // Compare the adjusted offset once we have the same base.
-
- // Climb the type DAG from base type of A to see if we reach base type of B.
- const MDNode *BaseA = TagA.getBaseType();
- const MDNode *BaseB = TagB.getBaseType();
- uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset();
- for (TBAAStructTypeNode T(BaseA); ; ) {
- if (T.getNode() == BaseB)
- // Base type of A encloses base type of B, check if the offsets match.
- return OffsetA == OffsetB;
-
- RootA = T;
- // Follow the edge with the correct offset, OffsetA will be adjusted to
- // be relative to the field type.
- T = T.getParent(OffsetA);
- if (!T.getNode())
- break;
- }
-
- // Reset OffsetA and climb the type DAG from base type of B to see if we reach
- // base type of A.
- OffsetA = TagA.getOffset();
- for (TBAAStructTypeNode T(BaseB); ; ) {
- if (T.getNode() == BaseA)
- // Base type of B encloses base type of A, check if the offsets match.
- return OffsetA == OffsetB;
-
- RootB = T;
- // Follow the edge with the correct offset, OffsetB will be adjusted to
- // be relative to the field type.
- T = T.getParent(OffsetB);
- if (!T.getNode())
- break;
- }
-
- // Neither node is an ancestor of the other.
-
- // If they have different roots, they're part of different potentially
- // unrelated type systems, so we must be conservative.
- if (RootA.getNode() != RootB.getNode())
- return true;
-
- // If they have the same root, then we've proved there's no alias.
- return false;
-}
-
-AliasResult TypeBasedAliasAnalysis::alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
+AliasResult TypeBasedAAResult::alias(const MemoryLocation &LocA,
+ const MemoryLocation &LocB) {
if (!EnableTBAA)
- return AliasAnalysis::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB);
// Get the attached MDNodes. If either value lacks a tbaa MDNode, we must
// be conservative.
const MDNode *AM = LocA.AATags.TBAA;
- if (!AM) return AliasAnalysis::alias(LocA, LocB);
+ if (!AM)
+ return AAResultBase::alias(LocA, LocB);
const MDNode *BM = LocB.AATags.TBAA;
- if (!BM) return AliasAnalysis::alias(LocA, LocB);
+ if (!BM)
+ return AAResultBase::alias(LocA, LocB);
// If they may alias, chain to the next AliasAnalysis.
if (Aliases(AM, BM))
- return AliasAnalysis::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB);
// Otherwise return a definitive result.
return NoAlias;
}
-bool TypeBasedAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal) {
+bool TypeBasedAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
+ bool OrLocal) {
if (!EnableTBAA)
- return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
const MDNode *M = Loc.AATags.TBAA;
- if (!M) return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ if (!M)
+ return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
// If this is an "immutable" type, we can assume the pointer is pointing
// to constant memory.
@@ -488,80 +316,82 @@ bool TypeBasedAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,
(isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable()))
return true;
- return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
}
-AliasAnalysis::ModRefBehavior
-TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+FunctionModRefBehavior
+TypeBasedAAResult::getModRefBehavior(ImmutableCallSite CS) {
if (!EnableTBAA)
- return AliasAnalysis::getModRefBehavior(CS);
+ return AAResultBase::getModRefBehavior(CS);
- ModRefBehavior Min = UnknownModRefBehavior;
+ FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
// If this is an "immutable" type, we can assume the call doesn't write
// to memory.
if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) ||
(isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable()))
- Min = OnlyReadsMemory;
+ Min = FMRB_OnlyReadsMemory;
- return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
+ return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min);
}
-AliasAnalysis::ModRefBehavior
-TypeBasedAliasAnalysis::getModRefBehavior(const Function *F) {
+FunctionModRefBehavior TypeBasedAAResult::getModRefBehavior(const Function *F) {
// Functions don't have metadata. Just chain to the next implementation.
- return AliasAnalysis::getModRefBehavior(F);
+ return AAResultBase::getModRefBehavior(F);
}
-AliasAnalysis::ModRefResult
-TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) {
+ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS,
+ const MemoryLocation &Loc) {
if (!EnableTBAA)
- return AliasAnalysis::getModRefInfo(CS, Loc);
+ return AAResultBase::getModRefInfo(CS, Loc);
if (const MDNode *L = Loc.AATags.TBAA)
if (const MDNode *M =
CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
if (!Aliases(L, M))
- return NoModRef;
+ return MRI_NoModRef;
- return AliasAnalysis::getModRefInfo(CS, Loc);
+ return AAResultBase::getModRefInfo(CS, Loc);
}
-AliasAnalysis::ModRefResult
-TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) {
+ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
if (!EnableTBAA)
- return AliasAnalysis::getModRefInfo(CS1, CS2);
+ return AAResultBase::getModRefInfo(CS1, CS2);
if (const MDNode *M1 =
CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
if (const MDNode *M2 =
CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
if (!Aliases(M1, M2))
- return NoModRef;
+ return MRI_NoModRef;
- return AliasAnalysis::getModRefInfo(CS1, CS2);
+ return AAResultBase::getModRefInfo(CS1, CS2);
}
bool MDNode::isTBAAVtableAccess() const {
if (!isStructPathTBAA(this)) {
- if (getNumOperands() < 1) return false;
+ if (getNumOperands() < 1)
+ return false;
if (MDString *Tag1 = dyn_cast<MDString>(getOperand(0))) {
- if (Tag1->getString() == "vtable pointer") return true;
+ if (Tag1->getString() == "vtable pointer")
+ return true;
}
return false;
}
// For struct-path aware TBAA, we use the access type of the tag.
- if (getNumOperands() < 2) return false;
+ if (getNumOperands() < 2)
+ return false;
MDNode *Tag = cast_or_null<MDNode>(getOperand(1));
- if (!Tag) return false;
+ if (!Tag)
+ return false;
if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) {
- if (Tag1->getString() == "vtable pointer") return true;
+ if (Tag1->getString() == "vtable pointer")
+ return true;
}
- return false;
+ return false;
}
MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
@@ -575,9 +405,11 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
bool StructPath = isStructPathTBAA(A) && isStructPathTBAA(B);
if (StructPath) {
A = cast_or_null<MDNode>(A->getOperand(1));
- if (!A) return nullptr;
+ if (!A)
+ return nullptr;
B = cast_or_null<MDNode>(B->getOperand(1));
- if (!B) return nullptr;
+ if (!B)
+ return nullptr;
}
SmallSetVector<MDNode *, 4> PathA;
@@ -604,7 +436,7 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
int IB = PathB.size() - 1;
MDNode *Ret = nullptr;
- while (IA >= 0 && IB >=0) {
+ while (IA >= 0 && IB >= 0) {
if (PathA[IA] == PathB[IB])
Ret = PathA[IA];
else
@@ -644,3 +476,147 @@ void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const {
N.NoAlias = getMetadata(LLVMContext::MD_noalias);
}
+/// Aliases - Test whether the type represented by A may alias the
+/// type represented by B.
+bool TypeBasedAAResult::Aliases(const MDNode *A, const MDNode *B) const {
+ // Make sure that both MDNodes are struct-path aware.
+ if (isStructPathTBAA(A) && isStructPathTBAA(B))
+ return PathAliases(A, B);
+
+ // Keep track of the root node for A and B.
+ TBAANode RootA, RootB;
+
+ // Climb the tree from A to see if we reach B.
+ for (TBAANode T(A);;) {
+ if (T.getNode() == B)
+ // B is an ancestor of A.
+ return true;
+
+ RootA = T;
+ T = T.getParent();
+ if (!T.getNode())
+ break;
+ }
+
+ // Climb the tree from B to see if we reach A.
+ for (TBAANode T(B);;) {
+ if (T.getNode() == A)
+ // A is an ancestor of B.
+ return true;
+
+ RootB = T;
+ T = T.getParent();
+ if (!T.getNode())
+ break;
+ }
+
+ // Neither node is an ancestor of the other.
+
+ // If they have different roots, they're part of different potentially
+ // unrelated type systems, so we must be conservative.
+ if (RootA.getNode() != RootB.getNode())
+ return true;
+
+ // If they have the same root, then we've proved there's no alias.
+ return false;
+}
+
+/// Test whether the struct-path tag represented by A may alias the
+/// struct-path tag represented by B.
+bool TypeBasedAAResult::PathAliases(const MDNode *A, const MDNode *B) const {
+ // Verify that both input nodes are struct-path aware.
+ assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware.");
+ assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware.");
+
+ // Keep track of the root node for A and B.
+ TBAAStructTypeNode RootA, RootB;
+ TBAAStructTagNode TagA(A), TagB(B);
+
+ // TODO: We need to check if AccessType of TagA encloses AccessType of
+ // TagB to support aggregate AccessType. If yes, return true.
+
+ // Start from the base type of A, follow the edge with the correct offset in
+ // the type DAG and adjust the offset until we reach the base type of B or
+ // until we reach the Root node.
+ // Compare the adjusted offset once we have the same base.
+
+ // Climb the type DAG from base type of A to see if we reach base type of B.
+ const MDNode *BaseA = TagA.getBaseType();
+ const MDNode *BaseB = TagB.getBaseType();
+ uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset();
+ for (TBAAStructTypeNode T(BaseA);;) {
+ if (T.getNode() == BaseB)
+ // Base type of A encloses base type of B, check if the offsets match.
+ return OffsetA == OffsetB;
+
+ RootA = T;
+ // Follow the edge with the correct offset, OffsetA will be adjusted to
+ // be relative to the field type.
+ T = T.getParent(OffsetA);
+ if (!T.getNode())
+ break;
+ }
+
+ // Reset OffsetA and climb the type DAG from base type of B to see if we reach
+ // base type of A.
+ OffsetA = TagA.getOffset();
+ for (TBAAStructTypeNode T(BaseB);;) {
+ if (T.getNode() == BaseA)
+ // Base type of B encloses base type of A, check if the offsets match.
+ return OffsetA == OffsetB;
+
+ RootB = T;
+ // Follow the edge with the correct offset, OffsetB will be adjusted to
+ // be relative to the field type.
+ T = T.getParent(OffsetB);
+ if (!T.getNode())
+ break;
+ }
+
+ // Neither node is an ancestor of the other.
+
+ // If they have different roots, they're part of different potentially
+ // unrelated type systems, so we must be conservative.
+ if (RootA.getNode() != RootB.getNode())
+ return true;
+
+ // If they have the same root, then we've proved there's no alias.
+ return false;
+}
+
+TypeBasedAAResult TypeBasedAA::run(Function &F, AnalysisManager<Function> *AM) {
+ return TypeBasedAAResult(AM->getResult<TargetLibraryAnalysis>(F));
+}
+
+char TypeBasedAA::PassID;
+
+char TypeBasedAAWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(TypeBasedAAWrapperPass, "tbaa",
+ "Type-Based Alias Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(TypeBasedAAWrapperPass, "tbaa", "Type-Based Alias Analysis",
+ false, true)
+
+ImmutablePass *llvm::createTypeBasedAAWrapperPass() {
+ return new TypeBasedAAWrapperPass();
+}
+
+TypeBasedAAWrapperPass::TypeBasedAAWrapperPass() : ImmutablePass(ID) {
+ initializeTypeBasedAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+bool TypeBasedAAWrapperPass::doInitialization(Module &M) {
+ Result.reset(new TypeBasedAAResult(
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI()));
+ return false;
+}
+
+bool TypeBasedAAWrapperPass::doFinalization(Module &M) {
+ Result.reset();
+ return false;
+}
+
+void TypeBasedAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+}
diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp
index fa0d779..314ec9c 100644
--- a/contrib/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -43,7 +44,7 @@ const unsigned MaxDepth = 6;
/// Enable an experimental feature to leverage information about dominating
/// conditions to compute known bits. The individual options below control how
-/// hard we search. The defaults are choosen to be fairly aggressive. If you
+/// hard we search. The defaults are chosen to be fairly aggressive. If you
/// run into compile time problems when testing, scale them back and report
/// your findings.
static cl::opt<bool> EnableDomConditions("value-tracking-dom-conditions",
@@ -58,12 +59,12 @@ static cl::opt<unsigned> DomConditionsMaxDepth("dom-conditions-max-depth",
/// conditions?
static cl::opt<unsigned> DomConditionsMaxDomBlocks("dom-conditions-dom-blocks",
cl::Hidden,
- cl::init(20000));
+ cl::init(20));
// Controls the number of uses of the value searched for possible
// dominating comparisons.
static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
- cl::Hidden, cl::init(2000));
+ cl::Hidden, cl::init(20));
// If true, don't consider only compares whose only use is a branch.
static cl::opt<bool> DomConditionsSingleCmpUse("dom-conditions-single-cmp-use",
@@ -185,6 +186,25 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
return ::isKnownNonZero(V, DL, Depth, Query(AC, safeCxtI(V, CxtI), DT));
}
+bool llvm::isKnownNonNegative(Value *V, const DataLayout &DL, unsigned Depth,
+ AssumptionCache *AC, const Instruction *CxtI,
+ const DominatorTree *DT) {
+ bool NonNegative, Negative;
+ ComputeSignBit(V, NonNegative, Negative, DL, Depth, AC, CxtI, DT);
+ return NonNegative;
+}
+
+static bool isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL,
+ const Query &Q);
+
+bool llvm::isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL,
+ AssumptionCache *AC, const Instruction *CxtI,
+ const DominatorTree *DT) {
+ return ::isKnownNonEqual(V1, V2, DL, Query(AC,
+ safeCxtI(V1, safeCxtI(V2, CxtI)),
+ DT));
+}
+
static bool MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL,
unsigned Depth, const Query &Q);
@@ -320,7 +340,7 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW,
}
// If low bits are zero in either operand, output low known-0 bits.
- // Also compute a conserative estimate for high known-0 bits.
+ // Also compute a conservative estimate for high known-0 bits.
// More trickiness is possible, but this is sufficient for the
// interesting case of alignment computation.
KnownOne.clearAllBits();
@@ -347,26 +367,30 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW,
}
void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
- APInt &KnownZero) {
+ APInt &KnownZero,
+ APInt &KnownOne) {
unsigned BitWidth = KnownZero.getBitWidth();
unsigned NumRanges = Ranges.getNumOperands() / 2;
assert(NumRanges >= 1);
- // Use the high end of the ranges to find leading zeros.
- unsigned MinLeadingZeros = BitWidth;
+ KnownZero.setAllBits();
+ KnownOne.setAllBits();
+
for (unsigned i = 0; i < NumRanges; ++i) {
ConstantInt *Lower =
mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0));
ConstantInt *Upper =
mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1));
ConstantRange Range(Lower->getValue(), Upper->getValue());
- if (Range.isWrappedSet())
- MinLeadingZeros = 0; // -1 has no zeros
- unsigned LeadingZeros = (Upper->getValue() - 1).countLeadingZeros();
- MinLeadingZeros = std::min(LeadingZeros, MinLeadingZeros);
- }
- KnownZero = APInt::getHighBitsSet(BitWidth, MinLeadingZeros);
+ // The first CommonPrefixBits of all values in Range are equal.
+ unsigned CommonPrefixBits =
+ (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countLeadingZeros();
+
+ APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits);
+ KnownOne &= Range.getUnsignedMax() & Mask;
+ KnownZero &= ~Range.getUnsignedMax() & Mask;
+ }
}
static bool isEphemeralValueOf(Instruction *I, const Value *E) {
@@ -374,20 +398,20 @@ static bool isEphemeralValueOf(Instruction *I, const Value *E) {
SmallPtrSet<const Value *, 32> Visited;
SmallPtrSet<const Value *, 16> EphValues;
+ // The instruction defining an assumption's condition itself is always
+ // considered ephemeral to that assumption (even if it has other
+ // non-ephemeral users). See r246696's test case for an example.
+ if (std::find(I->op_begin(), I->op_end(), E) != I->op_end())
+ return true;
+
while (!WorkSet.empty()) {
const Value *V = WorkSet.pop_back_val();
if (!Visited.insert(V).second)
continue;
// If all uses of this value are ephemeral, then so is this value.
- bool FoundNEUse = false;
- for (const User *I : V->users())
- if (!EphValues.count(I)) {
- FoundNEUse = true;
- break;
- }
-
- if (!FoundNEUse) {
+ if (std::all_of(V->user_begin(), V->user_end(),
+ [&](const User *U) { return EphValues.count(U); })) {
if (V == E)
return true;
@@ -447,7 +471,7 @@ static bool isValidAssumeForContext(Value *V, const Query &Q) {
for (BasicBlock::const_iterator I =
std::next(BasicBlock::const_iterator(Q.CxtI)),
IE(Inv); I != IE; ++I)
- if (!isSafeToSpeculativelyExecute(I) && !isAssumeLikeIntrinsic(I))
+ if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I))
return false;
return !isEphemeralValueOf(Inv, Q.CxtI);
@@ -464,14 +488,14 @@ static bool isValidAssumeForContext(Value *V, const Query &Q) {
// of the block); the common case is that the assume will come first.
for (BasicBlock::iterator I = std::next(BasicBlock::iterator(Inv)),
IE = Inv->getParent()->end(); I != IE; ++I)
- if (I == Q.CxtI)
+ if (&*I == Q.CxtI)
return true;
// The context must come first...
for (BasicBlock::const_iterator I =
std::next(BasicBlock::const_iterator(Q.CxtI)),
IE(Inv); I != IE; ++I)
- if (!isSafeToSpeculativelyExecute(I) && !isAssumeLikeIntrinsic(I))
+ if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I))
return false;
return !isEphemeralValueOf(Inv, Q.CxtI);
@@ -601,6 +625,11 @@ static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero,
if (!Q.DT || !Q.CxtI)
return;
Instruction *Cxt = const_cast<Instruction *>(Q.CxtI);
+ // The context instruction might be in a statically unreachable block. If
+ // so, asking dominator queries may yield suprising results. (e.g. the block
+ // may not have a dom tree node)
+ if (!Q.DT->isReachableFromEntry(Cxt->getParent()))
+ return;
// Avoid useless work
if (auto VI = dyn_cast<Instruction>(V))
@@ -647,7 +676,9 @@ static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero,
// instruction. Finding a condition where one path dominates the context
// isn't enough because both the true and false cases could merge before
// the context instruction we're actually interested in. Instead, we need
- // to ensure that the taken *edge* dominates the context instruction.
+ // to ensure that the taken *edge* dominates the context instruction. We
+ // know that the edge must be reachable since we started from a reachable
+ // block.
BasicBlock *BB0 = BI->getSuccessor(0);
BasicBlockEdge Edge(BI->getParent(), BB0);
if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent()))
@@ -941,6 +972,90 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
}
}
+// Compute known bits from a shift operator, including those with a
+// non-constant shift amount. KnownZero and KnownOne are the outputs of this
+// function. KnownZero2 and KnownOne2 are pre-allocated temporaries with the
+// same bit width as KnownZero and KnownOne. KZF and KOF are operator-specific
+// functors that, given the known-zero or known-one bits respectively, and a
+// shift amount, compute the implied known-zero or known-one bits of the shift
+// operator's result respectively for that shift amount. The results from calling
+// KZF and KOF are conservatively combined for all permitted shift amounts.
+template <typename KZFunctor, typename KOFunctor>
+static void computeKnownBitsFromShiftOperator(Operator *I,
+ APInt &KnownZero, APInt &KnownOne,
+ APInt &KnownZero2, APInt &KnownOne2,
+ const DataLayout &DL, unsigned Depth, const Query &Q,
+ KZFunctor KZF, KOFunctor KOF) {
+ unsigned BitWidth = KnownZero.getBitWidth();
+
+ if (auto *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ unsigned ShiftAmt = SA->getLimitedValue(BitWidth-1);
+
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
+ KnownZero = KZF(KnownZero, ShiftAmt);
+ KnownOne = KOF(KnownOne, ShiftAmt);
+ return;
+ }
+
+ computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q);
+
+ // Note: We cannot use KnownZero.getLimitedValue() here, because if
+ // BitWidth > 64 and any upper bits are known, we'll end up returning the
+ // limit value (which implies all bits are known).
+ uint64_t ShiftAmtKZ = KnownZero.zextOrTrunc(64).getZExtValue();
+ uint64_t ShiftAmtKO = KnownOne.zextOrTrunc(64).getZExtValue();
+
+ // It would be more-clearly correct to use the two temporaries for this
+ // calculation. Reusing the APInts here to prevent unnecessary allocations.
+ KnownZero.clearAllBits(), KnownOne.clearAllBits();
+
+ // If we know the shifter operand is nonzero, we can sometimes infer more
+ // known bits. However this is expensive to compute, so be lazy about it and
+ // only compute it when absolutely necessary.
+ Optional<bool> ShifterOperandIsNonZero;
+
+ // Early exit if we can't constrain any well-defined shift amount.
+ if (!(ShiftAmtKZ & (BitWidth - 1)) && !(ShiftAmtKO & (BitWidth - 1))) {
+ ShifterOperandIsNonZero =
+ isKnownNonZero(I->getOperand(1), DL, Depth + 1, Q);
+ if (!*ShifterOperandIsNonZero)
+ return;
+ }
+
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q);
+
+ KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+ for (unsigned ShiftAmt = 0; ShiftAmt < BitWidth; ++ShiftAmt) {
+ // Combine the shifted known input bits only for those shift amounts
+ // compatible with its known constraints.
+ if ((ShiftAmt & ~ShiftAmtKZ) != ShiftAmt)
+ continue;
+ if ((ShiftAmt | ShiftAmtKO) != ShiftAmt)
+ continue;
+ // If we know the shifter is nonzero, we may be able to infer more known
+ // bits. This check is sunk down as far as possible to avoid the expensive
+ // call to isKnownNonZero if the cheaper checks above fail.
+ if (ShiftAmt == 0) {
+ if (!ShifterOperandIsNonZero.hasValue())
+ ShifterOperandIsNonZero =
+ isKnownNonZero(I->getOperand(1), DL, Depth + 1, Q);
+ if (*ShifterOperandIsNonZero)
+ continue;
+ }
+
+ KnownZero &= KZF(KnownZero2, ShiftAmt);
+ KnownOne &= KOF(KnownOne2, ShiftAmt);
+ }
+
+ // If there are no compatible shift amounts, then we've proven that the shift
+ // amount must be >= the BitWidth, and the result is undefined. We could
+ // return anything we'd like, but we need to make sure the sets of known bits
+ // stay disjoint (it should be better for some other code to actually
+ // propagate the undef than to pick a value here using known bits).
+ if ((KnownZero & KnownOne) != 0)
+ KnownZero.clearAllBits(), KnownOne.clearAllBits();
+}
+
static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
APInt &KnownOne, const DataLayout &DL,
unsigned Depth, const Query &Q) {
@@ -951,7 +1066,7 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
default: break;
case Instruction::Load:
if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range))
- computeKnownBitsFromRangeMetadata(*MD, KnownZero);
+ computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne);
break;
case Instruction::And: {
// If either the LHS or the RHS are Zero, the result is zero.
@@ -962,6 +1077,22 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
KnownOne &= KnownOne2;
// Output known-0 are known to be clear if zero in either the LHS | RHS.
KnownZero |= KnownZero2;
+
+ // and(x, add (x, -1)) is a common idiom that always clears the low bit;
+ // here we handle the more general case of adding any odd number by
+ // matching the form add(x, add(x, y)) where y is odd.
+ // TODO: This could be generalized to clearing any bit set in y where the
+ // following bit is known to be unset in y.
+ Value *Y = nullptr;
+ if (match(I->getOperand(0), m_Add(m_Specific(I->getOperand(1)),
+ m_Value(Y))) ||
+ match(I->getOperand(1), m_Add(m_Specific(I->getOperand(0)),
+ m_Value(Y)))) {
+ APInt KnownZero3(BitWidth, 0), KnownOne3(BitWidth, 0);
+ computeKnownBits(Y, KnownZero3, KnownOne3, DL, Depth + 1, Q);
+ if (KnownOne3.countTrailingOnes() > 0)
+ KnownZero |= APInt::getLowBitsSet(BitWidth, 1);
+ }
break;
}
case Instruction::Or: {
@@ -1050,7 +1181,8 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
}
case Instruction::BitCast: {
Type *SrcTy = I->getOperand(0)->getType();
- if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy() ||
+ SrcTy->isFloatingPointTy()) &&
// TODO: For now, not handling conversions like:
// (bitcast i64 %x to <2 x i32>)
!I->getType()->isVectorTy()) {
@@ -1077,48 +1209,54 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
break;
}
- case Instruction::Shl:
+ case Instruction::Shl: {
// (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
- if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
- uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
- KnownZero <<= ShiftAmt;
- KnownOne <<= ShiftAmt;
- KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0
- }
+ auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) {
+ return (KnownZero << ShiftAmt) |
+ APInt::getLowBitsSet(BitWidth, ShiftAmt); // Low bits known 0.
+ };
+
+ auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) {
+ return KnownOne << ShiftAmt;
+ };
+
+ computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne,
+ KnownZero2, KnownOne2, DL, Depth, Q,
+ KZF, KOF);
break;
- case Instruction::LShr:
+ }
+ case Instruction::LShr: {
// (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
- if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
- // Compute the new bits that are at the top now.
- uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
-
- // Unsigned shift right.
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
- KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
- KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
- // high bits known zero.
- KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
- }
+ auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) {
+ return APIntOps::lshr(KnownZero, ShiftAmt) |
+ // High bits known zero.
+ APInt::getHighBitsSet(BitWidth, ShiftAmt);
+ };
+
+ auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) {
+ return APIntOps::lshr(KnownOne, ShiftAmt);
+ };
+
+ computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne,
+ KnownZero2, KnownOne2, DL, Depth, Q,
+ KZF, KOF);
break;
- case Instruction::AShr:
+ }
+ case Instruction::AShr: {
// (ashr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
- if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
- // Compute the new bits that are at the top now.
- uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
+ auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) {
+ return APIntOps::ashr(KnownZero, ShiftAmt);
+ };
- // Signed shift right.
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
- KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
- KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
+ auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) {
+ return APIntOps::ashr(KnownOne, ShiftAmt);
+ };
- APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
- if (KnownZero[BitWidth-ShiftAmt-1]) // New bits are known zero.
- KnownZero |= HighBits;
- else if (KnownOne[BitWidth-ShiftAmt-1]) // New bits are known one.
- KnownOne |= HighBits;
- }
+ computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne,
+ KnownZero2, KnownOne2, DL, Depth, Q,
+ KZF, KOF);
break;
+ }
case Instruction::Sub: {
bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW,
@@ -1336,13 +1474,19 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
case Instruction::Call:
case Instruction::Invoke:
if (MDNode *MD = cast<Instruction>(I)->getMetadata(LLVMContext::MD_range))
- computeKnownBitsFromRangeMetadata(*MD, KnownZero);
+ computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne);
// If a range metadata is attached to this IntrinsicInst, intersect the
// explicit range specified by the metadata and the implicit range of
// the intrinsic.
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
default: break;
+ case Intrinsic::bswap:
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL,
+ Depth + 1, Q);
+ KnownZero |= KnownZero2.byteSwap();
+ KnownOne |= KnownOne2.byteSwap();
+ break;
case Intrinsic::ctlz:
case Intrinsic::cttz: {
unsigned LowBits = Log2_32(BitWidth)+1;
@@ -1353,8 +1497,24 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
break;
}
case Intrinsic::ctpop: {
- unsigned LowBits = Log2_32(BitWidth)+1;
- KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL,
+ Depth + 1, Q);
+ // We can bound the space the count needs. Also, bits known to be zero
+ // can't contribute to the population.
+ unsigned BitsPossiblySet = BitWidth - KnownZero2.countPopulation();
+ unsigned LeadingZeros =
+ APInt(BitWidth, BitsPossiblySet).countLeadingZeros();
+ assert(LeadingZeros <= BitWidth);
+ KnownZero |= APInt::getHighBitsSet(BitWidth, LeadingZeros);
+ KnownOne &= ~KnownZero;
+ // TODO: we could bound KnownOne using the lower bound on the number
+ // of bits which might be set provided by popcnt KnownOne2.
+ break;
+ }
+ case Intrinsic::fabs: {
+ Type *Ty = II->getType();
+ APInt SignBit = APInt::getSignBit(Ty->getScalarSizeInBits());
+ KnownZero |= APInt::getSplat(Ty->getPrimitiveSizeInBits(), SignBit);
break;
}
case Intrinsic::x86_sse42_crc32_64_64:
@@ -1394,6 +1554,46 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
}
}
+static unsigned getAlignment(const Value *V, const DataLayout &DL) {
+ unsigned Align = 0;
+ if (auto *GO = dyn_cast<GlobalObject>(V)) {
+ Align = GO->getAlignment();
+ if (Align == 0) {
+ if (auto *GVar = dyn_cast<GlobalVariable>(GO)) {
+ Type *ObjectType = GVar->getType()->getElementType();
+ if (ObjectType->isSized()) {
+ // If the object is defined in the current Module, we'll be giving
+ // it the preferred alignment. Otherwise, we have to assume that it
+ // may only have the minimum ABI alignment.
+ if (GVar->isStrongDefinitionForLinker())
+ Align = DL.getPreferredAlignment(GVar);
+ else
+ Align = DL.getABITypeAlignment(ObjectType);
+ }
+ }
+ }
+ } else if (const Argument *A = dyn_cast<Argument>(V)) {
+ Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0;
+
+ if (!Align && A->hasStructRetAttr()) {
+ // An sret parameter has at least the ABI alignment of the return type.
+ Type *EltTy = cast<PointerType>(A->getType())->getElementType();
+ if (EltTy->isSized())
+ Align = DL.getABITypeAlignment(EltTy);
+ }
+ } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
+ Align = AI->getAlignment();
+ else if (auto CS = ImmutableCallSite(V))
+ Align = CS.getAttributes().getParamAlignment(AttributeSet::ReturnIndex);
+ else if (const LoadInst *LI = dyn_cast<LoadInst>(V))
+ if (MDNode *MD = LI->getMetadata(LLVMContext::MD_align)) {
+ ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(0));
+ Align = CI->getLimitedValue();
+ }
+
+ return Align;
+}
+
/// Determine which bits of V are known to be either zero or one and return
/// them in the KnownZero/KnownOne bit sets.
///
@@ -1416,8 +1616,9 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
unsigned BitWidth = KnownZero.getBitWidth();
assert((V->getType()->isIntOrIntVectorTy() ||
+ V->getType()->isFPOrFPVectorTy() ||
V->getType()->getScalarType()->isPointerTy()) &&
- "Not integer or pointer type!");
+ "Not integer, floating point, or pointer type!");
assert((DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
(!V->getType()->isIntOrIntVectorTy() ||
V->getType()->getScalarSizeInBits() == BitWidth) &&
@@ -1454,59 +1655,6 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
return;
}
- // The address of an aligned GlobalValue has trailing zeros.
- if (auto *GO = dyn_cast<GlobalObject>(V)) {
- unsigned Align = GO->getAlignment();
- if (Align == 0) {
- if (auto *GVar = dyn_cast<GlobalVariable>(GO)) {
- Type *ObjectType = GVar->getType()->getElementType();
- if (ObjectType->isSized()) {
- // If the object is defined in the current Module, we'll be giving
- // it the preferred alignment. Otherwise, we have to assume that it
- // may only have the minimum ABI alignment.
- if (GVar->isStrongDefinitionForLinker())
- Align = DL.getPreferredAlignment(GVar);
- else
- Align = DL.getABITypeAlignment(ObjectType);
- }
- }
- }
- if (Align > 0)
- KnownZero = APInt::getLowBitsSet(BitWidth,
- countTrailingZeros(Align));
- else
- KnownZero.clearAllBits();
- KnownOne.clearAllBits();
- return;
- }
-
- if (Argument *A = dyn_cast<Argument>(V)) {
- unsigned Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0;
-
- if (!Align && A->hasStructRetAttr()) {
- // An sret parameter has at least the ABI alignment of the return type.
- Type *EltTy = cast<PointerType>(A->getType())->getElementType();
- if (EltTy->isSized())
- Align = DL.getABITypeAlignment(EltTy);
- }
-
- if (Align)
- KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align));
- else
- KnownZero.clearAllBits();
- KnownOne.clearAllBits();
-
- // Don't give up yet... there might be an assumption that provides more
- // information...
- computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q);
-
- // Or a dominating condition for that matter
- if (EnableDomConditions && Depth <= DomConditionsMaxDepth)
- computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL,
- Depth, Q);
- return;
- }
-
// Start out not knowing anything.
KnownZero.clearAllBits(); KnownOne.clearAllBits();
@@ -1525,6 +1673,14 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
if (Operator *I = dyn_cast<Operator>(V))
computeKnownBitsFromOperator(I, KnownZero, KnownOne, DL, Depth, Q);
+
+ // Aligned pointers have trailing zeros - refine KnownZero set
+ if (V->getType()->isPointerTy()) {
+ unsigned Align = getAlignment(V, DL);
+ if (Align)
+ KnownZero |= APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align));
+ }
+
// computeKnownBitsFromAssume and computeKnownBitsFromDominatingCondition
// strictly refines KnownZero and KnownOne. Therefore, we run them after
// computeKnownBitsFromOperator.
@@ -1812,6 +1968,23 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
ComputeSignBit(X, XKnownNonNegative, XKnownNegative, DL, Depth, Q);
if (XKnownNegative)
return true;
+
+ // If the shifter operand is a constant, and all of the bits shifted
+ // out are known to be zero, and X is known non-zero then at least one
+ // non-zero bit must remain.
+ if (ConstantInt *Shift = dyn_cast<ConstantInt>(Y)) {
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ computeKnownBits(X, KnownZero, KnownOne, DL, Depth, Q);
+
+ auto ShiftVal = Shift->getLimitedValue(BitWidth - 1);
+ // Is there a known one in the portion not shifted out?
+ if (KnownOne.countLeadingZeros() < BitWidth - ShiftVal)
+ return true;
+ // Are all the bits to be shifted out known zero?
+ if (KnownZero.countTrailingOnes() >= ShiftVal)
+ return isKnownNonZero(X, DL, Depth, Q);
+ }
}
// div exact can only produce a zero if the dividend is zero.
else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) {
@@ -1871,6 +2044,26 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
isKnownNonZero(SI->getFalseValue(), DL, Depth, Q))
return true;
}
+ // PHI
+ else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ // Try and detect a recurrence that monotonically increases from a
+ // starting value, as these are common as induction variables.
+ if (PN->getNumIncomingValues() == 2) {
+ Value *Start = PN->getIncomingValue(0);
+ Value *Induction = PN->getIncomingValue(1);
+ if (isa<ConstantInt>(Induction) && !isa<ConstantInt>(Start))
+ std::swap(Start, Induction);
+ if (ConstantInt *C = dyn_cast<ConstantInt>(Start)) {
+ if (!C->isZero() && !C->isNegative()) {
+ ConstantInt *X;
+ if ((match(Induction, m_NSWAdd(m_Specific(PN), m_ConstantInt(X))) ||
+ match(Induction, m_NUWAdd(m_Specific(PN), m_ConstantInt(X)))) &&
+ !X->isNegative())
+ return true;
+ }
+ }
+ }
+ }
if (!BitWidth) return false;
APInt KnownZero(BitWidth, 0);
@@ -1879,6 +2072,51 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
return KnownOne != 0;
}
+/// Return true if V2 == V1 + X, where X is known non-zero.
+static bool isAddOfNonZero(Value *V1, Value *V2, const DataLayout &DL,
+ const Query &Q) {
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(V1);
+ if (!BO || BO->getOpcode() != Instruction::Add)
+ return false;
+ Value *Op = nullptr;
+ if (V2 == BO->getOperand(0))
+ Op = BO->getOperand(1);
+ else if (V2 == BO->getOperand(1))
+ Op = BO->getOperand(0);
+ else
+ return false;
+ return isKnownNonZero(Op, DL, 0, Q);
+}
+
+/// Return true if it is known that V1 != V2.
+static bool isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL,
+ const Query &Q) {
+ if (V1->getType()->isVectorTy() || V1 == V2)
+ return false;
+ if (V1->getType() != V2->getType())
+ // We can't look through casts yet.
+ return false;
+ if (isAddOfNonZero(V1, V2, DL, Q) || isAddOfNonZero(V2, V1, DL, Q))
+ return true;
+
+ if (IntegerType *Ty = dyn_cast<IntegerType>(V1->getType())) {
+ // Are any known bits in V1 contradictory to known bits in V2? If V1
+ // has a known zero where V2 has a known one, they must not be equal.
+ auto BitWidth = Ty->getBitWidth();
+ APInt KnownZero1(BitWidth, 0);
+ APInt KnownOne1(BitWidth, 0);
+ computeKnownBits(V1, KnownZero1, KnownOne1, DL, 0, Q);
+ APInt KnownZero2(BitWidth, 0);
+ APInt KnownOne2(BitWidth, 0);
+ computeKnownBits(V2, KnownZero2, KnownOne2, DL, 0, Q);
+
+ auto OppositeBits = (KnownZero1 & KnownOne2) | (KnownZero2 & KnownOne1);
+ if (OppositeBits.getBoolValue())
+ return true;
+ }
+ return false;
+}
+
/// Return true if 'V & Mask' is known to be zero. We use this predicate to
/// simplify operations downstream. Mask is known to be zero for bits that V
/// cannot have.
@@ -2545,7 +2783,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
}
// This insert value inserts something else than what we are looking for.
- // See if the (aggregrate) value inserted into has the value we are
+ // See if the (aggregate) value inserted into has the value we are
// looking for, then.
if (*req_idx != *i)
return FindInsertedValue(I->getAggregateOperand(), idx_range,
@@ -2560,7 +2798,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
}
if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
- // If we're extracting a value from an aggregrate that was extracted from
+ // If we're extracting a value from an aggregate that was extracted from
// something else, we can extract from that something else directly instead.
// However, we will need to chain I's indices with the requested indices.
@@ -2935,20 +3173,42 @@ static bool isDereferenceableFromAttribute(const Value *V, const DataLayout &DL,
return isDereferenceableFromAttribute(V, Offset, Ty, DL, CtxI, DT, TLI);
}
-/// Return true if Value is always a dereferenceable pointer.
-///
+static bool isAligned(const Value *Base, APInt Offset, unsigned Align,
+ const DataLayout &DL) {
+ APInt BaseAlign(Offset.getBitWidth(), getAlignment(Base, DL));
+
+ if (!BaseAlign) {
+ Type *Ty = Base->getType()->getPointerElementType();
+ if (!Ty->isSized())
+ return false;
+ BaseAlign = DL.getABITypeAlignment(Ty);
+ }
+
+ APInt Alignment(Offset.getBitWidth(), Align);
+
+ assert(Alignment.isPowerOf2() && "must be a power of 2!");
+ return BaseAlign.uge(Alignment) && !(Offset & (Alignment-1));
+}
+
+static bool isAligned(const Value *Base, unsigned Align, const DataLayout &DL) {
+ Type *Ty = Base->getType();
+ assert(Ty->isSized() && "must be sized");
+ APInt Offset(DL.getTypeStoreSizeInBits(Ty), 0);
+ return isAligned(Base, Offset, Align, DL);
+}
+
/// Test if V is always a pointer to allocated and suitably aligned memory for
/// a simple load or store.
-static bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
- const Instruction *CtxI,
- const DominatorTree *DT,
- const TargetLibraryInfo *TLI,
- SmallPtrSetImpl<const Value *> &Visited) {
+static bool isDereferenceableAndAlignedPointer(
+ const Value *V, unsigned Align, const DataLayout &DL,
+ const Instruction *CtxI, const DominatorTree *DT,
+ const TargetLibraryInfo *TLI, SmallPtrSetImpl<const Value *> &Visited) {
// Note that it is not safe to speculate into a malloc'd region because
// malloc may return null.
- // These are obviously ok.
- if (isa<AllocaInst>(V)) return true;
+ // These are obviously ok if aligned.
+ if (isa<AllocaInst>(V))
+ return isAligned(V, Align, DL);
// It's not always safe to follow a bitcast, for example:
// bitcast i8* (alloca i8) to i32*
@@ -2963,21 +3223,22 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
if (STy->isSized() && DTy->isSized() &&
(DL.getTypeStoreSize(STy) >= DL.getTypeStoreSize(DTy)) &&
(DL.getABITypeAlignment(STy) >= DL.getABITypeAlignment(DTy)))
- return isDereferenceablePointer(BC->getOperand(0), DL, CtxI,
- DT, TLI, Visited);
+ return isDereferenceableAndAlignedPointer(BC->getOperand(0), Align, DL,
+ CtxI, DT, TLI, Visited);
}
// Global variables which can't collapse to null are ok.
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
- return !GV->hasExternalWeakLinkage();
+ if (!GV->hasExternalWeakLinkage())
+ return isAligned(V, Align, DL);
// byval arguments are okay.
if (const Argument *A = dyn_cast<Argument>(V))
if (A->hasByValAttr())
- return true;
-
+ return isAligned(V, Align, DL);
+
if (isDereferenceableFromAttribute(V, DL, CtxI, DT, TLI))
- return true;
+ return isAligned(V, Align, DL);
// For GEPs, determine if the indexing lands within the allocated object.
if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
@@ -2985,61 +3246,79 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
Type *Ty = VTy->getPointerElementType();
const Value *Base = GEP->getPointerOperand();
- // Conservatively require that the base pointer be fully dereferenceable.
+ // Conservatively require that the base pointer be fully dereferenceable
+ // and aligned.
if (!Visited.insert(Base).second)
return false;
- if (!isDereferenceablePointer(Base, DL, CtxI,
- DT, TLI, Visited))
+ if (!isDereferenceableAndAlignedPointer(Base, Align, DL, CtxI, DT, TLI,
+ Visited))
return false;
-
+
APInt Offset(DL.getPointerTypeSizeInBits(VTy), 0);
if (!GEP->accumulateConstantOffset(DL, Offset))
return false;
-
- // Check if the load is within the bounds of the underlying object.
+
+ // Check if the load is within the bounds of the underlying object
+ // and offset is aligned.
uint64_t LoadSize = DL.getTypeStoreSize(Ty);
Type *BaseType = Base->getType()->getPointerElementType();
- return (Offset + LoadSize).ule(DL.getTypeAllocSize(BaseType));
+ assert(isPowerOf2_32(Align) && "must be a power of 2!");
+ return (Offset + LoadSize).ule(DL.getTypeAllocSize(BaseType)) &&
+ !(Offset & APInt(Offset.getBitWidth(), Align-1));
}
// For gc.relocate, look through relocations
if (const IntrinsicInst *I = dyn_cast<IntrinsicInst>(V))
if (I->getIntrinsicID() == Intrinsic::experimental_gc_relocate) {
GCRelocateOperands RelocateInst(I);
- return isDereferenceablePointer(RelocateInst.getDerivedPtr(), DL, CtxI,
- DT, TLI, Visited);
+ return isDereferenceableAndAlignedPointer(
+ RelocateInst.getDerivedPtr(), Align, DL, CtxI, DT, TLI, Visited);
}
if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(V))
- return isDereferenceablePointer(ASC->getOperand(0), DL, CtxI,
- DT, TLI, Visited);
+ return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Align, DL,
+ CtxI, DT, TLI, Visited);
// If we don't know, assume the worst.
return false;
}
-bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL,
- const Instruction *CtxI,
- const DominatorTree *DT,
- const TargetLibraryInfo *TLI) {
+bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
+ const DataLayout &DL,
+ const Instruction *CtxI,
+ const DominatorTree *DT,
+ const TargetLibraryInfo *TLI) {
// When dereferenceability information is provided by a dereferenceable
// attribute, we know exactly how many bytes are dereferenceable. If we can
// determine the exact offset to the attributed variable, we can use that
// information here.
Type *VTy = V->getType();
Type *Ty = VTy->getPointerElementType();
+
+ // Require ABI alignment for loads without alignment specification
+ if (Align == 0)
+ Align = DL.getABITypeAlignment(Ty);
+
if (Ty->isSized()) {
APInt Offset(DL.getTypeStoreSizeInBits(VTy), 0);
const Value *BV = V->stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
-
+
if (Offset.isNonNegative())
- if (isDereferenceableFromAttribute(BV, Offset, Ty, DL,
- CtxI, DT, TLI))
+ if (isDereferenceableFromAttribute(BV, Offset, Ty, DL, CtxI, DT, TLI) &&
+ isAligned(BV, Offset, Align, DL))
return true;
}
SmallPtrSet<const Value *, 32> Visited;
- return ::isDereferenceablePointer(V, DL, CtxI, DT, TLI, Visited);
+ return ::isDereferenceableAndAlignedPointer(V, Align, DL, CtxI, DT, TLI,
+ Visited);
+}
+
+bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL,
+ const Instruction *CtxI,
+ const DominatorTree *DT,
+ const TargetLibraryInfo *TLI) {
+ return isDereferenceableAndAlignedPointer(V, 1, DL, CtxI, DT, TLI);
}
bool llvm::isSafeToSpeculativelyExecute(const Value *V,
@@ -3089,10 +3368,15 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
const LoadInst *LI = cast<LoadInst>(Inst);
if (!LI->isUnordered() ||
// Speculative load may create a race that did not exist in the source.
- LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
+ LI->getParent()->getParent()->hasFnAttribute(
+ Attribute::SanitizeThread) ||
+ // Speculative load may load data from dirty regions.
+ LI->getParent()->getParent()->hasFnAttribute(
+ Attribute::SanitizeAddress))
return false;
const DataLayout &DL = LI->getModule()->getDataLayout();
- return isDereferenceablePointer(LI->getPointerOperand(), DL, CtxI, DT, TLI);
+ return isDereferenceableAndAlignedPointer(
+ LI->getPointerOperand(), LI->getAlignment(), DL, CtxI, DT, TLI);
}
case Instruction::Call: {
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
@@ -3147,16 +3431,27 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
case Instruction::Switch:
case Instruction::Unreachable:
case Instruction::Fence:
- case Instruction::LandingPad:
case Instruction::AtomicRMW:
case Instruction::AtomicCmpXchg:
+ case Instruction::LandingPad:
case Instruction::Resume:
+ case Instruction::CatchSwitch:
+ case Instruction::CatchPad:
+ case Instruction::CatchRet:
+ case Instruction::CleanupPad:
+ case Instruction::CleanupRet:
return false; // Misc instructions which have effects
}
}
+bool llvm::mayBeMemoryDependent(const Instruction &I) {
+ return I.mayReadOrWriteMemory() || !isSafeToSpeculativelyExecute(&I);
+}
+
/// Return true if we know that the specified value is never null.
bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
+ assert(V->getType()->isPointerTy() && "V must be pointer type");
+
// Alloca never returns null, malloc might.
if (isa<AllocaInst>(V)) return true;
@@ -3164,9 +3459,12 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
if (const Argument *A = dyn_cast<Argument>(V))
return A->hasByValOrInAllocaAttr() || A->hasNonNullAttr();
- // Global values are not null unless extern weak.
+ // A global variable in address space 0 is non null unless extern weak.
+ // Other address spaces may have null as a valid address for a global,
+ // so we can't assume anything.
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
- return !GV->hasExternalWeakLinkage();
+ return !GV->hasExternalWeakLinkage() &&
+ GV->getType()->getAddressSpace() == 0;
// A Load tagged w/nonnull metadata is never null.
if (const LoadInst *LI = dyn_cast<LoadInst>(V))
@@ -3186,6 +3484,8 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
static bool isKnownNonNullFromDominatingCondition(const Value *V,
const Instruction *CtxI,
const DominatorTree *DT) {
+ assert(V->getType()->isPointerTy() && "V must be pointer type");
+
unsigned NumUsesExplored = 0;
for (auto U : V->users()) {
// Avoid massive lists
@@ -3316,40 +3616,339 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS,
return OverflowResult::MayOverflow;
}
-static SelectPatternFlavor matchSelectPattern(ICmpInst::Predicate Pred,
+static OverflowResult computeOverflowForSignedAdd(
+ Value *LHS, Value *RHS, AddOperator *Add, const DataLayout &DL,
+ AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) {
+ if (Add && Add->hasNoSignedWrap()) {
+ return OverflowResult::NeverOverflows;
+ }
+
+ bool LHSKnownNonNegative, LHSKnownNegative;
+ bool RHSKnownNonNegative, RHSKnownNegative;
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, /*Depth=*/0,
+ AC, CxtI, DT);
+ ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, /*Depth=*/0,
+ AC, CxtI, DT);
+
+ if ((LHSKnownNonNegative && RHSKnownNegative) ||
+ (LHSKnownNegative && RHSKnownNonNegative)) {
+ // The sign bits are opposite: this CANNOT overflow.
+ return OverflowResult::NeverOverflows;
+ }
+
+ // The remaining code needs Add to be available. Early returns if not so.
+ if (!Add)
+ return OverflowResult::MayOverflow;
+
+ // If the sign of Add is the same as at least one of the operands, this add
+ // CANNOT overflow. This is particularly useful when the sum is
+ // @llvm.assume'ed non-negative rather than proved so from analyzing its
+ // operands.
+ bool LHSOrRHSKnownNonNegative =
+ (LHSKnownNonNegative || RHSKnownNonNegative);
+ bool LHSOrRHSKnownNegative = (LHSKnownNegative || RHSKnownNegative);
+ if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) {
+ bool AddKnownNonNegative, AddKnownNegative;
+ ComputeSignBit(Add, AddKnownNonNegative, AddKnownNegative, DL,
+ /*Depth=*/0, AC, CxtI, DT);
+ if ((AddKnownNonNegative && LHSOrRHSKnownNonNegative) ||
+ (AddKnownNegative && LHSOrRHSKnownNegative)) {
+ return OverflowResult::NeverOverflows;
+ }
+ }
+
+ return OverflowResult::MayOverflow;
+}
+
+OverflowResult llvm::computeOverflowForSignedAdd(AddOperator *Add,
+ const DataLayout &DL,
+ AssumptionCache *AC,
+ const Instruction *CxtI,
+ const DominatorTree *DT) {
+ return ::computeOverflowForSignedAdd(Add->getOperand(0), Add->getOperand(1),
+ Add, DL, AC, CxtI, DT);
+}
+
+OverflowResult llvm::computeOverflowForSignedAdd(Value *LHS, Value *RHS,
+ const DataLayout &DL,
+ AssumptionCache *AC,
+ const Instruction *CxtI,
+ const DominatorTree *DT) {
+ return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, DL, AC, CxtI, DT);
+}
+
+bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
+ // FIXME: This conservative implementation can be relaxed. E.g. most
+ // atomic operations are guaranteed to terminate on most platforms
+ // and most functions terminate.
+
+ return !I->isAtomic() && // atomics may never succeed on some platforms
+ !isa<CallInst>(I) && // could throw and might not terminate
+ !isa<InvokeInst>(I) && // might not terminate and could throw to
+ // non-successor (see bug 24185 for details).
+ !isa<ResumeInst>(I) && // has no successors
+ !isa<ReturnInst>(I); // has no successors
+}
+
+bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I,
+ const Loop *L) {
+ // The loop header is guaranteed to be executed for every iteration.
+ //
+ // FIXME: Relax this constraint to cover all basic blocks that are
+ // guaranteed to be executed at every iteration.
+ if (I->getParent() != L->getHeader()) return false;
+
+ for (const Instruction &LI : *L->getHeader()) {
+ if (&LI == I) return true;
+ if (!isGuaranteedToTransferExecutionToSuccessor(&LI)) return false;
+ }
+ llvm_unreachable("Instruction not contained in its own parent basic block.");
+}
+
+bool llvm::propagatesFullPoison(const Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Xor:
+ case Instruction::Trunc:
+ case Instruction::BitCast:
+ case Instruction::AddrSpaceCast:
+ // These operations all propagate poison unconditionally. Note that poison
+ // is not any particular value, so xor or subtraction of poison with
+ // itself still yields poison, not zero.
+ return true;
+
+ case Instruction::AShr:
+ case Instruction::SExt:
+ // For these operations, one bit of the input is replicated across
+ // multiple output bits. A replicated poison bit is still poison.
+ return true;
+
+ case Instruction::Shl: {
+ // Left shift *by* a poison value is poison. The number of
+ // positions to shift is unsigned, so no negative values are
+ // possible there. Left shift by zero places preserves poison. So
+ // it only remains to consider left shift of poison by a positive
+ // number of places.
+ //
+ // A left shift by a positive number of places leaves the lowest order bit
+ // non-poisoned. However, if such a shift has a no-wrap flag, then we can
+ // make the poison operand violate that flag, yielding a fresh full-poison
+ // value.
+ auto *OBO = cast<OverflowingBinaryOperator>(I);
+ return OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap();
+ }
+
+ case Instruction::Mul: {
+ // A multiplication by zero yields a non-poison zero result, so we need to
+ // rule out zero as an operand. Conservatively, multiplication by a
+ // non-zero constant is not multiplication by zero.
+ //
+ // Multiplication by a non-zero constant can leave some bits
+ // non-poisoned. For example, a multiplication by 2 leaves the lowest
+ // order bit unpoisoned. So we need to consider that.
+ //
+ // Multiplication by 1 preserves poison. If the multiplication has a
+ // no-wrap flag, then we can make the poison operand violate that flag
+ // when multiplied by any integer other than 0 and 1.
+ auto *OBO = cast<OverflowingBinaryOperator>(I);
+ if (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) {
+ for (Value *V : OBO->operands()) {
+ if (auto *CI = dyn_cast<ConstantInt>(V)) {
+ // A ConstantInt cannot yield poison, so we can assume that it is
+ // the other operand that is poison.
+ return !CI->isZero();
+ }
+ }
+ }
+ return false;
+ }
+
+ case Instruction::GetElementPtr:
+ // A GEP implicitly represents a sequence of additions, subtractions,
+ // truncations, sign extensions and multiplications. The multiplications
+ // are by the non-zero sizes of some set of types, so we do not have to be
+ // concerned with multiplication by zero. If the GEP is in-bounds, then
+ // these operations are implicitly no-signed-wrap so poison is propagated
+ // by the arguments above for Add, Sub, Trunc, SExt and Mul.
+ return cast<GEPOperator>(I)->isInBounds();
+
+ default:
+ return false;
+ }
+}
+
+const Value *llvm::getGuaranteedNonFullPoisonOp(const Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::Store:
+ return cast<StoreInst>(I)->getPointerOperand();
+
+ case Instruction::Load:
+ return cast<LoadInst>(I)->getPointerOperand();
+
+ case Instruction::AtomicCmpXchg:
+ return cast<AtomicCmpXchgInst>(I)->getPointerOperand();
+
+ case Instruction::AtomicRMW:
+ return cast<AtomicRMWInst>(I)->getPointerOperand();
+
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ return I->getOperand(1);
+
+ default:
+ return nullptr;
+ }
+}
+
+bool llvm::isKnownNotFullPoison(const Instruction *PoisonI) {
+ // We currently only look for uses of poison values within the same basic
+ // block, as that makes it easier to guarantee that the uses will be
+ // executed given that PoisonI is executed.
+ //
+ // FIXME: Expand this to consider uses beyond the same basic block. To do
+ // this, look out for the distinction between post-dominance and strong
+ // post-dominance.
+ const BasicBlock *BB = PoisonI->getParent();
+
+ // Set of instructions that we have proved will yield poison if PoisonI
+ // does.
+ SmallSet<const Value *, 16> YieldsPoison;
+ YieldsPoison.insert(PoisonI);
+
+ for (BasicBlock::const_iterator I = PoisonI->getIterator(), E = BB->end();
+ I != E; ++I) {
+ if (&*I != PoisonI) {
+ const Value *NotPoison = getGuaranteedNonFullPoisonOp(&*I);
+ if (NotPoison != nullptr && YieldsPoison.count(NotPoison)) return true;
+ if (!isGuaranteedToTransferExecutionToSuccessor(&*I))
+ return false;
+ }
+
+ // Mark poison that propagates from I through uses of I.
+ if (YieldsPoison.count(&*I)) {
+ for (const User *User : I->users()) {
+ const Instruction *UserI = cast<Instruction>(User);
+ if (UserI->getParent() == BB && propagatesFullPoison(UserI))
+ YieldsPoison.insert(User);
+ }
+ }
+ }
+ return false;
+}
+
+static bool isKnownNonNaN(Value *V, FastMathFlags FMF) {
+ if (FMF.noNaNs())
+ return true;
+
+ if (auto *C = dyn_cast<ConstantFP>(V))
+ return !C->isNaN();
+ return false;
+}
+
+static bool isKnownNonZero(Value *V) {
+ if (auto *C = dyn_cast<ConstantFP>(V))
+ return !C->isZero();
+ return false;
+}
+
+static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
+ FastMathFlags FMF,
Value *CmpLHS, Value *CmpRHS,
Value *TrueVal, Value *FalseVal,
Value *&LHS, Value *&RHS) {
LHS = CmpLHS;
RHS = CmpRHS;
- // (icmp X, Y) ? X : Y
- if (TrueVal == CmpLHS && FalseVal == CmpRHS) {
- switch (Pred) {
- default: return SPF_UNKNOWN; // Equality.
- case ICmpInst::ICMP_UGT:
- case ICmpInst::ICMP_UGE: return SPF_UMAX;
- case ICmpInst::ICMP_SGT:
- case ICmpInst::ICMP_SGE: return SPF_SMAX;
- case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_ULE: return SPF_UMIN;
- case ICmpInst::ICMP_SLT:
- case ICmpInst::ICMP_SLE: return SPF_SMIN;
+ // If the predicate is an "or-equal" (FP) predicate, then signed zeroes may
+ // return inconsistent results between implementations.
+ // (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0
+ // minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1)
+ // Therefore we behave conservatively and only proceed if at least one of the
+ // operands is known to not be zero, or if we don't care about signed zeroes.
+ switch (Pred) {
+ default: break;
+ case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE:
+ case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE:
+ if (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) &&
+ !isKnownNonZero(CmpRHS))
+ return {SPF_UNKNOWN, SPNB_NA, false};
+ }
+
+ SelectPatternNaNBehavior NaNBehavior = SPNB_NA;
+ bool Ordered = false;
+
+ // When given one NaN and one non-NaN input:
+ // - maxnum/minnum (C99 fmaxf()/fminf()) return the non-NaN input.
+ // - A simple C99 (a < b ? a : b) construction will return 'b' (as the
+ // ordered comparison fails), which could be NaN or non-NaN.
+ // so here we discover exactly what NaN behavior is required/accepted.
+ if (CmpInst::isFPPredicate(Pred)) {
+ bool LHSSafe = isKnownNonNaN(CmpLHS, FMF);
+ bool RHSSafe = isKnownNonNaN(CmpRHS, FMF);
+
+ if (LHSSafe && RHSSafe) {
+ // Both operands are known non-NaN.
+ NaNBehavior = SPNB_RETURNS_ANY;
+ } else if (CmpInst::isOrdered(Pred)) {
+ // An ordered comparison will return false when given a NaN, so it
+ // returns the RHS.
+ Ordered = true;
+ if (LHSSafe)
+ // LHS is non-NaN, so if RHS is NaN then NaN will be returned.
+ NaNBehavior = SPNB_RETURNS_NAN;
+ else if (RHSSafe)
+ NaNBehavior = SPNB_RETURNS_OTHER;
+ else
+ // Completely unsafe.
+ return {SPF_UNKNOWN, SPNB_NA, false};
+ } else {
+ Ordered = false;
+ // An unordered comparison will return true when given a NaN, so it
+ // returns the LHS.
+ if (LHSSafe)
+ // LHS is non-NaN, so if RHS is NaN then non-NaN will be returned.
+ NaNBehavior = SPNB_RETURNS_OTHER;
+ else if (RHSSafe)
+ NaNBehavior = SPNB_RETURNS_NAN;
+ else
+ // Completely unsafe.
+ return {SPF_UNKNOWN, SPNB_NA, false};
}
}
- // (icmp X, Y) ? Y : X
if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
+ std::swap(CmpLHS, CmpRHS);
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ if (NaNBehavior == SPNB_RETURNS_NAN)
+ NaNBehavior = SPNB_RETURNS_OTHER;
+ else if (NaNBehavior == SPNB_RETURNS_OTHER)
+ NaNBehavior = SPNB_RETURNS_NAN;
+ Ordered = !Ordered;
+ }
+
+ // ([if]cmp X, Y) ? X : Y
+ if (TrueVal == CmpLHS && FalseVal == CmpRHS) {
switch (Pred) {
- default: return SPF_UNKNOWN; // Equality.
+ default: return {SPF_UNKNOWN, SPNB_NA, false}; // Equality.
case ICmpInst::ICMP_UGT:
- case ICmpInst::ICMP_UGE: return SPF_UMIN;
+ case ICmpInst::ICMP_UGE: return {SPF_UMAX, SPNB_NA, false};
case ICmpInst::ICMP_SGT:
- case ICmpInst::ICMP_SGE: return SPF_SMIN;
+ case ICmpInst::ICMP_SGE: return {SPF_SMAX, SPNB_NA, false};
case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_ULE: return SPF_UMAX;
+ case ICmpInst::ICMP_ULE: return {SPF_UMIN, SPNB_NA, false};
case ICmpInst::ICMP_SLT:
- case ICmpInst::ICMP_SLE: return SPF_SMAX;
+ case ICmpInst::ICMP_SLE: return {SPF_SMIN, SPNB_NA, false};
+ case FCmpInst::FCMP_UGT:
+ case FCmpInst::FCMP_UGE:
+ case FCmpInst::FCMP_OGT:
+ case FCmpInst::FCMP_OGE: return {SPF_FMAXNUM, NaNBehavior, Ordered};
+ case FCmpInst::FCMP_ULT:
+ case FCmpInst::FCMP_ULE:
+ case FCmpInst::FCMP_OLT:
+ case FCmpInst::FCMP_OLE: return {SPF_FMINNUM, NaNBehavior, Ordered};
}
}
@@ -3360,13 +3959,13 @@ static SelectPatternFlavor matchSelectPattern(ICmpInst::Predicate Pred,
// ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X
// NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X
if (Pred == ICmpInst::ICMP_SGT && (C1->isZero() || C1->isMinusOne())) {
- return (CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS;
+ return {(CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false};
}
// ABS(X) ==> (X <s 0) ? -X : X and (X <s 1) ? -X : X
// NABS(X) ==> (X <s 0) ? X : -X and (X <s 1) ? X : -X
if (Pred == ICmpInst::ICMP_SLT && (C1->isZero() || C1->isOne())) {
- return (CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS;
+ return {(CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false};
}
}
@@ -3377,24 +3976,36 @@ static SelectPatternFlavor matchSelectPattern(ICmpInst::Predicate Pred,
match(CmpLHS, m_Not(m_Specific(TrueVal))))) {
LHS = TrueVal;
RHS = FalseVal;
- return SPF_SMIN;
+ return {SPF_SMIN, SPNB_NA, false};
}
}
}
// TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5)
- return SPF_UNKNOWN;
+ return {SPF_UNKNOWN, SPNB_NA, false};
}
-static Constant *lookThroughCast(ICmpInst *CmpI, Value *V1, Value *V2,
- Instruction::CastOps *CastOp) {
+static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,
+ Instruction::CastOps *CastOp) {
CastInst *CI = dyn_cast<CastInst>(V1);
Constant *C = dyn_cast<Constant>(V2);
- if (!CI || !C)
+ CastInst *CI2 = dyn_cast<CastInst>(V2);
+ if (!CI)
return nullptr;
*CastOp = CI->getOpcode();
+ if (CI2) {
+ // If V1 and V2 are both the same cast from the same type, we can look
+ // through V1.
+ if (CI2->getOpcode() == CI->getOpcode() &&
+ CI2->getSrcTy() == CI->getSrcTy())
+ return CI2->getOperand(0);
+ return nullptr;
+ } else if (!C) {
+ return nullptr;
+ }
+
if (isa<SExtInst>(CI) && CmpI->isSigned()) {
Constant *T = ConstantExpr::getTrunc(C, CI->getSrcTy());
// This is only valid if the truncated value can be sign-extended
@@ -3409,39 +4020,200 @@ static Constant *lookThroughCast(ICmpInst *CmpI, Value *V1, Value *V2,
if (isa<TruncInst>(CI))
return ConstantExpr::getIntegerCast(C, CI->getSrcTy(), CmpI->isSigned());
+ if (isa<FPToUIInst>(CI))
+ return ConstantExpr::getUIToFP(C, CI->getSrcTy(), true);
+
+ if (isa<FPToSIInst>(CI))
+ return ConstantExpr::getSIToFP(C, CI->getSrcTy(), true);
+
+ if (isa<UIToFPInst>(CI))
+ return ConstantExpr::getFPToUI(C, CI->getSrcTy(), true);
+
+ if (isa<SIToFPInst>(CI))
+ return ConstantExpr::getFPToSI(C, CI->getSrcTy(), true);
+
+ if (isa<FPTruncInst>(CI))
+ return ConstantExpr::getFPExtend(C, CI->getSrcTy(), true);
+
+ if (isa<FPExtInst>(CI))
+ return ConstantExpr::getFPTrunc(C, CI->getSrcTy(), true);
+
return nullptr;
}
-SelectPatternFlavor llvm::matchSelectPattern(Value *V,
+SelectPatternResult llvm::matchSelectPattern(Value *V,
Value *&LHS, Value *&RHS,
Instruction::CastOps *CastOp) {
SelectInst *SI = dyn_cast<SelectInst>(V);
- if (!SI) return SPF_UNKNOWN;
+ if (!SI) return {SPF_UNKNOWN, SPNB_NA, false};
- ICmpInst *CmpI = dyn_cast<ICmpInst>(SI->getCondition());
- if (!CmpI) return SPF_UNKNOWN;
+ CmpInst *CmpI = dyn_cast<CmpInst>(SI->getCondition());
+ if (!CmpI) return {SPF_UNKNOWN, SPNB_NA, false};
- ICmpInst::Predicate Pred = CmpI->getPredicate();
+ CmpInst::Predicate Pred = CmpI->getPredicate();
Value *CmpLHS = CmpI->getOperand(0);
Value *CmpRHS = CmpI->getOperand(1);
Value *TrueVal = SI->getTrueValue();
Value *FalseVal = SI->getFalseValue();
+ FastMathFlags FMF;
+ if (isa<FPMathOperator>(CmpI))
+ FMF = CmpI->getFastMathFlags();
// Bail out early.
if (CmpI->isEquality())
- return SPF_UNKNOWN;
+ return {SPF_UNKNOWN, SPNB_NA, false};
// Deal with type mismatches.
if (CastOp && CmpLHS->getType() != TrueVal->getType()) {
- if (Constant *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp))
- return ::matchSelectPattern(Pred, CmpLHS, CmpRHS,
+ if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp))
+ return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
cast<CastInst>(TrueVal)->getOperand(0), C,
LHS, RHS);
- if (Constant *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp))
- return ::matchSelectPattern(Pred, CmpLHS, CmpRHS,
+ if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp))
+ return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
C, cast<CastInst>(FalseVal)->getOperand(0),
LHS, RHS);
}
- return ::matchSelectPattern(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal,
+ return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal,
LHS, RHS);
}
+
+ConstantRange llvm::getConstantRangeFromMetadata(MDNode &Ranges) {
+ const unsigned NumRanges = Ranges.getNumOperands() / 2;
+ assert(NumRanges >= 1 && "Must have at least one range!");
+ assert(Ranges.getNumOperands() % 2 == 0 && "Must be a sequence of pairs");
+
+ auto *FirstLow = mdconst::extract<ConstantInt>(Ranges.getOperand(0));
+ auto *FirstHigh = mdconst::extract<ConstantInt>(Ranges.getOperand(1));
+
+ ConstantRange CR(FirstLow->getValue(), FirstHigh->getValue());
+
+ for (unsigned i = 1; i < NumRanges; ++i) {
+ auto *Low = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0));
+ auto *High = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1));
+
+ // Note: unionWith will potentially create a range that contains values not
+ // contained in any of the original N ranges.
+ CR = CR.unionWith(ConstantRange(Low->getValue(), High->getValue()));
+ }
+
+ return CR;
+}
+
+/// Return true if "icmp Pred LHS RHS" is always true.
+static bool isTruePredicate(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
+ const DataLayout &DL, unsigned Depth,
+ AssumptionCache *AC, const Instruction *CxtI,
+ const DominatorTree *DT) {
+ assert(!LHS->getType()->isVectorTy() && "TODO: extend to handle vectors!");
+ if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS)
+ return true;
+
+ switch (Pred) {
+ default:
+ return false;
+
+ case CmpInst::ICMP_SLE: {
+ const APInt *C;
+
+ // LHS s<= LHS +_{nsw} C if C >= 0
+ if (match(RHS, m_NSWAdd(m_Specific(LHS), m_APInt(C))))
+ return !C->isNegative();
+ return false;
+ }
+
+ case CmpInst::ICMP_ULE: {
+ const APInt *C;
+
+ // LHS u<= LHS +_{nuw} C for any C
+ if (match(RHS, m_NUWAdd(m_Specific(LHS), m_APInt(C))))
+ return true;
+
+ // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB)
+ auto MatchNUWAddsToSameValue = [&](Value *A, Value *B, Value *&X,
+ const APInt *&CA, const APInt *&CB) {
+ if (match(A, m_NUWAdd(m_Value(X), m_APInt(CA))) &&
+ match(B, m_NUWAdd(m_Specific(X), m_APInt(CB))))
+ return true;
+
+ // If X & C == 0 then (X | C) == X +_{nuw} C
+ if (match(A, m_Or(m_Value(X), m_APInt(CA))) &&
+ match(B, m_Or(m_Specific(X), m_APInt(CB)))) {
+ unsigned BitWidth = CA->getBitWidth();
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ computeKnownBits(X, KnownZero, KnownOne, DL, Depth + 1, AC, CxtI, DT);
+
+ if ((KnownZero & *CA) == *CA && (KnownZero & *CB) == *CB)
+ return true;
+ }
+
+ return false;
+ };
+
+ Value *X;
+ const APInt *CLHS, *CRHS;
+ if (MatchNUWAddsToSameValue(LHS, RHS, X, CLHS, CRHS))
+ return CLHS->ule(*CRHS);
+
+ return false;
+ }
+ }
+}
+
+/// Return true if "icmp Pred BLHS BRHS" is true whenever "icmp Pred
+/// ALHS ARHS" is true.
+static bool isImpliedCondOperands(CmpInst::Predicate Pred, Value *ALHS,
+ Value *ARHS, Value *BLHS, Value *BRHS,
+ const DataLayout &DL, unsigned Depth,
+ AssumptionCache *AC, const Instruction *CxtI,
+ const DominatorTree *DT) {
+ switch (Pred) {
+ default:
+ return false;
+
+ case CmpInst::ICMP_SLT:
+ case CmpInst::ICMP_SLE:
+ return isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS, DL, Depth, AC, CxtI,
+ DT) &&
+ isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth, AC, CxtI,
+ DT);
+
+ case CmpInst::ICMP_ULT:
+ case CmpInst::ICMP_ULE:
+ return isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS, DL, Depth, AC, CxtI,
+ DT) &&
+ isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth, AC, CxtI,
+ DT);
+ }
+}
+
+bool llvm::isImpliedCondition(Value *LHS, Value *RHS, const DataLayout &DL,
+ unsigned Depth, AssumptionCache *AC,
+ const Instruction *CxtI,
+ const DominatorTree *DT) {
+ assert(LHS->getType() == RHS->getType() && "mismatched type");
+ Type *OpTy = LHS->getType();
+ assert(OpTy->getScalarType()->isIntegerTy(1));
+
+ // LHS ==> RHS by definition
+ if (LHS == RHS) return true;
+
+ if (OpTy->isVectorTy())
+ // TODO: extending the code below to handle vectors
+ return false;
+ assert(OpTy->isIntegerTy(1) && "implied by above");
+
+ ICmpInst::Predicate APred, BPred;
+ Value *ALHS, *ARHS;
+ Value *BLHS, *BRHS;
+
+ if (!match(LHS, m_ICmp(APred, m_Value(ALHS), m_Value(ARHS))) ||
+ !match(RHS, m_ICmp(BPred, m_Value(BLHS), m_Value(BRHS))))
+ return false;
+
+ if (APred == BPred)
+ return isImpliedCondOperands(APred, ALHS, ARHS, BLHS, BRHS, DL, Depth, AC,
+ CxtI, DT);
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Analysis/VectorUtils.cpp b/contrib/llvm/lib/Analysis/VectorUtils.cpp
index 8c671ef..4b244ec 100644
--- a/contrib/llvm/lib/Analysis/VectorUtils.cpp
+++ b/contrib/llvm/lib/Analysis/VectorUtils.cpp
@@ -11,13 +11,20 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Value.h"
+#include "llvm/IR/Constants.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
/// \brief Identify if the intrinsic is trivially vectorizable.
/// This method returns true if the intrinsic's argument types are all
@@ -79,7 +86,7 @@ bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
/// d) call should only reads memory.
/// If all these condition is met then return ValidIntrinsicID
/// else return not_intrinsic.
-llvm::Intrinsic::ID
+Intrinsic::ID
llvm::checkUnaryFloatSignature(const CallInst &I,
Intrinsic::ID ValidIntrinsicID) {
if (I.getNumArgOperands() != 1 ||
@@ -98,7 +105,7 @@ llvm::checkUnaryFloatSignature(const CallInst &I,
/// d) call should only reads memory.
/// If all these condition is met then return ValidIntrinsicID
/// else return not_intrinsic.
-llvm::Intrinsic::ID
+Intrinsic::ID
llvm::checkBinaryFloatSignature(const CallInst &I,
Intrinsic::ID ValidIntrinsicID) {
if (I.getNumArgOperands() != 2 ||
@@ -114,8 +121,8 @@ llvm::checkBinaryFloatSignature(const CallInst &I,
/// \brief Returns intrinsic ID for call.
/// For the input call instruction it finds mapping intrinsic and returns
/// its ID, in case it does not found it return not_intrinsic.
-llvm::Intrinsic::ID llvm::getIntrinsicIDForCall(CallInst *CI,
- const TargetLibraryInfo *TLI) {
+Intrinsic::ID llvm::getIntrinsicIDForCall(CallInst *CI,
+ const TargetLibraryInfo *TLI) {
// If we have an intrinsic call, check if it is trivially vectorizable.
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
Intrinsic::ID ID = II->getIntrinsicID();
@@ -228,8 +235,7 @@ unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) {
cast<PointerType>(Gep->getType()->getScalarType())->getElementType());
// Walk backwards and try to peel off zeros.
- while (LastOperand > 1 &&
- match(Gep->getOperand(LastOperand), llvm::PatternMatch::m_Zero())) {
+ while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
// Find the type we're currently indexing into.
gep_type_iterator GEPTI = gep_type_begin(Gep);
std::advance(GEPTI, LastOperand - 1);
@@ -247,8 +253,7 @@ unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) {
/// \brief If the argument is a GEP, then returns the operand identified by
/// getGEPInductionOperand. However, if there is some other non-loop-invariant
/// operand, it returns that instead.
-llvm::Value *llvm::stripGetElementPtr(llvm::Value *Ptr, ScalarEvolution *SE,
- Loop *Lp) {
+Value *llvm::stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
if (!GEP)
return Ptr;
@@ -265,8 +270,8 @@ llvm::Value *llvm::stripGetElementPtr(llvm::Value *Ptr, ScalarEvolution *SE,
}
/// \brief If a value has only one user that is a CastInst, return it.
-llvm::Value *llvm::getUniqueCastUse(llvm::Value *Ptr, Loop *Lp, Type *Ty) {
- llvm::Value *UniqueCast = nullptr;
+Value *llvm::getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) {
+ Value *UniqueCast = nullptr;
for (User *U : Ptr->users()) {
CastInst *CI = dyn_cast<CastInst>(U);
if (CI && CI->getType() == Ty) {
@@ -281,16 +286,15 @@ llvm::Value *llvm::getUniqueCastUse(llvm::Value *Ptr, Loop *Lp, Type *Ty) {
/// \brief Get the stride of a pointer access in a loop. Looks for symbolic
/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
-llvm::Value *llvm::getStrideFromPointer(llvm::Value *Ptr, ScalarEvolution *SE,
- Loop *Lp) {
- const PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
+Value *llvm::getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
+ auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());
if (!PtrTy || PtrTy->isAggregateType())
return nullptr;
// Try to remove a gep instruction to make the pointer (actually index at this
// point) easier analyzable. If OrigPtr is equal to Ptr we are analzying the
// pointer, otherwise, we are analyzing the index.
- llvm::Value *OrigPtr = Ptr;
+ Value *OrigPtr = Ptr;
// The size of the pointer access.
int64_t PtrAccessSize = 1;
@@ -320,8 +324,7 @@ llvm::Value *llvm::getStrideFromPointer(llvm::Value *Ptr, ScalarEvolution *SE,
if (M->getOperand(0)->getSCEVType() != scConstant)
return nullptr;
- const APInt &APStepVal =
- cast<SCEVConstant>(M->getOperand(0))->getValue()->getValue();
+ const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt();
// Huge step value - give up.
if (APStepVal.getBitWidth() > 64)
@@ -346,7 +349,7 @@ llvm::Value *llvm::getStrideFromPointer(llvm::Value *Ptr, ScalarEvolution *SE,
if (!U)
return nullptr;
- llvm::Value *Stride = U->getValue();
+ Value *Stride = U->getValue();
if (!Lp->isLoopInvariant(Stride))
return nullptr;
@@ -361,7 +364,7 @@ llvm::Value *llvm::getStrideFromPointer(llvm::Value *Ptr, ScalarEvolution *SE,
/// \brief Given a vector and an element number, see if the scalar value is
/// already around as a register, for example if it were inserted then extracted
/// from the vector.
-llvm::Value *llvm::findScalarElement(llvm::Value *V, unsigned EltNo) {
+Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
assert(V->getType()->isVectorTy() && "Not looking at a vector?");
VectorType *VTy = cast<VectorType>(V->getType());
unsigned Width = VTy->getNumElements();
@@ -399,14 +402,166 @@ llvm::Value *llvm::findScalarElement(llvm::Value *V, unsigned EltNo) {
// Extract a value from a vector add operation with a constant zero.
Value *Val = nullptr; Constant *Con = nullptr;
- if (match(V,
- llvm::PatternMatch::m_Add(llvm::PatternMatch::m_Value(Val),
- llvm::PatternMatch::m_Constant(Con)))) {
+ if (match(V, m_Add(m_Value(Val), m_Constant(Con))))
if (Constant *Elt = Con->getAggregateElement(EltNo))
if (Elt->isNullValue())
return findScalarElement(Val, EltNo);
- }
// Otherwise, we don't know.
return nullptr;
}
+
+/// \brief Get splat value if the input is a splat vector or return nullptr.
+/// This function is not fully general. It checks only 2 cases:
+/// the input value is (1) a splat constants vector or (2) a sequence
+/// of instructions that broadcast a single value into a vector.
+///
+const llvm::Value *llvm::getSplatValue(const Value *V) {
+
+ if (auto *C = dyn_cast<Constant>(V))
+ if (isa<VectorType>(V->getType()))
+ return C->getSplatValue();
+
+ auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V);
+ if (!ShuffleInst)
+ return nullptr;
+ // All-zero (or undef) shuffle mask elements.
+ for (int MaskElt : ShuffleInst->getShuffleMask())
+ if (MaskElt != 0 && MaskElt != -1)
+ return nullptr;
+ // The first shuffle source is 'insertelement' with index 0.
+ auto *InsertEltInst =
+ dyn_cast<InsertElementInst>(ShuffleInst->getOperand(0));
+ if (!InsertEltInst || !isa<ConstantInt>(InsertEltInst->getOperand(2)) ||
+ !cast<ConstantInt>(InsertEltInst->getOperand(2))->isNullValue())
+ return nullptr;
+
+ return InsertEltInst->getOperand(1);
+}
+
+MapVector<Instruction *, uint64_t>
+llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
+ const TargetTransformInfo *TTI) {
+
+ // DemandedBits will give us every value's live-out bits. But we want
+ // to ensure no extra casts would need to be inserted, so every DAG
+ // of connected values must have the same minimum bitwidth.
+ EquivalenceClasses<Value *> ECs;
+ SmallVector<Value *, 16> Worklist;
+ SmallPtrSet<Value *, 4> Roots;
+ SmallPtrSet<Value *, 16> Visited;
+ DenseMap<Value *, uint64_t> DBits;
+ SmallPtrSet<Instruction *, 4> InstructionSet;
+ MapVector<Instruction *, uint64_t> MinBWs;
+
+ // Determine the roots. We work bottom-up, from truncs or icmps.
+ bool SeenExtFromIllegalType = false;
+ for (auto *BB : Blocks)
+ for (auto &I : *BB) {
+ InstructionSet.insert(&I);
+
+ if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) &&
+ !TTI->isTypeLegal(I.getOperand(0)->getType()))
+ SeenExtFromIllegalType = true;
+
+ // Only deal with non-vector integers up to 64-bits wide.
+ if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) &&
+ !I.getType()->isVectorTy() &&
+ I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
+ // Don't make work for ourselves. If we know the loaded type is legal,
+ // don't add it to the worklist.
+ if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType()))
+ continue;
+
+ Worklist.push_back(&I);
+ Roots.insert(&I);
+ }
+ }
+ // Early exit.
+ if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))
+ return MinBWs;
+
+ // Now proceed breadth-first, unioning values together.
+ while (!Worklist.empty()) {
+ Value *Val = Worklist.pop_back_val();
+ Value *Leader = ECs.getOrInsertLeaderValue(Val);
+
+ if (Visited.count(Val))
+ continue;
+ Visited.insert(Val);
+
+ // Non-instructions terminate a chain successfully.
+ if (!isa<Instruction>(Val))
+ continue;
+ Instruction *I = cast<Instruction>(Val);
+
+ // If we encounter a type that is larger than 64 bits, we can't represent
+ // it so bail out.
+ if (DB.getDemandedBits(I).getBitWidth() > 64)
+ return MapVector<Instruction *, uint64_t>();
+
+ uint64_t V = DB.getDemandedBits(I).getZExtValue();
+ DBits[Leader] |= V;
+
+ // Casts, loads and instructions outside of our range terminate a chain
+ // successfully.
+ if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) ||
+ !InstructionSet.count(I))
+ continue;
+
+ // Unsafe casts terminate a chain unsuccessfully. We can't do anything
+ // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to
+ // transform anything that relies on them.
+ if (isa<BitCastInst>(I) || isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I) ||
+ !I->getType()->isIntegerTy()) {
+ DBits[Leader] |= ~0ULL;
+ continue;
+ }
+
+ // We don't modify the types of PHIs. Reductions will already have been
+ // truncated if possible, and inductions' sizes will have been chosen by
+ // indvars.
+ if (isa<PHINode>(I))
+ continue;
+
+ if (DBits[Leader] == ~0ULL)
+ // All bits demanded, no point continuing.
+ continue;
+
+ for (Value *O : cast<User>(I)->operands()) {
+ ECs.unionSets(Leader, O);
+ Worklist.push_back(O);
+ }
+ }
+
+ // Now we've discovered all values, walk them to see if there are
+ // any users we didn't see. If there are, we can't optimize that
+ // chain.
+ for (auto &I : DBits)
+ for (auto *U : I.first->users())
+ if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
+ DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL;
+
+ for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) {
+ uint64_t LeaderDemandedBits = 0;
+ for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI)
+ LeaderDemandedBits |= DBits[*MI];
+
+ uint64_t MinBW = (sizeof(LeaderDemandedBits) * 8) -
+ llvm::countLeadingZeros(LeaderDemandedBits);
+ // Round up to a power of 2
+ if (!isPowerOf2_64((uint64_t)MinBW))
+ MinBW = NextPowerOf2(MinBW);
+ for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI) {
+ if (!isa<Instruction>(*MI))
+ continue;
+ Type *Ty = (*MI)->getType();
+ if (Roots.count(*MI))
+ Ty = cast<Instruction>(*MI)->getOperand(0)->getType();
+ if (MinBW < Ty->getScalarSizeInBits())
+ MinBWs[cast<Instruction>(*MI)] = MinBW;
+ }
+ }
+
+ return MinBWs;
+}
OpenPOWER on IntegriCloud