summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp')
-rw-r--r--contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp227
1 files changed, 145 insertions, 82 deletions
diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 3349933..66a0d14 100644
--- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -15,24 +15,38 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/PHITransAddr.h"
#include "llvm/Analysis/OrderedBasicBlock.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/PredIteratorCache.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+
using namespace llvm;
#define DEBUG_TYPE "memdep"
@@ -166,7 +180,7 @@ MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom(
BasicBlock *BB) {
unsigned Limit = BlockScanLimit;
- // Walk backwards through the block, looking for dependencies
+ // Walk backwards through the block, looking for dependencies.
while (ScanIt != BB->begin()) {
// Limit the amount of scanning we do so we don't end up with quadratic
// running time on extreme testcases.
@@ -220,26 +234,6 @@ MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom(
return MemDepResult::getNonFuncLocal();
}
-/// Return true if LI is a load that would fully overlap MemLoc if done as
-/// a wider legal integer load.
-///
-/// MemLocBase, MemLocOffset are lazily computed here the first time the
-/// base/offs of memloc is needed.
-static bool isLoadLoadClobberIfExtendedToFullWidth(const MemoryLocation &MemLoc,
- const Value *&MemLocBase,
- int64_t &MemLocOffs,
- const LoadInst *LI) {
- const DataLayout &DL = LI->getModule()->getDataLayout();
-
- // If we haven't already computed the base/offset of MemLoc, do so now.
- if (!MemLocBase)
- MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, DL);
-
- unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
- MemLocBase, MemLocOffs, MemLoc.Size, LI);
- return Size != 0;
-}
-
unsigned MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize,
const LoadInst *LI) {
@@ -292,7 +286,7 @@ unsigned MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits() / 8U;
NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
- while (1) {
+ while (true) {
// If this load size is bigger than our known alignment or would not fit
// into a native integer register, then we fail.
if (NewLoadByteSize > LoadAlign ||
@@ -327,80 +321,129 @@ static bool isVolatile(Instruction *Inst) {
MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
- BasicBlock *BB, Instruction *QueryInst) {
+ BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
+ MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
if (QueryInst != nullptr) {
if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
- MemDepResult invariantGroupDependency =
- getInvariantGroupPointerDependency(LI, BB);
+ InvariantGroupDependency = getInvariantGroupPointerDependency(LI, BB);
- if (invariantGroupDependency.isDef())
- return invariantGroupDependency;
+ if (InvariantGroupDependency.isDef())
+ return InvariantGroupDependency;
}
}
- return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst);
+ MemDepResult SimpleDep = getSimplePointerDependencyFrom(
+ MemLoc, isLoad, ScanIt, BB, QueryInst, Limit);
+ if (SimpleDep.isDef())
+ return SimpleDep;
+ // Non-local invariant group dependency indicates there is non local Def
+ // (it only returns nonLocal if it finds nonLocal def), which is better than
+ // local clobber and everything else.
+ if (InvariantGroupDependency.isNonLocal())
+ return InvariantGroupDependency;
+
+ assert(InvariantGroupDependency.isUnknown() &&
+ "InvariantGroupDependency should be only unknown at this point");
+ return SimpleDep;
}
MemDepResult
MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
- BasicBlock *BB) {
- Value *LoadOperand = LI->getPointerOperand();
- // It's is not safe to walk the use list of global value, because function
- // passes aren't allowed to look outside their functions.
- if (isa<GlobalValue>(LoadOperand))
- return MemDepResult::getUnknown();
+ BasicBlock *BB) {
auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group);
if (!InvariantGroupMD)
return MemDepResult::getUnknown();
- MemDepResult Result = MemDepResult::getUnknown();
- llvm::SmallSet<Value *, 14> Seen;
+ // Take the ptr operand after all casts and geps 0. This way we can search
+ // cast graph down only.
+ Value *LoadOperand = LI->getPointerOperand()->stripPointerCasts();
+
+ // It's is not safe to walk the use list of global value, because function
+ // passes aren't allowed to look outside their functions.
+ // FIXME: this could be fixed by filtering instructions from outside
+ // of current function.
+ if (isa<GlobalValue>(LoadOperand))
+ return MemDepResult::getUnknown();
+
// Queue to process all pointers that are equivalent to load operand.
- llvm::SmallVector<Value *, 8> LoadOperandsQueue;
+ SmallVector<const Value *, 8> LoadOperandsQueue;
LoadOperandsQueue.push_back(LoadOperand);
- while (!LoadOperandsQueue.empty()) {
- Value *Ptr = LoadOperandsQueue.pop_back_val();
- if (isa<GlobalValue>(Ptr))
- continue;
- if (auto *BCI = dyn_cast<BitCastInst>(Ptr)) {
- if (Seen.insert(BCI->getOperand(0)).second) {
- LoadOperandsQueue.push_back(BCI->getOperand(0));
- }
- }
+ Instruction *ClosestDependency = nullptr;
+ // Order of instructions in uses list is unpredictible. In order to always
+ // get the same result, we will look for the closest dominance.
+ auto GetClosestDependency = [this](Instruction *Best, Instruction *Other) {
+ assert(Other && "Must call it with not null instruction");
+ if (Best == nullptr || DT.dominates(Best, Other))
+ return Other;
+ return Best;
+ };
+
- for (Use &Us : Ptr->uses()) {
+ // FIXME: This loop is O(N^2) because dominates can be O(n) and in worst case
+ // we will see all the instructions. This should be fixed in MSSA.
+ while (!LoadOperandsQueue.empty()) {
+ const Value *Ptr = LoadOperandsQueue.pop_back_val();
+ assert(Ptr && !isa<GlobalValue>(Ptr) &&
+ "Null or GlobalValue should not be inserted");
+
+ for (const Use &Us : Ptr->uses()) {
auto *U = dyn_cast<Instruction>(Us.getUser());
if (!U || U == LI || !DT.dominates(U, LI))
continue;
- if (auto *BCI = dyn_cast<BitCastInst>(U)) {
- if (Seen.insert(BCI).second) {
- LoadOperandsQueue.push_back(BCI);
- }
+ // Bitcast or gep with zeros are using Ptr. Add to queue to check it's
+ // users. U = bitcast Ptr
+ if (isa<BitCastInst>(U)) {
+ LoadOperandsQueue.push_back(U);
continue;
}
+ // Gep with zeros is equivalent to bitcast.
+ // FIXME: we are not sure if some bitcast should be canonicalized to gep 0
+ // or gep 0 to bitcast because of SROA, so there are 2 forms. When
+ // typeless pointers will be ready then both cases will be gone
+ // (and this BFS also won't be needed).
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(U))
+ if (GEP->hasAllZeroIndices()) {
+ LoadOperandsQueue.push_back(U);
+ continue;
+ }
+
// If we hit load/store with the same invariant.group metadata (and the
// same pointer operand) we can assume that value pointed by pointer
// operand didn't change.
- if ((isa<LoadInst>(U) || isa<StoreInst>(U)) && U->getParent() == BB &&
+ if ((isa<LoadInst>(U) || isa<StoreInst>(U)) &&
U->getMetadata(LLVMContext::MD_invariant_group) == InvariantGroupMD)
- return MemDepResult::getDef(U);
+ ClosestDependency = GetClosestDependency(ClosestDependency, U);
}
}
- return Result;
+
+ if (!ClosestDependency)
+ return MemDepResult::getUnknown();
+ if (ClosestDependency->getParent() == BB)
+ return MemDepResult::getDef(ClosestDependency);
+ // Def(U) can't be returned here because it is non-local. If local
+ // dependency won't be found then return nonLocal counting that the
+ // user will call getNonLocalPointerDependency, which will return cached
+ // result.
+ NonLocalDefsCache.try_emplace(
+ LI, NonLocalDepResult(ClosestDependency->getParent(),
+ MemDepResult::getDef(ClosestDependency), nullptr));
+ return MemDepResult::getNonLocal();
}
MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
- BasicBlock *BB, Instruction *QueryInst) {
-
- const Value *MemLocBase = nullptr;
- int64_t MemLocOffset = 0;
- unsigned Limit = BlockScanLimit;
+ BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
bool isInvariantLoad = false;
+ if (!Limit) {
+ unsigned DefaultLimit = BlockScanLimit;
+ return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst,
+ &DefaultLimit);
+ }
+
// We must be careful with atomic accesses, as they may allow another thread
// to touch this location, clobbering it. We are conservative: if the
// QueryInst is not a simple (non-atomic) memory access, we automatically
@@ -474,8 +517,8 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
// Limit the amount of scanning we do so we don't end up with quadratic
// running time on extreme testcases.
- --Limit;
- if (!Limit)
+ --*Limit;
+ if (!*Limit)
return MemDepResult::getUnknown();
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
@@ -530,21 +573,8 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
AliasResult R = AA.alias(LoadLoc, MemLoc);
if (isLoad) {
- if (R == NoAlias) {
- // If this is an over-aligned integer load (for example,
- // "load i8* %P, align 4") see if it would obviously overlap with the
- // queried location if widened to a larger load (e.g. if the queried
- // location is 1 byte at P+1). If so, return it as a load/load
- // clobber result, allowing the client to decide to widen the load if
- // it wants to.
- if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
- if (LI->getAlignment() * 8 > ITy->getPrimitiveSizeInBits() &&
- isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase,
- MemLocOffset, LI))
- return MemDepResult::getClobber(Inst);
- }
+ if (R == NoAlias)
continue;
- }
// Must aliased loads are defs of each other.
if (R == MustAlias)
@@ -697,7 +727,7 @@ MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) {
// Do the scan.
if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
- // No dependence found. If this is the entry block of the function, it is
+ // No dependence found. If this is the entry block of the function, it is
// unknown, otherwise it is non-local.
if (QueryParent != &QueryParent->getParent()->getEntryBlock())
LocalCache = MemDepResult::getNonLocal();
@@ -709,7 +739,7 @@ MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) {
if (MemLoc.Ptr) {
// If we can do a pointer scan, make it happen.
bool isLoad = !(MR & MRI_Mod);
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst))
+ if (auto *II = dyn_cast<IntrinsicInst>(QueryInst))
isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
LocalCache = getPointerDependencyFrom(
@@ -884,7 +914,17 @@ void MemoryDependenceResults::getNonLocalPointerDependency(
assert(Loc.Ptr->getType()->isPointerTy() &&
"Can't get pointer deps of a non-pointer!");
Result.clear();
-
+ {
+ // Check if there is cached Def with invariant.group. FIXME: cache might be
+ // invalid if cached instruction would be removed between call to
+ // getPointerDependencyFrom and this function.
+ auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
+ if (NonLocalDefIt != NonLocalDefsCache.end()) {
+ Result.push_back(std::move(NonLocalDefIt->second));
+ NonLocalDefsCache.erase(NonLocalDefIt);
+ return;
+ }
+ }
// This routine does not expect to deal with volatile instructions.
// Doing so would require piping through the QueryInst all the way through.
// TODO: volatiles can't be elided, but they can be reordered with other
@@ -1010,7 +1050,7 @@ SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache,
MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
Cache.insert(Entry, Val);
- // FALL THROUGH.
+ LLVM_FALLTHROUGH;
}
case 1:
// One new entry, Just insert the new value at the appropriate position.
@@ -1659,10 +1699,10 @@ void MemoryDependenceResults::verifyRemoved(Instruction *D) const {
#endif
}
-char MemoryDependenceAnalysis::PassID;
+AnalysisKey MemoryDependenceAnalysis::Key;
MemoryDependenceResults
-MemoryDependenceAnalysis::run(Function &F, AnalysisManager<Function> &AM) {
+MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
auto &AA = AM.getResult<AAManager>(F);
auto &AC = AM.getResult<AssumptionAnalysis>(F);
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
@@ -1684,6 +1724,7 @@ INITIALIZE_PASS_END(MemoryDependenceWrapperPass, "memdep",
MemoryDependenceWrapperPass::MemoryDependenceWrapperPass() : FunctionPass(ID) {
initializeMemoryDependenceWrapperPassPass(*PassRegistry::getPassRegistry());
}
+
MemoryDependenceWrapperPass::~MemoryDependenceWrapperPass() {}
void MemoryDependenceWrapperPass::releaseMemory() {
@@ -1698,6 +1739,28 @@ void MemoryDependenceWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
}
+bool MemoryDependenceResults::invalidate(Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &Inv) {
+ // Check whether our analysis is preserved.
+ auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
+ if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
+ // If not, give up now.
+ return true;
+
+ // Check whether the analyses we depend on became invalid for any reason.
+ if (Inv.invalidate<AAManager>(F, PA) ||
+ Inv.invalidate<AssumptionAnalysis>(F, PA) ||
+ Inv.invalidate<DominatorTreeAnalysis>(F, PA))
+ return true;
+
+ // Otherwise this analysis result remains valid.
+ return false;
+}
+
+unsigned MemoryDependenceResults::getDefaultBlockScanLimit() const {
+ return BlockScanLimit;
+}
+
bool MemoryDependenceWrapperPass::runOnFunction(Function &F) {
auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
OpenPOWER on IntegriCloud