summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp')
-rw-r--r--contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp447
1 files changed, 333 insertions, 114 deletions
diff --git a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index b11cd7e..becbae4 100644
--- a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -48,6 +48,13 @@ static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold(
cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8));
unsigned VectorizerParams::RuntimeMemoryCheckThreshold;
+/// \brief The maximum iterations used to merge memory checks
+static cl::opt<unsigned> MemoryCheckMergeThreshold(
+ "memory-check-merge-threshold", cl::Hidden,
+ cl::desc("Maximum number of comparisons done when trying to merge "
+ "runtime memory checks. (default = 100)"),
+ cl::init(100));
+
/// Maximum SIMD width.
const unsigned VectorizerParams::MaxVectorWidth = 64;
@@ -112,35 +119,182 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE,
return SE->getSCEV(Ptr);
}
-void LoopAccessInfo::RuntimePointerCheck::insert(
- ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId,
- unsigned ASId, const ValueToValueMap &Strides) {
+void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
+ unsigned DepSetId, unsigned ASId,
+ const ValueToValueMap &Strides) {
// Get the stride replaced scev.
const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
assert(AR && "Invalid addrec expression");
const SCEV *Ex = SE->getBackedgeTakenCount(Lp);
const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
- Pointers.push_back(Ptr);
- Starts.push_back(AR->getStart());
- Ends.push_back(ScEnd);
- IsWritePtr.push_back(WritePtr);
- DependencySetId.push_back(DepSetId);
- AliasSetId.push_back(ASId);
+ Pointers.emplace_back(Ptr, AR->getStart(), ScEnd, WritePtr, DepSetId, ASId,
+ Sc);
+}
+
+bool RuntimePointerChecking::needsChecking(
+ const CheckingPtrGroup &M, const CheckingPtrGroup &N,
+ const SmallVectorImpl<int> *PtrPartition) const {
+ for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
+ for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
+ if (needsChecking(M.Members[I], N.Members[J], PtrPartition))
+ return true;
+ return false;
+}
+
+/// Compare \p I and \p J and return the minimum.
+/// Return nullptr in case we couldn't find an answer.
+static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
+ ScalarEvolution *SE) {
+ const SCEV *Diff = SE->getMinusSCEV(J, I);
+ const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff);
+
+ if (!C)
+ return nullptr;
+ if (C->getValue()->isNegative())
+ return J;
+ return I;
+}
+
+bool RuntimePointerChecking::CheckingPtrGroup::addPointer(unsigned Index) {
+ const SCEV *Start = RtCheck.Pointers[Index].Start;
+ const SCEV *End = RtCheck.Pointers[Index].End;
+
+ // Compare the starts and ends with the known minimum and maximum
+ // of this set. We need to know how we compare against the min/max
+ // of the set in order to be able to emit memchecks.
+ const SCEV *Min0 = getMinFromExprs(Start, Low, RtCheck.SE);
+ if (!Min0)
+ return false;
+
+ const SCEV *Min1 = getMinFromExprs(End, High, RtCheck.SE);
+ if (!Min1)
+ return false;
+
+ // Update the low bound expression if we've found a new min value.
+ if (Min0 == Start)
+ Low = Start;
+
+ // Update the high bound expression if we've found a new max value.
+ if (Min1 != End)
+ High = End;
+
+ Members.push_back(Index);
+ return true;
}
-bool LoopAccessInfo::RuntimePointerCheck::needsChecking(
+void RuntimePointerChecking::groupChecks(
+ MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
+ // We build the groups from dependency candidates equivalence classes
+ // because:
+ // - We know that pointers in the same equivalence class share
+ // the same underlying object and therefore there is a chance
+ // that we can compare pointers
+ // - We wouldn't be able to merge two pointers for which we need
+ // to emit a memcheck. The classes in DepCands are already
+ // conveniently built such that no two pointers in the same
+ // class need checking against each other.
+
+ // We use the following (greedy) algorithm to construct the groups
+ // For every pointer in the equivalence class:
+ // For each existing group:
+ // - if the difference between this pointer and the min/max bounds
+ // of the group is a constant, then make the pointer part of the
+ // group and update the min/max bounds of that group as required.
+
+ CheckingGroups.clear();
+
+ // If we don't have the dependency partitions, construct a new
+ // checking pointer group for each pointer.
+ if (!UseDependencies) {
+ for (unsigned I = 0; I < Pointers.size(); ++I)
+ CheckingGroups.push_back(CheckingPtrGroup(I, *this));
+ return;
+ }
+
+ unsigned TotalComparisons = 0;
+
+ DenseMap<Value *, unsigned> PositionMap;
+ for (unsigned Index = 0; Index < Pointers.size(); ++Index)
+ PositionMap[Pointers[Index].PointerValue] = Index;
+
+ // We need to keep track of what pointers we've already seen so we
+ // don't process them twice.
+ SmallSet<unsigned, 2> Seen;
+
+ // Go through all equivalence classes, get the the "pointer check groups"
+ // and add them to the overall solution. We use the order in which accesses
+ // appear in 'Pointers' to enforce determinism.
+ for (unsigned I = 0; I < Pointers.size(); ++I) {
+ // We've seen this pointer before, and therefore already processed
+ // its equivalence class.
+ if (Seen.count(I))
+ continue;
+
+ MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
+ Pointers[I].IsWritePtr);
+
+ SmallVector<CheckingPtrGroup, 2> Groups;
+ auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
+
+ // Because DepCands is constructed by visiting accesses in the order in
+ // which they appear in alias sets (which is deterministic) and the
+ // iteration order within an equivalence class member is only dependent on
+ // the order in which unions and insertions are performed on the
+ // equivalence class, the iteration order is deterministic.
+ for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
+ MI != ME; ++MI) {
+ unsigned Pointer = PositionMap[MI->getPointer()];
+ bool Merged = false;
+ // Mark this pointer as seen.
+ Seen.insert(Pointer);
+
+ // Go through all the existing sets and see if we can find one
+ // which can include this pointer.
+ for (CheckingPtrGroup &Group : Groups) {
+ // Don't perform more than a certain amount of comparisons.
+ // This should limit the cost of grouping the pointers to something
+ // reasonable. If we do end up hitting this threshold, the algorithm
+ // will create separate groups for all remaining pointers.
+ if (TotalComparisons > MemoryCheckMergeThreshold)
+ break;
+
+ TotalComparisons++;
+
+ if (Group.addPointer(Pointer)) {
+ Merged = true;
+ break;
+ }
+ }
+
+ if (!Merged)
+ // We couldn't add this pointer to any existing set or the threshold
+ // for the number of comparisons has been reached. Create a new group
+ // to hold the current pointer.
+ Groups.push_back(CheckingPtrGroup(Pointer, *this));
+ }
+
+ // We've computed the grouped checks for this partition.
+ // Save the results and continue with the next one.
+ std::copy(Groups.begin(), Groups.end(), std::back_inserter(CheckingGroups));
+ }
+}
+
+bool RuntimePointerChecking::needsChecking(
unsigned I, unsigned J, const SmallVectorImpl<int> *PtrPartition) const {
+ const PointerInfo &PointerI = Pointers[I];
+ const PointerInfo &PointerJ = Pointers[J];
+
// No need to check if two readonly pointers intersect.
- if (!IsWritePtr[I] && !IsWritePtr[J])
+ if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
return false;
// Only need to check pointers between two different dependency sets.
- if (DependencySetId[I] == DependencySetId[J])
+ if (PointerI.DependencySetId == PointerJ.DependencySetId)
return false;
// Only need to check pointers in the same alias set.
- if (AliasSetId[I] != AliasSetId[J])
+ if (PointerI.AliasSetId != PointerJ.AliasSetId)
return false;
// If PtrPartition is set omit checks between pointers of the same partition.
@@ -153,45 +307,75 @@ bool LoopAccessInfo::RuntimePointerCheck::needsChecking(
return true;
}
-void LoopAccessInfo::RuntimePointerCheck::print(
+void RuntimePointerChecking::print(
raw_ostream &OS, unsigned Depth,
const SmallVectorImpl<int> *PtrPartition) const {
- unsigned NumPointers = Pointers.size();
- if (NumPointers == 0)
- return;
OS.indent(Depth) << "Run-time memory checks:\n";
+
unsigned N = 0;
- for (unsigned I = 0; I < NumPointers; ++I)
- for (unsigned J = I + 1; J < NumPointers; ++J)
- if (needsChecking(I, J, PtrPartition)) {
- OS.indent(Depth) << N++ << ":\n";
- OS.indent(Depth + 2) << *Pointers[I];
- if (PtrPartition)
- OS << " (Partition: " << (*PtrPartition)[I] << ")";
- OS << "\n";
- OS.indent(Depth + 2) << *Pointers[J];
- if (PtrPartition)
- OS << " (Partition: " << (*PtrPartition)[J] << ")";
- OS << "\n";
+ for (unsigned I = 0; I < CheckingGroups.size(); ++I)
+ for (unsigned J = I + 1; J < CheckingGroups.size(); ++J)
+ if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) {
+ OS.indent(Depth) << "Check " << N++ << ":\n";
+ OS.indent(Depth + 2) << "Comparing group " << I << ":\n";
+
+ for (unsigned K = 0; K < CheckingGroups[I].Members.size(); ++K) {
+ OS.indent(Depth + 2)
+ << *Pointers[CheckingGroups[I].Members[K]].PointerValue << "\n";
+ if (PtrPartition)
+ OS << " (Partition: "
+ << (*PtrPartition)[CheckingGroups[I].Members[K]] << ")"
+ << "\n";
+ }
+
+ OS.indent(Depth + 2) << "Against group " << J << ":\n";
+
+ for (unsigned K = 0; K < CheckingGroups[J].Members.size(); ++K) {
+ OS.indent(Depth + 2)
+ << *Pointers[CheckingGroups[J].Members[K]].PointerValue << "\n";
+ if (PtrPartition)
+ OS << " (Partition: "
+ << (*PtrPartition)[CheckingGroups[J].Members[K]] << ")"
+ << "\n";
+ }
}
+
+ OS.indent(Depth) << "Grouped accesses:\n";
+ for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
+ OS.indent(Depth + 2) << "Group " << I << ":\n";
+ OS.indent(Depth + 4) << "(Low: " << *CheckingGroups[I].Low
+ << " High: " << *CheckingGroups[I].High << ")\n";
+ for (unsigned J = 0; J < CheckingGroups[I].Members.size(); ++J) {
+ OS.indent(Depth + 6) << "Member: "
+ << *Pointers[CheckingGroups[I].Members[J]].Expr
+ << "\n";
+ }
+ }
}
-unsigned LoopAccessInfo::RuntimePointerCheck::getNumberOfChecks(
+unsigned RuntimePointerChecking::getNumberOfChecks(
const SmallVectorImpl<int> *PtrPartition) const {
- unsigned NumPointers = Pointers.size();
+
+ unsigned NumPartitions = CheckingGroups.size();
unsigned CheckCount = 0;
- for (unsigned I = 0; I < NumPointers; ++I)
- for (unsigned J = I + 1; J < NumPointers; ++J)
- if (needsChecking(I, J, PtrPartition))
+ for (unsigned I = 0; I < NumPartitions; ++I)
+ for (unsigned J = I + 1; J < NumPartitions; ++J)
+ if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition))
CheckCount++;
return CheckCount;
}
-bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking(
+bool RuntimePointerChecking::needsAnyChecking(
const SmallVectorImpl<int> *PtrPartition) const {
- return getNumberOfChecks(PtrPartition) != 0;
+ unsigned NumPointers = Pointers.size();
+
+ for (unsigned I = 0; I < NumPointers; ++I)
+ for (unsigned J = I + 1; J < NumPointers; ++J)
+ if (needsChecking(I, J, PtrPartition))
+ return true;
+ return false;
}
namespace {
@@ -207,7 +391,8 @@ public:
AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI,
MemoryDepChecker::DepCandidates &DA)
- : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckNeeded(false) {}
+ : DL(Dl), AST(*AA), LI(LI), DepCands(DA),
+ IsRTCheckAnalysisNeeded(false) {}
/// \brief Register a load and whether it is only read from.
void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
@@ -226,11 +411,12 @@ public:
}
/// \brief Check whether we can check the pointers at runtime for
- /// non-intersection. Returns true when we have 0 pointers
- /// (a check on 0 pointers for non-intersection will always return true).
- bool canCheckPtrAtRT(LoopAccessInfo::RuntimePointerCheck &RtCheck,
- bool &NeedRTCheck, ScalarEvolution *SE, Loop *TheLoop,
- const ValueToValueMap &Strides,
+ /// non-intersection.
+ ///
+ /// Returns true if we need no check or if we do and we can generate them
+ /// (i.e. the pointers have computable bounds).
+ bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
+ Loop *TheLoop, const ValueToValueMap &Strides,
bool ShouldCheckStride = false);
/// \brief Goes over all memory accesses, checks whether a RT check is needed
@@ -239,8 +425,11 @@ public:
processMemAccesses();
}
- bool isRTCheckNeeded() { return IsRTCheckNeeded; }
-
+ /// \brief Initial processing of memory accesses determined that we need to
+ /// perform dependency checking.
+ ///
+ /// Note that this can later be cleared if we retry memcheck analysis without
+ /// dependency checking (i.e. ShouldRetryWithRuntimeCheck).
bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
/// We decided that no dependence analysis would be used. Reset the state.
@@ -255,7 +444,7 @@ private:
typedef SetVector<MemAccessInfo> PtrAccessSet;
/// \brief Go over all memory access and check whether runtime pointer checks
- /// are needed /// and build sets of dependency check candidates.
+ /// are needed and build sets of dependency check candidates.
void processMemAccesses();
/// Set of all accesses.
@@ -280,7 +469,14 @@ private:
/// dependence check.
MemoryDepChecker::DepCandidates &DepCands;
- bool IsRTCheckNeeded;
+ /// \brief Initial processing of memory accesses determined that we may need
+ /// to add memchecks. Perform the analysis to determine the necessary checks.
+ ///
+ /// Note that, this is different from isDependencyCheckNeeded. When we retry
+ /// memcheck analysis without dependency checking
+ /// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared
+ /// while this remains set if we have potentially dependent accesses.
+ bool IsRTCheckAnalysisNeeded;
};
} // end anonymous namespace
@@ -296,16 +492,16 @@ static bool hasComputableBounds(ScalarEvolution *SE,
return AR->isAffine();
}
-bool AccessAnalysis::canCheckPtrAtRT(
- LoopAccessInfo::RuntimePointerCheck &RtCheck, bool &NeedRTCheck,
- ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &StridesMap,
- bool ShouldCheckStride) {
+bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
+ ScalarEvolution *SE, Loop *TheLoop,
+ const ValueToValueMap &StridesMap,
+ bool ShouldCheckStride) {
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
bool CanDoRT = true;
- NeedRTCheck = false;
- if (!IsRTCheckNeeded) return true;
+ bool NeedRTCheck = false;
+ if (!IsRTCheckAnalysisNeeded) return true;
bool IsDepCheckNeeded = isDependencyCheckNeeded();
@@ -313,6 +509,9 @@ bool AccessAnalysis::canCheckPtrAtRT(
// Accesses between different groups doesn't need to be checked.
unsigned ASId = 1;
for (auto &AS : AST) {
+ int NumReadPtrChecks = 0;
+ int NumWritePtrChecks = 0;
+
// We assign consecutive id to access from different dependence sets.
// Accesses within the same set don't need a runtime check.
unsigned RunningDepId = 1;
@@ -323,6 +522,11 @@ bool AccessAnalysis::canCheckPtrAtRT(
bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
MemAccessInfo Access(Ptr, IsWrite);
+ if (IsWrite)
+ ++NumWritePtrChecks;
+ else
+ ++NumReadPtrChecks;
+
if (hasComputableBounds(SE, StridesMap, Ptr) &&
// When we run after a failing dependency check we have to make sure
// we don't have wrapping pointers.
@@ -341,7 +545,7 @@ bool AccessAnalysis::canCheckPtrAtRT(
// Each access has its own dependence set.
DepId = RunningDepId++;
- RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
+ RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
} else {
@@ -350,15 +554,21 @@ bool AccessAnalysis::canCheckPtrAtRT(
}
}
+ // If we have at least two writes or one write and a read then we need to
+ // check them. But there is no need to checks if there is only one
+ // dependence set for this alias set.
+ //
+ // Note that this function computes CanDoRT and NeedRTCheck independently.
+ // For example CanDoRT=false, NeedRTCheck=false means that we have a pointer
+ // for which we couldn't find the bounds but we don't actually need to emit
+ // any checks so it does not matter.
+ if (!(IsDepCheckNeeded && CanDoRT && RunningDepId == 2))
+ NeedRTCheck |= (NumWritePtrChecks >= 2 || (NumReadPtrChecks >= 1 &&
+ NumWritePtrChecks >= 1));
+
++ASId;
}
- // We need a runtime check if there are any accesses that need checking.
- // However, some accesses cannot be checked (for example because we
- // can't determine their bounds). In these cases we would need a check
- // but wouldn't be able to add it.
- NeedRTCheck = !CanDoRT || RtCheck.needsAnyChecking(nullptr);
-
// If the pointers that we would use for the bounds comparison have different
// address spaces, assume the values aren't directly comparable, so we can't
// use them for the runtime check. We also have to assume they could
@@ -368,14 +578,15 @@ bool AccessAnalysis::canCheckPtrAtRT(
for (unsigned i = 0; i < NumPointers; ++i) {
for (unsigned j = i + 1; j < NumPointers; ++j) {
// Only need to check pointers between two different dependency sets.
- if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j])
+ if (RtCheck.Pointers[i].DependencySetId ==
+ RtCheck.Pointers[j].DependencySetId)
continue;
// Only need to check pointers in the same alias set.
- if (RtCheck.AliasSetId[i] != RtCheck.AliasSetId[j])
+ if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
continue;
- Value *PtrI = RtCheck.Pointers[i];
- Value *PtrJ = RtCheck.Pointers[j];
+ Value *PtrI = RtCheck.Pointers[i].PointerValue;
+ Value *PtrJ = RtCheck.Pointers[j].PointerValue;
unsigned ASi = PtrI->getType()->getPointerAddressSpace();
unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
@@ -387,7 +598,18 @@ bool AccessAnalysis::canCheckPtrAtRT(
}
}
- return CanDoRT;
+ if (NeedRTCheck && CanDoRT)
+ RtCheck.groupChecks(DepCands, IsDepCheckNeeded);
+
+ DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks(nullptr)
+ << " pointer comparisons.\n");
+
+ RtCheck.Need = NeedRTCheck;
+
+ bool CanDoRTIfNeeded = !NeedRTCheck || CanDoRT;
+ if (!CanDoRTIfNeeded)
+ RtCheck.reset();
+ return CanDoRTIfNeeded;
}
void AccessAnalysis::processMemAccesses() {
@@ -470,7 +692,7 @@ void AccessAnalysis::processMemAccesses() {
// catch "a[i] = a[i] + " without having to do a dependence check).
if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
CheckDeps.insert(Access);
- IsRTCheckNeeded = true;
+ IsRTCheckAnalysisNeeded = true;
}
if (IsWrite)
@@ -600,7 +822,7 @@ int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
// Check the step is constant.
const SCEV *Step = AR->getStepRecurrence(*SE);
- // Calculate the pointer stride and check if it is consecutive.
+ // Calculate the pointer stride and check if it is constant.
const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
if (!C) {
DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr <<
@@ -805,11 +1027,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
<< *InstMap[BIdx] << ": " << *Dist << "\n");
- // Need consecutive accesses. We don't want to vectorize
+ // Need accesses with constant stride. We don't want to vectorize
// "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
// the address space.
if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
- DEBUG(dbgs() << "Non-consecutive pointer access\n");
+ DEBUG(dbgs() << "Pointer access with non-constant stride\n");
return Dependence::Unknown;
}
@@ -859,8 +1081,10 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
unsigned Stride = std::abs(StrideAPtr);
if (Stride > 1 &&
- areStridedAccessesIndependent(Distance, Stride, TypeByteSize))
+ areStridedAccessesIndependent(Distance, Stride, TypeByteSize)) {
+ DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
return Dependence::NoDep;
+ }
// Bail out early if passed-in parameters make vectorization not feasible.
unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
@@ -1098,8 +1322,8 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
unsigned NumReads = 0;
unsigned NumReadWrites = 0;
- PtrRtCheck.Pointers.clear();
- PtrRtCheck.Need = false;
+ PtrRtChecking.Pointers.clear();
+ PtrRtChecking.Need = false;
const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
@@ -1258,28 +1482,17 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
- bool NeedRTCheck;
- bool CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck,
- NeedRTCheck, SE,
- TheLoop, Strides);
-
- DEBUG(dbgs() << "LAA: We need to do "
- << PtrRtCheck.getNumberOfChecks(nullptr)
- << " pointer comparisons.\n");
-
- // Check that we found the bounds for the pointer.
- if (CanDoRT)
- DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
- else if (NeedRTCheck) {
+ bool CanDoRTIfNeeded =
+ Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides);
+ if (!CanDoRTIfNeeded) {
emitAnalysis(LoopAccessReport() << "cannot identify array bounds");
- DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " <<
- "the array bounds.\n");
- PtrRtCheck.reset();
+ DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
+ << "the array bounds.\n");
CanVecMem = false;
return;
}
- PtrRtCheck.Need = NeedRTCheck;
+ DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
CanVecMem = true;
if (Accesses.isDependencyCheckNeeded()) {
@@ -1290,23 +1503,21 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
- NeedRTCheck = true;
// Clear the dependency checks. We assume they are not needed.
Accesses.resetDepChecks(DepChecker);
- PtrRtCheck.reset();
- PtrRtCheck.Need = true;
+ PtrRtChecking.reset();
+ PtrRtChecking.Need = true;
- CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NeedRTCheck, SE,
- TheLoop, Strides, true);
+ CanDoRTIfNeeded =
+ Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides, true);
// Check that we found the bounds for the pointer.
- if (NeedRTCheck && !CanDoRT) {
+ if (!CanDoRTIfNeeded) {
emitAnalysis(LoopAccessReport()
<< "cannot check memory dependencies at runtime");
DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
- PtrRtCheck.reset();
CanVecMem = false;
return;
}
@@ -1317,8 +1528,8 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
if (CanVecMem)
DEBUG(dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
- << (NeedRTCheck ? "" : " don't")
- << " need a runtime memory check.\n");
+ << (PtrRtChecking.Need ? "" : " don't")
+ << " need runtime memory checks.\n");
else {
emitAnalysis(LoopAccessReport() <<
"unsafe dependent memory operations in loop");
@@ -1357,35 +1568,38 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
Instruction *Loc, const SmallVectorImpl<int> *PtrPartition) const {
- if (!PtrRtCheck.Need)
+ if (!PtrRtChecking.Need)
return std::make_pair(nullptr, nullptr);
- unsigned NumPointers = PtrRtCheck.Pointers.size();
- SmallVector<TrackingVH<Value> , 2> Starts;
- SmallVector<TrackingVH<Value> , 2> Ends;
+ SmallVector<TrackingVH<Value>, 2> Starts;
+ SmallVector<TrackingVH<Value>, 2> Ends;
LLVMContext &Ctx = Loc->getContext();
SCEVExpander Exp(*SE, DL, "induction");
Instruction *FirstInst = nullptr;
- for (unsigned i = 0; i < NumPointers; ++i) {
- Value *Ptr = PtrRtCheck.Pointers[i];
+ for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) {
+ const RuntimePointerChecking::CheckingPtrGroup &CG =
+ PtrRtChecking.CheckingGroups[i];
+ Value *Ptr = PtrRtChecking.Pointers[CG.Members[0]].PointerValue;
const SCEV *Sc = SE->getSCEV(Ptr);
if (SE->isLoopInvariant(Sc, TheLoop)) {
- DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" <<
- *Ptr <<"\n");
+ DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr
+ << "\n");
Starts.push_back(Ptr);
Ends.push_back(Ptr);
} else {
- DEBUG(dbgs() << "LAA: Adding RT check for range:" << *Ptr << '\n');
unsigned AS = Ptr->getType()->getPointerAddressSpace();
// Use this type for pointer arithmetic.
Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
+ Value *Start = nullptr, *End = nullptr;
- Value *Start = Exp.expandCodeFor(PtrRtCheck.Starts[i], PtrArithTy, Loc);
- Value *End = Exp.expandCodeFor(PtrRtCheck.Ends[i], PtrArithTy, Loc);
+ DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
+ Start = Exp.expandCodeFor(CG.Low, PtrArithTy, Loc);
+ End = Exp.expandCodeFor(CG.High, PtrArithTy, Loc);
+ DEBUG(dbgs() << "Start: " << *CG.Low << " End: " << *CG.High << "\n");
Starts.push_back(Start);
Ends.push_back(End);
}
@@ -1394,9 +1608,14 @@ std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
IRBuilder<> ChkBuilder(Loc);
// Our instructions might fold to a constant.
Value *MemoryRuntimeCheck = nullptr;
- for (unsigned i = 0; i < NumPointers; ++i) {
- for (unsigned j = i+1; j < NumPointers; ++j) {
- if (!PtrRtCheck.needsChecking(i, j, PtrPartition))
+ for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) {
+ for (unsigned j = i + 1; j < PtrRtChecking.CheckingGroups.size(); ++j) {
+ const RuntimePointerChecking::CheckingPtrGroup &CGI =
+ PtrRtChecking.CheckingGroups[i];
+ const RuntimePointerChecking::CheckingPtrGroup &CGJ =
+ PtrRtChecking.CheckingGroups[j];
+
+ if (!PtrRtChecking.needsChecking(CGI, CGJ, PtrPartition))
continue;
unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
@@ -1447,7 +1666,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
const TargetLibraryInfo *TLI, AliasAnalysis *AA,
DominatorTree *DT, LoopInfo *LI,
const ValueToValueMap &Strides)
- : DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL),
+ : PtrRtChecking(SE), DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL),
TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
MaxSafeDepDistBytes(-1U), CanVecMem(false),
StoreToLoopInvariantAddress(false) {
@@ -1457,7 +1676,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
if (CanVecMem) {
- if (PtrRtCheck.Need)
+ if (PtrRtChecking.Need)
OS.indent(Depth) << "Memory dependences are safe with run-time checks\n";
else
OS.indent(Depth) << "Memory dependences are safe\n";
@@ -1476,7 +1695,7 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
OS.indent(Depth) << "Too many interesting dependences, not recorded\n";
// List the pair of accesses need run-time checks to prove independence.
- PtrRtCheck.print(OS, Depth);
+ PtrRtChecking.print(OS, Depth);
OS << "\n";
OS.indent(Depth) << "Store to invariant address was "
OpenPOWER on IntegriCloud