diff options
Diffstat (limited to 'contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp')
-rw-r--r-- | contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp | 608 |
1 files changed, 416 insertions, 192 deletions
diff --git a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 8bcdcb8..5214eb7 100644 --- a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -14,15 +14,17 @@ #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPassManager.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/PassManager.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/VectorUtils.h" using namespace llvm; #define DEBUG_TYPE "loop-accesses" @@ -65,6 +67,28 @@ static cl::opt<unsigned> "loop-access analysis (default = 100)"), cl::init(100)); +/// This enables versioning on the strides of symbolically striding memory +/// accesses in code like the following. +/// for (i = 0; i < N; ++i) +/// A[i * Stride1] += B[i * Stride2] ... +/// +/// Will be roughly translated to +/// if (Stride1 == 1 && Stride2 == 1) { +/// for (i = 0; i < N; i+=4) +/// A[i:i+3] += ... +/// } else +/// ... +static cl::opt<bool> EnableMemAccessVersioning( + "enable-mem-access-versioning", cl::init(true), cl::Hidden, + cl::desc("Enable symbolic stride memory access versioning")); + +/// \brief Enable store-to-load forwarding conflict detection. This option can +/// be disabled for correctness testing. +static cl::opt<bool> EnableForwardingConflictDetection( + "store-to-load-forwarding-conflict-detection", cl::Hidden, + cl::desc("Enable conflict detection in loop-access analysis"), + cl::init(true)); + bool VectorizerParams::isInterleaveForced() { return ::VectorizationInterleave.getNumOccurrences() > 0; } @@ -81,7 +105,7 @@ void LoopAccessReport::emitAnalysis(const LoopAccessReport &Message, } Value *llvm::stripIntegerCast(Value *V) { - if (CastInst *CI = dyn_cast<CastInst>(V)) + if (auto *CI = dyn_cast<CastInst>(V)) if (CI->getOperand(0)->getType()->isIntegerTy()) return CI->getOperand(0); return V; @@ -124,32 +148,58 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, return OrigSCEV; } +/// Calculate Start and End points of memory access. +/// Let's assume A is the first access and B is a memory access on N-th loop +/// iteration. Then B is calculated as: +/// B = A + Step*N . +/// Step value may be positive or negative. +/// N is a calculated back-edge taken count: +/// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0 +/// Start and End points are calculated in the following way: +/// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt, +/// where SizeOfElt is the size of single memory access in bytes. +/// +/// There is no conflict when the intervals are disjoint: +/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End) void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides, PredicatedScalarEvolution &PSE) { // Get the stride replaced scev. const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); - const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc); - assert(AR && "Invalid addrec expression"); ScalarEvolution *SE = PSE.getSE(); - const SCEV *Ex = SE->getBackedgeTakenCount(Lp); - const SCEV *ScStart = AR->getStart(); - const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE); - const SCEV *Step = AR->getStepRecurrence(*SE); + const SCEV *ScStart; + const SCEV *ScEnd; - // For expressions with negative step, the upper bound is ScStart and the - // lower bound is ScEnd. - if (const SCEVConstant *CStep = dyn_cast<const SCEVConstant>(Step)) { - if (CStep->getValue()->isNegative()) - std::swap(ScStart, ScEnd); - } else { - // Fallback case: the step is not constant, but the we can still - // get the upper and lower bounds of the interval by using min/max - // expressions. - ScStart = SE->getUMinExpr(ScStart, ScEnd); - ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd); + if (SE->isLoopInvariant(Sc, Lp)) + ScStart = ScEnd = Sc; + else { + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc); + assert(AR && "Invalid addrec expression"); + const SCEV *Ex = PSE.getBackedgeTakenCount(); + + ScStart = AR->getStart(); + ScEnd = AR->evaluateAtIteration(Ex, *SE); + const SCEV *Step = AR->getStepRecurrence(*SE); + + // For expressions with negative step, the upper bound is ScStart and the + // lower bound is ScEnd. + if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) { + if (CStep->getValue()->isNegative()) + std::swap(ScStart, ScEnd); + } else { + // Fallback case: the step is not constant, but we can still + // get the upper and lower bounds of the interval by using min/max + // expressions. + ScStart = SE->getUMinExpr(ScStart, ScEnd); + ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd); + } + // Add the size of the pointed element to ScEnd. + unsigned EltSize = + Ptr->getType()->getPointerElementType()->getScalarSizeInBits() / 8; + const SCEV *EltSizeSCEV = SE->getConstant(ScEnd->getType(), EltSize); + ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV); } Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc); @@ -452,7 +502,7 @@ public: /// (i.e. the pointers have computable bounds). bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &Strides, - bool ShouldCheckStride = false); + bool ShouldCheckWrap = false); /// \brief Goes over all memory accesses, checks whether a RT check is needed /// and builds sets of dependent accesses. @@ -524,6 +574,11 @@ static bool hasComputableBounds(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L) { const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); + + // The bounds for loop-invariant pointer is trivial. + if (PSE.getSE()->isLoopInvariant(PtrScev, L)) + return true; + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev); if (!AR) return false; @@ -531,10 +586,21 @@ static bool hasComputableBounds(PredicatedScalarEvolution &PSE, return AR->isAffine(); } +/// \brief Check whether a pointer address cannot wrap. +static bool isNoWrap(PredicatedScalarEvolution &PSE, + const ValueToValueMap &Strides, Value *Ptr, Loop *L) { + const SCEV *PtrScev = PSE.getSCEV(Ptr); + if (PSE.getSE()->isLoopInvariant(PtrScev, L)) + return true; + + int64_t Stride = getPtrStride(PSE, Ptr, L, Strides); + return Stride == 1; +} + bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &StridesMap, - bool ShouldCheckStride) { + bool ShouldCheckWrap) { // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. bool CanDoRT = true; @@ -569,8 +635,7 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, if (hasComputableBounds(PSE, StridesMap, Ptr, TheLoop) && // When we run after a failing dependency check we have to make sure // we don't have wrapping pointers. - (!ShouldCheckStride || - isStridedPtr(PSE, Ptr, TheLoop, StridesMap) == 1)) { + (!ShouldCheckWrap || isNoWrap(PSE, StridesMap, Ptr, TheLoop))) { // The id of the dependence set. unsigned DepId; @@ -773,7 +838,7 @@ static bool isInBoundsGep(Value *Ptr) { /// \brief Return true if an AddRec pointer \p Ptr is unsigned non-wrapping, /// i.e. monotonically increasing/decreasing. static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, - ScalarEvolution *SE, const Loop *L) { + PredicatedScalarEvolution &PSE, const Loop *L) { // FIXME: This should probably only return true for NUW. if (AR->getNoWrapFlags(SCEV::NoWrapMask)) return true; @@ -792,11 +857,11 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, // Make sure there is only one non-const index and analyze that. Value *NonConstIndex = nullptr; - for (auto Index = GEP->idx_begin(); Index != GEP->idx_end(); ++Index) - if (!isa<ConstantInt>(*Index)) { + for (Value *Index : make_range(GEP->idx_begin(), GEP->idx_end())) + if (!isa<ConstantInt>(Index)) { if (NonConstIndex) return false; - NonConstIndex = *Index; + NonConstIndex = Index; } if (!NonConstIndex) // The recurrence is on the pointer, ignore for now. @@ -809,7 +874,7 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, // Assume constant for other the operand so that the AddRec can be // easily found. isa<ConstantInt>(OBO->getOperand(1))) { - auto *OpScev = SE->getSCEV(OBO->getOperand(0)); + auto *OpScev = PSE.getSCEV(OBO->getOperand(0)); if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev)) return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW); @@ -819,32 +884,36 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, } /// \brief Check whether the access through \p Ptr has a constant stride. -int llvm::isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, - const Loop *Lp, const ValueToValueMap &StridesMap) { +int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, + const Loop *Lp, const ValueToValueMap &StridesMap, + bool Assume) { Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Unexpected non-ptr"); // Make sure that the pointer does not point to aggregate types. auto *PtrTy = cast<PointerType>(Ty); if (PtrTy->getElementType()->isAggregateType()) { - DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type" - << *Ptr << "\n"); + DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type" << *Ptr + << "\n"); return 0; } const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr); const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev); + if (Assume && !AR) + AR = PSE.getAsAddRec(Ptr); + if (!AR) { - DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " - << *Ptr << " SCEV: " << *PtrScev << "\n"); + DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr + << " SCEV: " << *PtrScev << "\n"); return 0; } // The accesss function must stride over the innermost loop. if (Lp != AR->getLoop()) { DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " << - *Ptr << " SCEV: " << *PtrScev << "\n"); + *Ptr << " SCEV: " << *AR << "\n"); return 0; } @@ -856,12 +925,23 @@ int llvm::isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, // to access the pointer value "0" which is undefined behavior in address // space 0, therefore we can also vectorize this case. bool IsInBoundsGEP = isInBoundsGep(Ptr); - bool IsNoWrapAddRec = isNoWrapAddRec(Ptr, AR, PSE.getSE(), Lp); + bool IsNoWrapAddRec = + PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) || + isNoWrapAddRec(Ptr, AR, PSE, Lp); bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0; if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) { - DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space " - << *Ptr << " SCEV: " << *PtrScev << "\n"); - return 0; + if (Assume) { + PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); + IsNoWrapAddRec = true; + DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n" + << "LAA: Pointer: " << *Ptr << "\n" + << "LAA: SCEV: " << *AR << "\n" + << "LAA: Added an overflow assumption\n"); + } else { + DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space " + << *Ptr << " SCEV: " << *AR << "\n"); + return 0; + } } // Check the step is constant. @@ -871,7 +951,7 @@ int llvm::isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, const SCEVConstant *C = dyn_cast<SCEVConstant>(Step); if (!C) { DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr << - " SCEV: " << *PtrScev << "\n"); + " SCEV: " << *AR << "\n"); return 0; } @@ -895,12 +975,94 @@ int llvm::isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, // know we can't "wrap around the address space". In case of address space // zero we know that this won't happen without triggering undefined behavior. if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) && - Stride != 1 && Stride != -1) - return 0; + Stride != 1 && Stride != -1) { + if (Assume) { + // We can avoid this case by adding a run-time check. + DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either " + << "inbouds or in address space 0 may wrap:\n" + << "LAA: Pointer: " << *Ptr << "\n" + << "LAA: SCEV: " << *AR << "\n" + << "LAA: Added an overflow assumption\n"); + PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); + } else + return 0; + } return Stride; } +/// Take the pointer operand from the Load/Store instruction. +/// Returns NULL if this is not a valid Load/Store instruction. +static Value *getPointerOperand(Value *I) { + if (auto *LI = dyn_cast<LoadInst>(I)) + return LI->getPointerOperand(); + if (auto *SI = dyn_cast<StoreInst>(I)) + return SI->getPointerOperand(); + return nullptr; +} + +/// Take the address space operand from the Load/Store instruction. +/// Returns -1 if this is not a valid Load/Store instruction. +static unsigned getAddressSpaceOperand(Value *I) { + if (LoadInst *L = dyn_cast<LoadInst>(I)) + return L->getPointerAddressSpace(); + if (StoreInst *S = dyn_cast<StoreInst>(I)) + return S->getPointerAddressSpace(); + return -1; +} + +/// Returns true if the memory operations \p A and \p B are consecutive. +bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, + ScalarEvolution &SE, bool CheckType) { + Value *PtrA = getPointerOperand(A); + Value *PtrB = getPointerOperand(B); + unsigned ASA = getAddressSpaceOperand(A); + unsigned ASB = getAddressSpaceOperand(B); + + // Check that the address spaces match and that the pointers are valid. + if (!PtrA || !PtrB || (ASA != ASB)) + return false; + + // Make sure that A and B are different pointers. + if (PtrA == PtrB) + return false; + + // Make sure that A and B have the same type if required. + if(CheckType && PtrA->getType() != PtrB->getType()) + return false; + + unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA); + Type *Ty = cast<PointerType>(PtrA->getType())->getElementType(); + APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty)); + + APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0); + PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); + PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); + + // OffsetDelta = OffsetB - OffsetA; + const SCEV *OffsetSCEVA = SE.getConstant(OffsetA); + const SCEV *OffsetSCEVB = SE.getConstant(OffsetB); + const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA); + const SCEVConstant *OffsetDeltaC = dyn_cast<SCEVConstant>(OffsetDeltaSCEV); + const APInt &OffsetDelta = OffsetDeltaC->getAPInt(); + // Check if they are based on the same pointer. That makes the offsets + // sufficient. + if (PtrA == PtrB) + return OffsetDelta == Size; + + // Compute the necessary base pointer delta to have the necessary final delta + // equal to the size. + // BaseDelta = Size - OffsetDelta; + const SCEV *SizeSCEV = SE.getConstant(Size); + const SCEV *BaseDelta = SE.getMinusSCEV(SizeSCEV, OffsetDeltaSCEV); + + // Otherwise compute the distance with SCEV between the base pointers. + const SCEV *PtrSCEVA = SE.getSCEV(PtrA); + const SCEV *PtrSCEVB = SE.getSCEV(PtrB); + const SCEV *X = SE.getAddExpr(PtrSCEVA, BaseDelta); + return X == PtrSCEVB; +} + bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) { switch (Type) { case NoDep: @@ -953,8 +1115,8 @@ bool MemoryDepChecker::Dependence::isForward() const { llvm_unreachable("unexpected DepType!"); } -bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance, - unsigned TypeByteSize) { +bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance, + uint64_t TypeByteSize) { // If loads occur at a distance that is not a multiple of a feasible vector // factor store-load forwarding does not take place. // Positive dependences might cause troubles because vectorizing them might @@ -964,30 +1126,34 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance, // hence on your typical architecture store-load forwarding does not take // place. Vectorizing in such cases does not make sense. // Store-load forwarding distance. - const unsigned NumCyclesForStoreLoadThroughMemory = 8*TypeByteSize; + + // After this many iterations store-to-load forwarding conflicts should not + // cause any slowdowns. + const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize; // Maximum vector factor. - unsigned MaxVFWithoutSLForwardIssues = - VectorizerParams::MaxVectorWidth * TypeByteSize; - if(MaxSafeDepDistBytes < MaxVFWithoutSLForwardIssues) - MaxVFWithoutSLForwardIssues = MaxSafeDepDistBytes; - - for (unsigned vf = 2*TypeByteSize; vf <= MaxVFWithoutSLForwardIssues; - vf *= 2) { - if (Distance % vf && Distance / vf < NumCyclesForStoreLoadThroughMemory) { - MaxVFWithoutSLForwardIssues = (vf >>=1); + uint64_t MaxVFWithoutSLForwardIssues = std::min( + VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes); + + // Compute the smallest VF at which the store and load would be misaligned. + for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues; + VF *= 2) { + // If the number of vector iteration between the store and the load are + // small we could incur conflicts. + if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) { + MaxVFWithoutSLForwardIssues = (VF >>= 1); break; } } - if (MaxVFWithoutSLForwardIssues< 2*TypeByteSize) { - DEBUG(dbgs() << "LAA: Distance " << Distance << - " that could cause a store-load forwarding conflict\n"); + if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) { + DEBUG(dbgs() << "LAA: Distance " << Distance + << " that could cause a store-load forwarding conflict\n"); return true; } if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes && MaxVFWithoutSLForwardIssues != - VectorizerParams::MaxVectorWidth * TypeByteSize) + VectorizerParams::MaxVectorWidth * TypeByteSize) MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues; return false; } @@ -997,8 +1163,8 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance, /// bytes. /// /// \returns true if they are independent. -static bool areStridedAccessesIndependent(unsigned Distance, unsigned Stride, - unsigned TypeByteSize) { +static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, + uint64_t TypeByteSize) { assert(Stride > 1 && "The stride must be greater than 1"); assert(TypeByteSize > 0 && "The type size in byte must be non-zero"); assert(Distance > 0 && "The distance must be non-zero"); @@ -1007,7 +1173,7 @@ static bool areStridedAccessesIndependent(unsigned Distance, unsigned Stride, if (Distance % TypeByteSize) return false; - unsigned ScaledDist = Distance / TypeByteSize; + uint64_t ScaledDist = Distance / TypeByteSize; // No dependence if the scaled distance is not multiple of the stride. // E.g. @@ -1048,20 +1214,15 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, BPtr->getType()->getPointerAddressSpace()) return Dependence::Unknown; - const SCEV *AScev = replaceSymbolicStrideSCEV(PSE, Strides, APtr); - const SCEV *BScev = replaceSymbolicStrideSCEV(PSE, Strides, BPtr); - - int StrideAPtr = isStridedPtr(PSE, APtr, InnermostLoop, Strides); - int StrideBPtr = isStridedPtr(PSE, BPtr, InnermostLoop, Strides); + int64_t StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true); + int64_t StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true); - const SCEV *Src = AScev; - const SCEV *Sink = BScev; + const SCEV *Src = PSE.getSCEV(APtr); + const SCEV *Sink = PSE.getSCEV(BPtr); // If the induction step is negative we have to invert source and sink of the // dependence. if (StrideAPtr < 0) { - //Src = BScev; - //Sink = AScev; std::swap(APtr, BPtr); std::swap(Src, Sink); std::swap(AIsWrite, BIsWrite); @@ -1094,18 +1255,30 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, Type *ATy = APtr->getType()->getPointerElementType(); Type *BTy = BPtr->getType()->getPointerElementType(); auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout(); - unsigned TypeByteSize = DL.getTypeAllocSize(ATy); + uint64_t TypeByteSize = DL.getTypeAllocSize(ATy); - // Negative distances are not plausible dependencies. const APInt &Val = C->getAPInt(); + int64_t Distance = Val.getSExtValue(); + uint64_t Stride = std::abs(StrideAPtr); + + // Attempt to prove strided accesses independent. + if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy && + areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) { + DEBUG(dbgs() << "LAA: Strided accesses are independent\n"); + return Dependence::NoDep; + } + + // Negative distances are not plausible dependencies. if (Val.isNegative()) { bool IsTrueDataDependence = (AIsWrite && !BIsWrite); - if (IsTrueDataDependence && + if (IsTrueDataDependence && EnableForwardingConflictDetection && (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) || - ATy != BTy)) + ATy != BTy)) { + DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n"); return Dependence::ForwardButPreventsForwarding; + } - DEBUG(dbgs() << "LAA: Dependence is negative: NoDep\n"); + DEBUG(dbgs() << "LAA: Dependence is negative\n"); return Dependence::Forward; } @@ -1126,15 +1299,6 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, return Dependence::Unknown; } - unsigned Distance = (unsigned) Val.getZExtValue(); - - unsigned Stride = std::abs(StrideAPtr); - if (Stride > 1 && - areStridedAccessesIndependent(Distance, Stride, TypeByteSize)) { - DEBUG(dbgs() << "LAA: Strided accesses are independent\n"); - return Dependence::NoDep; - } - // Bail out early if passed-in parameters make vectorization not feasible. unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ? VectorizerParams::VectorizationFactor : 1); @@ -1169,9 +1333,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4), // the minimum distance needed is 28, which is greater than distance. It is // not safe to do vectorization. - unsigned MinDistanceNeeded = + uint64_t MinDistanceNeeded = TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize; - if (MinDistanceNeeded > Distance) { + if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) { DEBUG(dbgs() << "LAA: Failure because of positive distance " << Distance << '\n'); return Dependence::Backward; @@ -1201,10 +1365,10 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // is 8, which is less than 2 and forbidden vectorization, But actually // both A and B could be vectorized by 2 iterations. MaxSafeDepDistBytes = - Distance < MaxSafeDepDistBytes ? Distance : MaxSafeDepDistBytes; + std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes); bool IsTrueDataDependence = (!AIsWrite && BIsWrite); - if (IsTrueDataDependence && + if (IsTrueDataDependence && EnableForwardingConflictDetection && couldPreventStoreLoadForward(Distance, TypeByteSize)) return Dependence::BackwardVectorizableButPreventsForwarding; @@ -1219,7 +1383,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, MemAccessInfoSet &CheckDeps, const ValueToValueMap &Strides) { - MaxSafeDepDistBytes = -1U; + MaxSafeDepDistBytes = -1; while (!CheckDeps.empty()) { MemAccessInfo CurAccess = *CheckDeps.begin(); @@ -1228,8 +1392,10 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, AccessSets.findValue(AccessSets.getLeaderValue(CurAccess)); // Check accesses within this set. - EquivalenceClasses<MemAccessInfo>::member_iterator AI, AE; - AI = AccessSets.member_begin(I), AE = AccessSets.member_end(); + EquivalenceClasses<MemAccessInfo>::member_iterator AI = + AccessSets.member_begin(I); + EquivalenceClasses<MemAccessInfo>::member_iterator AE = + AccessSets.member_end(); // Check every access pair. while (AI != AE) { @@ -1305,10 +1471,11 @@ void MemoryDepChecker::Dependence::print( bool LoopAccessInfo::canAnalyzeLoop() { // We need to have a loop header. - DEBUG(dbgs() << "LAA: Found a loop: " << - TheLoop->getHeader()->getName() << '\n'); + DEBUG(dbgs() << "LAA: Found a loop in " + << TheLoop->getHeader()->getParent()->getName() << ": " + << TheLoop->getHeader()->getName() << '\n'); - // We can only analyze innermost loops. + // We can only analyze innermost loops. if (!TheLoop->empty()) { DEBUG(dbgs() << "LAA: loop is not the innermost loop\n"); emitAnalysis(LoopAccessReport() << "loop is not the innermost loop"); @@ -1345,8 +1512,8 @@ bool LoopAccessInfo::canAnalyzeLoop() { } // ScalarEvolution needs to be able to find the exit count. - const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop); - if (ExitCount == PSE.getSE()->getCouldNotCompute()) { + const SCEV *ExitCount = PSE->getBackedgeTakenCount(); + if (ExitCount == PSE->getSE()->getCouldNotCompute()) { emitAnalysis(LoopAccessReport() << "could not determine number of loop iterations"); DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n"); @@ -1356,41 +1523,37 @@ bool LoopAccessInfo::canAnalyzeLoop() { return true; } -void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { - - typedef SmallVector<Value*, 16> ValueVector; +void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, + const TargetLibraryInfo *TLI, + DominatorTree *DT) { typedef SmallPtrSet<Value*, 16> ValueSet; - // Holds the Load and Store *instructions*. - ValueVector Loads; - ValueVector Stores; + // Holds the Load and Store instructions. + SmallVector<LoadInst *, 16> Loads; + SmallVector<StoreInst *, 16> Stores; // Holds all the different accesses in the loop. unsigned NumReads = 0; unsigned NumReadWrites = 0; - PtrRtChecking.Pointers.clear(); - PtrRtChecking.Need = false; + PtrRtChecking->Pointers.clear(); + PtrRtChecking->Need = false; const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); // For each block. - for (Loop::block_iterator bb = TheLoop->block_begin(), - be = TheLoop->block_end(); bb != be; ++bb) { - + for (BasicBlock *BB : TheLoop->blocks()) { // Scan the BB and collect legal loads and stores. - for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e; - ++it) { - + for (Instruction &I : *BB) { // If this is a load, save it. If this instruction can read from memory // but is not a load, then we quit. Notice that we don't handle function // calls that read or write. - if (it->mayReadFromMemory()) { + if (I.mayReadFromMemory()) { // Many math library functions read the rounding mode. We will only // vectorize a loop if it contains known function calls that don't set // the flag. Therefore, it is safe to ignore this read from memory. - CallInst *Call = dyn_cast<CallInst>(it); - if (Call && getIntrinsicIDForCall(Call, TLI)) + auto *Call = dyn_cast<CallInst>(&I); + if (Call && getVectorIntrinsicIDForCall(Call, TLI)) continue; // If the function has an explicit vectorized counterpart, we can safely @@ -1399,7 +1562,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { TLI->isFunctionVectorizable(Call->getCalledFunction()->getName())) continue; - LoadInst *Ld = dyn_cast<LoadInst>(it); + auto *Ld = dyn_cast<LoadInst>(&I); if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) { emitAnalysis(LoopAccessReport(Ld) << "read with atomic ordering or volatile read"); @@ -1409,16 +1572,18 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { } NumLoads++; Loads.push_back(Ld); - DepChecker.addAccess(Ld); + DepChecker->addAccess(Ld); + if (EnableMemAccessVersioning) + collectStridedAccess(Ld); continue; } // Save 'store' instructions. Abort if other instructions write to memory. - if (it->mayWriteToMemory()) { - StoreInst *St = dyn_cast<StoreInst>(it); + if (I.mayWriteToMemory()) { + auto *St = dyn_cast<StoreInst>(&I); if (!St) { - emitAnalysis(LoopAccessReport(&*it) << - "instruction cannot be vectorized"); + emitAnalysis(LoopAccessReport(St) + << "instruction cannot be vectorized"); CanVecMem = false; return; } @@ -1431,7 +1596,9 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { } NumStores++; Stores.push_back(St); - DepChecker.addAccess(St); + DepChecker->addAccess(St); + if (EnableMemAccessVersioning) + collectStridedAccess(St); } } // Next instr. } // Next block. @@ -1449,7 +1616,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { MemoryDepChecker::DepCandidates DependentAccesses; AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(), - AA, LI, DependentAccesses, PSE); + AA, LI, DependentAccesses, *PSE); // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects // multiple times on the same object. If the ptr is accessed twice, once @@ -1458,10 +1625,8 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { // writes and between reads and writes, but not between reads and reads. ValueSet Seen; - ValueVector::iterator I, IE; - for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) { - StoreInst *ST = cast<StoreInst>(*I); - Value* Ptr = ST->getPointerOperand(); + for (StoreInst *ST : Stores) { + Value *Ptr = ST->getPointerOperand(); // Check for store to loop invariant address. StoreToLoopInvariantAddress |= isUniform(Ptr); // If we did *not* see this pointer before, insert it to the read-write @@ -1488,9 +1653,8 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { return; } - for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) { - LoadInst *LD = cast<LoadInst>(*I); - Value* Ptr = LD->getPointerOperand(); + for (LoadInst *LD : Loads) { + Value *Ptr = LD->getPointerOperand(); // If we did *not* see this pointer before, insert it to the // read list. If we *did* see it before, then it is already in // the read-write list. This allows us to vectorize expressions @@ -1500,7 +1664,8 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { // read a few words, modify, and write a few words, and some of the // words may be written to the same address. bool IsReadOnlyPtr = false; - if (Seen.insert(Ptr).second || !isStridedPtr(PSE, Ptr, TheLoop, Strides)) { + if (Seen.insert(Ptr).second || + !getPtrStride(*PSE, Ptr, TheLoop, SymbolicStrides)) { ++NumReads; IsReadOnlyPtr = true; } @@ -1529,8 +1694,8 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. - bool CanDoRTIfNeeded = - Accesses.canCheckPtrAtRT(PtrRtChecking, PSE.getSE(), TheLoop, Strides); + bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(), + TheLoop, SymbolicStrides); if (!CanDoRTIfNeeded) { emitAnalysis(LoopAccessReport() << "cannot identify array bounds"); DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " @@ -1544,22 +1709,22 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { CanVecMem = true; if (Accesses.isDependencyCheckNeeded()) { DEBUG(dbgs() << "LAA: Checking memory dependencies\n"); - CanVecMem = DepChecker.areDepsSafe( - DependentAccesses, Accesses.getDependenciesToCheck(), Strides); - MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes(); + CanVecMem = DepChecker->areDepsSafe( + DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides); + MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes(); - if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) { + if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) { DEBUG(dbgs() << "LAA: Retrying with memory checks\n"); // Clear the dependency checks. We assume they are not needed. - Accesses.resetDepChecks(DepChecker); + Accesses.resetDepChecks(*DepChecker); - PtrRtChecking.reset(); - PtrRtChecking.Need = true; + PtrRtChecking->reset(); + PtrRtChecking->Need = true; - auto *SE = PSE.getSE(); - CanDoRTIfNeeded = - Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides, true); + auto *SE = PSE->getSE(); + CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, SE, TheLoop, + SymbolicStrides, true); // Check that we found the bounds for the pointer. if (!CanDoRTIfNeeded) { @@ -1576,11 +1741,15 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { if (CanVecMem) DEBUG(dbgs() << "LAA: No unsafe dependent memory operations in loop. We" - << (PtrRtChecking.Need ? "" : " don't") + << (PtrRtChecking->Need ? "" : " don't") << " need runtime memory checks.\n"); else { - emitAnalysis(LoopAccessReport() << - "unsafe dependent memory operations in loop"); + emitAnalysis( + LoopAccessReport() + << "unsafe dependent memory operations in loop. Use " + "#pragma loop distribute(enable) to allow loop distribution " + "to attempt to isolate the offending operations into a separate " + "loop"); DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n"); } } @@ -1600,7 +1769,7 @@ void LoopAccessInfo::emitAnalysis(LoopAccessReport &Message) { } bool LoopAccessInfo::isUniform(Value *V) const { - return (PSE.getSE()->isLoopInvariant(PSE.getSE()->getSCEV(V), TheLoop)); + return (PSE->getSE()->isLoopInvariant(PSE->getSE()->getSCEV(V), TheLoop)); } // FIXME: this function is currently a duplicate of the one in @@ -1681,10 +1850,11 @@ std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks( Instruction *Loc, const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks) const { - auto *SE = PSE.getSE(); + const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout(); + auto *SE = PSE->getSE(); SCEVExpander Exp(*SE, DL, "induction"); auto ExpandedChecks = - expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, PtrRtChecking); + expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, *PtrRtChecking); LLVMContext &Ctx = Loc->getContext(); Instruction *FirstInst = nullptr; @@ -1711,9 +1881,17 @@ std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks( Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc"); Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc"); - Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0"); + // [A|B].Start points to the first accessed byte under base [A|B]. + // [A|B].End points to the last accessed byte, plus one. + // There is no conflict when the intervals are disjoint: + // NoConflict = (B.Start >= A.End) || (A.Start >= B.End) + // + // bound0 = (B.Start < A.End) + // bound1 = (A.Start < B.End) + // IsConflict = bound0 & bound1 + Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0"); FirstInst = getFirstInst(FirstInst, Cmp0, Loc); - Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1"); + Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1"); FirstInst = getFirstInst(FirstInst, Cmp1, Loc); Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict"); FirstInst = getFirstInst(FirstInst, IsConflict, Loc); @@ -1740,47 +1918,68 @@ std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks( std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks(Instruction *Loc) const { - if (!PtrRtChecking.Need) + if (!PtrRtChecking->Need) return std::make_pair(nullptr, nullptr); - return addRuntimeChecks(Loc, PtrRtChecking.getChecks()); + return addRuntimeChecks(Loc, PtrRtChecking->getChecks()); +} + +void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { + Value *Ptr = nullptr; + if (LoadInst *LI = dyn_cast<LoadInst>(MemAccess)) + Ptr = LI->getPointerOperand(); + else if (StoreInst *SI = dyn_cast<StoreInst>(MemAccess)) + Ptr = SI->getPointerOperand(); + else + return; + + Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop); + if (!Stride) + return; + + DEBUG(dbgs() << "LAA: Found a strided access that we can version"); + DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n"); + SymbolicStrides[Ptr] = Stride; + StrideSet.insert(Stride); } LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, - const DataLayout &DL, const TargetLibraryInfo *TLI, AliasAnalysis *AA, - DominatorTree *DT, LoopInfo *LI, - const ValueToValueMap &Strides) - : PSE(*SE), PtrRtChecking(SE), DepChecker(PSE, L), TheLoop(L), DL(DL), - TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0), - MaxSafeDepDistBytes(-1U), CanVecMem(false), + DominatorTree *DT, LoopInfo *LI) + : PSE(llvm::make_unique<PredicatedScalarEvolution>(*SE, *L)), + PtrRtChecking(llvm::make_unique<RuntimePointerChecking>(SE)), + DepChecker(llvm::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L), + NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false), StoreToLoopInvariantAddress(false) { if (canAnalyzeLoop()) - analyzeLoop(Strides); + analyzeLoop(AA, LI, TLI, DT); } void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { if (CanVecMem) { - if (PtrRtChecking.Need) - OS.indent(Depth) << "Memory dependences are safe with run-time checks\n"; - else - OS.indent(Depth) << "Memory dependences are safe\n"; + OS.indent(Depth) << "Memory dependences are safe"; + if (MaxSafeDepDistBytes != -1ULL) + OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes + << " bytes"; + if (PtrRtChecking->Need) + OS << " with run-time checks"; + OS << "\n"; } if (Report) OS.indent(Depth) << "Report: " << Report->str() << "\n"; - if (auto *Dependences = DepChecker.getDependences()) { + if (auto *Dependences = DepChecker->getDependences()) { OS.indent(Depth) << "Dependences:\n"; for (auto &Dep : *Dependences) { - Dep.print(OS, Depth + 2, DepChecker.getMemoryInstructions()); + Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions()); OS << "\n"; } } else OS.indent(Depth) << "Too many dependences, not recorded\n"; // List the pair of accesses need run-time checks to prove independence. - PtrRtChecking.print(OS, Depth); + PtrRtChecking->print(OS, Depth); OS << "\n"; OS.indent(Depth) << "Store to invariant address was " @@ -1788,43 +1987,35 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { << "found in loop.\n"; OS.indent(Depth) << "SCEV assumptions:\n"; - PSE.getUnionPredicate().print(OS, Depth); + PSE->getUnionPredicate().print(OS, Depth); + + OS << "\n"; + + OS.indent(Depth) << "Expressions re-written:\n"; + PSE->print(OS, Depth); } -const LoopAccessInfo & -LoopAccessAnalysis::getInfo(Loop *L, const ValueToValueMap &Strides) { +const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L) { auto &LAI = LoopAccessInfoMap[L]; -#ifndef NDEBUG - assert((!LAI || LAI->NumSymbolicStrides == Strides.size()) && - "Symbolic strides changed for loop"); -#endif - - if (!LAI) { - const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); - LAI = - llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI, Strides); -#ifndef NDEBUG - LAI->NumSymbolicStrides = Strides.size(); -#endif - } + if (!LAI) + LAI = llvm::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI); + return *LAI.get(); } -void LoopAccessAnalysis::print(raw_ostream &OS, const Module *M) const { - LoopAccessAnalysis &LAA = *const_cast<LoopAccessAnalysis *>(this); - - ValueToValueMap NoSymbolicStrides; +void LoopAccessLegacyAnalysis::print(raw_ostream &OS, const Module *M) const { + LoopAccessLegacyAnalysis &LAA = *const_cast<LoopAccessLegacyAnalysis *>(this); for (Loop *TopLevelLoop : *LI) for (Loop *L : depth_first(TopLevelLoop)) { OS.indent(2) << L->getHeader()->getName() << ":\n"; - auto &LAI = LAA.getInfo(L, NoSymbolicStrides); + auto &LAI = LAA.getInfo(L); LAI.print(OS, 4); } } -bool LoopAccessAnalysis::runOnFunction(Function &F) { +bool LoopAccessLegacyAnalysis::runOnFunction(Function &F) { SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); TLI = TLIP ? &TLIP->getTLI() : nullptr; @@ -1835,7 +2026,7 @@ bool LoopAccessAnalysis::runOnFunction(Function &F) { return false; } -void LoopAccessAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { +void LoopAccessLegacyAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<ScalarEvolutionWrapperPass>(); AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<DominatorTreeWrapperPass>(); @@ -1844,19 +2035,52 @@ void LoopAccessAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); } -char LoopAccessAnalysis::ID = 0; +char LoopAccessLegacyAnalysis::ID = 0; static const char laa_name[] = "Loop Access Analysis"; #define LAA_NAME "loop-accesses" -INITIALIZE_PASS_BEGIN(LoopAccessAnalysis, LAA_NAME, laa_name, false, true) +INITIALIZE_PASS_BEGIN(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_END(LoopAccessAnalysis, LAA_NAME, laa_name, false, true) +INITIALIZE_PASS_END(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true) + +char LoopAccessAnalysis::PassID; + +LoopAccessInfo LoopAccessAnalysis::run(Loop &L, AnalysisManager<Loop> &AM) { + const AnalysisManager<Function> &FAM = + AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager(); + Function &F = *L.getHeader()->getParent(); + auto *SE = FAM.getCachedResult<ScalarEvolutionAnalysis>(F); + auto *TLI = FAM.getCachedResult<TargetLibraryAnalysis>(F); + auto *AA = FAM.getCachedResult<AAManager>(F); + auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F); + auto *LI = FAM.getCachedResult<LoopAnalysis>(F); + if (!SE) + report_fatal_error( + "ScalarEvolution must have been cached at a higher level"); + if (!AA) + report_fatal_error("AliasAnalysis must have been cached at a higher level"); + if (!DT) + report_fatal_error("DominatorTree must have been cached at a higher level"); + if (!LI) + report_fatal_error("LoopInfo must have been cached at a higher level"); + return LoopAccessInfo(&L, SE, TLI, AA, DT, LI); +} + +PreservedAnalyses LoopAccessInfoPrinterPass::run(Loop &L, + AnalysisManager<Loop> &AM) { + Function &F = *L.getHeader()->getParent(); + auto &LAI = AM.getResult<LoopAccessAnalysis>(L); + OS << "Loop access info in function '" << F.getName() << "':\n"; + OS.indent(2) << L.getHeader()->getName() << ":\n"; + LAI.print(OS, 4); + return PreservedAnalyses::all(); +} namespace llvm { Pass *createLAAPass() { - return new LoopAccessAnalysis(); + return new LoopAccessLegacyAnalysis(); } } |