diff options
Diffstat (limited to 'contrib/llvm/lib/Analysis')
64 files changed, 7352 insertions, 3965 deletions
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp index f931b6f..84da76b 100644 --- a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp @@ -53,7 +53,8 @@ using namespace llvm; static cl::opt<bool> DisableBasicAA("disable-basicaa", cl::Hidden, cl::init(false)); -AAResults::AAResults(AAResults &&Arg) : TLI(Arg.TLI), AAs(std::move(Arg.AAs)) { +AAResults::AAResults(AAResults &&Arg) + : TLI(Arg.TLI), AAs(std::move(Arg.AAs)), AADeps(std::move(Arg.AADeps)) { for (auto &AA : AAs) AA->setAAResults(this); } @@ -69,6 +70,22 @@ AAResults::~AAResults() { #endif } +bool AAResults::invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv) { + // Check if the AA manager itself has been invalidated. + auto PAC = PA.getChecker<AAManager>(); + if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>()) + return true; // The manager needs to be blown away, clear everything. + + // Check all of the dependencies registered. + for (AnalysisKey *ID : AADeps) + if (Inv.invalidate(ID, F, PA)) + return true; + + // Everything we depend on is still fine, so are we. Nothing to invalidate. + return false; +} + //===----------------------------------------------------------------------===// // Default chaining methods //===----------------------------------------------------------------------===// @@ -141,7 +158,8 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS, // Try to refine the mod-ref info further using other API entry points to the // aggregate set of AA results. auto MRB = getModRefBehavior(CS); - if (MRB == FMRB_DoesNotAccessMemory) + if (MRB == FMRB_DoesNotAccessMemory || + MRB == FMRB_OnlyAccessesInaccessibleMem) return MRI_NoModRef; if (onlyReadsMemory(MRB)) @@ -149,7 +167,7 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS, else if (doesNotReadMemory(MRB)) Result = ModRefInfo(Result & MRI_Mod); - if (onlyAccessesArgPointees(MRB)) { + if (onlyAccessesArgPointees(MRB) || onlyAccessesInaccessibleOrArgMem(MRB)) { bool DoesAlias = false; ModRefInfo AllArgsMask = MRI_NoModRef; if (doesAccessArgPointees(MRB)) { @@ -459,7 +477,8 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I, // pointer were passed to arguments that were neither of these, then it // couldn't be no-capture. if (!(*CI)->getType()->isPointerTy() || - (!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo))) + (!CS.doesNotCapture(ArgNo) && + ArgNo < CS.getNumArgOperands() && !CS.isByValArgument(ArgNo))) continue; // If this is a no-capture pointer argument, see if we can tell that it @@ -512,7 +531,7 @@ bool AAResults::canInstructionRangeModRef(const Instruction &I1, AAResults::Concept::~Concept() {} // Provide a definition for the static object used to identify passes. -char AAManager::PassID; +AnalysisKey AAManager::Key; namespace { /// A wrapper pass for external alias analyses. This just squirrels away the diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp index baf8f3f..4d6a6c9 100644 --- a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -88,7 +88,7 @@ static inline bool isInterestingPointer(Value *V) { && !isa<ConstantPointerNull>(V); } -PreservedAnalyses AAEvaluator::run(Function &F, AnalysisManager<Function> &AM) { +PreservedAnalyses AAEvaluator::run(Function &F, FunctionAnalysisManager &AM) { runInternal(F, AM.getResult<AAManager>(F)); return PreservedAnalyses::all(); } diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisSummary.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisSummary.cpp index f3f13df..2b48794 100644 --- a/contrib/llvm/lib/Analysis/AliasAnalysisSummary.cpp +++ b/contrib/llvm/lib/Analysis/AliasAnalysisSummary.cpp @@ -7,25 +7,23 @@ namespace llvm { namespace cflaa { namespace { -LLVM_CONSTEXPR unsigned AttrEscapedIndex = 0; -LLVM_CONSTEXPR unsigned AttrUnknownIndex = 1; -LLVM_CONSTEXPR unsigned AttrGlobalIndex = 2; -LLVM_CONSTEXPR unsigned AttrCallerIndex = 3; -LLVM_CONSTEXPR unsigned AttrFirstArgIndex = 4; -LLVM_CONSTEXPR unsigned AttrLastArgIndex = NumAliasAttrs; -LLVM_CONSTEXPR unsigned AttrMaxNumArgs = AttrLastArgIndex - AttrFirstArgIndex; +const unsigned AttrEscapedIndex = 0; +const unsigned AttrUnknownIndex = 1; +const unsigned AttrGlobalIndex = 2; +const unsigned AttrCallerIndex = 3; +const unsigned AttrFirstArgIndex = 4; +const unsigned AttrLastArgIndex = NumAliasAttrs; +const unsigned AttrMaxNumArgs = AttrLastArgIndex - AttrFirstArgIndex; -// NOTE: These aren't AliasAttrs because bitsets don't have a constexpr -// ctor for some versions of MSVC that we support. We could maybe refactor, -// but... +// It would be *slightly* prettier if we changed these to AliasAttrs, but it +// seems that both GCC and MSVC emit dynamic initializers for const bitsets. using AliasAttr = unsigned; -LLVM_CONSTEXPR AliasAttr AttrNone = 0; -LLVM_CONSTEXPR AliasAttr AttrEscaped = 1 << AttrEscapedIndex; -LLVM_CONSTEXPR AliasAttr AttrUnknown = 1 << AttrUnknownIndex; -LLVM_CONSTEXPR AliasAttr AttrGlobal = 1 << AttrGlobalIndex; -LLVM_CONSTEXPR AliasAttr AttrCaller = 1 << AttrCallerIndex; -LLVM_CONSTEXPR AliasAttr ExternalAttrMask = - AttrEscaped | AttrUnknown | AttrGlobal; +const AliasAttr AttrNone = 0; +const AliasAttr AttrEscaped = 1 << AttrEscapedIndex; +const AliasAttr AttrUnknown = 1 << AttrUnknownIndex; +const AliasAttr AttrGlobal = 1 << AttrGlobalIndex; +const AliasAttr AttrCaller = 1 << AttrCallerIndex; +const AliasAttr ExternalAttrMask = AttrEscaped | AttrUnknown | AttrGlobal; } AliasAttrs getAttrNone() { return AttrNone; } @@ -91,7 +89,7 @@ instantiateExternalRelation(ExternalRelation ERelation, CallSite CS) { auto To = instantiateInterfaceValue(ERelation.To, CS); if (!To) return None; - return InstantiatedRelation{*From, *To}; + return InstantiatedRelation{*From, *To, ERelation.Offset}; } Optional<InstantiatedAttr> instantiateExternalAttribute(ExternalAttribute EAttr, diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisSummary.h b/contrib/llvm/lib/Analysis/AliasAnalysisSummary.h index 43c0d4c..51a85f4 100644 --- a/contrib/llvm/lib/Analysis/AliasAnalysisSummary.h +++ b/contrib/llvm/lib/Analysis/AliasAnalysisSummary.h @@ -99,7 +99,7 @@ AliasAttrs getExternallyVisibleAttrs(AliasAttrs); //===----------------------------------------------------------------------===// /// The maximum number of arguments we can put into a summary. -LLVM_CONSTEXPR static unsigned MaxSupportedArgsInSummary = 50; +static const unsigned MaxSupportedArgsInSummary = 50; /// We use InterfaceValue to describe parameters/return value, as well as /// potential memory locations that are pointed to by parameters/return value, @@ -120,13 +120,66 @@ inline bool operator==(InterfaceValue LHS, InterfaceValue RHS) { inline bool operator!=(InterfaceValue LHS, InterfaceValue RHS) { return !(LHS == RHS); } +inline bool operator<(InterfaceValue LHS, InterfaceValue RHS) { + return LHS.Index < RHS.Index || + (LHS.Index == RHS.Index && LHS.DerefLevel < RHS.DerefLevel); +} +inline bool operator>(InterfaceValue LHS, InterfaceValue RHS) { + return RHS < LHS; +} +inline bool operator<=(InterfaceValue LHS, InterfaceValue RHS) { + return !(RHS < LHS); +} +inline bool operator>=(InterfaceValue LHS, InterfaceValue RHS) { + return !(LHS < RHS); +} + +// We use UnknownOffset to represent pointer offsets that cannot be determined +// at compile time. Note that MemoryLocation::UnknownSize cannot be used here +// because we require a signed value. +static const int64_t UnknownOffset = INT64_MAX; + +inline int64_t addOffset(int64_t LHS, int64_t RHS) { + if (LHS == UnknownOffset || RHS == UnknownOffset) + return UnknownOffset; + // FIXME: Do we need to guard against integer overflow here? + return LHS + RHS; +} /// We use ExternalRelation to describe an externally visible aliasing relations /// between parameters/return value of a function. struct ExternalRelation { InterfaceValue From, To; + int64_t Offset; }; +inline bool operator==(ExternalRelation LHS, ExternalRelation RHS) { + return LHS.From == RHS.From && LHS.To == RHS.To && LHS.Offset == RHS.Offset; +} +inline bool operator!=(ExternalRelation LHS, ExternalRelation RHS) { + return !(LHS == RHS); +} +inline bool operator<(ExternalRelation LHS, ExternalRelation RHS) { + if (LHS.From < RHS.From) + return true; + if (LHS.From > RHS.From) + return false; + if (LHS.To < RHS.To) + return true; + if (LHS.To > RHS.To) + return false; + return LHS.Offset < RHS.Offset; +} +inline bool operator>(ExternalRelation LHS, ExternalRelation RHS) { + return RHS < LHS; +} +inline bool operator<=(ExternalRelation LHS, ExternalRelation RHS) { + return !(RHS < LHS); +} +inline bool operator>=(ExternalRelation LHS, ExternalRelation RHS) { + return !(LHS < RHS); +} + /// We use ExternalAttribute to describe an externally visible AliasAttrs /// for parameters/return value. struct ExternalAttribute { @@ -174,6 +227,7 @@ inline bool operator>=(InstantiatedValue LHS, InstantiatedValue RHS) { /// callsite struct InstantiatedRelation { InstantiatedValue From, To; + int64_t Offset; }; Optional<InstantiatedRelation> instantiateExternalRelation(ExternalRelation, CallSite); diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp index d349ac5..701b0e1 100644 --- a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp +++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp @@ -26,12 +26,19 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +static cl::opt<unsigned> + SaturationThreshold("alias-set-saturation-threshold", cl::Hidden, + cl::init(250), + cl::desc("The maximum number of pointers may-alias " + "sets may contain before degradation")); + /// mergeSetIn - Merge the specified alias set into this alias set. /// void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) { assert(!AS.Forward && "Alias set is already forwarding!"); assert(!Forward && "This set is a forwarding set!!"); + bool WasMustAlias = (Alias == SetMustAlias); // Update the alias and access types of this set... Access |= AS.Access; Alias |= AS.Alias; @@ -52,6 +59,13 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) { Alias = SetMayAlias; } + if (Alias == SetMayAlias) { + if (WasMustAlias) + AST.TotalMayAliasSetSize += size(); + if (AS.Alias == SetMustAlias) + AST.TotalMayAliasSetSize += AS.size(); + } + bool ASHadUnknownInsts = !AS.UnknownInsts.empty(); if (UnknownInsts.empty()) { // Merge call sites... if (ASHadUnknownInsts) { @@ -63,11 +77,13 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) { AS.UnknownInsts.clear(); } - AS.Forward = this; // Forward across AS now... - addRef(); // AS is now pointing to us... + AS.Forward = this; // Forward across AS now... + addRef(); // AS is now pointing to us... // Merge the list of constituent pointers... if (AS.PtrList) { + SetSize += AS.size(); + AS.SetSize = 0; *PtrListEnd = AS.PtrList; AS.PtrList->setPrevInList(PtrListEnd); PtrListEnd = AS.PtrListEnd; @@ -85,7 +101,12 @@ void AliasSetTracker::removeAliasSet(AliasSet *AS) { Fwd->dropRef(*this); AS->Forward = nullptr; } + + if (AS->Alias == AliasSet::SetMayAlias) + TotalMayAliasSetSize -= AS->size(); + AliasSets.erase(AS); + } void AliasSet::removeFromTracker(AliasSetTracker &AST) { @@ -105,10 +126,13 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry, AliasResult Result = AA.alias(MemoryLocation(P->getValue(), P->getSize(), P->getAAInfo()), MemoryLocation(Entry.getValue(), Size, AAInfo)); - if (Result != MustAlias) + if (Result != MustAlias) { Alias = SetMayAlias; - else // First entry of must alias must have maximum size! + AST.TotalMayAliasSetSize += size(); + } else { + // First entry of must alias must have maximum size! P->updateSizeAndAAInfo(Size, AAInfo); + } assert(Result != NoAlias && "Cannot be part of must set!"); } @@ -116,11 +140,16 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry, Entry.updateSizeAndAAInfo(Size, AAInfo); // Add it to the end of the list... + ++SetSize; assert(*PtrListEnd == nullptr && "End of list is not null?"); *PtrListEnd = &Entry; PtrListEnd = Entry.setPrevInList(PtrListEnd); assert(*PtrListEnd == nullptr && "End of list is not null?"); - addRef(); // Entry points to alias set. + // Entry points to alias set. + addRef(); + + if (Alias == SetMayAlias) + AST.TotalMayAliasSetSize++; } void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) { @@ -145,6 +174,9 @@ void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) { bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo, AliasAnalysis &AA) const { + if (AliasAny) + return true; + if (Alias == SetMustAlias) { assert(UnknownInsts.empty() && "Illegal must alias set!"); @@ -177,6 +209,10 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size, bool AliasSet::aliasesUnknownInst(const Instruction *Inst, AliasAnalysis &AA) const { + + if (AliasAny) + return true; + if (!Inst->mayReadOrWriteMemory()) return false; @@ -229,17 +265,6 @@ AliasSet *AliasSetTracker::mergeAliasSetsForPointer(const Value *Ptr, return FoundSet; } -/// containsPointer - Return true if the specified location is represented by -/// this alias set, false otherwise. This does not modify the AST object or -/// alias sets. -bool AliasSetTracker::containsPointer(const Value *Ptr, uint64_t Size, - const AAMDNodes &AAInfo) const { - for (const AliasSet &AS : *this) - if (!AS.Forward && AS.aliasesPointer(Ptr, Size, AAInfo, AA)) - return true; - return false; -} - bool AliasSetTracker::containsUnknown(const Instruction *Inst) const { for (const AliasSet &AS : *this) if (!AS.Forward && AS.aliasesUnknownInst(Inst, AA)) @@ -261,16 +286,28 @@ AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) { return FoundSet; } - - - /// getAliasSetForPointer - Return the alias set that the specified pointer /// lives in. AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, uint64_t Size, - const AAMDNodes &AAInfo, - bool *New) { + const AAMDNodes &AAInfo) { AliasSet::PointerRec &Entry = getEntryFor(Pointer); + if (AliasAnyAS) { + // At this point, the AST is saturated, so we only have one active alias + // set. That means we already know which alias set we want to return, and + // just need to add the pointer to that set to keep the data structure + // consistent. + // This, of course, means that we will never need a merge here. + if (Entry.hasAliasSet()) { + Entry.updateSizeAndAAInfo(Size, AAInfo); + assert(Entry.getAliasSet(*this) == AliasAnyAS && + "Entry in saturated AST must belong to only alias set"); + } else { + AliasAnyAS->addPointer(*this, Entry, Size, AAInfo); + } + return *AliasAnyAS; + } + // Check to see if the pointer is already known. if (Entry.hasAliasSet()) { // If the size changed, we may need to merge several alias sets. @@ -290,68 +327,55 @@ AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, uint64_t Size, return *AS; } - if (New) *New = true; // Otherwise create a new alias set to hold the loaded pointer. AliasSets.push_back(new AliasSet()); AliasSets.back().addPointer(*this, Entry, Size, AAInfo); return AliasSets.back(); } -bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) { - bool NewPtr; - addPointer(Ptr, Size, AAInfo, AliasSet::NoAccess, NewPtr); - return NewPtr; +void AliasSetTracker::add(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) { + addPointer(Ptr, Size, AAInfo, AliasSet::NoAccess); } - -bool AliasSetTracker::add(LoadInst *LI) { +void AliasSetTracker::add(LoadInst *LI) { if (isStrongerThanMonotonic(LI->getOrdering())) return addUnknown(LI); AAMDNodes AAInfo; LI->getAAMetadata(AAInfo); AliasSet::AccessLattice Access = AliasSet::RefAccess; - bool NewPtr; const DataLayout &DL = LI->getModule()->getDataLayout(); AliasSet &AS = addPointer(LI->getOperand(0), - DL.getTypeStoreSize(LI->getType()), - AAInfo, Access, NewPtr); + DL.getTypeStoreSize(LI->getType()), AAInfo, Access); if (LI->isVolatile()) AS.setVolatile(); - return NewPtr; } -bool AliasSetTracker::add(StoreInst *SI) { +void AliasSetTracker::add(StoreInst *SI) { if (isStrongerThanMonotonic(SI->getOrdering())) return addUnknown(SI); AAMDNodes AAInfo; SI->getAAMetadata(AAInfo); AliasSet::AccessLattice Access = AliasSet::ModAccess; - bool NewPtr; const DataLayout &DL = SI->getModule()->getDataLayout(); Value *Val = SI->getOperand(0); - AliasSet &AS = addPointer(SI->getOperand(1), - DL.getTypeStoreSize(Val->getType()), - AAInfo, Access, NewPtr); + AliasSet &AS = addPointer( + SI->getOperand(1), DL.getTypeStoreSize(Val->getType()), AAInfo, Access); if (SI->isVolatile()) AS.setVolatile(); - return NewPtr; } -bool AliasSetTracker::add(VAArgInst *VAAI) { +void AliasSetTracker::add(VAArgInst *VAAI) { AAMDNodes AAInfo; VAAI->getAAMetadata(AAInfo); - bool NewPtr; addPointer(VAAI->getOperand(0), MemoryLocation::UnknownSize, AAInfo, - AliasSet::ModRefAccess, NewPtr); - return NewPtr; + AliasSet::ModRefAccess); } -bool AliasSetTracker::add(MemSetInst *MSI) { +void AliasSetTracker::add(MemSetInst *MSI) { AAMDNodes AAInfo; MSI->getAAMetadata(AAInfo); - bool NewPtr; uint64_t Len; if (ConstantInt *C = dyn_cast<ConstantInt>(MSI->getLength())) @@ -360,30 +384,61 @@ bool AliasSetTracker::add(MemSetInst *MSI) { Len = MemoryLocation::UnknownSize; AliasSet &AS = - addPointer(MSI->getRawDest(), Len, AAInfo, AliasSet::ModAccess, NewPtr); + addPointer(MSI->getRawDest(), Len, AAInfo, AliasSet::ModAccess); if (MSI->isVolatile()) AS.setVolatile(); - return NewPtr; } -bool AliasSetTracker::addUnknown(Instruction *Inst) { - if (isa<DbgInfoIntrinsic>(Inst)) - return true; // Ignore DbgInfo Intrinsics. +void AliasSetTracker::add(MemTransferInst *MTI) { + AAMDNodes AAInfo; + MTI->getAAMetadata(AAInfo); + + uint64_t Len; + if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength())) + Len = C->getZExtValue(); + else + Len = MemoryLocation::UnknownSize; + + AliasSet &ASSrc = + addPointer(MTI->getRawSource(), Len, AAInfo, AliasSet::RefAccess); + if (MTI->isVolatile()) + ASSrc.setVolatile(); + + AliasSet &ASDst = + addPointer(MTI->getRawDest(), Len, AAInfo, AliasSet::ModAccess); + if (MTI->isVolatile()) + ASDst.setVolatile(); +} + +void AliasSetTracker::addUnknown(Instruction *Inst) { + if (isa<DbgInfoIntrinsic>(Inst)) + return; // Ignore DbgInfo Intrinsics. + + if (auto *II = dyn_cast<IntrinsicInst>(Inst)) { + // These intrinsics will show up as affecting memory, but they are just + // markers. + switch (II->getIntrinsicID()) { + default: + break; + // FIXME: Add lifetime/invariant intrinsics (See: PR30807). + case Intrinsic::assume: + return; + } + } if (!Inst->mayReadOrWriteMemory()) - return true; // doesn't alias anything + return; // doesn't alias anything AliasSet *AS = findAliasSetForUnknownInst(Inst); if (AS) { AS->addUnknownInst(Inst, AA); - return false; + return; } AliasSets.push_back(new AliasSet()); AS = &AliasSets.back(); AS->addUnknownInst(Inst, AA); - return true; } -bool AliasSetTracker::add(Instruction *I) { +void AliasSetTracker::add(Instruction *I) { // Dispatch to one of the other add methods. if (LoadInst *LI = dyn_cast<LoadInst>(I)) return add(LI); @@ -393,8 +448,9 @@ bool AliasSetTracker::add(Instruction *I) { return add(VAAI); if (MemSetInst *MSI = dyn_cast<MemSetInst>(I)) return add(MSI); + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) + return add(MTI); return addUnknown(I); - // FIXME: add support of memcpy and memmove. } void AliasSetTracker::add(BasicBlock &BB) { @@ -418,134 +474,15 @@ void AliasSetTracker::add(const AliasSetTracker &AST) { add(AS.UnknownInsts[i]); // Loop over all of the pointers in this alias set. - bool X; for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) { - AliasSet &NewAS = addPointer(ASI.getPointer(), ASI.getSize(), - ASI.getAAInfo(), - (AliasSet::AccessLattice)AS.Access, X); + AliasSet &NewAS = + addPointer(ASI.getPointer(), ASI.getSize(), ASI.getAAInfo(), + (AliasSet::AccessLattice)AS.Access); if (AS.isVolatile()) NewAS.setVolatile(); } } } -/// remove - Remove the specified (potentially non-empty) alias set from the -/// tracker. -void AliasSetTracker::remove(AliasSet &AS) { - // Drop all call sites. - if (!AS.UnknownInsts.empty()) - AS.dropRef(*this); - AS.UnknownInsts.clear(); - - // Clear the alias set. - unsigned NumRefs = 0; - while (!AS.empty()) { - AliasSet::PointerRec *P = AS.PtrList; - - Value *ValToRemove = P->getValue(); - - // Unlink and delete entry from the list of values. - P->eraseFromList(); - - // Remember how many references need to be dropped. - ++NumRefs; - - // Finally, remove the entry. - PointerMap.erase(ValToRemove); - } - - // Stop using the alias set, removing it. - AS.RefCount -= NumRefs; - if (AS.RefCount == 0) - AS.removeFromTracker(*this); -} - -bool -AliasSetTracker::remove(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) { - AliasSet *AS = mergeAliasSetsForPointer(Ptr, Size, AAInfo); - if (!AS) return false; - remove(*AS); - return true; -} - -bool AliasSetTracker::remove(LoadInst *LI) { - const DataLayout &DL = LI->getModule()->getDataLayout(); - uint64_t Size = DL.getTypeStoreSize(LI->getType()); - - AAMDNodes AAInfo; - LI->getAAMetadata(AAInfo); - - AliasSet *AS = mergeAliasSetsForPointer(LI->getOperand(0), Size, AAInfo); - if (!AS) return false; - remove(*AS); - return true; -} - -bool AliasSetTracker::remove(StoreInst *SI) { - const DataLayout &DL = SI->getModule()->getDataLayout(); - uint64_t Size = DL.getTypeStoreSize(SI->getOperand(0)->getType()); - - AAMDNodes AAInfo; - SI->getAAMetadata(AAInfo); - - AliasSet *AS = mergeAliasSetsForPointer(SI->getOperand(1), Size, AAInfo); - if (!AS) return false; - remove(*AS); - return true; -} - -bool AliasSetTracker::remove(VAArgInst *VAAI) { - AAMDNodes AAInfo; - VAAI->getAAMetadata(AAInfo); - - AliasSet *AS = mergeAliasSetsForPointer(VAAI->getOperand(0), - MemoryLocation::UnknownSize, AAInfo); - if (!AS) return false; - remove(*AS); - return true; -} - -bool AliasSetTracker::remove(MemSetInst *MSI) { - AAMDNodes AAInfo; - MSI->getAAMetadata(AAInfo); - uint64_t Len; - - if (ConstantInt *C = dyn_cast<ConstantInt>(MSI->getLength())) - Len = C->getZExtValue(); - else - Len = MemoryLocation::UnknownSize; - - AliasSet *AS = mergeAliasSetsForPointer(MSI->getRawDest(), Len, AAInfo); - if (!AS) - return false; - remove(*AS); - return true; -} - -bool AliasSetTracker::removeUnknown(Instruction *I) { - if (!I->mayReadOrWriteMemory()) - return false; // doesn't alias anything - - AliasSet *AS = findAliasSetForUnknownInst(I); - if (!AS) return false; - remove(*AS); - return true; -} - -bool AliasSetTracker::remove(Instruction *I) { - // Dispatch to one of the other remove methods... - if (LoadInst *LI = dyn_cast<LoadInst>(I)) - return remove(LI); - if (StoreInst *SI = dyn_cast<StoreInst>(I)) - return remove(SI); - if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I)) - return remove(VAAI); - if (MemSetInst *MSI = dyn_cast<MemSetInst>(I)) - return remove(MSI); - return removeUnknown(I); - // FIXME: add support of memcpy and memmove. -} - - // deleteValue method - This method is used to remove a pointer value from the // AliasSetTracker entirely. It should be used when an instruction is deleted // from the program to update the AST. If you don't use this, you would have @@ -575,6 +512,11 @@ void AliasSetTracker::deleteValue(Value *PtrVal) { // Unlink and delete from the list of values. PtrValEnt->eraseFromList(); + + if (AS->Alias == AliasSet::SetMayAlias) { + AS->SetSize--; + TotalMayAliasSetSize--; + } // Stop using the alias set. AS->dropRef(*this); @@ -597,15 +539,68 @@ void AliasSetTracker::copyValue(Value *From, Value *To) { AliasSet::PointerRec &Entry = getEntryFor(To); if (Entry.hasAliasSet()) return; // Already in the tracker! - // Add it to the alias set it aliases... + // getEntryFor above may invalidate iterator \c I, so reinitialize it. I = PointerMap.find_as(From); + // Add it to the alias set it aliases... AliasSet *AS = I->second->getAliasSet(*this); AS->addPointer(*this, Entry, I->second->getSize(), I->second->getAAInfo(), true); } +AliasSet &AliasSetTracker::mergeAllAliasSets() { + assert(!AliasAnyAS && (TotalMayAliasSetSize > SaturationThreshold) && + "Full merge should happen once, when the saturation threshold is " + "reached"); + + // Collect all alias sets, so that we can drop references with impunity + // without worrying about iterator invalidation. + std::vector<AliasSet *> ASVector; + ASVector.reserve(SaturationThreshold); + for (iterator I = begin(), E = end(); I != E; I++) + ASVector.push_back(&*I); + + // Copy all instructions and pointers into a new set, and forward all other + // sets to it. + AliasSets.push_back(new AliasSet()); + AliasAnyAS = &AliasSets.back(); + AliasAnyAS->Alias = AliasSet::SetMayAlias; + AliasAnyAS->Access = AliasSet::ModRefAccess; + AliasAnyAS->AliasAny = true; + + for (auto Cur : ASVector) { + + // If Cur was already forwarding, just forward to the new AS instead. + AliasSet *FwdTo = Cur->Forward; + if (FwdTo) { + Cur->Forward = AliasAnyAS; + AliasAnyAS->addRef(); + FwdTo->dropRef(*this); + continue; + } + + // Otherwise, perform the actual merge. + AliasAnyAS->mergeSetIn(*Cur, *this); + } + + return *AliasAnyAS; +} + +AliasSet &AliasSetTracker::addPointer(Value *P, uint64_t Size, + const AAMDNodes &AAInfo, + AliasSet::AccessLattice E) { + + AliasSet &AS = getAliasSetForPointer(P, Size, AAInfo); + AS.Access |= E; + + if (!AliasAnyAS && (TotalMayAliasSetSize > SaturationThreshold)) { + // The AST is now saturated. From here on, we conservatively consider all + // pointers to alias each-other. + return mergeAllAliasSets(); + } + return AS; +} //===----------------------------------------------------------------------===// // AliasSet/AliasSetTracker Printing Support @@ -700,7 +695,7 @@ namespace { bool runOnFunction(Function &F) override { auto &AAWP = getAnalysis<AAResultsWrapperPass>(); Tracker = new AliasSetTracker(AAWP.getAAResults()); - + errs() << "Alias sets for function '" << F.getName() << "':\n"; for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) Tracker->add(&*I); Tracker->print(errs()); diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp index c04447c..0e7cf40 100644 --- a/contrib/llvm/lib/Analysis/Analysis.cpp +++ b/contrib/llvm/lib/Analysis/Analysis.cpp @@ -30,10 +30,10 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeCallGraphPrinterLegacyPassPass(Registry); initializeCallGraphViewerPass(Registry); initializeCostModelAnalysisPass(Registry); - initializeCFGViewerPass(Registry); - initializeCFGPrinterPass(Registry); - initializeCFGOnlyViewerPass(Registry); - initializeCFGOnlyPrinterPass(Registry); + initializeCFGViewerLegacyPassPass(Registry); + initializeCFGPrinterLegacyPassPass(Registry); + initializeCFGOnlyViewerLegacyPassPass(Registry); + initializeCFGOnlyPrinterLegacyPassPass(Registry); initializeCFLAndersAAWrapperPassPass(Registry); initializeCFLSteensAAWrapperPassPass(Registry); initializeDependenceAnalysisWrapperPassPass(Registry); @@ -54,6 +54,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeIVUsersWrapperPassPass(Registry); initializeInstCountPass(Registry); initializeIntervalPartitionPass(Registry); + initializeLazyBranchProbabilityInfoPassPass(Registry); initializeLazyBlockFrequencyInfoPassPass(Registry); initializeLazyValueInfoWrapperPassPass(Registry); initializeLintPass(Registry); @@ -76,6 +77,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeTargetTransformInfoWrapperPassPass(Registry); initializeTypeBasedAAWrapperPassPass(Registry); initializeScopedNoAliasAAWrapperPassPass(Registry); + initializeLCSSAVerificationPassPass(Registry); } void LLVMInitializeAnalysis(LLVMPassRegistryRef R) { diff --git a/contrib/llvm/lib/Analysis/AssumptionCache.cpp b/contrib/llvm/lib/Analysis/AssumptionCache.cpp index ca71644..5851594 100644 --- a/contrib/llvm/lib/Analysis/AssumptionCache.cpp +++ b/contrib/llvm/lib/Analysis/AssumptionCache.cpp @@ -24,6 +24,116 @@ using namespace llvm; using namespace llvm::PatternMatch; +SmallVector<WeakVH, 1> &AssumptionCache::getOrInsertAffectedValues(Value *V) { + // Try using find_as first to avoid creating extra value handles just for the + // purpose of doing the lookup. + auto AVI = AffectedValues.find_as(V); + if (AVI != AffectedValues.end()) + return AVI->second; + + auto AVIP = AffectedValues.insert({ + AffectedValueCallbackVH(V, this), SmallVector<WeakVH, 1>()}); + return AVIP.first->second; +} + +void AssumptionCache::updateAffectedValues(CallInst *CI) { + // Note: This code must be kept in-sync with the code in + // computeKnownBitsFromAssume in ValueTracking. + + SmallVector<Value *, 16> Affected; + auto AddAffected = [&Affected](Value *V) { + if (isa<Argument>(V)) { + Affected.push_back(V); + } else if (auto *I = dyn_cast<Instruction>(V)) { + Affected.push_back(I); + + if (I->getOpcode() == Instruction::BitCast || + I->getOpcode() == Instruction::PtrToInt) { + auto *Op = I->getOperand(0); + if (isa<Instruction>(Op) || isa<Argument>(Op)) + Affected.push_back(Op); + } + } + }; + + Value *Cond = CI->getArgOperand(0), *A, *B; + AddAffected(Cond); + + CmpInst::Predicate Pred; + if (match(Cond, m_ICmp(Pred, m_Value(A), m_Value(B)))) { + AddAffected(A); + AddAffected(B); + + if (Pred == ICmpInst::ICMP_EQ) { + // For equality comparisons, we handle the case of bit inversion. + auto AddAffectedFromEq = [&AddAffected](Value *V) { + Value *A; + if (match(V, m_Not(m_Value(A)))) { + AddAffected(A); + V = A; + } + + Value *B; + ConstantInt *C; + // (A & B) or (A | B) or (A ^ B). + if (match(V, + m_CombineOr(m_And(m_Value(A), m_Value(B)), + m_CombineOr(m_Or(m_Value(A), m_Value(B)), + m_Xor(m_Value(A), m_Value(B)))))) { + AddAffected(A); + AddAffected(B); + // (A << C) or (A >>_s C) or (A >>_u C) where C is some constant. + } else if (match(V, + m_CombineOr(m_Shl(m_Value(A), m_ConstantInt(C)), + m_CombineOr(m_LShr(m_Value(A), m_ConstantInt(C)), + m_AShr(m_Value(A), + m_ConstantInt(C)))))) { + AddAffected(A); + } + }; + + AddAffectedFromEq(A); + AddAffectedFromEq(B); + } + } + + for (auto &AV : Affected) { + auto &AVV = getOrInsertAffectedValues(AV); + if (std::find(AVV.begin(), AVV.end(), CI) == AVV.end()) + AVV.push_back(CI); + } +} + +void AssumptionCache::AffectedValueCallbackVH::deleted() { + auto AVI = AC->AffectedValues.find(getValPtr()); + if (AVI != AC->AffectedValues.end()) + AC->AffectedValues.erase(AVI); + // 'this' now dangles! +} + +void AssumptionCache::copyAffectedValuesInCache(Value *OV, Value *NV) { + auto &NAVV = getOrInsertAffectedValues(NV); + auto AVI = AffectedValues.find(OV); + if (AVI == AffectedValues.end()) + return; + + for (auto &A : AVI->second) + if (std::find(NAVV.begin(), NAVV.end(), A) == NAVV.end()) + NAVV.push_back(A); +} + +void AssumptionCache::AffectedValueCallbackVH::allUsesReplacedWith(Value *NV) { + if (!isa<Instruction>(NV) && !isa<Argument>(NV)) + return; + + // Any assumptions that affected this value now affect the new value. + + AC->copyAffectedValuesInCache(getValPtr(), NV); + // 'this' now might dangle! If the AffectedValues map was resized to add an + // entry for NV then this object might have been destroyed in favor of some + // copy in the grown map. +} + void AssumptionCache::scanFunction() { assert(!Scanned && "Tried to scan the function twice!"); assert(AssumeHandles.empty() && "Already have assumes when scanning!"); @@ -37,6 +147,10 @@ void AssumptionCache::scanFunction() { // Mark the scan as complete. Scanned = true; + + // Update affected values. + for (auto &A : AssumeHandles) + updateAffectedValues(cast<CallInst>(A)); } void AssumptionCache::registerAssumption(CallInst *CI) { @@ -72,12 +186,14 @@ void AssumptionCache::registerAssumption(CallInst *CI) { "Cache contains multiple copies of a call!"); } #endif + + updateAffectedValues(CI); } -char AssumptionAnalysis::PassID; +AnalysisKey AssumptionAnalysis::Key; PreservedAnalyses AssumptionPrinterPass::run(Function &F, - AnalysisManager<Function> &AM) { + FunctionAnalysisManager &AM) { AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F); OS << "Cached assumptions for function: " << F.getName() << "\n"; diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 43d5c3c..c8d0579 100644 --- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -63,6 +63,21 @@ const unsigned MaxNumPhiBBsValueReachabilityCheck = 20; // depth otherwise the algorithm in aliasGEP will assert. static const unsigned MaxLookupSearchDepth = 6; +bool BasicAAResult::invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv) { + // We don't care if this analysis itself is preserved, it has no state. But + // we need to check that the analyses it depends on have been. Note that we + // may be created without handles to some analyses and in that case don't + // depend on them. + if (Inv.invalidate<AssumptionAnalysis>(F, PA) || + (DT && Inv.invalidate<DominatorTreeAnalysis>(F, PA)) || + (LI && Inv.invalidate<LoopAnalysis>(F, PA))) + return true; + + // Otherwise this analysis result remains valid. + return false; +} + //===----------------------------------------------------------------------===// // Useful predicates //===----------------------------------------------------------------------===// @@ -227,7 +242,7 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL, Offset = 0; return V; } - // FALL THROUGH. + LLVM_FALLTHROUGH; case Instruction::Add: V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits, SExtBits, DL, Depth + 1, AC, DT, NSW, NUW); @@ -275,7 +290,7 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL, GetLinearExpression(CastOp, Scale, Offset, ZExtBits, SExtBits, DL, Depth + 1, AC, DT, NSW, NUW); - // zext(zext(%x)) == zext(%x), and similiarly for sext; we'll handle this + // zext(zext(%x)) == zext(%x), and similarly for sext; we'll handle this // by just incrementing the number of bits we've extended by. unsigned ExtendedBy = NewWidth - SmallWidth; @@ -409,11 +424,13 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V, // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices. gep_type_iterator GTI = gep_type_begin(GEPOp); unsigned PointerSize = DL.getPointerSizeInBits(AS); + // Assume all GEP operands are constants until proven otherwise. + bool GepHasConstantOffset = true; for (User::const_op_iterator I = GEPOp->op_begin() + 1, E = GEPOp->op_end(); - I != E; ++I) { + I != E; ++I, ++GTI) { const Value *Index = *I; // Compute the (potentially symbolic) offset in bytes for this index. - if (StructType *STy = dyn_cast<StructType>(*GTI++)) { + if (StructType *STy = GTI.getStructTypeOrNull()) { // For a struct, add the member offset. unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue(); if (FieldNo == 0) @@ -429,11 +446,13 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V, if (CIdx->isZero()) continue; Decomposed.OtherOffset += - DL.getTypeAllocSize(*GTI) * CIdx->getSExtValue(); + DL.getTypeAllocSize(GTI.getIndexedType()) * CIdx->getSExtValue(); continue; } - uint64_t Scale = DL.getTypeAllocSize(*GTI); + GepHasConstantOffset = false; + + uint64_t Scale = DL.getTypeAllocSize(GTI.getIndexedType()); unsigned ZExtBits = 0, SExtBits = 0; // If the integer type is smaller than the pointer size, it is implicitly @@ -458,7 +477,7 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V, // A[x][x] -> x*16 + x*4 -> x*20 // This also ensures that 'x' only appears in the index list once. for (unsigned i = 0, e = Decomposed.VarIndices.size(); i != e; ++i) { - if (Decomposed.VarIndices[i].V == Index && + if (Decomposed.VarIndices[i].V == Index && Decomposed.VarIndices[i].ZExtBits == ZExtBits && Decomposed.VarIndices[i].SExtBits == SExtBits) { Scale += Decomposed.VarIndices[i].Scale; @@ -479,10 +498,12 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V, } // Take care of wrap-arounds - Decomposed.StructOffset = - adjustToPointerSize(Decomposed.StructOffset, PointerSize); - Decomposed.OtherOffset = - adjustToPointerSize(Decomposed.OtherOffset, PointerSize); + if (GepHasConstantOffset) { + Decomposed.StructOffset = + adjustToPointerSize(Decomposed.StructOffset, PointerSize); + Decomposed.OtherOffset = + adjustToPointerSize(Decomposed.OtherOffset, PointerSize); + } // Analyze the base pointer next. V = GEPOp->getOperand(0); @@ -603,6 +624,10 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) { if (F->onlyAccessesArgMemory()) Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees); + else if (F->onlyAccessesInaccessibleMemory()) + Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesInaccessibleMem); + else if (F->onlyAccessesInaccessibleMemOrArgMem()) + Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesInaccessibleOrArgMem); return Min; } @@ -732,7 +757,8 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, // pointer were passed to arguments that were neither of these, then it // couldn't be no-capture. if (!(*CI)->getType()->isPointerTy() || - (!CS.doesNotCapture(OperandNo) && !CS.isByValArgument(OperandNo))) + (!CS.doesNotCapture(OperandNo) && + OperandNo < CS.getNumArgOperands() && !CS.isByValArgument(OperandNo))) continue; // If this is a no-capture pointer argument, see if we can tell that it @@ -765,6 +791,31 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, return MRI_NoModRef; } + // The semantics of memcpy intrinsics forbid overlap between their respective + // operands, i.e., source and destination of any given memcpy must no-alias. + // If Loc must-aliases either one of these two locations, then it necessarily + // no-aliases the other. + if (auto *Inst = dyn_cast<MemCpyInst>(CS.getInstruction())) { + AliasResult SrcAA, DestAA; + + if ((SrcAA = getBestAAResults().alias(MemoryLocation::getForSource(Inst), + Loc)) == MustAlias) + // Loc is exactly the memcpy source thus disjoint from memcpy dest. + return MRI_Ref; + if ((DestAA = getBestAAResults().alias(MemoryLocation::getForDest(Inst), + Loc)) == MustAlias) + // The converse case. + return MRI_Mod; + + // It's also possible for Loc to alias both src and dest, or neither. + ModRefInfo rv = MRI_NoModRef; + if (SrcAA != NoAlias) + rv = static_cast<ModRefInfo>(rv | MRI_Ref); + if (DestAA != NoAlias) + rv = static_cast<ModRefInfo>(rv | MRI_Mod); + return rv; + } + // While the assume intrinsic is marked as arbitrarily writing so that // proper control dependencies will be maintained, it never aliases any // particular memory location. @@ -781,6 +832,32 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, if (isIntrinsicCall(CS, Intrinsic::experimental_guard)) return MRI_Ref; + // Like assumes, invariant.start intrinsics were also marked as arbitrarily + // writing so that proper control dependencies are maintained but they never + // mod any particular memory location visible to the IR. + // *Unlike* assumes (which are now modeled as NoModRef), invariant.start + // intrinsic is now modeled as reading memory. This prevents hoisting the + // invariant.start intrinsic over stores. Consider: + // *ptr = 40; + // *ptr = 50; + // invariant_start(ptr) + // int val = *ptr; + // print(val); + // + // This cannot be transformed to: + // + // *ptr = 40; + // invariant_start(ptr) + // *ptr = 50; + // int val = *ptr; + // print(val); + // + // The transformation will cause the second store to be ignored (based on + // rules of invariant.start) and print 40, while the first program always + // prints 50. + if (isIntrinsicCall(CS, Intrinsic::invariant_start)) + return MRI_Ref; + // The AAResultBase base class has some smarts, lets use them. return AAResultBase::getModRefInfo(CS, Loc); } @@ -1114,13 +1191,14 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, return MayAlias; AliasResult R = aliasCheck(UnderlyingV1, MemoryLocation::UnknownSize, - AAMDNodes(), V2, V2Size, V2AAInfo); + AAMDNodes(), V2, MemoryLocation::UnknownSize, + V2AAInfo, nullptr, UnderlyingV2); if (R != MustAlias) // If V2 may alias GEP base pointer, conservatively returns MayAlias. // If V2 is known not to alias GEP base pointer, then the two values - // cannot alias per GEP semantics: "A pointer value formed from a - // getelementptr instruction is associated with the addresses associated - // with the first operand of the getelementptr". + // cannot alias per GEP semantics: "Any memory access must be done through + // a pointer value associated with an address range of the memory access, + // otherwise the behavior is undefined.". return R; // If the max search depth is reached the result is undefined @@ -1251,7 +1329,8 @@ static AliasResult MergeAliasResults(AliasResult A, AliasResult B) { AliasResult BasicAAResult::aliasSelect(const SelectInst *SI, uint64_t SISize, const AAMDNodes &SIAAInfo, const Value *V2, uint64_t V2Size, - const AAMDNodes &V2AAInfo) { + const AAMDNodes &V2AAInfo, + const Value *UnderV2) { // If the values are Selects with the same condition, we can do a more precise // check: just check for aliases between the values on corresponding arms. if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2)) @@ -1269,12 +1348,14 @@ AliasResult BasicAAResult::aliasSelect(const SelectInst *SI, uint64_t SISize, // If both arms of the Select node NoAlias or MustAlias V2, then returns // NoAlias / MustAlias. Otherwise, returns MayAlias. AliasResult Alias = - aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(), SISize, SIAAInfo); + aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(), + SISize, SIAAInfo, UnderV2); if (Alias == MayAlias) return MayAlias; AliasResult ThisAlias = - aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), SISize, SIAAInfo); + aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), SISize, SIAAInfo, + UnderV2); return MergeAliasResults(ThisAlias, Alias); } @@ -1282,8 +1363,8 @@ AliasResult BasicAAResult::aliasSelect(const SelectInst *SI, uint64_t SISize, /// another. AliasResult BasicAAResult::aliasPHI(const PHINode *PN, uint64_t PNSize, const AAMDNodes &PNAAInfo, const Value *V2, - uint64_t V2Size, - const AAMDNodes &V2AAInfo) { + uint64_t V2Size, const AAMDNodes &V2AAInfo, + const Value *UnderV2) { // Track phi nodes we have visited. We use this information when we determine // value equivalence. VisitedPhiBBs.insert(PN->getParent()); @@ -1362,7 +1443,8 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, uint64_t PNSize, PNSize = MemoryLocation::UnknownSize; AliasResult Alias = - aliasCheck(V2, V2Size, V2AAInfo, V1Srcs[0], PNSize, PNAAInfo); + aliasCheck(V2, V2Size, V2AAInfo, V1Srcs[0], + PNSize, PNAAInfo, UnderV2); // Early exit if the check of the first PHI source against V2 is MayAlias. // Other results are not possible. @@ -1375,7 +1457,7 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, uint64_t PNSize, Value *V = V1Srcs[i]; AliasResult ThisAlias = - aliasCheck(V2, V2Size, V2AAInfo, V, PNSize, PNAAInfo); + aliasCheck(V2, V2Size, V2AAInfo, V, PNSize, PNAAInfo, UnderV2); Alias = MergeAliasResults(ThisAlias, Alias); if (Alias == MayAlias) break; @@ -1388,7 +1470,8 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, uint64_t PNSize, /// array references. AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size, AAMDNodes V1AAInfo, const Value *V2, - uint64_t V2Size, AAMDNodes V2AAInfo) { + uint64_t V2Size, AAMDNodes V2AAInfo, + const Value *O1, const Value *O2) { // If either of the memory references is empty, it doesn't matter what the // pointer values are. if (V1Size == 0 || V2Size == 0) @@ -1416,8 +1499,11 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size, return NoAlias; // Scalars cannot alias each other // Figure out what objects these things are pointing to if we can. - const Value *O1 = GetUnderlyingObject(V1, DL, MaxLookupSearchDepth); - const Value *O2 = GetUnderlyingObject(V2, DL, MaxLookupSearchDepth); + if (O1 == nullptr) + O1 = GetUnderlyingObject(V1, DL, MaxLookupSearchDepth); + + if (O2 == nullptr) + O2 = GetUnderlyingObject(V2, DL, MaxLookupSearchDepth); // Null values in the default address space don't point to any object, so they // don't alias any other pointer. @@ -1500,23 +1586,26 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size, if (isa<PHINode>(V2) && !isa<PHINode>(V1)) { std::swap(V1, V2); + std::swap(O1, O2); std::swap(V1Size, V2Size); std::swap(V1AAInfo, V2AAInfo); } if (const PHINode *PN = dyn_cast<PHINode>(V1)) { - AliasResult Result = aliasPHI(PN, V1Size, V1AAInfo, V2, V2Size, V2AAInfo); + AliasResult Result = aliasPHI(PN, V1Size, V1AAInfo, + V2, V2Size, V2AAInfo, O2); if (Result != MayAlias) return AliasCache[Locs] = Result; } if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) { std::swap(V1, V2); + std::swap(O1, O2); std::swap(V1Size, V2Size); std::swap(V1AAInfo, V2AAInfo); } if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) { AliasResult Result = - aliasSelect(S1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo); + aliasSelect(S1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O2); if (Result != MayAlias) return AliasCache[Locs] = Result; } @@ -1667,9 +1756,9 @@ bool BasicAAResult::constantOffsetHeuristic( // BasicAliasAnalysis Pass //===----------------------------------------------------------------------===// -char BasicAA::PassID; +AnalysisKey BasicAA::Key; -BasicAAResult BasicAA::run(Function &F, AnalysisManager<Function> &AM) { +BasicAAResult BasicAA::run(Function &F, FunctionAnalysisManager &AM) { return BasicAAResult(F.getParent()->getDataLayout(), AM.getResult<TargetLibraryAnalysis>(F), AM.getResult<AssumptionAnalysis>(F), diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp index 1dd8f4f..4cdbe4d 100644 --- a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp +++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp @@ -39,8 +39,7 @@ static cl::opt<GVDAGType> ViewBlockFreqPropagationDAG( "display a graph using the raw " "integer fractional block frequency representation."), clEnumValN(GVDT_Count, "count", "display a graph using the real " - "profile count if available."), - clEnumValEnd)); + "profile count if available."))); cl::opt<std::string> ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, @@ -60,24 +59,22 @@ namespace llvm { template <> struct GraphTraits<BlockFrequencyInfo *> { - typedef const BasicBlock NodeType; + typedef const BasicBlock *NodeRef; typedef succ_const_iterator ChildIteratorType; - typedef Function::const_iterator nodes_iterator; + typedef pointer_iterator<Function::const_iterator> nodes_iterator; - static inline const NodeType *getEntryNode(const BlockFrequencyInfo *G) { + static NodeRef getEntryNode(const BlockFrequencyInfo *G) { return &G->getFunction()->front(); } - static ChildIteratorType child_begin(const NodeType *N) { + static ChildIteratorType child_begin(const NodeRef N) { return succ_begin(N); } - static ChildIteratorType child_end(const NodeType *N) { - return succ_end(N); - } + static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } static nodes_iterator nodes_begin(const BlockFrequencyInfo *G) { - return G->getFunction()->begin(); + return nodes_iterator(G->getFunction()->begin()); } static nodes_iterator nodes_end(const BlockFrequencyInfo *G) { - return G->getFunction()->end(); + return nodes_iterator(G->getFunction()->end()); } }; @@ -162,6 +159,13 @@ BlockFrequencyInfo::getBlockProfileCount(const BasicBlock *BB) const { return BFI->getBlockProfileCount(*getFunction(), BB); } +Optional<uint64_t> +BlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const { + if (!BFI) + return None; + return BFI->getProfileCountFromFreq(*getFunction(), Freq); +} + void BlockFrequencyInfo::setBlockFreq(const BasicBlock *BB, uint64_t Freq) { assert(BFI && "Expected analysis to be available"); BFI->setBlockFreq(BB, Freq); @@ -248,9 +252,9 @@ bool BlockFrequencyInfoWrapperPass::runOnFunction(Function &F) { return false; } -char BlockFrequencyAnalysis::PassID; +AnalysisKey BlockFrequencyAnalysis::Key; BlockFrequencyInfo BlockFrequencyAnalysis::run(Function &F, - AnalysisManager<Function> &AM) { + FunctionAnalysisManager &AM) { BlockFrequencyInfo BFI; BFI.calculate(F, AM.getResult<BranchProbabilityAnalysis>(F), AM.getResult<LoopAnalysis>(F)); @@ -258,7 +262,7 @@ BlockFrequencyInfo BlockFrequencyAnalysis::run(Function &F, } PreservedAnalyses -BlockFrequencyPrinterPass::run(Function &F, AnalysisManager<Function> &AM) { +BlockFrequencyPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { OS << "Printing analysis results of BFI for function " << "'" << F.getName() << "':" << "\n"; diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp index c2039e1..9850e02 100644 --- a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp +++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -533,12 +533,18 @@ BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const { Optional<uint64_t> BlockFrequencyInfoImplBase::getBlockProfileCount(const Function &F, const BlockNode &Node) const { + return getProfileCountFromFreq(F, getBlockFreq(Node).getFrequency()); +} + +Optional<uint64_t> +BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F, + uint64_t Freq) const { auto EntryCount = F.getEntryCount(); if (!EntryCount) return None; // Use 128 bit APInt to do the arithmetic to avoid overflow. APInt BlockCount(128, EntryCount.getValue()); - APInt BlockFreq(128, getBlockFreq(Node).getFrequency()); + APInt BlockFreq(128, Freq); APInt EntryFreq(128, getEntryFreq()); BlockCount *= BlockFreq; BlockCount = BlockCount.udiv(EntryFreq); @@ -622,15 +628,12 @@ namespace llvm { template <> struct GraphTraits<IrreducibleGraph> { typedef bfi_detail::IrreducibleGraph GraphT; - typedef const GraphT::IrrNode NodeType; typedef const GraphT::IrrNode *NodeRef; typedef GraphT::IrrNode::iterator ChildIteratorType; - static const NodeType *getEntryNode(const GraphT &G) { - return G.StartIrr; - } - static ChildIteratorType child_begin(NodeType *N) { return N->succ_begin(); } - static ChildIteratorType child_end(NodeType *N) { return N->succ_end(); } + static NodeRef getEntryNode(const GraphT &G) { return G.StartIrr; } + static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); } + static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); } }; } // end namespace llvm diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp index d802552..3eabb78 100644 --- a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -162,12 +162,12 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) { return true; } - BranchProbability UnreachableProb(UR_TAKEN_WEIGHT, - (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * - UnreachableEdges.size()); - BranchProbability ReachableProb(UR_NONTAKEN_WEIGHT, - (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * - ReachableEdges.size()); + auto UnreachableProb = BranchProbability::getBranchProbability( + UR_TAKEN_WEIGHT, (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * + uint64_t(UnreachableEdges.size())); + auto ReachableProb = BranchProbability::getBranchProbability( + UR_NONTAKEN_WEIGHT, + (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * uint64_t(ReachableEdges.size())); for (unsigned SuccIdx : UnreachableEdges) setEdgeProbability(BB, SuccIdx, UnreachableProb); @@ -279,6 +279,16 @@ bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) { } } + if (auto *II = dyn_cast<InvokeInst>(TI)) { + // If the terminator is an InvokeInst, consider only the normal destination + // block. + if (PostDominatedByColdCall.count(II->getNormalDest())) + PostDominatedByColdCall.insert(BB); + // Return false here so that edge weights for InvokeInst could be decided + // in calcInvokeHeuristics(). + return false; + } + // Skip probabilities if this block has a single successor. if (TI->getNumSuccessors() == 1 || ColdEdges.empty()) return false; @@ -290,12 +300,12 @@ bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) { return true; } - BranchProbability ColdProb(CC_TAKEN_WEIGHT, - (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) * - ColdEdges.size()); - BranchProbability NormalProb(CC_NONTAKEN_WEIGHT, - (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) * - NormalEdges.size()); + auto ColdProb = BranchProbability::getBranchProbability( + CC_TAKEN_WEIGHT, + (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) * uint64_t(ColdEdges.size())); + auto NormalProb = BranchProbability::getBranchProbability( + CC_NONTAKEN_WEIGHT, + (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) * uint64_t(NormalEdges.size())); for (unsigned SuccIdx : ColdEdges) setEdgeProbability(BB, SuccIdx, ColdProb); @@ -701,16 +711,16 @@ void BranchProbabilityInfoWrapperPass::print(raw_ostream &OS, BPI.print(OS); } -char BranchProbabilityAnalysis::PassID; +AnalysisKey BranchProbabilityAnalysis::Key; BranchProbabilityInfo -BranchProbabilityAnalysis::run(Function &F, AnalysisManager<Function> &AM) { +BranchProbabilityAnalysis::run(Function &F, FunctionAnalysisManager &AM) { BranchProbabilityInfo BPI; BPI.calculate(F, AM.getResult<LoopAnalysis>(F)); return BPI; } PreservedAnalyses -BranchProbabilityPrinterPass::run(Function &F, AnalysisManager<Function> &AM) { +BranchProbabilityPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { OS << "Printing analysis results of BPI for function " << "'" << F.getName() << "':" << "\n"; diff --git a/contrib/llvm/lib/Analysis/CFGPrinter.cpp b/contrib/llvm/lib/Analysis/CFGPrinter.cpp index c86f1f5..a85af6c 100644 --- a/contrib/llvm/lib/Analysis/CFGPrinter.cpp +++ b/contrib/llvm/lib/Analysis/CFGPrinter.cpp @@ -23,10 +23,10 @@ using namespace llvm; namespace { - struct CFGViewer : public FunctionPass { + struct CFGViewerLegacyPass : public FunctionPass { static char ID; // Pass identifcation, replacement for typeid - CFGViewer() : FunctionPass(ID) { - initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry()); + CFGViewerLegacyPass() : FunctionPass(ID) { + initializeCFGViewerLegacyPassPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override { @@ -42,14 +42,21 @@ namespace { }; } -char CFGViewer::ID = 0; -INITIALIZE_PASS(CFGViewer, "view-cfg", "View CFG of function", false, true) +char CFGViewerLegacyPass::ID = 0; +INITIALIZE_PASS(CFGViewerLegacyPass, "view-cfg", "View CFG of function", false, true) + +PreservedAnalyses CFGViewerPass::run(Function &F, + FunctionAnalysisManager &AM) { + F.viewCFG(); + return PreservedAnalyses::all(); +} + namespace { - struct CFGOnlyViewer : public FunctionPass { + struct CFGOnlyViewerLegacyPass : public FunctionPass { static char ID; // Pass identifcation, replacement for typeid - CFGOnlyViewer() : FunctionPass(ID) { - initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry()); + CFGOnlyViewerLegacyPass() : FunctionPass(ID) { + initializeCFGOnlyViewerLegacyPassPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override { @@ -65,29 +72,39 @@ namespace { }; } -char CFGOnlyViewer::ID = 0; -INITIALIZE_PASS(CFGOnlyViewer, "view-cfg-only", +char CFGOnlyViewerLegacyPass::ID = 0; +INITIALIZE_PASS(CFGOnlyViewerLegacyPass, "view-cfg-only", "View CFG of function (with no function bodies)", false, true) +PreservedAnalyses CFGOnlyViewerPass::run(Function &F, + FunctionAnalysisManager &AM) { + F.viewCFGOnly(); + return PreservedAnalyses::all(); +} + +static void writeCFGToDotFile(Function &F) { + std::string Filename = ("cfg." + F.getName() + ".dot").str(); + errs() << "Writing '" << Filename << "'..."; + + std::error_code EC; + raw_fd_ostream File(Filename, EC, sys::fs::F_Text); + + if (!EC) + WriteGraph(File, (const Function*)&F); + else + errs() << " error opening file for writing!"; + errs() << "\n"; +} + namespace { - struct CFGPrinter : public FunctionPass { + struct CFGPrinterLegacyPass : public FunctionPass { static char ID; // Pass identification, replacement for typeid - CFGPrinter() : FunctionPass(ID) { - initializeCFGPrinterPass(*PassRegistry::getPassRegistry()); + CFGPrinterLegacyPass() : FunctionPass(ID) { + initializeCFGPrinterLegacyPassPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override { - std::string Filename = ("cfg." + F.getName() + ".dot").str(); - errs() << "Writing '" << Filename << "'..."; - - std::error_code EC; - raw_fd_ostream File(Filename, EC, sys::fs::F_Text); - - if (!EC) - WriteGraph(File, (const Function*)&F); - else - errs() << " error opening file for writing!"; - errs() << "\n"; + writeCFGToDotFile(F); return false; } @@ -99,29 +116,25 @@ namespace { }; } -char CFGPrinter::ID = 0; -INITIALIZE_PASS(CFGPrinter, "dot-cfg", "Print CFG of function to 'dot' file", +char CFGPrinterLegacyPass::ID = 0; +INITIALIZE_PASS(CFGPrinterLegacyPass, "dot-cfg", "Print CFG of function to 'dot' file", false, true) +PreservedAnalyses CFGPrinterPass::run(Function &F, + FunctionAnalysisManager &AM) { + writeCFGToDotFile(F); + return PreservedAnalyses::all(); +} + namespace { - struct CFGOnlyPrinter : public FunctionPass { + struct CFGOnlyPrinterLegacyPass : public FunctionPass { static char ID; // Pass identification, replacement for typeid - CFGOnlyPrinter() : FunctionPass(ID) { - initializeCFGOnlyPrinterPass(*PassRegistry::getPassRegistry()); + CFGOnlyPrinterLegacyPass() : FunctionPass(ID) { + initializeCFGOnlyPrinterLegacyPassPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override { - std::string Filename = ("cfg." + F.getName() + ".dot").str(); - errs() << "Writing '" << Filename << "'..."; - - std::error_code EC; - raw_fd_ostream File(Filename, EC, sys::fs::F_Text); - - if (!EC) - WriteGraph(File, (const Function*)&F, true); - else - errs() << " error opening file for writing!"; - errs() << "\n"; + writeCFGToDotFile(F); return false; } void print(raw_ostream &OS, const Module* = nullptr) const override {} @@ -132,11 +145,17 @@ namespace { }; } -char CFGOnlyPrinter::ID = 0; -INITIALIZE_PASS(CFGOnlyPrinter, "dot-cfg-only", +char CFGOnlyPrinterLegacyPass::ID = 0; +INITIALIZE_PASS(CFGOnlyPrinterLegacyPass, "dot-cfg-only", "Print CFG of function to 'dot' file (with no function bodies)", false, true) +PreservedAnalyses CFGOnlyPrinterPass::run(Function &F, + FunctionAnalysisManager &AM) { + writeCFGToDotFile(F); + return PreservedAnalyses::all(); +} + /// viewCFG - This function is meant for use from the debugger. You can just /// say 'call F->viewCFG()' and a ghostview window should pop up from the /// program, displaying the CFG of the current function. This depends on there @@ -155,11 +174,11 @@ void Function::viewCFGOnly() const { ViewGraph(this, "cfg" + getName(), true); } -FunctionPass *llvm::createCFGPrinterPass () { - return new CFGPrinter(); +FunctionPass *llvm::createCFGPrinterLegacyPassPass () { + return new CFGPrinterLegacyPass(); } -FunctionPass *llvm::createCFGOnlyPrinterPass () { - return new CFGOnlyPrinter(); +FunctionPass *llvm::createCFGOnlyPrinterLegacyPassPass () { + return new CFGOnlyPrinterLegacyPass(); } diff --git a/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp index 7d5bd94..e48ff23 100644 --- a/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp @@ -27,12 +27,23 @@ // codes: all we do here is to selectively expand the transitive closure by // discarding edges that are not recognized by the state machine. // -// There is one difference between our current implementation and the one -// described in the paper: out algorithm eagerly computes all alias pairs after -// the CFLGraph is built, while in the paper the authors did the computation in -// a demand-driven fashion. We did not implement the demand-driven algorithm due -// to the additional coding complexity and higher memory profile, but if we -// found it necessary we may switch to it eventually. +// There are two differences between our current implementation and the one +// described in the paper: +// - Our algorithm eagerly computes all alias pairs after the CFLGraph is built, +// while in the paper the authors did the computation in a demand-driven +// fashion. We did not implement the demand-driven algorithm due to the +// additional coding complexity and higher memory profile, but if we found it +// necessary we may switch to it eventually. +// - In the paper the authors use a state machine that does not distinguish +// value reads from value writes. For example, if Y is reachable from X at state +// S3, it may be the case that X is written into Y, or it may be the case that +// there's a third value Z that writes into both X and Y. To make that +// distinction (which is crucial in building function summary as well as +// retrieving mod-ref info), we choose to duplicate some of the states in the +// paper's proposed state machine. The duplication does not change the set the +// machine accepts. Given a pair of reachable values, it only provides more +// detailed information on which value is being written into and which is being +// read from. // //===----------------------------------------------------------------------===// @@ -71,16 +82,65 @@ static const Function *parentFunctionOfValue(const Value *Val) { namespace { enum class MatchState : uint8_t { - FlowFrom = 0, // S1 in the paper - FlowFromMemAlias, // S2 in the paper - FlowTo, // S3 in the paper - FlowToMemAlias // S4 in the paper + // The following state represents S1 in the paper. + FlowFromReadOnly = 0, + // The following two states together represent S2 in the paper. + // The 'NoReadWrite' suffix indicates that there exists an alias path that + // does not contain assignment and reverse assignment edges. + // The 'ReadOnly' suffix indicates that there exists an alias path that + // contains reverse assignment edges only. + FlowFromMemAliasNoReadWrite, + FlowFromMemAliasReadOnly, + // The following two states together represent S3 in the paper. + // The 'WriteOnly' suffix indicates that there exists an alias path that + // contains assignment edges only. + // The 'ReadWrite' suffix indicates that there exists an alias path that + // contains both assignment and reverse assignment edges. Note that if X and Y + // are reachable at 'ReadWrite' state, it does NOT mean X is both read from + // and written to Y. Instead, it means that a third value Z is written to both + // X and Y. + FlowToWriteOnly, + FlowToReadWrite, + // The following two states together represent S4 in the paper. + FlowToMemAliasWriteOnly, + FlowToMemAliasReadWrite, }; +typedef std::bitset<7> StateSet; +const unsigned ReadOnlyStateMask = + (1U << static_cast<uint8_t>(MatchState::FlowFromReadOnly)) | + (1U << static_cast<uint8_t>(MatchState::FlowFromMemAliasReadOnly)); +const unsigned WriteOnlyStateMask = + (1U << static_cast<uint8_t>(MatchState::FlowToWriteOnly)) | + (1U << static_cast<uint8_t>(MatchState::FlowToMemAliasWriteOnly)); + +// A pair that consists of a value and an offset +struct OffsetValue { + const Value *Val; + int64_t Offset; +}; + +bool operator==(OffsetValue LHS, OffsetValue RHS) { + return LHS.Val == RHS.Val && LHS.Offset == RHS.Offset; +} +bool operator<(OffsetValue LHS, OffsetValue RHS) { + return std::less<const Value *>()(LHS.Val, RHS.Val) || + (LHS.Val == RHS.Val && LHS.Offset < RHS.Offset); +} + +// A pair that consists of an InstantiatedValue and an offset +struct OffsetInstantiatedValue { + InstantiatedValue IVal; + int64_t Offset; +}; + +bool operator==(OffsetInstantiatedValue LHS, OffsetInstantiatedValue RHS) { + return LHS.IVal == RHS.IVal && LHS.Offset == RHS.Offset; +} + // We use ReachabilitySet to keep track of value aliases (The nonterminal "V" in // the paper) during the analysis. class ReachabilitySet { - typedef std::bitset<4> StateSet; typedef DenseMap<InstantiatedValue, StateSet> ValueStateMap; typedef DenseMap<InstantiatedValue, ValueStateMap> ValueReachMap; ValueReachMap ReachMap; @@ -91,6 +151,7 @@ public: // Insert edge 'From->To' at state 'State' bool insert(InstantiatedValue From, InstantiatedValue To, MatchState State) { + assert(From != To); auto &States = ReachMap[To][From]; auto Idx = static_cast<size_t>(State); if (!States.test(Idx)) { @@ -150,8 +211,6 @@ public: typedef MapType::const_iterator const_iterator; bool add(InstantiatedValue V, AliasAttrs Attr) { - if (Attr.none()) - return false; auto &OldAttr = AttrMap[V]; auto NewAttr = OldAttr | Attr; if (OldAttr == NewAttr) @@ -178,6 +237,57 @@ struct WorkListItem { InstantiatedValue To; MatchState State; }; + +struct ValueSummary { + struct Record { + InterfaceValue IValue; + unsigned DerefLevel; + }; + SmallVector<Record, 4> FromRecords, ToRecords; +}; +} + +namespace llvm { +// Specialize DenseMapInfo for OffsetValue. +template <> struct DenseMapInfo<OffsetValue> { + static OffsetValue getEmptyKey() { + return OffsetValue{DenseMapInfo<const Value *>::getEmptyKey(), + DenseMapInfo<int64_t>::getEmptyKey()}; + } + static OffsetValue getTombstoneKey() { + return OffsetValue{DenseMapInfo<const Value *>::getTombstoneKey(), + DenseMapInfo<int64_t>::getEmptyKey()}; + } + static unsigned getHashValue(const OffsetValue &OVal) { + return DenseMapInfo<std::pair<const Value *, int64_t>>::getHashValue( + std::make_pair(OVal.Val, OVal.Offset)); + } + static bool isEqual(const OffsetValue &LHS, const OffsetValue &RHS) { + return LHS == RHS; + } +}; + +// Specialize DenseMapInfo for OffsetInstantiatedValue. +template <> struct DenseMapInfo<OffsetInstantiatedValue> { + static OffsetInstantiatedValue getEmptyKey() { + return OffsetInstantiatedValue{ + DenseMapInfo<InstantiatedValue>::getEmptyKey(), + DenseMapInfo<int64_t>::getEmptyKey()}; + } + static OffsetInstantiatedValue getTombstoneKey() { + return OffsetInstantiatedValue{ + DenseMapInfo<InstantiatedValue>::getTombstoneKey(), + DenseMapInfo<int64_t>::getEmptyKey()}; + } + static unsigned getHashValue(const OffsetInstantiatedValue &OVal) { + return DenseMapInfo<std::pair<InstantiatedValue, int64_t>>::getHashValue( + std::make_pair(OVal.IVal, OVal.Offset)); + } + static bool isEqual(const OffsetInstantiatedValue &LHS, + const OffsetInstantiatedValue &RHS) { + return LHS == RHS; + } +}; } class CFLAndersAAResult::FunctionInfo { @@ -185,7 +295,7 @@ class CFLAndersAAResult::FunctionInfo { /// Since the alias relation is symmetric, to save some space we assume values /// are properly ordered: if a and b alias each other, and a < b, then b is in /// AliasMap[a] but not vice versa. - DenseMap<const Value *, std::vector<const Value *>> AliasMap; + DenseMap<const Value *, std::vector<OffsetValue>> AliasMap; /// Map a value to its corresponding AliasAttrs DenseMap<const Value *, AliasAttrs> AttrMap; @@ -193,27 +303,56 @@ class CFLAndersAAResult::FunctionInfo { /// Summary of externally visible effects. AliasSummary Summary; - AliasAttrs getAttrs(const Value *) const; + Optional<AliasAttrs> getAttrs(const Value *) const; public: - FunctionInfo(const ReachabilitySet &, AliasAttrMap); + FunctionInfo(const Function &, const SmallVectorImpl<Value *> &, + const ReachabilitySet &, AliasAttrMap); - bool mayAlias(const Value *LHS, const Value *RHS) const; + bool mayAlias(const Value *, uint64_t, const Value *, uint64_t) const; const AliasSummary &getAliasSummary() const { return Summary; } }; -CFLAndersAAResult::FunctionInfo::FunctionInfo(const ReachabilitySet &ReachSet, - AliasAttrMap AMap) { - // Populate AttrMap +static bool hasReadOnlyState(StateSet Set) { + return (Set & StateSet(ReadOnlyStateMask)).any(); +} + +static bool hasWriteOnlyState(StateSet Set) { + return (Set & StateSet(WriteOnlyStateMask)).any(); +} + +static Optional<InterfaceValue> +getInterfaceValue(InstantiatedValue IValue, + const SmallVectorImpl<Value *> &RetVals) { + auto Val = IValue.Val; + + Optional<unsigned> Index; + if (auto Arg = dyn_cast<Argument>(Val)) + Index = Arg->getArgNo() + 1; + else if (is_contained(RetVals, Val)) + Index = 0; + + if (Index) + return InterfaceValue{*Index, IValue.DerefLevel}; + return None; +} + +static void populateAttrMap(DenseMap<const Value *, AliasAttrs> &AttrMap, + const AliasAttrMap &AMap) { for (const auto &Mapping : AMap.mappings()) { auto IVal = Mapping.first; + // Insert IVal into the map + auto &Attr = AttrMap[IVal.Val]; // AttrMap only cares about top-level values if (IVal.DerefLevel == 0) - AttrMap[IVal.Val] = Mapping.second; + Attr |= Mapping.second; } +} - // Populate AliasMap +static void +populateAliasMap(DenseMap<const Value *, std::vector<OffsetValue>> &AliasMap, + const ReachabilitySet &ReachSet) { for (const auto &OuterMapping : ReachSet.value_mappings()) { // AliasMap only cares about top-level values if (OuterMapping.first.DerefLevel > 0) @@ -224,48 +363,202 @@ CFLAndersAAResult::FunctionInfo::FunctionInfo(const ReachabilitySet &ReachSet, for (const auto &InnerMapping : OuterMapping.second) { // Again, AliasMap only cares about top-level values if (InnerMapping.first.DerefLevel == 0) - AliasList.push_back(InnerMapping.first.Val); + AliasList.push_back(OffsetValue{InnerMapping.first.Val, UnknownOffset}); } // Sort AliasList for faster lookup - std::sort(AliasList.begin(), AliasList.end(), std::less<const Value *>()); + std::sort(AliasList.begin(), AliasList.end()); } +} - // TODO: Populate function summary here +static void populateExternalRelations( + SmallVectorImpl<ExternalRelation> &ExtRelations, const Function &Fn, + const SmallVectorImpl<Value *> &RetVals, const ReachabilitySet &ReachSet) { + // If a function only returns one of its argument X, then X will be both an + // argument and a return value at the same time. This is an edge case that + // needs special handling here. + for (const auto &Arg : Fn.args()) { + if (is_contained(RetVals, &Arg)) { + auto ArgVal = InterfaceValue{Arg.getArgNo() + 1, 0}; + auto RetVal = InterfaceValue{0, 0}; + ExtRelations.push_back(ExternalRelation{ArgVal, RetVal, 0}); + } + } + + // Below is the core summary construction logic. + // A naive solution of adding only the value aliases that are parameters or + // return values in ReachSet to the summary won't work: It is possible that a + // parameter P is written into an intermediate value I, and the function + // subsequently returns *I. In that case, *I is does not value alias anything + // in ReachSet, and the naive solution will miss a summary edge from (P, 1) to + // (I, 1). + // To account for the aforementioned case, we need to check each non-parameter + // and non-return value for the possibility of acting as an intermediate. + // 'ValueMap' here records, for each value, which InterfaceValues read from or + // write into it. If both the read list and the write list of a given value + // are non-empty, we know that a particular value is an intermidate and we + // need to add summary edges from the writes to the reads. + DenseMap<Value *, ValueSummary> ValueMap; + for (const auto &OuterMapping : ReachSet.value_mappings()) { + if (auto Dst = getInterfaceValue(OuterMapping.first, RetVals)) { + for (const auto &InnerMapping : OuterMapping.second) { + // If Src is a param/return value, we get a same-level assignment. + if (auto Src = getInterfaceValue(InnerMapping.first, RetVals)) { + // This may happen if both Dst and Src are return values + if (*Dst == *Src) + continue; + + if (hasReadOnlyState(InnerMapping.second)) + ExtRelations.push_back(ExternalRelation{*Dst, *Src, UnknownOffset}); + // No need to check for WriteOnly state, since ReachSet is symmetric + } else { + // If Src is not a param/return, add it to ValueMap + auto SrcIVal = InnerMapping.first; + if (hasReadOnlyState(InnerMapping.second)) + ValueMap[SrcIVal.Val].FromRecords.push_back( + ValueSummary::Record{*Dst, SrcIVal.DerefLevel}); + if (hasWriteOnlyState(InnerMapping.second)) + ValueMap[SrcIVal.Val].ToRecords.push_back( + ValueSummary::Record{*Dst, SrcIVal.DerefLevel}); + } + } + } + } + + for (const auto &Mapping : ValueMap) { + for (const auto &FromRecord : Mapping.second.FromRecords) { + for (const auto &ToRecord : Mapping.second.ToRecords) { + auto ToLevel = ToRecord.DerefLevel; + auto FromLevel = FromRecord.DerefLevel; + // Same-level assignments should have already been processed by now + if (ToLevel == FromLevel) + continue; + + auto SrcIndex = FromRecord.IValue.Index; + auto SrcLevel = FromRecord.IValue.DerefLevel; + auto DstIndex = ToRecord.IValue.Index; + auto DstLevel = ToRecord.IValue.DerefLevel; + if (ToLevel > FromLevel) + SrcLevel += ToLevel - FromLevel; + else + DstLevel += FromLevel - ToLevel; + + ExtRelations.push_back(ExternalRelation{ + InterfaceValue{SrcIndex, SrcLevel}, + InterfaceValue{DstIndex, DstLevel}, UnknownOffset}); + } + } + } + + // Remove duplicates in ExtRelations + std::sort(ExtRelations.begin(), ExtRelations.end()); + ExtRelations.erase(std::unique(ExtRelations.begin(), ExtRelations.end()), + ExtRelations.end()); +} + +static void populateExternalAttributes( + SmallVectorImpl<ExternalAttribute> &ExtAttributes, const Function &Fn, + const SmallVectorImpl<Value *> &RetVals, const AliasAttrMap &AMap) { + for (const auto &Mapping : AMap.mappings()) { + if (auto IVal = getInterfaceValue(Mapping.first, RetVals)) { + auto Attr = getExternallyVisibleAttrs(Mapping.second); + if (Attr.any()) + ExtAttributes.push_back(ExternalAttribute{*IVal, Attr}); + } + } +} + +CFLAndersAAResult::FunctionInfo::FunctionInfo( + const Function &Fn, const SmallVectorImpl<Value *> &RetVals, + const ReachabilitySet &ReachSet, AliasAttrMap AMap) { + populateAttrMap(AttrMap, AMap); + populateExternalAttributes(Summary.RetParamAttributes, Fn, RetVals, AMap); + populateAliasMap(AliasMap, ReachSet); + populateExternalRelations(Summary.RetParamRelations, Fn, RetVals, ReachSet); } -AliasAttrs CFLAndersAAResult::FunctionInfo::getAttrs(const Value *V) const { +Optional<AliasAttrs> +CFLAndersAAResult::FunctionInfo::getAttrs(const Value *V) const { assert(V != nullptr); - AliasAttrs Attr; auto Itr = AttrMap.find(V); if (Itr != AttrMap.end()) - Attr = Itr->second; - return Attr; + return Itr->second; + return None; } bool CFLAndersAAResult::FunctionInfo::mayAlias(const Value *LHS, - const Value *RHS) const { + uint64_t LHSSize, + const Value *RHS, + uint64_t RHSSize) const { assert(LHS && RHS); + // Check if we've seen LHS and RHS before. Sometimes LHS or RHS can be created + // after the analysis gets executed, and we want to be conservative in those + // cases. + auto MaybeAttrsA = getAttrs(LHS); + auto MaybeAttrsB = getAttrs(RHS); + if (!MaybeAttrsA || !MaybeAttrsB) + return true; + + // Check AliasAttrs before AliasMap lookup since it's cheaper + auto AttrsA = *MaybeAttrsA; + auto AttrsB = *MaybeAttrsB; + if (hasUnknownOrCallerAttr(AttrsA)) + return AttrsB.any(); + if (hasUnknownOrCallerAttr(AttrsB)) + return AttrsA.any(); + if (isGlobalOrArgAttr(AttrsA)) + return isGlobalOrArgAttr(AttrsB); + if (isGlobalOrArgAttr(AttrsB)) + return isGlobalOrArgAttr(AttrsA); + + // At this point both LHS and RHS should point to locally allocated objects + auto Itr = AliasMap.find(LHS); if (Itr != AliasMap.end()) { - if (std::binary_search(Itr->second.begin(), Itr->second.end(), RHS, - std::less<const Value *>())) - return true; - } - // Even if LHS and RHS are not reachable, they may still alias due to their - // AliasAttrs - auto AttrsA = getAttrs(LHS); - auto AttrsB = getAttrs(RHS); + // Find out all (X, Offset) where X == RHS + auto Comparator = [](OffsetValue LHS, OffsetValue RHS) { + return std::less<const Value *>()(LHS.Val, RHS.Val); + }; +#ifdef EXPENSIVE_CHECKS + assert(std::is_sorted(Itr->second.begin(), Itr->second.end(), Comparator)); +#endif + auto RangePair = std::equal_range(Itr->second.begin(), Itr->second.end(), + OffsetValue{RHS, 0}, Comparator); + + if (RangePair.first != RangePair.second) { + // Be conservative about UnknownSize + if (LHSSize == MemoryLocation::UnknownSize || + RHSSize == MemoryLocation::UnknownSize) + return true; + + for (const auto &OVal : make_range(RangePair)) { + // Be conservative about UnknownOffset + if (OVal.Offset == UnknownOffset) + return true; + + // We know that LHS aliases (RHS + OVal.Offset) if the control flow + // reaches here. The may-alias query essentially becomes integer + // range-overlap queries over two ranges [OVal.Offset, OVal.Offset + + // LHSSize) and [0, RHSSize). + + // Try to be conservative on super large offsets + if (LLVM_UNLIKELY(LHSSize > INT64_MAX || RHSSize > INT64_MAX)) + return true; + + auto LHSStart = OVal.Offset; + // FIXME: Do we need to guard against integer overflow? + auto LHSEnd = OVal.Offset + static_cast<int64_t>(LHSSize); + auto RHSStart = 0; + auto RHSEnd = static_cast<int64_t>(RHSSize); + if (LHSEnd > RHSStart && LHSStart < RHSEnd) + return true; + } + } + } - if (AttrsA.none() || AttrsB.none()) - return false; - if (hasUnknownOrCallerAttr(AttrsA) || hasUnknownOrCallerAttr(AttrsB)) - return true; - if (isGlobalOrArgAttr(AttrsA) && isGlobalOrArgAttr(AttrsB)) - return true; return false; } @@ -292,8 +585,10 @@ static void initializeWorkList(std::vector<WorkListItem> &WorkList, // If there's an assignment edge from X to Y, it means Y is reachable from // X at S2 and X is reachable from Y at S1 for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) { - propagate(Edge.Other, Src, MatchState::FlowFrom, ReachSet, WorkList); - propagate(Src, Edge.Other, MatchState::FlowTo, ReachSet, WorkList); + propagate(Edge.Other, Src, MatchState::FlowFromReadOnly, ReachSet, + WorkList); + propagate(Src, Edge.Other, MatchState::FlowToWriteOnly, ReachSet, + WorkList); } } } @@ -328,16 +623,21 @@ static void processWorkListItem(const WorkListItem &Item, const CFLGraph &Graph, auto ToNodeBelow = getNodeBelow(Graph, ToNode); if (FromNodeBelow && ToNodeBelow && MemSet.insert(*FromNodeBelow, *ToNodeBelow)) { - propagate(*FromNodeBelow, *ToNodeBelow, MatchState::FlowFromMemAlias, - ReachSet, WorkList); + propagate(*FromNodeBelow, *ToNodeBelow, + MatchState::FlowFromMemAliasNoReadWrite, ReachSet, WorkList); for (const auto &Mapping : ReachSet.reachableValueAliases(*FromNodeBelow)) { auto Src = Mapping.first; - if (Mapping.second.test(static_cast<size_t>(MatchState::FlowFrom))) - propagate(Src, *ToNodeBelow, MatchState::FlowFromMemAlias, ReachSet, - WorkList); - if (Mapping.second.test(static_cast<size_t>(MatchState::FlowTo))) - propagate(Src, *ToNodeBelow, MatchState::FlowToMemAlias, ReachSet, - WorkList); + auto MemAliasPropagate = [&](MatchState FromState, MatchState ToState) { + if (Mapping.second.test(static_cast<size_t>(FromState))) + propagate(Src, *ToNodeBelow, ToState, ReachSet, WorkList); + }; + + MemAliasPropagate(MatchState::FlowFromReadOnly, + MatchState::FlowFromMemAliasReadOnly); + MemAliasPropagate(MatchState::FlowToWriteOnly, + MatchState::FlowToMemAliasWriteOnly); + MemAliasPropagate(MatchState::FlowToReadWrite, + MatchState::FlowToMemAliasReadWrite); } } @@ -349,45 +649,54 @@ static void processWorkListItem(const WorkListItem &Item, const CFLGraph &Graph, // - If *X and *Y are memory aliases, then X and Y are value aliases // - If Y is an alias of X, then reverse assignment edges (if there is any) // should precede any assignment edges on the path from X to Y. - switch (Item.State) { - case MatchState::FlowFrom: { - for (const auto &RevAssignEdge : NodeInfo->ReverseEdges) - propagate(FromNode, RevAssignEdge.Other, MatchState::FlowFrom, ReachSet, - WorkList); + auto NextAssignState = [&](MatchState State) { for (const auto &AssignEdge : NodeInfo->Edges) - propagate(FromNode, AssignEdge.Other, MatchState::FlowTo, ReachSet, - WorkList); + propagate(FromNode, AssignEdge.Other, State, ReachSet, WorkList); + }; + auto NextRevAssignState = [&](MatchState State) { + for (const auto &RevAssignEdge : NodeInfo->ReverseEdges) + propagate(FromNode, RevAssignEdge.Other, State, ReachSet, WorkList); + }; + auto NextMemState = [&](MatchState State) { if (auto AliasSet = MemSet.getMemoryAliases(ToNode)) { for (const auto &MemAlias : *AliasSet) - propagate(FromNode, MemAlias, MatchState::FlowFromMemAlias, ReachSet, - WorkList); + propagate(FromNode, MemAlias, State, ReachSet, WorkList); } + }; + + switch (Item.State) { + case MatchState::FlowFromReadOnly: { + NextRevAssignState(MatchState::FlowFromReadOnly); + NextAssignState(MatchState::FlowToReadWrite); + NextMemState(MatchState::FlowFromMemAliasReadOnly); break; } - case MatchState::FlowFromMemAlias: { - for (const auto &RevAssignEdge : NodeInfo->ReverseEdges) - propagate(FromNode, RevAssignEdge.Other, MatchState::FlowFrom, ReachSet, - WorkList); - for (const auto &AssignEdge : NodeInfo->Edges) - propagate(FromNode, AssignEdge.Other, MatchState::FlowTo, ReachSet, - WorkList); + case MatchState::FlowFromMemAliasNoReadWrite: { + NextRevAssignState(MatchState::FlowFromReadOnly); + NextAssignState(MatchState::FlowToWriteOnly); break; } - case MatchState::FlowTo: { - for (const auto &AssignEdge : NodeInfo->Edges) - propagate(FromNode, AssignEdge.Other, MatchState::FlowTo, ReachSet, - WorkList); - if (auto AliasSet = MemSet.getMemoryAliases(ToNode)) { - for (const auto &MemAlias : *AliasSet) - propagate(FromNode, MemAlias, MatchState::FlowToMemAlias, ReachSet, - WorkList); - } + case MatchState::FlowFromMemAliasReadOnly: { + NextRevAssignState(MatchState::FlowFromReadOnly); + NextAssignState(MatchState::FlowToReadWrite); break; } - case MatchState::FlowToMemAlias: { - for (const auto &AssignEdge : NodeInfo->Edges) - propagate(FromNode, AssignEdge.Other, MatchState::FlowTo, ReachSet, - WorkList); + case MatchState::FlowToWriteOnly: { + NextAssignState(MatchState::FlowToWriteOnly); + NextMemState(MatchState::FlowToMemAliasWriteOnly); + break; + } + case MatchState::FlowToReadWrite: { + NextAssignState(MatchState::FlowToReadWrite); + NextMemState(MatchState::FlowToMemAliasReadWrite); + break; + } + case MatchState::FlowToMemAliasWriteOnly: { + NextAssignState(MatchState::FlowToWriteOnly); + break; + } + case MatchState::FlowToMemAliasReadWrite: { + NextAssignState(MatchState::FlowToReadWrite); break; } } @@ -465,7 +774,8 @@ CFLAndersAAResult::buildInfoFrom(const Function &Fn) { // to it auto IValueAttrMap = buildAttrMap(Graph, ReachSet); - return FunctionInfo(ReachSet, std::move(IValueAttrMap)); + return FunctionInfo(Fn, GraphBuilder.getReturnValues(), ReachSet, + std::move(IValueAttrMap)); } void CFLAndersAAResult::scan(const Function &Fn) { @@ -530,7 +840,7 @@ AliasResult CFLAndersAAResult::query(const MemoryLocation &LocA, auto &FunInfo = ensureCached(*Fn); // AliasMap lookup - if (FunInfo->mayAlias(ValA, ValB)) + if (FunInfo->mayAlias(ValA, LocA.Size, ValB, LocB.Size)) return MayAlias; return NoAlias; } @@ -555,9 +865,9 @@ AliasResult CFLAndersAAResult::alias(const MemoryLocation &LocA, return QueryResult; } -char CFLAndersAA::PassID; +AnalysisKey CFLAndersAA::Key; -CFLAndersAAResult CFLAndersAA::run(Function &F, AnalysisManager<Function> &AM) { +CFLAndersAAResult CFLAndersAA::run(Function &F, FunctionAnalysisManager &AM) { return CFLAndersAAResult(AM.getResult<TargetLibraryAnalysis>(F)); } diff --git a/contrib/llvm/lib/Analysis/CFLGraph.h b/contrib/llvm/lib/Analysis/CFLGraph.h index bc6e794..e526e0e 100644 --- a/contrib/llvm/lib/Analysis/CFLGraph.h +++ b/contrib/llvm/lib/Analysis/CFLGraph.h @@ -40,6 +40,7 @@ public: struct Edge { Node Other; + int64_t Offset; }; typedef std::vector<Edge> EdgeList; @@ -107,8 +108,8 @@ public: auto *ToInfo = getNode(To); assert(ToInfo != nullptr); - FromInfo->Edges.push_back(Edge{To}); - ToInfo->ReverseEdges.push_back(Edge{From}); + FromInfo->Edges.push_back(Edge{To, Offset}); + ToInfo->ReverseEdges.push_back(Edge{From, Offset}); } const NodeInfo *getNode(Node N) const { @@ -151,6 +152,7 @@ template <typename CFLAA> class CFLGraphBuilder { /// Gets the edges our graph should have, based on an Instruction* class GetEdgesVisitor : public InstVisitor<GetEdgesVisitor, void> { CFLAA &AA; + const DataLayout &DL; const TargetLibraryInfo &TLI; CFLGraph &Graph; @@ -225,8 +227,8 @@ template <typename CFLAA> class CFLGraphBuilder { void addStoreEdge(Value *From, Value *To) { addDerefEdge(From, To, false); } public: - GetEdgesVisitor(CFLGraphBuilder &Builder) - : AA(Builder.Analysis), TLI(Builder.TLI), Graph(Builder.Graph), + GetEdgesVisitor(CFLGraphBuilder &Builder, const DataLayout &DL) + : AA(Builder.Analysis), DL(DL), TLI(Builder.TLI), Graph(Builder.Graph), ReturnValues(Builder.ReturnedValues) {} void visitInstruction(Instruction &) { @@ -281,9 +283,20 @@ template <typename CFLAA> class CFLGraphBuilder { addAssignEdge(Val, &Inst); } + void visitGEP(GEPOperator &GEPOp) { + uint64_t Offset = UnknownOffset; + APInt APOffset(DL.getPointerSizeInBits(GEPOp.getPointerAddressSpace()), + 0); + if (GEPOp.accumulateConstantOffset(DL, APOffset)) + Offset = APOffset.getSExtValue(); + + auto *Op = GEPOp.getPointerOperand(); + addAssignEdge(Op, &GEPOp, Offset); + } + void visitGetElementPtrInst(GetElementPtrInst &Inst) { - auto *Op = Inst.getPointerOperand(); - addAssignEdge(Op, &Inst); + auto *GEPOp = cast<GEPOperator>(&Inst); + visitGEP(*GEPOp); } void visitSelectInst(SelectInst &Inst) { @@ -321,7 +334,8 @@ template <typename CFLAA> class CFLGraphBuilder { // For now, we'll handle this like a landingpad instruction (by placing // the // result in its own group, and having that group alias externals). - addNode(&Inst, getAttrUnknown()); + if (Inst.getType()->isPointerTy()) + addNode(&Inst, getAttrUnknown()); } static bool isFunctionExternal(Function *Fn) { @@ -444,7 +458,8 @@ template <typename CFLAA> class CFLGraphBuilder { // Exceptions come from "nowhere", from our analysis' perspective. // So we place the instruction its own group, noting that said group may // alias externals - addNode(&Inst, getAttrUnknown()); + if (Inst.getType()->isPointerTy()) + addNode(&Inst, getAttrUnknown()); } void visitInsertValueInst(InsertValueInst &Inst) { @@ -468,14 +483,97 @@ template <typename CFLAA> class CFLGraphBuilder { void visitConstantExpr(ConstantExpr *CE) { switch (CE->getOpcode()) { + case Instruction::GetElementPtr: { + auto GEPOp = cast<GEPOperator>(CE); + visitGEP(*GEPOp); + break; + } + case Instruction::PtrToInt: { + auto *Ptr = CE->getOperand(0); + addNode(Ptr, getAttrEscaped()); + break; + } + case Instruction::IntToPtr: { + addNode(CE, getAttrUnknown()); + break; + } + case Instruction::BitCast: + case Instruction::AddrSpaceCast: + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPExt: + case Instruction::FPTrunc: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPToUI: + case Instruction::FPToSI: { + auto *Src = CE->getOperand(0); + addAssignEdge(Src, CE); + break; + } + case Instruction::Select: { + auto *TrueVal = CE->getOperand(0); + auto *FalseVal = CE->getOperand(1); + addAssignEdge(TrueVal, CE); + addAssignEdge(FalseVal, CE); + break; + } + case Instruction::InsertElement: { + auto *Vec = CE->getOperand(0); + auto *Val = CE->getOperand(1); + addAssignEdge(Vec, CE); + addStoreEdge(Val, CE); + break; + } + case Instruction::ExtractElement: { + auto *Ptr = CE->getOperand(0); + addLoadEdge(Ptr, CE); + break; + } + case Instruction::InsertValue: { + auto *Agg = CE->getOperand(0); + auto *Val = CE->getOperand(1); + addAssignEdge(Agg, CE); + addStoreEdge(Val, CE); + break; + } + case Instruction::ExtractValue: { + auto *Ptr = CE->getOperand(0); + addLoadEdge(Ptr, CE); + } + case Instruction::ShuffleVector: { + auto *From1 = CE->getOperand(0); + auto *From2 = CE->getOperand(1); + addAssignEdge(From1, CE); + addAssignEdge(From2, CE); + break; + } + case Instruction::Add: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::ICmp: + case Instruction::FCmp: { + addAssignEdge(CE->getOperand(0), CE); + addAssignEdge(CE->getOperand(1), CE); + break; + } default: llvm_unreachable("Unknown instruction type encountered!"); -// Build the switch statement using the Instruction.def file. -#define HANDLE_INST(NUM, OPCODE, CLASS) \ - case Instruction::OPCODE: \ - this->visit##OPCODE(*(CLASS *)CE); \ - break; -#include "llvm/IR/Instruction.def" } } }; @@ -517,7 +615,7 @@ template <typename CFLAA> class CFLGraphBuilder { // Builds the graph needed for constructing the StratifiedSets for the given // function void buildGraphFrom(Function &Fn) { - GetEdgesVisitor Visitor(*this); + GetEdgesVisitor Visitor(*this, Fn.getParent()->getDataLayout()); for (auto &Bb : Fn.getBasicBlockList()) for (auto &Inst : Bb.getInstList()) diff --git a/contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp index d816822..dde24ef 100644 --- a/contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp @@ -153,7 +153,7 @@ CFLSteensAAResult::FunctionInfo::FunctionInfo( if (Itr != InterfaceMap.end()) { if (CurrValue != Itr->second) Summary.RetParamRelations.push_back( - ExternalRelation{CurrValue, Itr->second}); + ExternalRelation{CurrValue, Itr->second, UnknownOffset}); break; } @@ -341,81 +341,9 @@ AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA, return NoAlias; } -ModRefInfo CFLSteensAAResult::getArgModRefInfo(ImmutableCallSite CS, - unsigned ArgIdx) { - if (auto CalledFunc = CS.getCalledFunction()) { - auto &MaybeInfo = ensureCached(const_cast<Function *>(CalledFunc)); - if (!MaybeInfo.hasValue()) - return MRI_ModRef; - auto &RetParamAttributes = MaybeInfo->getAliasSummary().RetParamAttributes; - auto &RetParamRelations = MaybeInfo->getAliasSummary().RetParamRelations; - - bool ArgAttributeIsWritten = - std::any_of(RetParamAttributes.begin(), RetParamAttributes.end(), - [ArgIdx](const ExternalAttribute &ExtAttr) { - return ExtAttr.IValue.Index == ArgIdx + 1; - }); - bool ArgIsAccessed = - std::any_of(RetParamRelations.begin(), RetParamRelations.end(), - [ArgIdx](const ExternalRelation &ExtRelation) { - return ExtRelation.To.Index == ArgIdx + 1 || - ExtRelation.From.Index == ArgIdx + 1; - }); - - return (!ArgIsAccessed && !ArgAttributeIsWritten) ? MRI_NoModRef - : MRI_ModRef; - } - - return MRI_ModRef; -} - -FunctionModRefBehavior -CFLSteensAAResult::getModRefBehavior(ImmutableCallSite CS) { - // If we know the callee, try analyzing it - if (auto CalledFunc = CS.getCalledFunction()) - return getModRefBehavior(CalledFunc); - - // Otherwise, be conservative - return FMRB_UnknownModRefBehavior; -} - -FunctionModRefBehavior CFLSteensAAResult::getModRefBehavior(const Function *F) { - assert(F != nullptr); - - // TODO: Remove the const_cast - auto &MaybeInfo = ensureCached(const_cast<Function *>(F)); - if (!MaybeInfo.hasValue()) - return FMRB_UnknownModRefBehavior; - auto &RetParamAttributes = MaybeInfo->getAliasSummary().RetParamAttributes; - auto &RetParamRelations = MaybeInfo->getAliasSummary().RetParamRelations; - - // First, if any argument is marked Escpaed, Unknown or Global, anything may - // happen to them and thus we can't draw any conclusion. - if (!RetParamAttributes.empty()) - return FMRB_UnknownModRefBehavior; - - // Currently we don't (and can't) distinguish reads from writes in - // RetParamRelations. All we can say is whether there may be memory access or - // not. - if (RetParamRelations.empty()) - return FMRB_DoesNotAccessMemory; - - // Check if something beyond argmem gets touched. - bool AccessArgMemoryOnly = - std::all_of(RetParamRelations.begin(), RetParamRelations.end(), - [](const ExternalRelation &ExtRelation) { - // Both DerefLevels has to be 0, since we don't know which - // one is a read and which is a write. - return ExtRelation.From.DerefLevel == 0 && - ExtRelation.To.DerefLevel == 0; - }); - return AccessArgMemoryOnly ? FMRB_OnlyAccessesArgumentPointees - : FMRB_UnknownModRefBehavior; -} - -char CFLSteensAA::PassID; +AnalysisKey CFLSteensAA::Key; -CFLSteensAAResult CFLSteensAA::run(Function &F, AnalysisManager<Function> &AM) { +CFLSteensAAResult CFLSteensAA::run(Function &F, FunctionAnalysisManager &AM) { return CFLSteensAAResult(AM.getResult<TargetLibraryAnalysis>(F)); } diff --git a/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp b/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp index f6f30bb..054bdc4 100644 --- a/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp +++ b/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp @@ -8,17 +8,506 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/InstIterator.h" using namespace llvm; -// Explicit instantiations for the core proxy templates. +// Explicit template instantiations and specialization defininitions for core +// template typedefs. namespace llvm { -template class PassManager<LazyCallGraph::SCC>; -template class AnalysisManager<LazyCallGraph::SCC>; + +// Explicit instantiations for the core proxy templates. +template class AllAnalysesOn<LazyCallGraph::SCC>; +template class AnalysisManager<LazyCallGraph::SCC, LazyCallGraph &>; +template class PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, + LazyCallGraph &, CGSCCUpdateResult &>; template class InnerAnalysisManagerProxy<CGSCCAnalysisManager, Module>; template class OuterAnalysisManagerProxy<ModuleAnalysisManager, - LazyCallGraph::SCC>; -template class InnerAnalysisManagerProxy<FunctionAnalysisManager, - LazyCallGraph::SCC>; + LazyCallGraph::SCC, LazyCallGraph &>; template class OuterAnalysisManagerProxy<CGSCCAnalysisManager, Function>; + +/// Explicitly specialize the pass manager run method to handle call graph +/// updates. +template <> +PreservedAnalyses +PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, + CGSCCUpdateResult &>::run(LazyCallGraph::SCC &InitialC, + CGSCCAnalysisManager &AM, + LazyCallGraph &G, CGSCCUpdateResult &UR) { + PreservedAnalyses PA = PreservedAnalyses::all(); + + if (DebugLogging) + dbgs() << "Starting CGSCC pass manager run.\n"; + + // The SCC may be refined while we are running passes over it, so set up + // a pointer that we can update. + LazyCallGraph::SCC *C = &InitialC; + + for (auto &Pass : Passes) { + if (DebugLogging) + dbgs() << "Running pass: " << Pass->name() << " on " << *C << "\n"; + + PreservedAnalyses PassPA = Pass->run(*C, AM, G, UR); + + // Update the SCC if necessary. + C = UR.UpdatedC ? UR.UpdatedC : C; + + // Check that we didn't miss any update scenario. + assert(!UR.InvalidatedSCCs.count(C) && "Processing an invalid SCC!"); + assert(C->begin() != C->end() && "Cannot have an empty SCC!"); + + // Update the analysis manager as each pass runs and potentially + // invalidates analyses. + AM.invalidate(*C, PassPA); + + // Finally, we intersect the final preserved analyses to compute the + // aggregate preserved set for this pass manager. + PA.intersect(std::move(PassPA)); + + // FIXME: Historically, the pass managers all called the LLVM context's + // yield function here. We don't have a generic way to acquire the + // context and it isn't yet clear what the right pattern is for yielding + // in the new pass manager so it is currently omitted. + // ...getContext().yield(); + } + + // Invaliadtion was handled after each pass in the above loop for the current + // SCC. Therefore, the remaining analysis results in the AnalysisManager are + // preserved. We mark this with a set so that we don't need to inspect each + // one individually. + PA.preserveSet<AllAnalysesOn<LazyCallGraph::SCC>>(); + + if (DebugLogging) + dbgs() << "Finished CGSCC pass manager run.\n"; + + return PA; +} + +bool CGSCCAnalysisManagerModuleProxy::Result::invalidate( + Module &M, const PreservedAnalyses &PA, + ModuleAnalysisManager::Invalidator &Inv) { + // If literally everything is preserved, we're done. + if (PA.areAllPreserved()) + return false; // This is still a valid proxy. + + // If this proxy or the call graph is going to be invalidated, we also need + // to clear all the keys coming from that analysis. + // + // We also directly invalidate the FAM's module proxy if necessary, and if + // that proxy isn't preserved we can't preserve this proxy either. We rely on + // it to handle module -> function analysis invalidation in the face of + // structural changes and so if it's unavailable we conservatively clear the + // entire SCC layer as well rather than trying to do invalidation ourselves. + auto PAC = PA.getChecker<CGSCCAnalysisManagerModuleProxy>(); + if (!(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Module>>()) || + Inv.invalidate<LazyCallGraphAnalysis>(M, PA) || + Inv.invalidate<FunctionAnalysisManagerModuleProxy>(M, PA)) { + InnerAM->clear(); + + // And the proxy itself should be marked as invalid so that we can observe + // the new call graph. This isn't strictly necessary because we cheat + // above, but is still useful. + return true; + } + + // Directly check if the relevant set is preserved so we can short circuit + // invalidating SCCs below. + bool AreSCCAnalysesPreserved = + PA.allAnalysesInSetPreserved<AllAnalysesOn<LazyCallGraph::SCC>>(); + + // Ok, we have a graph, so we can propagate the invalidation down into it. + for (auto &RC : G->postorder_ref_sccs()) + for (auto &C : RC) { + Optional<PreservedAnalyses> InnerPA; + + // Check to see whether the preserved set needs to be adjusted based on + // module-level analysis invalidation triggering deferred invalidation + // for this SCC. + if (auto *OuterProxy = + InnerAM->getCachedResult<ModuleAnalysisManagerCGSCCProxy>(C)) + for (const auto &OuterInvalidationPair : + OuterProxy->getOuterInvalidations()) { + AnalysisKey *OuterAnalysisID = OuterInvalidationPair.first; + const auto &InnerAnalysisIDs = OuterInvalidationPair.second; + if (Inv.invalidate(OuterAnalysisID, M, PA)) { + if (!InnerPA) + InnerPA = PA; + for (AnalysisKey *InnerAnalysisID : InnerAnalysisIDs) + InnerPA->abandon(InnerAnalysisID); + } + } + + // Check if we needed a custom PA set. If so we'll need to run the inner + // invalidation. + if (InnerPA) { + InnerAM->invalidate(C, *InnerPA); + continue; + } + + // Otherwise we only need to do invalidation if the original PA set didn't + // preserve all SCC analyses. + if (!AreSCCAnalysesPreserved) + InnerAM->invalidate(C, PA); + } + + // Return false to indicate that this result is still a valid proxy. + return false; +} + +template <> +CGSCCAnalysisManagerModuleProxy::Result +CGSCCAnalysisManagerModuleProxy::run(Module &M, ModuleAnalysisManager &AM) { + // Force the Function analysis manager to also be available so that it can + // be accessed in an SCC analysis and proxied onward to function passes. + // FIXME: It is pretty awkward to just drop the result here and assert that + // we can find it again later. + (void)AM.getResult<FunctionAnalysisManagerModuleProxy>(M); + + return Result(*InnerAM, AM.getResult<LazyCallGraphAnalysis>(M)); +} + +AnalysisKey FunctionAnalysisManagerCGSCCProxy::Key; + +FunctionAnalysisManagerCGSCCProxy::Result +FunctionAnalysisManagerCGSCCProxy::run(LazyCallGraph::SCC &C, + CGSCCAnalysisManager &AM, + LazyCallGraph &CG) { + // Collect the FunctionAnalysisManager from the Module layer and use that to + // build the proxy result. + // + // This allows us to rely on the FunctionAnalysisMangaerModuleProxy to + // invalidate the function analyses. + auto &MAM = AM.getResult<ModuleAnalysisManagerCGSCCProxy>(C, CG).getManager(); + Module &M = *C.begin()->getFunction().getParent(); + auto *FAMProxy = MAM.getCachedResult<FunctionAnalysisManagerModuleProxy>(M); + assert(FAMProxy && "The CGSCC pass manager requires that the FAM module " + "proxy is run on the module prior to entering the CGSCC " + "walk."); + + // Note that we special-case invalidation handling of this proxy in the CGSCC + // analysis manager's Module proxy. This avoids the need to do anything + // special here to recompute all of this if ever the FAM's module proxy goes + // away. + return Result(FAMProxy->getManager()); +} + +bool FunctionAnalysisManagerCGSCCProxy::Result::invalidate( + LazyCallGraph::SCC &C, const PreservedAnalyses &PA, + CGSCCAnalysisManager::Invalidator &Inv) { + for (LazyCallGraph::Node &N : C) + FAM->invalidate(N.getFunction(), PA); + + // This proxy doesn't need to handle invalidation itself. Instead, the + // module-level CGSCC proxy handles it above by ensuring that if the + // module-level FAM proxy becomes invalid the entire SCC layer, which + // includes this proxy, is cleared. + return false; +} + +} // End llvm namespace + +namespace { +/// Helper function to update both the \c CGSCCAnalysisManager \p AM and the \c +/// CGSCCPassManager's \c CGSCCUpdateResult \p UR based on a range of newly +/// added SCCs. +/// +/// The range of new SCCs must be in postorder already. The SCC they were split +/// out of must be provided as \p C. The current node being mutated and +/// triggering updates must be passed as \p N. +/// +/// This function returns the SCC containing \p N. This will be either \p C if +/// no new SCCs have been split out, or it will be the new SCC containing \p N. +template <typename SCCRangeT> +LazyCallGraph::SCC * +incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G, + LazyCallGraph::Node &N, LazyCallGraph::SCC *C, + CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, + bool DebugLogging = false) { + typedef LazyCallGraph::SCC SCC; + + if (NewSCCRange.begin() == NewSCCRange.end()) + return C; + + // Add the current SCC to the worklist as its shape has changed. + UR.CWorklist.insert(C); + if (DebugLogging) + dbgs() << "Enqueuing the existing SCC in the worklist:" << *C << "\n"; + + SCC *OldC = C; + (void)OldC; + + // Update the current SCC. Note that if we have new SCCs, this must actually + // change the SCC. + assert(C != &*NewSCCRange.begin() && + "Cannot insert new SCCs without changing current SCC!"); + C = &*NewSCCRange.begin(); + assert(G.lookupSCC(N) == C && "Failed to update current SCC!"); + + for (SCC &NewC : + reverse(make_range(std::next(NewSCCRange.begin()), NewSCCRange.end()))) { + assert(C != &NewC && "No need to re-visit the current SCC!"); + assert(OldC != &NewC && "Already handled the original SCC!"); + UR.CWorklist.insert(&NewC); + if (DebugLogging) + dbgs() << "Enqueuing a newly formed SCC:" << NewC << "\n"; + } + return C; +} +} + +LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( + LazyCallGraph &G, LazyCallGraph::SCC &InitialC, LazyCallGraph::Node &N, + CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, bool DebugLogging) { + typedef LazyCallGraph::Node Node; + typedef LazyCallGraph::Edge Edge; + typedef LazyCallGraph::SCC SCC; + typedef LazyCallGraph::RefSCC RefSCC; + + RefSCC &InitialRC = InitialC.getOuterRefSCC(); + SCC *C = &InitialC; + RefSCC *RC = &InitialRC; + Function &F = N.getFunction(); + + // Walk the function body and build up the set of retained, promoted, and + // demoted edges. + SmallVector<Constant *, 16> Worklist; + SmallPtrSet<Constant *, 16> Visited; + SmallPtrSet<Function *, 16> RetainedEdges; + SmallSetVector<Function *, 4> PromotedRefTargets; + SmallSetVector<Function *, 4> DemotedCallTargets; + + // First walk the function and handle all called functions. We do this first + // because if there is a single call edge, whether there are ref edges is + // irrelevant. + for (Instruction &I : instructions(F)) + if (auto CS = CallSite(&I)) + if (Function *Callee = CS.getCalledFunction()) + if (Visited.insert(Callee).second && !Callee->isDeclaration()) { + const Edge *E = N.lookup(*Callee); + // FIXME: We should really handle adding new calls. While it will + // make downstream usage more complex, there is no fundamental + // limitation and it will allow passes within the CGSCC to be a bit + // more flexible in what transforms they can do. Until then, we + // verify that new calls haven't been introduced. + assert(E && "No function transformations should introduce *new* " + "call edges! Any new calls should be modeled as " + "promoted existing ref edges!"); + RetainedEdges.insert(Callee); + if (!E->isCall()) + PromotedRefTargets.insert(Callee); + } + + // Now walk all references. + for (Instruction &I : instructions(F)) + for (Value *Op : I.operand_values()) + if (Constant *C = dyn_cast<Constant>(Op)) + if (Visited.insert(C).second) + Worklist.push_back(C); + + LazyCallGraph::visitReferences(Worklist, Visited, [&](Function &Referee) { + const Edge *E = N.lookup(Referee); + // FIXME: Similarly to new calls, we also currently preclude + // introducing new references. See above for details. + assert(E && "No function transformations should introduce *new* ref " + "edges! Any new ref edges would require IPO which " + "function passes aren't allowed to do!"); + RetainedEdges.insert(&Referee); + if (E->isCall()) + DemotedCallTargets.insert(&Referee); + }); + + // First remove all of the edges that are no longer present in this function. + // We have to build a list of dead targets first and then remove them as the + // data structures will all be invalidated by removing them. + SmallVector<PointerIntPair<Node *, 1, Edge::Kind>, 4> DeadTargets; + for (Edge &E : N) + if (!RetainedEdges.count(&E.getFunction())) + DeadTargets.push_back({E.getNode(), E.getKind()}); + for (auto DeadTarget : DeadTargets) { + Node &TargetN = *DeadTarget.getPointer(); + bool IsCall = DeadTarget.getInt() == Edge::Call; + SCC &TargetC = *G.lookupSCC(TargetN); + RefSCC &TargetRC = TargetC.getOuterRefSCC(); + + if (&TargetRC != RC) { + RC->removeOutgoingEdge(N, TargetN); + if (DebugLogging) + dbgs() << "Deleting outgoing edge from '" << N << "' to '" << TargetN + << "'\n"; + continue; + } + if (DebugLogging) + dbgs() << "Deleting internal " << (IsCall ? "call" : "ref") + << " edge from '" << N << "' to '" << TargetN << "'\n"; + + if (IsCall) { + if (C != &TargetC) { + // For separate SCCs this is trivial. + RC->switchTrivialInternalEdgeToRef(N, TargetN); + } else { + // Otherwise we may end up re-structuring the call graph. First, + // invalidate any SCC analyses. We have to do this before we split + // functions into new SCCs and lose track of where their analyses are + // cached. + // FIXME: We should accept a more precise preserved set here. For + // example, it might be possible to preserve some function analyses + // even as the SCC structure is changed. + AM.invalidate(*C, PreservedAnalyses::none()); + // Now update the call graph. + C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, TargetN), G, + N, C, AM, UR, DebugLogging); + } + } + + auto NewRefSCCs = RC->removeInternalRefEdge(N, TargetN); + if (!NewRefSCCs.empty()) { + // Note that we don't bother to invalidate analyses as ref-edge + // connectivity is not really observable in any way and is intended + // exclusively to be used for ordering of transforms rather than for + // analysis conclusions. + + // The RC worklist is in reverse postorder, so we first enqueue the + // current RefSCC as it will remain the parent of all split RefSCCs, then + // we enqueue the new ones in RPO except for the one which contains the + // source node as that is the "bottom" we will continue processing in the + // bottom-up walk. + UR.RCWorklist.insert(RC); + if (DebugLogging) + dbgs() << "Enqueuing the existing RefSCC in the update worklist: " + << *RC << "\n"; + // Update the RC to the "bottom". + assert(G.lookupSCC(N) == C && "Changed the SCC when splitting RefSCCs!"); + RC = &C->getOuterRefSCC(); + assert(G.lookupRefSCC(N) == RC && "Failed to update current RefSCC!"); + assert(NewRefSCCs.front() == RC && + "New current RefSCC not first in the returned list!"); + for (RefSCC *NewRC : reverse( + make_range(std::next(NewRefSCCs.begin()), NewRefSCCs.end()))) { + assert(NewRC != RC && "Should not encounter the current RefSCC further " + "in the postorder list of new RefSCCs."); + UR.RCWorklist.insert(NewRC); + if (DebugLogging) + dbgs() << "Enqueuing a new RefSCC in the update worklist: " << *NewRC + << "\n"; + } + } + } + + // Next demote all the call edges that are now ref edges. This helps make + // the SCCs small which should minimize the work below as we don't want to + // form cycles that this would break. + for (Function *RefTarget : DemotedCallTargets) { + Node &TargetN = *G.lookup(*RefTarget); + SCC &TargetC = *G.lookupSCC(TargetN); + RefSCC &TargetRC = TargetC.getOuterRefSCC(); + + // The easy case is when the target RefSCC is not this RefSCC. This is + // only supported when the target RefSCC is a child of this RefSCC. + if (&TargetRC != RC) { + assert(RC->isAncestorOf(TargetRC) && + "Cannot potentially form RefSCC cycles here!"); + RC->switchOutgoingEdgeToRef(N, TargetN); + if (DebugLogging) + dbgs() << "Switch outgoing call edge to a ref edge from '" << N + << "' to '" << TargetN << "'\n"; + continue; + } + + // We are switching an internal call edge to a ref edge. This may split up + // some SCCs. + if (C != &TargetC) { + // For separate SCCs this is trivial. + RC->switchTrivialInternalEdgeToRef(N, TargetN); + continue; + } + + // Otherwise we may end up re-structuring the call graph. First, invalidate + // any SCC analyses. We have to do this before we split functions into new + // SCCs and lose track of where their analyses are cached. + // FIXME: We should accept a more precise preserved set here. For example, + // it might be possible to preserve some function analyses even as the SCC + // structure is changed. + AM.invalidate(*C, PreservedAnalyses::none()); + // Now update the call graph. + C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, TargetN), G, + N, C, AM, UR, DebugLogging); + } + + // Now promote ref edges into call edges. + for (Function *CallTarget : PromotedRefTargets) { + Node &TargetN = *G.lookup(*CallTarget); + SCC &TargetC = *G.lookupSCC(TargetN); + RefSCC &TargetRC = TargetC.getOuterRefSCC(); + + // The easy case is when the target RefSCC is not this RefSCC. This is + // only supported when the target RefSCC is a child of this RefSCC. + if (&TargetRC != RC) { + assert(RC->isAncestorOf(TargetRC) && + "Cannot potentially form RefSCC cycles here!"); + RC->switchOutgoingEdgeToCall(N, TargetN); + if (DebugLogging) + dbgs() << "Switch outgoing ref edge to a call edge from '" << N + << "' to '" << TargetN << "'\n"; + continue; + } + if (DebugLogging) + dbgs() << "Switch an internal ref edge to a call edge from '" << N + << "' to '" << TargetN << "'\n"; + + // Otherwise we are switching an internal ref edge to a call edge. This + // may merge away some SCCs, and we add those to the UpdateResult. We also + // need to make sure to update the worklist in the event SCCs have moved + // before the current one in the post-order sequence. + auto InitialSCCIndex = RC->find(*C) - RC->begin(); + auto InvalidatedSCCs = RC->switchInternalEdgeToCall(N, TargetN); + if (!InvalidatedSCCs.empty()) { + C = &TargetC; + assert(G.lookupSCC(N) == C && "Failed to update current SCC!"); + + // Any analyses cached for this SCC are no longer precise as the shape + // has changed by introducing this cycle. + AM.invalidate(*C, PreservedAnalyses::none()); + + for (SCC *InvalidatedC : InvalidatedSCCs) { + assert(InvalidatedC != C && "Cannot invalidate the current SCC!"); + UR.InvalidatedSCCs.insert(InvalidatedC); + + // Also clear any cached analyses for the SCCs that are dead. This + // isn't really necessary for correctness but can release memory. + AM.clear(*InvalidatedC); + } + } + auto NewSCCIndex = RC->find(*C) - RC->begin(); + if (InitialSCCIndex < NewSCCIndex) { + // Put our current SCC back onto the worklist as we'll visit other SCCs + // that are now definitively ordered prior to the current one in the + // post-order sequence, and may end up observing more precise context to + // optimize the current SCC. + UR.CWorklist.insert(C); + if (DebugLogging) + dbgs() << "Enqueuing the existing SCC in the worklist: " << *C << "\n"; + // Enqueue in reverse order as we pop off the back of the worklist. + for (SCC &MovedC : reverse(make_range(RC->begin() + InitialSCCIndex, + RC->begin() + NewSCCIndex))) { + UR.CWorklist.insert(&MovedC); + if (DebugLogging) + dbgs() << "Enqueuing a newly earlier in post-order SCC: " << MovedC + << "\n"; + } + } + } + + assert(!UR.InvalidatedSCCs.count(C) && "Invalidated the current SCC!"); + assert(!UR.InvalidatedRefSCCs.count(RC) && "Invalidated the current RefSCC!"); + assert(&C->getOuterRefSCC() == RC && "Current SCC not in current RefSCC!"); + + // Record the current RefSCC and SCC for higher layers of the CGSCC pass + // manager now that all the updates have been applied. + if (RC != &InitialRC) + UR.UpdatedRC = RC; + if (C != &InitialC) + UR.UpdatedC = C; + + return *C; } diff --git a/contrib/llvm/lib/Analysis/CallGraph.cpp b/contrib/llvm/lib/Analysis/CallGraph.cpp index 39cb86d..458b7bf 100644 --- a/contrib/llvm/lib/Analysis/CallGraph.cpp +++ b/contrib/llvm/lib/Analysis/CallGraph.cpp @@ -258,10 +258,10 @@ void CallGraphNode::replaceCallEdge(CallSite CS, } // Provide an explicit template instantiation for the static ID. -char CallGraphAnalysis::PassID; +AnalysisKey CallGraphAnalysis::Key; PreservedAnalyses CallGraphPrinterPass::run(Module &M, - AnalysisManager<Module> &AM) { + ModuleAnalysisManager &AM) { AM.getResult<CallGraphAnalysis>(M).print(OS); return PreservedAnalyses::all(); } diff --git a/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp index 69d7673..9cef781 100644 --- a/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp +++ b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp @@ -67,9 +67,7 @@ public: Info.setPreservesAll(); } - const char *getPassName() const override { - return "CallGraph Pass Manager"; - } + StringRef getPassName() const override { return "CallGraph Pass Manager"; } PMDataManager *getAsPMDataManager() override { return this; } Pass *getAsPass() override { return this; } @@ -100,7 +98,7 @@ private: bool RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, CallGraph &CG, bool &CallGraphUpToDate, bool &DevirtualizedCall); - bool RefreshCallGraph(CallGraphSCC &CurSCC, CallGraph &CG, + bool RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, bool IsCheckingMode); }; @@ -175,8 +173,8 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, /// a function pass like GVN optimizes away stuff feeding the indirect call. /// This never happens in checking mode. /// -bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, - CallGraph &CG, bool CheckingMode) { +bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, + bool CheckingMode) { DenseMap<Value*, CallGraphNode*> CallSites; DEBUG(dbgs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size() @@ -450,7 +448,7 @@ bool CGPassManager::runOnModule(Module &M) { // Copy the current SCC and increment past it so that the pass can hack // on the SCC if it wants to without invalidating our iterator. const std::vector<CallGraphNode *> &NodeVec = *CGI; - CurSCC.initialize(NodeVec.data(), NodeVec.data() + NodeVec.size()); + CurSCC.initialize(NodeVec); ++CGI; // At the top level, we run all the passes in this pass manager on the diff --git a/contrib/llvm/lib/Analysis/CodeMetrics.cpp b/contrib/llvm/lib/Analysis/CodeMetrics.cpp index ed83704..bdffdd8 100644 --- a/contrib/llvm/lib/Analysis/CodeMetrics.cpp +++ b/contrib/llvm/lib/Analysis/CodeMetrics.cpp @@ -27,36 +27,45 @@ using namespace llvm; -static void completeEphemeralValues(SmallVector<const Value *, 16> &WorkSet, - SmallPtrSetImpl<const Value*> &EphValues) { - SmallPtrSet<const Value *, 32> Visited; - - // Make sure that all of the items in WorkSet are in our EphValues set. - EphValues.insert(WorkSet.begin(), WorkSet.end()); +static void +appendSpeculatableOperands(const Value *V, + SmallPtrSetImpl<const Value *> &Visited, + SmallVectorImpl<const Value *> &Worklist) { + const User *U = dyn_cast<User>(V); + if (!U) + return; + + for (const Value *Operand : U->operands()) + if (Visited.insert(Operand).second) + if (isSafeToSpeculativelyExecute(Operand)) + Worklist.push_back(Operand); +} +static void completeEphemeralValues(SmallPtrSetImpl<const Value *> &Visited, + SmallVectorImpl<const Value *> &Worklist, + SmallPtrSetImpl<const Value *> &EphValues) { // Note: We don't speculate PHIs here, so we'll miss instruction chains kept // alive only by ephemeral values. - while (!WorkSet.empty()) { - const Value *V = WorkSet.front(); - WorkSet.erase(WorkSet.begin()); + // Walk the worklist using an index but without caching the size so we can + // append more entries as we process the worklist. This forms a queue without + // quadratic behavior by just leaving processed nodes at the head of the + // worklist forever. + for (int i = 0; i < (int)Worklist.size(); ++i) { + const Value *V = Worklist[i]; - if (!Visited.insert(V).second) - continue; + assert(Visited.count(V) && + "Failed to add a worklist entry to our visited set!"); // If all uses of this value are ephemeral, then so is this value. - if (!std::all_of(V->user_begin(), V->user_end(), - [&](const User *U) { return EphValues.count(U); })) + if (!all_of(V->users(), [&](const User *U) { return EphValues.count(U); })) continue; EphValues.insert(V); DEBUG(dbgs() << "Ephemeral Value: " << *V << "\n"); - if (const User *U = dyn_cast<User>(V)) - for (const Value *J : U->operands()) { - if (isSafeToSpeculativelyExecute(J)) - WorkSet.push_back(J); - } + // Append any more operands to consider. + appendSpeculatableOperands(V, Visited, Worklist); } } @@ -64,29 +73,32 @@ static void completeEphemeralValues(SmallVector<const Value *, 16> &WorkSet, void CodeMetrics::collectEphemeralValues( const Loop *L, AssumptionCache *AC, SmallPtrSetImpl<const Value *> &EphValues) { - SmallVector<const Value *, 16> WorkSet; + SmallPtrSet<const Value *, 32> Visited; + SmallVector<const Value *, 16> Worklist; for (auto &AssumeVH : AC->assumptions()) { if (!AssumeVH) continue; Instruction *I = cast<Instruction>(AssumeVH); - // Filter out call sites outside of the loop so we don't to a function's + // Filter out call sites outside of the loop so we don't do a function's // worth of work for each of its loops (and, in the common case, ephemeral // values in the loop are likely due to @llvm.assume calls in the loop). if (!L->contains(I->getParent())) continue; - WorkSet.push_back(I); + if (EphValues.insert(I).second) + appendSpeculatableOperands(I, Visited, Worklist); } - completeEphemeralValues(WorkSet, EphValues); + completeEphemeralValues(Visited, Worklist, EphValues); } void CodeMetrics::collectEphemeralValues( const Function *F, AssumptionCache *AC, SmallPtrSetImpl<const Value *> &EphValues) { - SmallVector<const Value *, 16> WorkSet; + SmallPtrSet<const Value *, 32> Visited; + SmallVector<const Value *, 16> Worklist; for (auto &AssumeVH : AC->assumptions()) { if (!AssumeVH) @@ -94,17 +106,19 @@ void CodeMetrics::collectEphemeralValues( Instruction *I = cast<Instruction>(AssumeVH); assert(I->getParent()->getParent() == F && "Found assumption for the wrong function!"); - WorkSet.push_back(I); + + if (EphValues.insert(I).second) + appendSpeculatableOperands(I, Visited, Worklist); } - completeEphemeralValues(WorkSet, EphValues); + completeEphemeralValues(Visited, Worklist, EphValues); } /// Fill in the current structure with information gleaned from the specified /// block. void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, - SmallPtrSetImpl<const Value*> &EphValues) { + const SmallPtrSetImpl<const Value*> &EphValues) { ++NumBlocks; unsigned NumInstsBeforeThisBB = NumInsts; for (const Instruction &I : *BB) { diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp index c9adaa7..7386727 100644 --- a/contrib/llvm/lib/Analysis/ConstantFolding.cpp +++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp @@ -17,29 +17,38 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Config/config.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include <cassert> #include <cerrno> #include <cfenv> #include <cmath> -#include <limits> +#include <cstddef> +#include <cstdint> using namespace llvm; @@ -49,6 +58,36 @@ namespace { // Constant Folding internal helper functions //===----------------------------------------------------------------------===// +static Constant *foldConstVectorToAPInt(APInt &Result, Type *DestTy, + Constant *C, Type *SrcEltTy, + unsigned NumSrcElts, + const DataLayout &DL) { + // Now that we know that the input value is a vector of integers, just shift + // and insert them into our result. + unsigned BitShift = DL.getTypeSizeInBits(SrcEltTy); + for (unsigned i = 0; i != NumSrcElts; ++i) { + Constant *Element; + if (DL.isLittleEndian()) + Element = C->getAggregateElement(NumSrcElts - i - 1); + else + Element = C->getAggregateElement(i); + + if (Element && isa<UndefValue>(Element)) { + Result <<= BitShift; + continue; + } + + auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element); + if (!ElementCI) + return ConstantExpr::getBitCast(C, DestTy); + + Result <<= BitShift; + Result |= ElementCI->getValue().zextOrSelf(Result.getBitWidth()); + } + + return nullptr; +} + /// Constant fold bitcast, symbolically evaluating it with DataLayout. /// This always returns a non-null constant, but it may be a /// ConstantExpr if unfoldable. @@ -60,45 +99,33 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { !DestTy->isPtrOrPtrVectorTy()) // Don't get ones for ptr types! return Constant::getAllOnesValue(DestTy); - // Handle a vector->integer cast. - if (auto *IT = dyn_cast<IntegerType>(DestTy)) { - auto *VTy = dyn_cast<VectorType>(C->getType()); - if (!VTy) - return ConstantExpr::getBitCast(C, DestTy); + if (auto *VTy = dyn_cast<VectorType>(C->getType())) { + // Handle a vector->scalar integer/fp cast. + if (isa<IntegerType>(DestTy) || DestTy->isFloatingPointTy()) { + unsigned NumSrcElts = VTy->getNumElements(); + Type *SrcEltTy = VTy->getElementType(); + + // If the vector is a vector of floating point, convert it to vector of int + // to simplify things. + if (SrcEltTy->isFloatingPointTy()) { + unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); + Type *SrcIVTy = + VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElts); + // Ask IR to do the conversion now that #elts line up. + C = ConstantExpr::getBitCast(C, SrcIVTy); + } - unsigned NumSrcElts = VTy->getNumElements(); - Type *SrcEltTy = VTy->getElementType(); - - // If the vector is a vector of floating point, convert it to vector of int - // to simplify things. - if (SrcEltTy->isFloatingPointTy()) { - unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); - Type *SrcIVTy = - VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElts); - // Ask IR to do the conversion now that #elts line up. - C = ConstantExpr::getBitCast(C, SrcIVTy); - } + APInt Result(DL.getTypeSizeInBits(DestTy), 0); + if (Constant *CE = foldConstVectorToAPInt(Result, DestTy, C, + SrcEltTy, NumSrcElts, DL)) + return CE; - // Now that we know that the input value is a vector of integers, just shift - // and insert them into our result. - unsigned BitShift = DL.getTypeSizeInBits(SrcEltTy); - APInt Result(IT->getBitWidth(), 0); - for (unsigned i = 0; i != NumSrcElts; ++i) { - Constant *Element; - if (DL.isLittleEndian()) - Element = C->getAggregateElement(NumSrcElts-i-1); - else - Element = C->getAggregateElement(i); - - auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element); - if (!ElementCI) - return ConstantExpr::getBitCast(C, DestTy); + if (isa<IntegerType>(DestTy)) + return ConstantInt::get(DestTy, Result); - Result <<= BitShift; - Result |= ElementCI->getValue().zextOrSelf(IT->getBitWidth()); + APFloat FP(DestTy->getFltSemantics(), Result); + return ConstantFP::get(DestTy->getContext(), FP); } - - return ConstantInt::get(IT, Result); } // The code below only handles casts to vectors currently. @@ -180,7 +207,11 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { Constant *Elt = Zero; unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1); for (unsigned j = 0; j != Ratio; ++j) { - Constant *Src = dyn_cast<ConstantInt>(C->getAggregateElement(SrcElt++)); + Constant *Src = C->getAggregateElement(SrcElt++); + if (Src && isa<UndefValue>(Src)) + Src = Constant::getNullValue(C->getType()->getVectorElementType()); + else + Src = dyn_cast_or_null<ConstantInt>(Src); if (!Src) // Reject constantexpr elements. return ConstantExpr::getBitCast(C, DestTy); @@ -206,8 +237,19 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { // Loop over each source value, expanding into multiple results. for (unsigned i = 0; i != NumSrcElt; ++i) { - auto *Src = dyn_cast<ConstantInt>(C->getAggregateElement(i)); - if (!Src) // Reject constantexpr elements. + auto *Element = C->getAggregateElement(i); + + if (!Element) // Reject constantexpr elements. + return ConstantExpr::getBitCast(C, DestTy); + + if (isa<UndefValue>(Element)) { + // Correctly Propagate undef values. + Result.append(Ratio, UndefValue::get(DstEltTy)); + continue; + } + + auto *Src = dyn_cast<ConstantInt>(Element); + if (!Src) return ConstantExpr::getBitCast(C, DestTy); unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1); @@ -333,7 +375,7 @@ bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr, uint64_t CurEltOffset = SL->getElementOffset(Index); ByteOffset -= CurEltOffset; - while (1) { + while (true) { // If the element access is to the element itself and not to tail padding, // read the bytes from the element. uint64_t EltSize = DL.getTypeAllocSize(CS->getOperand(Index)->getType()); @@ -689,23 +731,27 @@ Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1, /// If array indices are not pointer-sized integers, explicitly cast them so /// that they aren't implicitly casted by the getelementptr. Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops, - Type *ResultTy, const DataLayout &DL, - const TargetLibraryInfo *TLI) { + Type *ResultTy, Optional<unsigned> InRangeIndex, + const DataLayout &DL, const TargetLibraryInfo *TLI) { Type *IntPtrTy = DL.getIntPtrType(ResultTy); + Type *IntPtrScalarTy = IntPtrTy->getScalarType(); bool Any = false; SmallVector<Constant*, 32> NewIdxs; for (unsigned i = 1, e = Ops.size(); i != e; ++i) { if ((i == 1 || - !isa<StructType>(GetElementPtrInst::getIndexedType(SrcElemTy, - Ops.slice(1, i - 1)))) && - Ops[i]->getType() != IntPtrTy) { + !isa<StructType>(GetElementPtrInst::getIndexedType( + SrcElemTy, Ops.slice(1, i - 1)))) && + Ops[i]->getType()->getScalarType() != IntPtrScalarTy) { Any = true; + Type *NewType = Ops[i]->getType()->isVectorTy() + ? IntPtrTy + : IntPtrTy->getScalarType(); NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i], true, - IntPtrTy, + NewType, true), - Ops[i], IntPtrTy)); + Ops[i], NewType)); } else NewIdxs.push_back(Ops[i]); } @@ -713,11 +759,10 @@ Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops, if (!Any) return nullptr; - Constant *C = ConstantExpr::getGetElementPtr(SrcElemTy, Ops[0], NewIdxs); - if (auto *CE = dyn_cast<ConstantExpr>(C)) { - if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI)) - C = Folded; - } + Constant *C = ConstantExpr::getGetElementPtr( + SrcElemTy, Ops[0], NewIdxs, /*InBounds=*/false, InRangeIndex); + if (Constant *Folded = ConstantFoldConstant(C, DL, TLI)) + C = Folded; return C; } @@ -744,13 +789,17 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, ArrayRef<Constant *> Ops, const DataLayout &DL, const TargetLibraryInfo *TLI) { + const GEPOperator *InnermostGEP = GEP; + bool InBounds = GEP->isInBounds(); + Type *SrcElemTy = GEP->getSourceElementType(); Type *ResElemTy = GEP->getResultElementType(); Type *ResTy = GEP->getType(); if (!SrcElemTy->isSized()) return nullptr; - if (Constant *C = CastGEPIndices(SrcElemTy, Ops, ResTy, DL, TLI)) + if (Constant *C = CastGEPIndices(SrcElemTy, Ops, ResTy, + GEP->getInRangeIndex(), DL, TLI)) return C; Constant *Ptr = Ops[0]; @@ -775,8 +824,8 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType()); Res = ConstantExpr::getSub(Res, CE->getOperand(1)); Res = ConstantExpr::getIntToPtr(Res, ResTy); - if (auto *ResCE = dyn_cast<ConstantExpr>(Res)) - Res = ConstantFoldConstantExpression(ResCE, DL, TLI); + if (auto *FoldedRes = ConstantFoldConstant(Res, DL, TLI)) + Res = FoldedRes; return Res; } } @@ -793,6 +842,9 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, // If this is a GEP of a GEP, fold it all into a single GEP. while (auto *GEP = dyn_cast<GEPOperator>(Ptr)) { + InnermostGEP = GEP; + InBounds &= GEP->isInBounds(); + SmallVector<Value *, 4> NestedOps(GEP->op_begin() + 1, GEP->op_end()); // Do not try the incorporate the sub-GEP if some index is not a number. @@ -821,7 +873,9 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, } } - if (Ptr->isNullValue() || BasePtr != 0) { + auto *PTy = cast<PointerType>(Ptr->getType()); + if ((Ptr->isNullValue() || BasePtr != 0) && + !DL.isNonIntegralPointerType(PTy)) { Constant *C = ConstantInt::get(Ptr->getContext(), Offset + BasePtr); return ConstantExpr::getIntToPtr(C, ResTy); } @@ -830,8 +884,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, // we eliminate over-indexing of the notional static type array bounds. // This makes it easy to determine if the getelementptr is "inbounds". // Also, this helps GlobalOpt do SROA on GlobalVariables. - Type *Ty = Ptr->getType(); - assert(Ty->isPointerTy() && "Forming regular GEP of non-pointer type"); + Type *Ty = PTy; SmallVector<Constant *, 32> NewIdxs; do { @@ -897,8 +950,23 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, if (Offset != 0) return nullptr; + // Preserve the inrange index from the innermost GEP if possible. We must + // have calculated the same indices up to and including the inrange index. + Optional<unsigned> InRangeIndex; + if (Optional<unsigned> LastIRIndex = InnermostGEP->getInRangeIndex()) + if (SrcElemTy == InnermostGEP->getSourceElementType() && + NewIdxs.size() > *LastIRIndex) { + InRangeIndex = LastIRIndex; + for (unsigned I = 0; I <= *LastIRIndex; ++I) + if (NewIdxs[I] != InnermostGEP->getOperand(I + 1)) { + InRangeIndex = None; + break; + } + } + // Create a GEP. - Constant *C = ConstantExpr::getGetElementPtr(SrcElemTy, Ptr, NewIdxs); + Constant *C = ConstantExpr::getGetElementPtr(SrcElemTy, Ptr, NewIdxs, + InBounds, InRangeIndex); assert(C->getType()->getPointerElementType() == Ty && "Computed GetElementPtr has unexpected type!"); @@ -916,15 +984,16 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, /// attempting to fold instructions like loads and stores, which have no /// constant expression form. /// -/// TODO: This function neither utilizes nor preserves nsw/nuw/inbounds/etc -/// information, due to only being passed an opcode and operands. Constant +/// TODO: This function neither utilizes nor preserves nsw/nuw/inbounds/inrange +/// etc information, due to only being passed an opcode and operands. Constant /// folding using this function strips this information. /// -Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, Type *DestTy, - unsigned Opcode, +Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode, ArrayRef<Constant *> Ops, const DataLayout &DL, const TargetLibraryInfo *TLI) { + Type *DestTy = InstOrCE->getType(); + // Handle easy binops first. if (Instruction::isBinaryOp(Opcode)) return ConstantFoldBinaryOpOperands(Opcode, Ops[0], Ops[1], DL); @@ -936,10 +1005,14 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, Type *DestTy, if (Constant *C = SymbolicallyEvaluateGEP(GEP, Ops, DL, TLI)) return C; - return ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), - Ops[0], Ops.slice(1)); + return ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), Ops[0], + Ops.slice(1), GEP->isInBounds(), + GEP->getInRangeIndex()); } + if (auto *CE = dyn_cast<ConstantExpr>(InstOrCE)) + return CE->getWithOperands(Ops); + switch (Opcode) { default: return nullptr; case Instruction::ICmp: @@ -966,12 +1039,58 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, Type *DestTy, // Constant Folding public APIs //===----------------------------------------------------------------------===// +namespace { + +Constant * +ConstantFoldConstantImpl(const Constant *C, const DataLayout &DL, + const TargetLibraryInfo *TLI, + SmallDenseMap<Constant *, Constant *> &FoldedOps) { + if (!isa<ConstantVector>(C) && !isa<ConstantExpr>(C)) + return nullptr; + + SmallVector<Constant *, 8> Ops; + for (const Use &NewU : C->operands()) { + auto *NewC = cast<Constant>(&NewU); + // Recursively fold the ConstantExpr's operands. If we have already folded + // a ConstantExpr, we don't have to process it again. + if (isa<ConstantVector>(NewC) || isa<ConstantExpr>(NewC)) { + auto It = FoldedOps.find(NewC); + if (It == FoldedOps.end()) { + if (auto *FoldedC = + ConstantFoldConstantImpl(NewC, DL, TLI, FoldedOps)) { + NewC = FoldedC; + FoldedOps.insert({NewC, FoldedC}); + } else { + FoldedOps.insert({NewC, NewC}); + } + } else { + NewC = It->second; + } + } + Ops.push_back(NewC); + } + + if (auto *CE = dyn_cast<ConstantExpr>(C)) { + if (CE->isCompare()) + return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1], + DL, TLI); + + return ConstantFoldInstOperandsImpl(CE, CE->getOpcode(), Ops, DL, TLI); + } + + assert(isa<ConstantVector>(C)); + return ConstantVector::get(Ops); +} + +} // end anonymous namespace + Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI) { // Handle PHI nodes quickly here... if (auto *PN = dyn_cast<PHINode>(I)) { Constant *CommonValue = nullptr; + SmallDenseMap<Constant *, Constant *> FoldedOps; for (Value *Incoming : PN->incoming_values()) { // If the incoming value is undef then skip it. Note that while we could // skip the value if it is equal to the phi node itself we choose not to @@ -984,8 +1103,8 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL, if (!C) return nullptr; // Fold the PHI's operands. - if (auto *NewC = dyn_cast<ConstantExpr>(C)) - C = ConstantFoldConstantExpression(NewC, DL, TLI); + if (auto *FoldedC = ConstantFoldConstantImpl(C, DL, TLI, FoldedOps)) + C = FoldedC; // If the incoming value is a different constant to // the one we saw previously, then give up. if (CommonValue && C != CommonValue) @@ -993,7 +1112,6 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL, CommonValue = C; } - // If we reach here, all incoming values are the same constant or undef. return CommonValue ? CommonValue : UndefValue::get(PN->getType()); } @@ -1003,12 +1121,13 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL, if (!all_of(I->operands(), [](Use &U) { return isa<Constant>(U); })) return nullptr; + SmallDenseMap<Constant *, Constant *> FoldedOps; SmallVector<Constant *, 8> Ops; for (const Use &OpU : I->operands()) { auto *Op = cast<Constant>(&OpU); // Fold the Instruction's operands. - if (auto *NewCE = dyn_cast<ConstantExpr>(Op)) - Op = ConstantFoldConstantExpression(NewCE, DL, TLI); + if (auto *FoldedOp = ConstantFoldConstantImpl(Op, DL, TLI, FoldedOps)) + Op = FoldedOp; Ops.push_back(Op); } @@ -1036,55 +1155,17 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL, return ConstantFoldInstOperands(I, Ops, DL, TLI); } -namespace { - -Constant * -ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout &DL, - const TargetLibraryInfo *TLI, - SmallPtrSetImpl<ConstantExpr *> &FoldedOps) { - SmallVector<Constant *, 8> Ops; - for (const Use &NewU : CE->operands()) { - auto *NewC = cast<Constant>(&NewU); - // Recursively fold the ConstantExpr's operands. If we have already folded - // a ConstantExpr, we don't have to process it again. - if (auto *NewCE = dyn_cast<ConstantExpr>(NewC)) { - if (FoldedOps.insert(NewCE).second) - NewC = ConstantFoldConstantExpressionImpl(NewCE, DL, TLI, FoldedOps); - } - Ops.push_back(NewC); - } - - if (CE->isCompare()) - return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1], - DL, TLI); - - return ConstantFoldInstOperandsImpl(CE, CE->getType(), CE->getOpcode(), Ops, - DL, TLI); -} - -} // end anonymous namespace - -Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, - const DataLayout &DL, - const TargetLibraryInfo *TLI) { - SmallPtrSet<ConstantExpr *, 4> FoldedOps; - return ConstantFoldConstantExpressionImpl(CE, DL, TLI, FoldedOps); +Constant *llvm::ConstantFoldConstant(const Constant *C, const DataLayout &DL, + const TargetLibraryInfo *TLI) { + SmallDenseMap<Constant *, Constant *> FoldedOps; + return ConstantFoldConstantImpl(C, DL, TLI, FoldedOps); } Constant *llvm::ConstantFoldInstOperands(Instruction *I, ArrayRef<Constant *> Ops, const DataLayout &DL, const TargetLibraryInfo *TLI) { - return ConstantFoldInstOperandsImpl(I, I->getType(), I->getOpcode(), Ops, DL, - TLI); -} - -Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, - ArrayRef<Constant *> Ops, - const DataLayout &DL, - const TargetLibraryInfo *TLI) { - assert(Opcode != Instruction::GetElementPtr && "Invalid for GEPs"); - return ConstantFoldInstOperandsImpl(nullptr, DestTy, Opcode, Ops, DL, TLI); + return ConstantFoldInstOperandsImpl(I, I->getOpcode(), Ops, DL, TLI); } Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, @@ -1350,6 +1431,8 @@ bool llvm::canConstantFoldCallTo(const Function *F) { Name == "log10f"; case 'p': return Name == "pow" || Name == "powf"; + case 'r': + return Name == "round" || Name == "roundf"; case 's': return Name == "sin" || Name == "sinh" || Name == "sqrt" || Name == "sinf" || Name == "sinhf" || Name == "sqrtf"; @@ -1364,7 +1447,7 @@ Constant *GetConstantFoldFPValue(double V, Type *Ty) { if (Ty->isHalfTy()) { APFloat APF(V); bool unused; - APF.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &unused); + APF.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &unused); return ConstantFP::get(Ty->getContext(), APF); } if (Ty->isFloatTy()) @@ -1455,7 +1538,7 @@ double getValueAsDouble(ConstantFP *Op) { bool unused; APFloat APF = Op->getValueAPF(); - APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused); + APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &unused); return APF.convertToDouble(); } @@ -1473,7 +1556,7 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, APFloat Val(Op->getValueAPF()); bool lost = false; - Val.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &lost); + Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &lost); return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt()); } @@ -1614,6 +1697,10 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, } } break; + case 'r': + if ((Name == "round" && TLI->has(LibFunc::round)) || + (Name == "roundf" && TLI->has(LibFunc::roundf))) + return ConstantFoldFP(round, V, Ty); case 's': if ((Name == "sin" && TLI->has(LibFunc::sin)) || (Name == "sinf" && TLI->has(LibFunc::sinf))) @@ -1648,7 +1735,7 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, case Intrinsic::bitreverse: return ConstantInt::get(Ty->getContext(), Op->getValue().reverseBits()); case Intrinsic::convert_from_fp16: { - APFloat Val(APFloat::IEEEhalf, Op->getValue()); + APFloat Val(APFloat::IEEEhalf(), Op->getValue()); bool lost = false; APFloat::opStatus status = Val.convert( @@ -1927,3 +2014,152 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, return ConstantFoldScalarCall(Name, F->getIntrinsicID(), Ty, Operands, TLI); } + +bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) { + // FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap + // (and to some extent ConstantFoldScalarCall). + Function *F = CS.getCalledFunction(); + if (!F) + return false; + + LibFunc::Func Func; + if (!TLI || !TLI->getLibFunc(*F, Func)) + return false; + + if (CS.getNumArgOperands() == 1) { + if (ConstantFP *OpC = dyn_cast<ConstantFP>(CS.getArgOperand(0))) { + const APFloat &Op = OpC->getValueAPF(); + switch (Func) { + case LibFunc::logl: + case LibFunc::log: + case LibFunc::logf: + case LibFunc::log2l: + case LibFunc::log2: + case LibFunc::log2f: + case LibFunc::log10l: + case LibFunc::log10: + case LibFunc::log10f: + return Op.isNaN() || (!Op.isZero() && !Op.isNegative()); + + case LibFunc::expl: + case LibFunc::exp: + case LibFunc::expf: + // FIXME: These boundaries are slightly conservative. + if (OpC->getType()->isDoubleTy()) + return Op.compare(APFloat(-745.0)) != APFloat::cmpLessThan && + Op.compare(APFloat(709.0)) != APFloat::cmpGreaterThan; + if (OpC->getType()->isFloatTy()) + return Op.compare(APFloat(-103.0f)) != APFloat::cmpLessThan && + Op.compare(APFloat(88.0f)) != APFloat::cmpGreaterThan; + break; + + case LibFunc::exp2l: + case LibFunc::exp2: + case LibFunc::exp2f: + // FIXME: These boundaries are slightly conservative. + if (OpC->getType()->isDoubleTy()) + return Op.compare(APFloat(-1074.0)) != APFloat::cmpLessThan && + Op.compare(APFloat(1023.0)) != APFloat::cmpGreaterThan; + if (OpC->getType()->isFloatTy()) + return Op.compare(APFloat(-149.0f)) != APFloat::cmpLessThan && + Op.compare(APFloat(127.0f)) != APFloat::cmpGreaterThan; + break; + + case LibFunc::sinl: + case LibFunc::sin: + case LibFunc::sinf: + case LibFunc::cosl: + case LibFunc::cos: + case LibFunc::cosf: + return !Op.isInfinity(); + + case LibFunc::tanl: + case LibFunc::tan: + case LibFunc::tanf: { + // FIXME: Stop using the host math library. + // FIXME: The computation isn't done in the right precision. + Type *Ty = OpC->getType(); + if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy()) { + double OpV = getValueAsDouble(OpC); + return ConstantFoldFP(tan, OpV, Ty) != nullptr; + } + break; + } + + case LibFunc::asinl: + case LibFunc::asin: + case LibFunc::asinf: + case LibFunc::acosl: + case LibFunc::acos: + case LibFunc::acosf: + return Op.compare(APFloat(Op.getSemantics(), "-1")) != + APFloat::cmpLessThan && + Op.compare(APFloat(Op.getSemantics(), "1")) != + APFloat::cmpGreaterThan; + + case LibFunc::sinh: + case LibFunc::cosh: + case LibFunc::sinhf: + case LibFunc::coshf: + case LibFunc::sinhl: + case LibFunc::coshl: + // FIXME: These boundaries are slightly conservative. + if (OpC->getType()->isDoubleTy()) + return Op.compare(APFloat(-710.0)) != APFloat::cmpLessThan && + Op.compare(APFloat(710.0)) != APFloat::cmpGreaterThan; + if (OpC->getType()->isFloatTy()) + return Op.compare(APFloat(-89.0f)) != APFloat::cmpLessThan && + Op.compare(APFloat(89.0f)) != APFloat::cmpGreaterThan; + break; + + case LibFunc::sqrtl: + case LibFunc::sqrt: + case LibFunc::sqrtf: + return Op.isNaN() || Op.isZero() || !Op.isNegative(); + + // FIXME: Add more functions: sqrt_finite, atanh, expm1, log1p, + // maybe others? + default: + break; + } + } + } + + if (CS.getNumArgOperands() == 2) { + ConstantFP *Op0C = dyn_cast<ConstantFP>(CS.getArgOperand(0)); + ConstantFP *Op1C = dyn_cast<ConstantFP>(CS.getArgOperand(1)); + if (Op0C && Op1C) { + const APFloat &Op0 = Op0C->getValueAPF(); + const APFloat &Op1 = Op1C->getValueAPF(); + + switch (Func) { + case LibFunc::powl: + case LibFunc::pow: + case LibFunc::powf: { + // FIXME: Stop using the host math library. + // FIXME: The computation isn't done in the right precision. + Type *Ty = Op0C->getType(); + if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy()) { + if (Ty == Op1C->getType()) { + double Op0V = getValueAsDouble(Op0C); + double Op1V = getValueAsDouble(Op1C); + return ConstantFoldBinaryFP(pow, Op0V, Op1V, Ty) != nullptr; + } + } + break; + } + + case LibFunc::fmodl: + case LibFunc::fmod: + case LibFunc::fmodf: + return Op0.isNaN() || Op1.isNaN() || + (!Op0.isInfinity() && !Op1.isZero()); + + default: + break; + } + } + } + + return false; +} diff --git a/contrib/llvm/lib/Analysis/CostModel.cpp b/contrib/llvm/lib/Analysis/CostModel.cpp index 68a4bea..6b773979 100644 --- a/contrib/llvm/lib/Analysis/CostModel.cpp +++ b/contrib/llvm/lib/Analysis/CostModel.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -89,14 +90,35 @@ CostModelAnalysis::runOnFunction(Function &F) { return false; } -static bool isReverseVectorMask(SmallVectorImpl<int> &Mask) { +static bool isReverseVectorMask(ArrayRef<int> Mask) { for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i) - if (Mask[i] > 0 && Mask[i] != (int)(MaskSize - 1 - i)) + if (Mask[i] >= 0 && Mask[i] != (int)(MaskSize - 1 - i)) return false; return true; } -static bool isAlternateVectorMask(SmallVectorImpl<int> &Mask) { +static bool isSingleSourceVectorMask(ArrayRef<int> Mask) { + bool Vec0 = false; + bool Vec1 = false; + for (unsigned i = 0, NumVecElts = Mask.size(); i < NumVecElts; ++i) { + if (Mask[i] >= 0) { + if ((unsigned)Mask[i] >= NumVecElts) + Vec1 = true; + else + Vec0 = true; + } + } + return !(Vec0 && Vec1); +} + +static bool isZeroEltBroadcastVectorMask(ArrayRef<int> Mask) { + for (unsigned i = 0; i < Mask.size(); ++i) + if (Mask[i] > 0) + return false; + return true; +} + +static bool isAlternateVectorMask(ArrayRef<int> Mask) { bool isAlternate = true; unsigned MaskSize = Mask.size(); @@ -123,7 +145,7 @@ static bool isAlternateVectorMask(SmallVectorImpl<int> &Mask) { static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) { TargetTransformInfo::OperandValueKind OpInfo = - TargetTransformInfo::OK_AnyValue; + TargetTransformInfo::OK_AnyValue; // Check for a splat of a constant or for a non uniform vector of constants. if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) { @@ -132,6 +154,12 @@ static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) { OpInfo = TargetTransformInfo::OK_UniformConstantValue; } + // Check for a splat of a uniform value. This is not loop aware, so return + // true only for the obviously uniform cases (argument, globalvalue) + const Value *Splat = getSplatValue(V); + if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat))) + OpInfo = TargetTransformInfo::OK_UniformValue; + return OpInfo; } @@ -410,8 +438,11 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { getOperandInfo(I->getOperand(0)); TargetTransformInfo::OperandValueKind Op2VK = getOperandInfo(I->getOperand(1)); + SmallVector<const Value*, 2> Operands(I->operand_values()); return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, - Op2VK); + Op2VK, TargetTransformInfo::OP_None, + TargetTransformInfo::OP_None, + Operands); } case Instruction::Select: { const SelectInst *SI = cast<SelectInst>(I); @@ -494,6 +525,17 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { if (isAlternateVectorMask(Mask)) return TTI->getShuffleCost(TargetTransformInfo::SK_Alternate, VecTypOp0, 0, nullptr); + + if (isZeroEltBroadcastVectorMask(Mask)) + return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, + VecTypOp0, 0, nullptr); + + if (isSingleSourceVectorMask(Mask)) + return TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, + VecTypOp0, 0, nullptr); + + return TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, + VecTypOp0, 0, nullptr); } return -1; diff --git a/contrib/llvm/lib/Analysis/DemandedBits.cpp b/contrib/llvm/lib/Analysis/DemandedBits.cpp index a3f8b7f..688c1db 100644 --- a/contrib/llvm/lib/Analysis/DemandedBits.cpp +++ b/contrib/llvm/lib/Analysis/DemandedBits.cpp @@ -280,10 +280,8 @@ void DemandedBits::performAnalysis() { // add their operands to the work list (for integer values operands, mark // all bits as live). if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) { - if (!AliveBits.count(&I)) { - AliveBits[&I] = APInt(IT->getBitWidth(), 0); + if (AliveBits.try_emplace(&I, IT->getBitWidth(), 0).second) Worklist.push_back(&I); - } continue; } @@ -363,8 +361,9 @@ APInt DemandedBits::getDemandedBits(Instruction *I) { performAnalysis(); const DataLayout &DL = I->getParent()->getModule()->getDataLayout(); - if (AliveBits.count(I)) - return AliveBits[I]; + auto Found = AliveBits.find(I); + if (Found != AliveBits.end()) + return Found->second; return APInt::getAllOnesValue(DL.getTypeSizeInBits(I->getType())); } @@ -387,10 +386,10 @@ FunctionPass *llvm::createDemandedBitsWrapperPass() { return new DemandedBitsWrapperPass(); } -char DemandedBitsAnalysis::PassID; +AnalysisKey DemandedBitsAnalysis::Key; DemandedBits DemandedBitsAnalysis::run(Function &F, - AnalysisManager<Function> &AM) { + FunctionAnalysisManager &AM) { auto &AC = AM.getResult<AssumptionAnalysis>(F); auto &DT = AM.getResult<DominatorTreeAnalysis>(F); return DemandedBits(F, AC, DT); diff --git a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp index eb4d925..a332a07 100644 --- a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -122,7 +122,7 @@ DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { return DependenceInfo(&F, &AA, &SE, &LI); } -char DependenceAnalysis::PassID; +AnalysisKey DependenceAnalysis::Key; INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da", "Dependence Analysis", true, true) diff --git a/contrib/llvm/lib/Analysis/DominanceFrontier.cpp b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp index 4554374..15856c3 100644 --- a/contrib/llvm/lib/Analysis/DominanceFrontier.cpp +++ b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp @@ -56,7 +56,7 @@ LLVM_DUMP_METHOD void DominanceFrontierWrapperPass::dump() const { } #endif -char DominanceFrontierAnalysis::PassID; +AnalysisKey DominanceFrontierAnalysis::Key; DominanceFrontier DominanceFrontierAnalysis::run(Function &F, FunctionAnalysisManager &AM) { diff --git a/contrib/llvm/lib/Analysis/EHPersonalities.cpp b/contrib/llvm/lib/Analysis/EHPersonalities.cpp index 5f951f5..ebf0a37 100644 --- a/contrib/llvm/lib/Analysis/EHPersonalities.cpp +++ b/contrib/llvm/lib/Analysis/EHPersonalities.cpp @@ -40,6 +40,29 @@ EHPersonality llvm::classifyEHPersonality(const Value *Pers) { .Default(EHPersonality::Unknown); } +StringRef llvm::getEHPersonalityName(EHPersonality Pers) { + switch (Pers) { + case EHPersonality::GNU_Ada: return "__gnat_eh_personality"; + case EHPersonality::GNU_CXX: return "__gxx_personality_v0"; + case EHPersonality::GNU_CXX_SjLj: return "__gxx_personality_sj0"; + case EHPersonality::GNU_C: return "__gcc_personality_v0"; + case EHPersonality::GNU_C_SjLj: return "__gcc_personality_sj0"; + case EHPersonality::GNU_ObjC: return "__objc_personality_v0"; + case EHPersonality::MSVC_X86SEH: return "_except_handler3"; + case EHPersonality::MSVC_Win64SEH: return "__C_specific_handler"; + case EHPersonality::MSVC_CXX: return "__CxxFrameHandler3"; + case EHPersonality::CoreCLR: return "ProcessCLRException"; + case EHPersonality::Rust: return "rust_eh_personality"; + case EHPersonality::Unknown: llvm_unreachable("Unknown EHPersonality!"); + } + + llvm_unreachable("Invalid EHPersonality!"); +} + +EHPersonality llvm::getDefaultEHPersonality(const Triple &T) { + return EHPersonality::GNU_C; +} + bool llvm::canSimplifyInvokeNoUnwind(const Function *F) { EHPersonality Personality = classifyEHPersonality(F->getPersonalityFn()); // We can't simplify any invokes to nounwind functions if the personality @@ -82,7 +105,7 @@ DenseMap<BasicBlock *, ColorVector> llvm::colorEHFunclets(Function &F) { } // Note that this is a member of the given color. ColorVector &Colors = BlockColors[Visiting]; - if (std::find(Colors.begin(), Colors.end(), Color) == Colors.end()) + if (!is_contained(Colors, Color)) Colors.push_back(Color); else continue; diff --git a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp index a7d1e04..33f00cb 100644 --- a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp +++ b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp @@ -78,7 +78,7 @@ class GlobalsAAResult::FunctionInfo { return (AlignedMap *)P; } enum { NumLowBitsAvailable = 3 }; - static_assert(AlignOf<AlignedMap>::Alignment >= (1 << NumLowBitsAvailable), + static_assert(alignof(AlignedMap) >= (1 << NumLowBitsAvailable), "AlignedMap insufficiently aligned to have enough low bits."); }; @@ -366,6 +366,10 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V, } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) { if (!isa<ConstantPointerNull>(ICI->getOperand(1))) return true; // Allow comparison against null. + } else if (Constant *C = dyn_cast<Constant>(I)) { + // Ignore constants which don't have any live uses. + if (isa<GlobalValue>(C) || C->isConstantUsed()) + return true; } else { return true; } @@ -521,7 +525,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { // Can't say anything about it. However, if it is inside our SCC, // then nothing needs to be done. CallGraphNode *CalleeNode = CG[Callee]; - if (std::find(SCC.begin(), SCC.end(), CalleeNode) == SCC.end()) + if (!is_contained(SCC, CalleeNode)) KnowNothing = true; } } else { @@ -857,22 +861,22 @@ ModRefInfo GlobalsAAResult::getModRefInfoForArgument(ImmutableCallSite CS, if (CS.doesNotAccessMemory()) return MRI_NoModRef; ModRefInfo ConservativeResult = CS.onlyReadsMemory() ? MRI_Ref : MRI_ModRef; - + // Iterate through all the arguments to the called function. If any argument // is based on GV, return the conservative result. for (auto &A : CS.args()) { SmallVector<Value*, 4> Objects; GetUnderlyingObjects(A, Objects, DL); - + // All objects must be identified. - if (!std::all_of(Objects.begin(), Objects.end(), isIdentifiedObject) && + if (!all_of(Objects, isIdentifiedObject) && // Try ::alias to see if all objects are known not to alias GV. - !std::all_of(Objects.begin(), Objects.end(), [&](Value *V) { + !all_of(Objects, [&](Value *V) { return this->alias(MemoryLocation(V), MemoryLocation(GV)) == NoAlias; - })) + })) return ConservativeResult; - if (std::find(Objects.begin(), Objects.end(), GV) != Objects.end()) + if (is_contained(Objects, GV)) return ConservativeResult; } @@ -937,9 +941,9 @@ GlobalsAAResult::analyzeModule(Module &M, const TargetLibraryInfo &TLI, return Result; } -char GlobalsAA::PassID; +AnalysisKey GlobalsAA::Key; -GlobalsAAResult GlobalsAA::run(Module &M, AnalysisManager<Module> &AM) { +GlobalsAAResult GlobalsAA::run(Module &M, ModuleAnalysisManager &AM) { return GlobalsAAResult::analyzeModule(M, AM.getResult<TargetLibraryAnalysis>(M), AM.getResult<CallGraphAnalysis>(M)); diff --git a/contrib/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm/lib/Analysis/IVUsers.cpp index 43c0ba1..a661b01 100644 --- a/contrib/llvm/lib/Analysis/IVUsers.cpp +++ b/contrib/llvm/lib/Analysis/IVUsers.cpp @@ -16,8 +16,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/LoopPassManager.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constants.h" @@ -34,22 +34,11 @@ using namespace llvm; #define DEBUG_TYPE "iv-users" -char IVUsersAnalysis::PassID; +AnalysisKey IVUsersAnalysis::Key; -IVUsers IVUsersAnalysis::run(Loop &L, AnalysisManager<Loop> &AM) { - const auto &FAM = - AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager(); - Function *F = L.getHeader()->getParent(); - - return IVUsers(&L, FAM.getCachedResult<AssumptionAnalysis>(*F), - FAM.getCachedResult<LoopAnalysis>(*F), - FAM.getCachedResult<DominatorTreeAnalysis>(*F), - FAM.getCachedResult<ScalarEvolutionAnalysis>(*F)); -} - -PreservedAnalyses IVUsersPrinterPass::run(Loop &L, AnalysisManager<Loop> &AM) { - AM.getResult<IVUsersAnalysis>(L).print(OS); - return PreservedAnalyses::all(); +IVUsers IVUsersAnalysis::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR) { + return IVUsers(&L, &AR.AC, &AR.LI, &AR.DT, &AR.SE); } char IVUsersWrapperPass::ID = 0; diff --git a/contrib/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm/lib/Analysis/InlineCost.cpp index dcb724a..4109049 100644 --- a/contrib/llvm/lib/Analysis/InlineCost.cpp +++ b/contrib/llvm/lib/Analysis/InlineCost.cpp @@ -40,18 +40,7 @@ using namespace llvm; STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); -// Threshold to use when optsize is specified (and there is no -// -inline-threshold). -const int OptSizeThreshold = 75; - -// Threshold to use when -Oz is specified (and there is no -inline-threshold). -const int OptMinSizeThreshold = 25; - -// Threshold to use when -O[34] is specified (and there is no -// -inline-threshold). -const int OptAggressiveThreshold = 275; - -static cl::opt<int> DefaultInlineThreshold( +static cl::opt<int> InlineThreshold( "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Control the amount of inlining to perform (default = 225)")); @@ -66,6 +55,11 @@ static cl::opt<int> ColdThreshold( "inlinecold-threshold", cl::Hidden, cl::init(225), cl::desc("Threshold for inlining functions with cold attribute")); +static cl::opt<int> + HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), + cl::ZeroOrMore, + cl::desc("Threshold for hot callsites ")); + namespace { class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { @@ -75,20 +69,23 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { /// The TargetTransformInfo available for this compilation. const TargetTransformInfo &TTI; - /// The cache of @llvm.assume intrinsics. - AssumptionCacheTracker *ACT; + /// Getter for the cache of @llvm.assume intrinsics. + std::function<AssumptionCache &(Function &)> &GetAssumptionCache; /// Profile summary information. ProfileSummaryInfo *PSI; - // The called function. + /// The called function. Function &F; - // The candidate callsite being analyzed. Please do not use this to do - // analysis in the caller function; we want the inline cost query to be - // easily cacheable. Instead, use the cover function paramHasAttr. + /// The candidate callsite being analyzed. Please do not use this to do + /// analysis in the caller function; we want the inline cost query to be + /// easily cacheable. Instead, use the cover function paramHasAttr. CallSite CandidateCS; + /// Tunable parameters that control the analysis. + const InlineParams &Params; + int Threshold; int Cost; @@ -107,25 +104,25 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { int FiftyPercentVectorBonus, TenPercentVectorBonus; int VectorBonus; - // While we walk the potentially-inlined instructions, we build up and - // maintain a mapping of simplified values specific to this callsite. The - // idea is to propagate any special information we have about arguments to - // this call through the inlinable section of the function, and account for - // likely simplifications post-inlining. The most important aspect we track - // is CFG altering simplifications -- when we prove a basic block dead, that - // can cause dramatic shifts in the cost of inlining a function. + /// While we walk the potentially-inlined instructions, we build up and + /// maintain a mapping of simplified values specific to this callsite. The + /// idea is to propagate any special information we have about arguments to + /// this call through the inlinable section of the function, and account for + /// likely simplifications post-inlining. The most important aspect we track + /// is CFG altering simplifications -- when we prove a basic block dead, that + /// can cause dramatic shifts in the cost of inlining a function. DenseMap<Value *, Constant *> SimplifiedValues; - // Keep track of the values which map back (through function arguments) to - // allocas on the caller stack which could be simplified through SROA. + /// Keep track of the values which map back (through function arguments) to + /// allocas on the caller stack which could be simplified through SROA. DenseMap<Value *, Value *> SROAArgValues; - // The mapping of caller Alloca values to their accumulated cost savings. If - // we have to disable SROA for one of the allocas, this tells us how much - // cost must be added. + /// The mapping of caller Alloca values to their accumulated cost savings. If + /// we have to disable SROA for one of the allocas, this tells us how much + /// cost must be added. DenseMap<Value *, int> SROAArgCosts; - // Keep track of values which map to a pointer base and constant offset. + /// Keep track of values which map to a pointer base and constant offset. DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs; // Custom simplification helper routines. @@ -203,20 +200,21 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { bool visitUnreachableInst(UnreachableInst &I); public: - CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT, - ProfileSummaryInfo *PSI, Function &Callee, int Threshold, - CallSite CSArg) - : TTI(TTI), ACT(ACT), PSI(PSI), F(Callee), CandidateCS(CSArg), - Threshold(Threshold), Cost(0), IsCallerRecursive(false), - IsRecursiveCall(false), ExposesReturnsTwice(false), - HasDynamicAlloca(false), ContainsNoDuplicateCall(false), - HasReturn(false), HasIndirectBr(false), HasFrameEscape(false), - AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), - FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0), - NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), - NumConstantPtrCmps(0), NumConstantPtrDiffs(0), - NumInstructionsSimplified(0), SROACostSavings(0), - SROACostSavingsLost(0) {} + CallAnalyzer(const TargetTransformInfo &TTI, + std::function<AssumptionCache &(Function &)> &GetAssumptionCache, + ProfileSummaryInfo *PSI, Function &Callee, CallSite CSArg, + const InlineParams &Params) + : TTI(TTI), GetAssumptionCache(GetAssumptionCache), PSI(PSI), F(Callee), + CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold), + Cost(0), IsCallerRecursive(false), IsRecursiveCall(false), + ExposesReturnsTwice(false), HasDynamicAlloca(false), + ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), + HasFrameEscape(false), AllocatedSize(0), NumInstructions(0), + NumVectorInstructions(0), FiftyPercentVectorBonus(0), + TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), + NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), + NumConstantPtrDiffs(0), NumInstructionsSimplified(0), + SROACostSavings(0), SROACostSavingsLost(0) {} bool analyzeCall(CallSite CS); @@ -320,7 +318,7 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { continue; // Handle a struct index, which adds its field offset to the pointer. - if (StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = GTI.getStructTypeOrNull()) { unsigned ElementIdx = OpC->getZExtValue(); const StructLayout *SL = DL.getStructLayout(STy); Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); @@ -620,42 +618,44 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { } Function *Caller = CS.getCaller(); - if (DefaultInlineThreshold.getNumOccurrences() > 0) { - // Explicitly specified -inline-threhold overrides the threshold passed to - // CallAnalyzer's constructor. - Threshold = DefaultInlineThreshold; - } else { - // If -inline-threshold is not given, listen to the optsize and minsize - // attributes when they would decrease the threshold. - if (Caller->optForMinSize() && OptMinSizeThreshold < Threshold) - Threshold = OptMinSizeThreshold; - else if (Caller->optForSize() && OptSizeThreshold < Threshold) - Threshold = OptSizeThreshold; - } - bool HotCallsite = false; - uint64_t TotalWeight; - if (CS.getInstruction()->extractProfTotalWeight(TotalWeight) && - PSI->isHotCount(TotalWeight)) - HotCallsite = true; - - // Listen to the inlinehint attribute or profile based hotness information - // when it would increase the threshold and the caller does not need to - // minimize its size. - bool InlineHint = Callee.hasFnAttribute(Attribute::InlineHint) || - PSI->isHotFunction(&Callee) || - HotCallsite; - if (InlineHint && HintThreshold > Threshold && !Caller->optForMinSize()) - Threshold = HintThreshold; - - bool ColdCallee = PSI->isColdFunction(&Callee); - // Command line argument for DefaultInlineThreshold will override the default - // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold, - // do not use the default cold threshold even if it is smaller. - if ((DefaultInlineThreshold.getNumOccurrences() == 0 || - ColdThreshold.getNumOccurrences() > 0) && - ColdCallee && ColdThreshold < Threshold) - Threshold = ColdThreshold; + // return min(A, B) if B is valid. + auto MinIfValid = [](int A, Optional<int> B) { + return B ? std::min(A, B.getValue()) : A; + }; + + // return max(A, B) if B is valid. + auto MaxIfValid = [](int A, Optional<int> B) { + return B ? std::max(A, B.getValue()) : A; + }; + + // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available + // and reduce the threshold if the caller has the necessary attribute. + if (Caller->optForMinSize()) + Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold); + else if (Caller->optForSize()) + Threshold = MinIfValid(Threshold, Params.OptSizeThreshold); + + // Adjust the threshold based on inlinehint attribute and profile based + // hotness information if the caller does not have MinSize attribute. + if (!Caller->optForMinSize()) { + if (Callee.hasFnAttribute(Attribute::InlineHint)) + Threshold = MaxIfValid(Threshold, Params.HintThreshold); + if (PSI) { + uint64_t TotalWeight; + if (CS.getInstruction()->extractProfTotalWeight(TotalWeight) && + PSI->isHotCount(TotalWeight)) { + Threshold = MaxIfValid(Threshold, Params.HotCallSiteThreshold); + } else if (PSI->isFunctionEntryHot(&Callee)) { + // If callsite hotness can not be determined, we may still know + // that the callee is hot and treat it as a weaker hint for threshold + // increase. + Threshold = MaxIfValid(Threshold, Params.HintThreshold); + } else if (PSI->isFunctionEntryCold(&Callee)) { + Threshold = MinIfValid(Threshold, Params.ColdThreshold); + } + } + } // Finally, take the target-specific inlining threshold multiplier into // account. @@ -957,8 +957,9 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { // during devirtualization and so we want to give it a hefty bonus for // inlining, but cap that bonus in the event that inlining wouldn't pan // out. Pretend to inline the function, with a custom threshold. - CallAnalyzer CA(TTI, ACT, PSI, *F, InlineConstants::IndirectCallThreshold, - CS); + auto IndirectCallParams = Params; + IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold; + CallAnalyzer CA(TTI, GetAssumptionCache, PSI, *F, CS, IndirectCallParams); if (CA.analyzeCall(CS)) { // We were able to inline the indirect call! Subtract the cost from the // threshold to get the bonus we want to apply, but don't go below zero. @@ -1251,13 +1252,15 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { Cost -= InlineConstants::InstrCost; } } - + // The call instruction also disappears after inlining. + Cost -= InlineConstants::InstrCost + InlineConstants::CallPenalty; + // If there is only one call of the function, and it has internal linkage, // the cost of inlining it drops dramatically. bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction(); if (OnlyOneCallAndLocalLinkage) - Cost += InlineConstants::LastCallToStaticBonus; + Cost -= InlineConstants::LastCallToStaticBonus; // If this function uses the coldcc calling convention, prefer not to inline // it. @@ -1312,8 +1315,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { // the ephemeral values multiple times (and they're completely determined by // the callee, so this is purely duplicate work). SmallPtrSet<const Value *, 32> EphValues; - CodeMetrics::collectEphemeralValues(&F, &ACT->getAssumptionCache(F), - EphValues); + CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues); // The worklist of live basic blocks in the callee *after* inlining. We avoid // adding basic blocks of the callee which can be proven to be dead for this @@ -1444,32 +1446,19 @@ static bool functionsHaveCompatibleAttributes(Function *Caller, AttributeFuncs::areInlineCompatible(*Caller, *Callee); } -InlineCost llvm::getInlineCost(CallSite CS, int DefaultThreshold, - TargetTransformInfo &CalleeTTI, - AssumptionCacheTracker *ACT, - ProfileSummaryInfo *PSI) { - return getInlineCost(CS, CS.getCalledFunction(), DefaultThreshold, CalleeTTI, - ACT, PSI); -} - -int llvm::computeThresholdFromOptLevels(unsigned OptLevel, - unsigned SizeOptLevel) { - if (OptLevel > 2) - return OptAggressiveThreshold; - if (SizeOptLevel == 1) // -Os - return OptSizeThreshold; - if (SizeOptLevel == 2) // -Oz - return OptMinSizeThreshold; - return DefaultInlineThreshold; +InlineCost llvm::getInlineCost( + CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, + std::function<AssumptionCache &(Function &)> &GetAssumptionCache, + ProfileSummaryInfo *PSI) { + return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI, + GetAssumptionCache, PSI); } -int llvm::getDefaultInlineThreshold() { return DefaultInlineThreshold; } - -InlineCost llvm::getInlineCost(CallSite CS, Function *Callee, - int DefaultThreshold, - TargetTransformInfo &CalleeTTI, - AssumptionCacheTracker *ACT, - ProfileSummaryInfo *PSI) { +InlineCost llvm::getInlineCost( + CallSite CS, Function *Callee, const InlineParams &Params, + TargetTransformInfo &CalleeTTI, + std::function<AssumptionCache &(Function &)> &GetAssumptionCache, + ProfileSummaryInfo *PSI) { // Cannot inline indirect calls. if (!Callee) @@ -1494,7 +1483,7 @@ InlineCost llvm::getInlineCost(CallSite CS, Function *Callee, // Don't inline functions which can be interposed at link-time. Don't inline // functions marked noinline or call sites marked noinline. - // Note: inlining non-exact non-interposable fucntions is fine, since we know + // Note: inlining non-exact non-interposable functions is fine, since we know // we have *a* correct implementation of the source level function. if (Callee->isInterposable() || Callee->hasFnAttribute(Attribute::NoInline) || CS.isNoInline()) @@ -1503,7 +1492,7 @@ InlineCost llvm::getInlineCost(CallSite CS, Function *Callee, DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "...\n"); - CallAnalyzer CA(CalleeTTI, ACT, PSI, *Callee, DefaultThreshold, CS); + CallAnalyzer CA(CalleeTTI, GetAssumptionCache, PSI, *Callee, CS, Params); bool ShouldInline = CA.analyzeCall(CS); DEBUG(CA.dump()); @@ -1551,3 +1540,67 @@ bool llvm::isInlineViable(Function &F) { return true; } + +// APIs to create InlineParams based on command line flags and/or other +// parameters. + +InlineParams llvm::getInlineParams(int Threshold) { + InlineParams Params; + + // This field is the threshold to use for a callee by default. This is + // derived from one or more of: + // * optimization or size-optimization levels, + // * a value passed to createFunctionInliningPass function, or + // * the -inline-threshold flag. + // If the -inline-threshold flag is explicitly specified, that is used + // irrespective of anything else. + if (InlineThreshold.getNumOccurrences() > 0) + Params.DefaultThreshold = InlineThreshold; + else + Params.DefaultThreshold = Threshold; + + // Set the HintThreshold knob from the -inlinehint-threshold. + Params.HintThreshold = HintThreshold; + + // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold. + Params.HotCallSiteThreshold = HotCallSiteThreshold; + + // Set the OptMinSizeThreshold and OptSizeThreshold params only if the + // Set the OptMinSizeThreshold and OptSizeThreshold params only if the + // -inlinehint-threshold commandline option is not explicitly given. If that + // option is present, then its value applies even for callees with size and + // minsize attributes. + // If the -inline-threshold is not specified, set the ColdThreshold from the + // -inlinecold-threshold even if it is not explicitly passed. If + // -inline-threshold is specified, then -inlinecold-threshold needs to be + // explicitly specified to set the ColdThreshold knob + if (InlineThreshold.getNumOccurrences() == 0) { + Params.OptMinSizeThreshold = InlineConstants::OptMinSizeThreshold; + Params.OptSizeThreshold = InlineConstants::OptSizeThreshold; + Params.ColdThreshold = ColdThreshold; + } else if (ColdThreshold.getNumOccurrences() > 0) { + Params.ColdThreshold = ColdThreshold; + } + return Params; +} + +InlineParams llvm::getInlineParams() { + return getInlineParams(InlineThreshold); +} + +// Compute the default threshold for inlining based on the opt level and the +// size opt level. +static int computeThresholdFromOptLevels(unsigned OptLevel, + unsigned SizeOptLevel) { + if (OptLevel > 2) + return InlineConstants::OptAggressiveThreshold; + if (SizeOptLevel == 1) // -Os + return InlineConstants::OptSizeThreshold; + if (SizeOptLevel == 2) // -Oz + return InlineConstants::OptMinSizeThreshold; + return InlineThreshold; +} + +InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) { + return getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel)); +} diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp index aeaf938..796e6e4 100644 --- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp @@ -67,9 +67,12 @@ static Value *SimplifyFPBinOp(unsigned, Value *, Value *, const FastMathFlags &, const Query &, unsigned); static Value *SimplifyCmpInst(unsigned, Value *, Value *, const Query &, unsigned); +static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const Query &Q, unsigned MaxRecurse); static Value *SimplifyOrInst(Value *, Value *, const Query &, unsigned); static Value *SimplifyXorInst(Value *, Value *, const Query &, unsigned); -static Value *SimplifyTruncInst(Value *, Type *, const Query &, unsigned); +static Value *SimplifyCastInst(unsigned, Value *, Type *, + const Query &, unsigned); /// For a boolean type, or a vector of boolean type, return false, or /// a vector with every element false, as appropriate for the type. @@ -679,9 +682,26 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, if (Op0 == Op1) return Constant::getNullValue(Op0->getType()); - // 0 - X -> 0 if the sub is NUW. - if (isNUW && match(Op0, m_Zero())) - return Op0; + // Is this a negation? + if (match(Op0, m_Zero())) { + // 0 - X -> 0 if the sub is NUW. + if (isNUW) + return Op0; + + unsigned BitWidth = Op1->getType()->getScalarSizeInBits(); + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + computeKnownBits(Op1, KnownZero, KnownOne, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (KnownZero == ~APInt::getSignBit(BitWidth)) { + // Op1 is either 0 or the minimum signed value. If the sub is NSW, then + // Op1 must be 0 because negating the minimum signed value is undefined. + if (isNSW) + return Op0; + + // 0 - X -> X if X is 0 or the minimum signed value. + return Op1; + } + } // (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies. // For example, (X + Y) - Y -> X; (Y + X) - Y -> X @@ -747,7 +767,8 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, // See if "V === X - Y" simplifies. if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1)) // It does! Now see if "trunc V" simplifies. - if (Value *W = SimplifyTruncInst(V, Op0->getType(), Q, MaxRecurse-1)) + if (Value *W = SimplifyCastInst(Instruction::Trunc, V, Op0->getType(), + Q, MaxRecurse - 1)) // It does, return the simplified "trunc V". return W; @@ -1085,6 +1106,16 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q, if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse)) return V; + // udiv %V, C -> 0 if %V < C + if (MaxRecurse) { + if (Constant *C = dyn_cast_or_null<Constant>(SimplifyICmpInst( + ICmpInst::ICMP_ULT, Op0, Op1, Q, MaxRecurse - 1))) { + if (C->isAllOnesValue()) { + return Constant::getNullValue(Op0->getType()); + } + } + } + return nullptr; } @@ -1106,6 +1137,10 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (match(Op1, m_Undef())) return Op1; + // X / 1.0 -> X + if (match(Op1, m_FPOne())) + return Op0; + // 0 / X -> 0 // Requires that NaNs are off (X could be zero) and signed zeroes are // ignored (X could be positive or negative, so the output sign is unknown). @@ -1222,6 +1257,16 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q, if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse)) return V; + // urem %V, C -> %V if %V < C + if (MaxRecurse) { + if (Constant *C = dyn_cast_or_null<Constant>(SimplifyICmpInst( + ICmpInst::ICMP_ULT, Op0, Op1, Q, MaxRecurse - 1))) { + if (C->isAllOnesValue()) { + return Op0; + } + } + } + return nullptr; } @@ -1497,17 +1542,45 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, return nullptr; } -static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { - Type *ITy = Op0->getType(); +/// Commuted variants are assumed to be handled by calling this function again +/// with the parameters swapped. +static Value *simplifyAndOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) { ICmpInst::Predicate Pred0, Pred1; - ConstantInt *CI1, *CI2; - Value *V; + Value *A ,*B; + if (!match(Op0, m_ICmp(Pred0, m_Value(A), m_Value(B))) || + !match(Op1, m_ICmp(Pred1, m_Specific(A), m_Specific(B)))) + return nullptr; + // We have (icmp Pred0, A, B) & (icmp Pred1, A, B). + // If Op1 is always implied true by Op0, then Op0 is a subset of Op1, and we + // can eliminate Op1 from this 'and'. + if (ICmpInst::isImpliedTrueByMatchingCmp(Pred0, Pred1)) + return Op0; + + // Check for any combination of predicates that are guaranteed to be disjoint. + if ((Pred0 == ICmpInst::getInversePredicate(Pred1)) || + (Pred0 == ICmpInst::ICMP_EQ && ICmpInst::isFalseWhenEqual(Pred1)) || + (Pred0 == ICmpInst::ICMP_SLT && Pred1 == ICmpInst::ICMP_SGT) || + (Pred0 == ICmpInst::ICMP_ULT && Pred1 == ICmpInst::ICMP_UGT)) + return getFalse(Op0->getType()); + + return nullptr; +} + +/// Commuted variants are assumed to be handled by calling this function again +/// with the parameters swapped. +static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true)) return X; + if (Value *X = simplifyAndOfICmpsWithSameOperands(Op0, Op1)) + return X; + // Look for this pattern: (icmp V, C0) & (icmp V, C1)). + Type *ITy = Op0->getType(); + ICmpInst::Predicate Pred0, Pred1; const APInt *C0, *C1; + Value *V; if (match(Op0, m_ICmp(Pred0, m_Value(V), m_APInt(C0))) && match(Op1, m_ICmp(Pred1, m_Specific(V), m_APInt(C1)))) { // Make a constant range that's the intersection of the two icmp ranges. @@ -1518,21 +1591,22 @@ static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { return getFalse(ITy); } - if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_ConstantInt(CI1)), - m_ConstantInt(CI2)))) + // (icmp (add V, C0), C1) & (icmp V, C0) + if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_APInt(C0)), m_APInt(C1)))) return nullptr; - if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Specific(CI1)))) + if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Value()))) return nullptr; auto *AddInst = cast<BinaryOperator>(Op0->getOperand(0)); + if (AddInst->getOperand(1) != Op1->getOperand(1)) + return nullptr; + bool isNSW = AddInst->hasNoSignedWrap(); bool isNUW = AddInst->hasNoUnsignedWrap(); - const APInt &CI1V = CI1->getValue(); - const APInt &CI2V = CI2->getValue(); - const APInt Delta = CI2V - CI1V; - if (CI1V.isStrictlyPositive()) { + const APInt Delta = *C1 - *C0; + if (C0->isStrictlyPositive()) { if (Delta == 2) { if (Pred0 == ICmpInst::ICMP_ULT && Pred1 == ICmpInst::ICMP_SGT) return getFalse(ITy); @@ -1546,7 +1620,7 @@ static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { return getFalse(ITy); } } - if (CI1V.getBoolValue() && isNUW) { + if (C0->getBoolValue() && isNUW) { if (Delta == 2) if (Pred0 == ICmpInst::ICMP_ULT && Pred1 == ICmpInst::ICMP_UGT) return getFalse(ITy); @@ -1680,33 +1754,61 @@ Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout &DL, RecursionLimit); } -/// Simplify (or (icmp ...) (icmp ...)) to true when we can tell that the union -/// contains all possible values. -static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) { +/// Commuted variants are assumed to be handled by calling this function again +/// with the parameters swapped. +static Value *simplifyOrOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) { ICmpInst::Predicate Pred0, Pred1; - ConstantInt *CI1, *CI2; - Value *V; + Value *A ,*B; + if (!match(Op0, m_ICmp(Pred0, m_Value(A), m_Value(B))) || + !match(Op1, m_ICmp(Pred1, m_Specific(A), m_Specific(B)))) + return nullptr; + + // We have (icmp Pred0, A, B) | (icmp Pred1, A, B). + // If Op1 is always implied true by Op0, then Op0 is a subset of Op1, and we + // can eliminate Op0 from this 'or'. + if (ICmpInst::isImpliedTrueByMatchingCmp(Pred0, Pred1)) + return Op1; + + // Check for any combination of predicates that cover the entire range of + // possibilities. + if ((Pred0 == ICmpInst::getInversePredicate(Pred1)) || + (Pred0 == ICmpInst::ICMP_NE && ICmpInst::isTrueWhenEqual(Pred1)) || + (Pred0 == ICmpInst::ICMP_SLE && Pred1 == ICmpInst::ICMP_SGE) || + (Pred0 == ICmpInst::ICMP_ULE && Pred1 == ICmpInst::ICMP_UGE)) + return getTrue(Op0->getType()); + + return nullptr; +} +/// Commuted variants are assumed to be handled by calling this function again +/// with the parameters swapped. +static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) { if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false)) return X; - if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_ConstantInt(CI1)), - m_ConstantInt(CI2)))) - return nullptr; + if (Value *X = simplifyOrOfICmpsWithSameOperands(Op0, Op1)) + return X; - if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Specific(CI1)))) + // (icmp (add V, C0), C1) | (icmp V, C0) + ICmpInst::Predicate Pred0, Pred1; + const APInt *C0, *C1; + Value *V; + if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_APInt(C0)), m_APInt(C1)))) return nullptr; - Type *ITy = Op0->getType(); + if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Value()))) + return nullptr; auto *AddInst = cast<BinaryOperator>(Op0->getOperand(0)); + if (AddInst->getOperand(1) != Op1->getOperand(1)) + return nullptr; + + Type *ITy = Op0->getType(); bool isNSW = AddInst->hasNoSignedWrap(); bool isNUW = AddInst->hasNoUnsignedWrap(); - const APInt &CI1V = CI1->getValue(); - const APInt &CI2V = CI2->getValue(); - const APInt Delta = CI2V - CI1V; - if (CI1V.isStrictlyPositive()) { + const APInt Delta = *C1 - *C0; + if (C0->isStrictlyPositive()) { if (Delta == 2) { if (Pred0 == ICmpInst::ICMP_UGE && Pred1 == ICmpInst::ICMP_SLE) return getTrue(ITy); @@ -1720,7 +1822,7 @@ static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) { return getTrue(ITy); } } - if (CI1V.getBoolValue() && isNUW) { + if (C0->getBoolValue() && isNUW) { if (Delta == 2) if (Pred0 == ICmpInst::ICMP_UGE && Pred1 == ICmpInst::ICMP_ULE) return getTrue(ITy); @@ -2102,8 +2204,8 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI, GetUnderlyingObjects(RHS, RHSUObjs, DL); // Is the set of underlying objects all noalias calls? - auto IsNAC = [](SmallVectorImpl<Value *> &Objects) { - return std::all_of(Objects.begin(), Objects.end(), isNoAliasCall); + auto IsNAC = [](ArrayRef<Value *> Objects) { + return all_of(Objects, isNoAliasCall); }; // Is the set of underlying objects all things which must be disjoint from @@ -2112,8 +2214,8 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI, // live with the compared-to allocation). For globals, we exclude symbols // that might be resolve lazily to symbols in another dynamically-loaded // library (and, thus, could be malloc'ed by the implementation). - auto IsAllocDisjoint = [](SmallVectorImpl<Value *> &Objects) { - return std::all_of(Objects.begin(), Objects.end(), [](Value *V) { + auto IsAllocDisjoint = [](ArrayRef<Value *> Objects) { + return all_of(Objects, [](Value *V) { if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) return AI->getParent() && AI->getFunction() && AI->isStaticAlloca(); if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) @@ -2150,470 +2252,275 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI, return nullptr; } -/// Given operands for an ICmpInst, see if we can fold the result. -/// If not, this returns null. -static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const Query &Q, unsigned MaxRecurse) { - CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; - assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!"); - - if (Constant *CLHS = dyn_cast<Constant>(LHS)) { - if (Constant *CRHS = dyn_cast<Constant>(RHS)) - return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.DL, Q.TLI); - - // If we have a constant, make sure it is on the RHS. - std::swap(LHS, RHS); - Pred = CmpInst::getSwappedPredicate(Pred); - } - +/// Fold an icmp when its operands have i1 scalar type. +static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS, + Value *RHS, const Query &Q) { Type *ITy = GetCompareTy(LHS); // The return type. Type *OpTy = LHS->getType(); // The operand type. + if (!OpTy->getScalarType()->isIntegerTy(1)) + return nullptr; - // icmp X, X -> true/false - // X icmp undef -> true/false. For example, icmp ugt %X, undef -> false - // because X could be 0. - if (LHS == RHS || isa<UndefValue>(RHS)) - return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred)); - - // Special case logic when the operands have i1 type. - if (OpTy->getScalarType()->isIntegerTy(1)) { - switch (Pred) { - default: break; - case ICmpInst::ICMP_EQ: - // X == 1 -> X - if (match(RHS, m_One())) - return LHS; - break; - case ICmpInst::ICMP_NE: - // X != 0 -> X - if (match(RHS, m_Zero())) - return LHS; - break; - case ICmpInst::ICMP_UGT: - // X >u 0 -> X - if (match(RHS, m_Zero())) - return LHS; - break; - case ICmpInst::ICMP_UGE: { - // X >=u 1 -> X - if (match(RHS, m_One())) - return LHS; - if (isImpliedCondition(RHS, LHS, Q.DL).getValueOr(false)) - return getTrue(ITy); - break; - } - case ICmpInst::ICMP_SGE: { - /// For signed comparison, the values for an i1 are 0 and -1 - /// respectively. This maps into a truth table of: - /// LHS | RHS | LHS >=s RHS | LHS implies RHS - /// 0 | 0 | 1 (0 >= 0) | 1 - /// 0 | 1 | 1 (0 >= -1) | 1 - /// 1 | 0 | 0 (-1 >= 0) | 0 - /// 1 | 1 | 1 (-1 >= -1) | 1 - if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false)) - return getTrue(ITy); - break; - } - case ICmpInst::ICMP_SLT: - // X <s 0 -> X - if (match(RHS, m_Zero())) - return LHS; - break; - case ICmpInst::ICMP_SLE: - // X <=s -1 -> X - if (match(RHS, m_One())) - return LHS; - break; - case ICmpInst::ICMP_ULE: { - if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false)) - return getTrue(ITy); - break; - } - } - } - - // If we are comparing with zero then try hard since this is a common case. - if (match(RHS, m_Zero())) { - bool LHSKnownNonNegative, LHSKnownNegative; - switch (Pred) { - default: llvm_unreachable("Unknown ICmp predicate!"); - case ICmpInst::ICMP_ULT: - return getFalse(ITy); - case ICmpInst::ICMP_UGE: + switch (Pred) { + default: + break; + case ICmpInst::ICMP_EQ: + // X == 1 -> X + if (match(RHS, m_One())) + return LHS; + break; + case ICmpInst::ICMP_NE: + // X != 0 -> X + if (match(RHS, m_Zero())) + return LHS; + break; + case ICmpInst::ICMP_UGT: + // X >u 0 -> X + if (match(RHS, m_Zero())) + return LHS; + break; + case ICmpInst::ICMP_UGE: + // X >=u 1 -> X + if (match(RHS, m_One())) + return LHS; + if (isImpliedCondition(RHS, LHS, Q.DL).getValueOr(false)) return getTrue(ITy); - case ICmpInst::ICMP_EQ: - case ICmpInst::ICMP_ULE: - if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) - return getFalse(ITy); - break; - case ICmpInst::ICMP_NE: - case ICmpInst::ICMP_UGT: - if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) - return getTrue(ITy); - break; - case ICmpInst::ICMP_SLT: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (LHSKnownNegative) - return getTrue(ITy); - if (LHSKnownNonNegative) - return getFalse(ITy); - break; - case ICmpInst::ICMP_SLE: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (LHSKnownNegative) - return getTrue(ITy); - if (LHSKnownNonNegative && - isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) - return getFalse(ITy); - break; - case ICmpInst::ICMP_SGE: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (LHSKnownNegative) - return getFalse(ITy); - if (LHSKnownNonNegative) - return getTrue(ITy); - break; - case ICmpInst::ICMP_SGT: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (LHSKnownNegative) - return getFalse(ITy); - if (LHSKnownNonNegative && - isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) - return getTrue(ITy); - break; - } + break; + case ICmpInst::ICMP_SGE: + /// For signed comparison, the values for an i1 are 0 and -1 + /// respectively. This maps into a truth table of: + /// LHS | RHS | LHS >=s RHS | LHS implies RHS + /// 0 | 0 | 1 (0 >= 0) | 1 + /// 0 | 1 | 1 (0 >= -1) | 1 + /// 1 | 0 | 0 (-1 >= 0) | 0 + /// 1 | 1 | 1 (-1 >= -1) | 1 + if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false)) + return getTrue(ITy); + break; + case ICmpInst::ICMP_SLT: + // X <s 0 -> X + if (match(RHS, m_Zero())) + return LHS; + break; + case ICmpInst::ICMP_SLE: + // X <=s -1 -> X + if (match(RHS, m_One())) + return LHS; + break; + case ICmpInst::ICMP_ULE: + if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false)) + return getTrue(ITy); + break; } - // See if we are doing a comparison with a constant integer. - if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { - // Rule out tautological comparisons (eg., ult 0 or uge 0). - ConstantRange RHS_CR = ICmpInst::makeConstantRange(Pred, CI->getValue()); - if (RHS_CR.isEmptySet()) - return ConstantInt::getFalse(CI->getContext()); - if (RHS_CR.isFullSet()) - return ConstantInt::getTrue(CI->getContext()); - - // Many binary operators with constant RHS have easy to compute constant - // range. Use them to check whether the comparison is a tautology. - unsigned Width = CI->getBitWidth(); - APInt Lower = APInt(Width, 0); - APInt Upper = APInt(Width, 0); - ConstantInt *CI2; - if (match(LHS, m_URem(m_Value(), m_ConstantInt(CI2)))) { - // 'urem x, CI2' produces [0, CI2). - Upper = CI2->getValue(); - } else if (match(LHS, m_SRem(m_Value(), m_ConstantInt(CI2)))) { - // 'srem x, CI2' produces (-|CI2|, |CI2|). - Upper = CI2->getValue().abs(); - Lower = (-Upper) + 1; - } else if (match(LHS, m_UDiv(m_ConstantInt(CI2), m_Value()))) { - // 'udiv CI2, x' produces [0, CI2]. - Upper = CI2->getValue() + 1; - } else if (match(LHS, m_UDiv(m_Value(), m_ConstantInt(CI2)))) { - // 'udiv x, CI2' produces [0, UINT_MAX / CI2]. - APInt NegOne = APInt::getAllOnesValue(Width); - if (!CI2->isZero()) - Upper = NegOne.udiv(CI2->getValue()) + 1; - } else if (match(LHS, m_SDiv(m_ConstantInt(CI2), m_Value()))) { - if (CI2->isMinSignedValue()) { - // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2]. - Lower = CI2->getValue(); - Upper = Lower.lshr(1) + 1; - } else { - // 'sdiv CI2, x' produces [-|CI2|, |CI2|]. - Upper = CI2->getValue().abs() + 1; - Lower = (-Upper) + 1; - } - } else if (match(LHS, m_SDiv(m_Value(), m_ConstantInt(CI2)))) { - APInt IntMin = APInt::getSignedMinValue(Width); - APInt IntMax = APInt::getSignedMaxValue(Width); - const APInt &Val = CI2->getValue(); - if (Val.isAllOnesValue()) { - // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX] - // where CI2 != -1 and CI2 != 0 and CI2 != 1 - Lower = IntMin + 1; - Upper = IntMax + 1; - } else if (Val.countLeadingZeros() < Width - 1) { - // 'sdiv x, CI2' produces [INT_MIN / CI2, INT_MAX / CI2] - // where CI2 != -1 and CI2 != 0 and CI2 != 1 - Lower = IntMin.sdiv(Val); - Upper = IntMax.sdiv(Val); - if (Lower.sgt(Upper)) - std::swap(Lower, Upper); - Upper = Upper + 1; - assert(Upper != Lower && "Upper part of range has wrapped!"); - } - } else if (match(LHS, m_NUWShl(m_ConstantInt(CI2), m_Value()))) { - // 'shl nuw CI2, x' produces [CI2, CI2 << CLZ(CI2)] - Lower = CI2->getValue(); - Upper = Lower.shl(Lower.countLeadingZeros()) + 1; - } else if (match(LHS, m_NSWShl(m_ConstantInt(CI2), m_Value()))) { - if (CI2->isNegative()) { - // 'shl nsw CI2, x' produces [CI2 << CLO(CI2)-1, CI2] - unsigned ShiftAmount = CI2->getValue().countLeadingOnes() - 1; - Lower = CI2->getValue().shl(ShiftAmount); - Upper = CI2->getValue() + 1; - } else { - // 'shl nsw CI2, x' produces [CI2, CI2 << CLZ(CI2)-1] - unsigned ShiftAmount = CI2->getValue().countLeadingZeros() - 1; - Lower = CI2->getValue(); - Upper = CI2->getValue().shl(ShiftAmount) + 1; - } - } else if (match(LHS, m_LShr(m_Value(), m_ConstantInt(CI2)))) { - // 'lshr x, CI2' produces [0, UINT_MAX >> CI2]. - APInt NegOne = APInt::getAllOnesValue(Width); - if (CI2->getValue().ult(Width)) - Upper = NegOne.lshr(CI2->getValue()) + 1; - } else if (match(LHS, m_LShr(m_ConstantInt(CI2), m_Value()))) { - // 'lshr CI2, x' produces [CI2 >> (Width-1), CI2]. - unsigned ShiftAmount = Width - 1; - if (!CI2->isZero() && cast<BinaryOperator>(LHS)->isExact()) - ShiftAmount = CI2->getValue().countTrailingZeros(); - Lower = CI2->getValue().lshr(ShiftAmount); - Upper = CI2->getValue() + 1; - } else if (match(LHS, m_AShr(m_Value(), m_ConstantInt(CI2)))) { - // 'ashr x, CI2' produces [INT_MIN >> CI2, INT_MAX >> CI2]. - APInt IntMin = APInt::getSignedMinValue(Width); - APInt IntMax = APInt::getSignedMaxValue(Width); - if (CI2->getValue().ult(Width)) { - Lower = IntMin.ashr(CI2->getValue()); - Upper = IntMax.ashr(CI2->getValue()) + 1; - } - } else if (match(LHS, m_AShr(m_ConstantInt(CI2), m_Value()))) { - unsigned ShiftAmount = Width - 1; - if (!CI2->isZero() && cast<BinaryOperator>(LHS)->isExact()) - ShiftAmount = CI2->getValue().countTrailingZeros(); - if (CI2->isNegative()) { - // 'ashr CI2, x' produces [CI2, CI2 >> (Width-1)] - Lower = CI2->getValue(); - Upper = CI2->getValue().ashr(ShiftAmount) + 1; - } else { - // 'ashr CI2, x' produces [CI2 >> (Width-1), CI2] - Lower = CI2->getValue().ashr(ShiftAmount); - Upper = CI2->getValue() + 1; - } - } else if (match(LHS, m_Or(m_Value(), m_ConstantInt(CI2)))) { - // 'or x, CI2' produces [CI2, UINT_MAX]. - Lower = CI2->getValue(); - } else if (match(LHS, m_And(m_Value(), m_ConstantInt(CI2)))) { - // 'and x, CI2' produces [0, CI2]. - Upper = CI2->getValue() + 1; - } else if (match(LHS, m_NUWAdd(m_Value(), m_ConstantInt(CI2)))) { - // 'add nuw x, CI2' produces [CI2, UINT_MAX]. - Lower = CI2->getValue(); - } - - ConstantRange LHS_CR = Lower != Upper ? ConstantRange(Lower, Upper) - : ConstantRange(Width, true); + return nullptr; +} - if (auto *I = dyn_cast<Instruction>(LHS)) - if (auto *Ranges = I->getMetadata(LLVMContext::MD_range)) - LHS_CR = LHS_CR.intersectWith(getConstantRangeFromMetadata(*Ranges)); +/// Try hard to fold icmp with zero RHS because this is a common case. +static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS, + Value *RHS, const Query &Q) { + if (!match(RHS, m_Zero())) + return nullptr; - if (!LHS_CR.isFullSet()) { - if (RHS_CR.contains(LHS_CR)) - return ConstantInt::getTrue(RHS->getContext()); - if (RHS_CR.inverse().contains(LHS_CR)) - return ConstantInt::getFalse(RHS->getContext()); - } + Type *ITy = GetCompareTy(LHS); // The return type. + bool LHSKnownNonNegative, LHSKnownNegative; + switch (Pred) { + default: + llvm_unreachable("Unknown ICmp predicate!"); + case ICmpInst::ICMP_ULT: + return getFalse(ITy); + case ICmpInst::ICMP_UGE: + return getTrue(ITy); + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_ULE: + if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) + return getFalse(ITy); + break; + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_UGT: + if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) + return getTrue(ITy); + break; + case ICmpInst::ICMP_SLT: + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC, + Q.CxtI, Q.DT); + if (LHSKnownNegative) + return getTrue(ITy); + if (LHSKnownNonNegative) + return getFalse(ITy); + break; + case ICmpInst::ICMP_SLE: + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC, + Q.CxtI, Q.DT); + if (LHSKnownNegative) + return getTrue(ITy); + if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) + return getFalse(ITy); + break; + case ICmpInst::ICMP_SGE: + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC, + Q.CxtI, Q.DT); + if (LHSKnownNegative) + return getFalse(ITy); + if (LHSKnownNonNegative) + return getTrue(ITy); + break; + case ICmpInst::ICMP_SGT: + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC, + Q.CxtI, Q.DT); + if (LHSKnownNegative) + return getFalse(ITy); + if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) + return getTrue(ITy); + break; } - // If both operands have range metadata, use the metadata - // to simplify the comparison. - if (isa<Instruction>(RHS) && isa<Instruction>(LHS)) { - auto RHS_Instr = dyn_cast<Instruction>(RHS); - auto LHS_Instr = dyn_cast<Instruction>(LHS); - - if (RHS_Instr->getMetadata(LLVMContext::MD_range) && - LHS_Instr->getMetadata(LLVMContext::MD_range)) { - auto RHS_CR = getConstantRangeFromMetadata( - *RHS_Instr->getMetadata(LLVMContext::MD_range)); - auto LHS_CR = getConstantRangeFromMetadata( - *LHS_Instr->getMetadata(LLVMContext::MD_range)); + return nullptr; +} - auto Satisfied_CR = ConstantRange::makeSatisfyingICmpRegion(Pred, RHS_CR); - if (Satisfied_CR.contains(LHS_CR)) - return ConstantInt::getTrue(RHS->getContext()); +static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS, + Value *RHS) { + const APInt *C; + if (!match(RHS, m_APInt(C))) + return nullptr; - auto InversedSatisfied_CR = ConstantRange::makeSatisfyingICmpRegion( - CmpInst::getInversePredicate(Pred), RHS_CR); - if (InversedSatisfied_CR.contains(LHS_CR)) - return ConstantInt::getFalse(RHS->getContext()); + // Rule out tautological comparisons (eg., ult 0 or uge 0). + ConstantRange RHS_CR = ConstantRange::makeExactICmpRegion(Pred, *C); + if (RHS_CR.isEmptySet()) + return ConstantInt::getFalse(GetCompareTy(RHS)); + if (RHS_CR.isFullSet()) + return ConstantInt::getTrue(GetCompareTy(RHS)); + + // Many binary operators with constant RHS have easy to compute constant + // range. Use them to check whether the comparison is a tautology. + unsigned Width = C->getBitWidth(); + APInt Lower = APInt(Width, 0); + APInt Upper = APInt(Width, 0); + const APInt *C2; + if (match(LHS, m_URem(m_Value(), m_APInt(C2)))) { + // 'urem x, C2' produces [0, C2). + Upper = *C2; + } else if (match(LHS, m_SRem(m_Value(), m_APInt(C2)))) { + // 'srem x, C2' produces (-|C2|, |C2|). + Upper = C2->abs(); + Lower = (-Upper) + 1; + } else if (match(LHS, m_UDiv(m_APInt(C2), m_Value()))) { + // 'udiv C2, x' produces [0, C2]. + Upper = *C2 + 1; + } else if (match(LHS, m_UDiv(m_Value(), m_APInt(C2)))) { + // 'udiv x, C2' produces [0, UINT_MAX / C2]. + APInt NegOne = APInt::getAllOnesValue(Width); + if (*C2 != 0) + Upper = NegOne.udiv(*C2) + 1; + } else if (match(LHS, m_SDiv(m_APInt(C2), m_Value()))) { + if (C2->isMinSignedValue()) { + // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2]. + Lower = *C2; + Upper = Lower.lshr(1) + 1; + } else { + // 'sdiv C2, x' produces [-|C2|, |C2|]. + Upper = C2->abs() + 1; + Lower = (-Upper) + 1; } - } - - // Compare of cast, for example (zext X) != 0 -> X != 0 - if (isa<CastInst>(LHS) && (isa<Constant>(RHS) || isa<CastInst>(RHS))) { - Instruction *LI = cast<CastInst>(LHS); - Value *SrcOp = LI->getOperand(0); - Type *SrcTy = SrcOp->getType(); - Type *DstTy = LI->getType(); - - // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input - // if the integer type is the same size as the pointer type. - if (MaxRecurse && isa<PtrToIntInst>(LI) && - Q.DL.getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) { - if (Constant *RHSC = dyn_cast<Constant>(RHS)) { - // Transfer the cast to the constant. - if (Value *V = SimplifyICmpInst(Pred, SrcOp, - ConstantExpr::getIntToPtr(RHSC, SrcTy), - Q, MaxRecurse-1)) - return V; - } else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) { - if (RI->getOperand(0)->getType() == SrcTy) - // Compare without the cast. - if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0), - Q, MaxRecurse-1)) - return V; - } + } else if (match(LHS, m_SDiv(m_Value(), m_APInt(C2)))) { + APInt IntMin = APInt::getSignedMinValue(Width); + APInt IntMax = APInt::getSignedMaxValue(Width); + if (C2->isAllOnesValue()) { + // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX] + // where C2 != -1 and C2 != 0 and C2 != 1 + Lower = IntMin + 1; + Upper = IntMax + 1; + } else if (C2->countLeadingZeros() < Width - 1) { + // 'sdiv x, C2' produces [INT_MIN / C2, INT_MAX / C2] + // where C2 != -1 and C2 != 0 and C2 != 1 + Lower = IntMin.sdiv(*C2); + Upper = IntMax.sdiv(*C2); + if (Lower.sgt(Upper)) + std::swap(Lower, Upper); + Upper = Upper + 1; + assert(Upper != Lower && "Upper part of range has wrapped!"); } - - if (isa<ZExtInst>(LHS)) { - // Turn icmp (zext X), (zext Y) into a compare of X and Y if they have the - // same type. - if (ZExtInst *RI = dyn_cast<ZExtInst>(RHS)) { - if (MaxRecurse && SrcTy == RI->getOperand(0)->getType()) - // Compare X and Y. Note that signed predicates become unsigned. - if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred), - SrcOp, RI->getOperand(0), Q, - MaxRecurse-1)) - return V; - } - // Turn icmp (zext X), Cst into a compare of X and Cst if Cst is extended - // too. If not, then try to deduce the result of the comparison. - else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { - // Compute the constant that would happen if we truncated to SrcTy then - // reextended to DstTy. - Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy); - Constant *RExt = ConstantExpr::getCast(CastInst::ZExt, Trunc, DstTy); - - // If the re-extended constant didn't change then this is effectively - // also a case of comparing two zero-extended values. - if (RExt == CI && MaxRecurse) - if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred), - SrcOp, Trunc, Q, MaxRecurse-1)) - return V; - - // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit - // there. Use this to work out the result of the comparison. - if (RExt != CI) { - switch (Pred) { - default: llvm_unreachable("Unknown ICmp predicate!"); - // LHS <u RHS. - case ICmpInst::ICMP_EQ: - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_UGE: - return ConstantInt::getFalse(CI->getContext()); - - case ICmpInst::ICMP_NE: - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_ULE: - return ConstantInt::getTrue(CI->getContext()); - - // LHS is non-negative. If RHS is negative then LHS >s LHS. If RHS - // is non-negative then LHS <s RHS. - case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_SGE: - return CI->getValue().isNegative() ? - ConstantInt::getTrue(CI->getContext()) : - ConstantInt::getFalse(CI->getContext()); - - case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_SLE: - return CI->getValue().isNegative() ? - ConstantInt::getFalse(CI->getContext()) : - ConstantInt::getTrue(CI->getContext()); - } - } - } + } else if (match(LHS, m_NUWShl(m_APInt(C2), m_Value()))) { + // 'shl nuw C2, x' produces [C2, C2 << CLZ(C2)] + Lower = *C2; + Upper = Lower.shl(Lower.countLeadingZeros()) + 1; + } else if (match(LHS, m_NSWShl(m_APInt(C2), m_Value()))) { + if (C2->isNegative()) { + // 'shl nsw C2, x' produces [C2 << CLO(C2)-1, C2] + unsigned ShiftAmount = C2->countLeadingOnes() - 1; + Lower = C2->shl(ShiftAmount); + Upper = *C2 + 1; + } else { + // 'shl nsw C2, x' produces [C2, C2 << CLZ(C2)-1] + unsigned ShiftAmount = C2->countLeadingZeros() - 1; + Lower = *C2; + Upper = C2->shl(ShiftAmount) + 1; } + } else if (match(LHS, m_LShr(m_Value(), m_APInt(C2)))) { + // 'lshr x, C2' produces [0, UINT_MAX >> C2]. + APInt NegOne = APInt::getAllOnesValue(Width); + if (C2->ult(Width)) + Upper = NegOne.lshr(*C2) + 1; + } else if (match(LHS, m_LShr(m_APInt(C2), m_Value()))) { + // 'lshr C2, x' produces [C2 >> (Width-1), C2]. + unsigned ShiftAmount = Width - 1; + if (*C2 != 0 && cast<BinaryOperator>(LHS)->isExact()) + ShiftAmount = C2->countTrailingZeros(); + Lower = C2->lshr(ShiftAmount); + Upper = *C2 + 1; + } else if (match(LHS, m_AShr(m_Value(), m_APInt(C2)))) { + // 'ashr x, C2' produces [INT_MIN >> C2, INT_MAX >> C2]. + APInt IntMin = APInt::getSignedMinValue(Width); + APInt IntMax = APInt::getSignedMaxValue(Width); + if (C2->ult(Width)) { + Lower = IntMin.ashr(*C2); + Upper = IntMax.ashr(*C2) + 1; + } + } else if (match(LHS, m_AShr(m_APInt(C2), m_Value()))) { + unsigned ShiftAmount = Width - 1; + if (*C2 != 0 && cast<BinaryOperator>(LHS)->isExact()) + ShiftAmount = C2->countTrailingZeros(); + if (C2->isNegative()) { + // 'ashr C2, x' produces [C2, C2 >> (Width-1)] + Lower = *C2; + Upper = C2->ashr(ShiftAmount) + 1; + } else { + // 'ashr C2, x' produces [C2 >> (Width-1), C2] + Lower = C2->ashr(ShiftAmount); + Upper = *C2 + 1; + } + } else if (match(LHS, m_Or(m_Value(), m_APInt(C2)))) { + // 'or x, C2' produces [C2, UINT_MAX]. + Lower = *C2; + } else if (match(LHS, m_And(m_Value(), m_APInt(C2)))) { + // 'and x, C2' produces [0, C2]. + Upper = *C2 + 1; + } else if (match(LHS, m_NUWAdd(m_Value(), m_APInt(C2)))) { + // 'add nuw x, C2' produces [C2, UINT_MAX]. + Lower = *C2; + } - if (isa<SExtInst>(LHS)) { - // Turn icmp (sext X), (sext Y) into a compare of X and Y if they have the - // same type. - if (SExtInst *RI = dyn_cast<SExtInst>(RHS)) { - if (MaxRecurse && SrcTy == RI->getOperand(0)->getType()) - // Compare X and Y. Note that the predicate does not change. - if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0), - Q, MaxRecurse-1)) - return V; - } - // Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended - // too. If not, then try to deduce the result of the comparison. - else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { - // Compute the constant that would happen if we truncated to SrcTy then - // reextended to DstTy. - Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy); - Constant *RExt = ConstantExpr::getCast(CastInst::SExt, Trunc, DstTy); - - // If the re-extended constant didn't change then this is effectively - // also a case of comparing two sign-extended values. - if (RExt == CI && MaxRecurse) - if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, Q, MaxRecurse-1)) - return V; - - // Otherwise the upper bits of LHS are all equal, while RHS has varying - // bits there. Use this to work out the result of the comparison. - if (RExt != CI) { - switch (Pred) { - default: llvm_unreachable("Unknown ICmp predicate!"); - case ICmpInst::ICMP_EQ: - return ConstantInt::getFalse(CI->getContext()); - case ICmpInst::ICMP_NE: - return ConstantInt::getTrue(CI->getContext()); + ConstantRange LHS_CR = + Lower != Upper ? ConstantRange(Lower, Upper) : ConstantRange(Width, true); - // If RHS is non-negative then LHS <s RHS. If RHS is negative then - // LHS >s RHS. - case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_SGE: - return CI->getValue().isNegative() ? - ConstantInt::getTrue(CI->getContext()) : - ConstantInt::getFalse(CI->getContext()); - case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_SLE: - return CI->getValue().isNegative() ? - ConstantInt::getFalse(CI->getContext()) : - ConstantInt::getTrue(CI->getContext()); + if (auto *I = dyn_cast<Instruction>(LHS)) + if (auto *Ranges = I->getMetadata(LLVMContext::MD_range)) + LHS_CR = LHS_CR.intersectWith(getConstantRangeFromMetadata(*Ranges)); - // If LHS is non-negative then LHS <u RHS. If LHS is negative then - // LHS >u RHS. - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_UGE: - // Comparison is true iff the LHS <s 0. - if (MaxRecurse) - if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp, - Constant::getNullValue(SrcTy), - Q, MaxRecurse-1)) - return V; - break; - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_ULE: - // Comparison is true iff the LHS >=s 0. - if (MaxRecurse) - if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp, - Constant::getNullValue(SrcTy), - Q, MaxRecurse-1)) - return V; - break; - } - } - } - } + if (!LHS_CR.isFullSet()) { + if (RHS_CR.contains(LHS_CR)) + return ConstantInt::getTrue(GetCompareTy(RHS)); + if (RHS_CR.inverse().contains(LHS_CR)) + return ConstantInt::getFalse(GetCompareTy(RHS)); } - // icmp eq|ne X, Y -> false|true if X != Y - if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) && - isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT)) { - LLVMContext &Ctx = LHS->getType()->getContext(); - return Pred == ICmpInst::ICMP_NE ? - ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx); - } + return nullptr; +} + +static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, + Value *RHS, const Query &Q, + unsigned MaxRecurse) { + Type *ITy = GetCompareTy(LHS); // The return type. - // Special logic for binary operators. BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS); BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS); if (MaxRecurse && (LBO || RBO)) { @@ -2622,35 +2529,39 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // LHS = A + B (or A and B are null); RHS = C + D (or C and D are null). bool NoLHSWrapProblem = false, NoRHSWrapProblem = false; if (LBO && LBO->getOpcode() == Instruction::Add) { - A = LBO->getOperand(0); B = LBO->getOperand(1); - NoLHSWrapProblem = ICmpInst::isEquality(Pred) || - (CmpInst::isUnsigned(Pred) && LBO->hasNoUnsignedWrap()) || - (CmpInst::isSigned(Pred) && LBO->hasNoSignedWrap()); + A = LBO->getOperand(0); + B = LBO->getOperand(1); + NoLHSWrapProblem = + ICmpInst::isEquality(Pred) || + (CmpInst::isUnsigned(Pred) && LBO->hasNoUnsignedWrap()) || + (CmpInst::isSigned(Pred) && LBO->hasNoSignedWrap()); } if (RBO && RBO->getOpcode() == Instruction::Add) { - C = RBO->getOperand(0); D = RBO->getOperand(1); - NoRHSWrapProblem = ICmpInst::isEquality(Pred) || - (CmpInst::isUnsigned(Pred) && RBO->hasNoUnsignedWrap()) || - (CmpInst::isSigned(Pred) && RBO->hasNoSignedWrap()); + C = RBO->getOperand(0); + D = RBO->getOperand(1); + NoRHSWrapProblem = + ICmpInst::isEquality(Pred) || + (CmpInst::isUnsigned(Pred) && RBO->hasNoUnsignedWrap()) || + (CmpInst::isSigned(Pred) && RBO->hasNoSignedWrap()); } // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow. if ((A == RHS || B == RHS) && NoLHSWrapProblem) if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A, - Constant::getNullValue(RHS->getType()), - Q, MaxRecurse-1)) + Constant::getNullValue(RHS->getType()), Q, + MaxRecurse - 1)) return V; // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow. if ((C == LHS || D == LHS) && NoRHSWrapProblem) - if (Value *V = SimplifyICmpInst(Pred, - Constant::getNullValue(LHS->getType()), - C == LHS ? D : C, Q, MaxRecurse-1)) + if (Value *V = + SimplifyICmpInst(Pred, Constant::getNullValue(LHS->getType()), + C == LHS ? D : C, Q, MaxRecurse - 1)) return V; // icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow. - if (A && C && (A == C || A == D || B == C || B == D) && - NoLHSWrapProblem && NoRHSWrapProblem) { + if (A && C && (A == C || A == D || B == C || B == D) && NoLHSWrapProblem && + NoRHSWrapProblem) { // Determine Y and Z in the form icmp (X+Y), (X+Z). Value *Y, *Z; if (A == C) { @@ -2671,7 +2582,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, Y = A; Z = C; } - if (Value *V = SimplifyICmpInst(Pred, Y, Z, Q, MaxRecurse-1)) + if (Value *V = SimplifyICmpInst(Pred, Y, Z, Q, MaxRecurse - 1)) return V; } } @@ -2771,7 +2682,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, Q.CxtI, Q.DT); if (!KnownNonNegative) break; - // fall-through + LLVM_FALLTHROUGH; case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: @@ -2782,7 +2693,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, Q.CxtI, Q.DT); if (!KnownNonNegative) break; - // fall-through + LLVM_FALLTHROUGH; case ICmpInst::ICMP_NE: case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: @@ -2802,7 +2713,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, Q.CxtI, Q.DT); if (!KnownNonNegative) break; - // fall-through + LLVM_FALLTHROUGH; case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: @@ -2813,7 +2724,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, Q.CxtI, Q.DT); if (!KnownNonNegative) break; - // fall-through + LLVM_FALLTHROUGH; case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: @@ -2832,6 +2743,17 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getTrue(ITy); } + // x >=u x >> y + // x >=u x udiv y. + if (RBO && (match(RBO, m_LShr(m_Specific(LHS), m_Value())) || + match(RBO, m_UDiv(m_Specific(LHS), m_Value())))) { + // icmp pred X, (X op Y) + if (Pred == ICmpInst::ICMP_ULT) + return getFalse(ITy); + if (Pred == ICmpInst::ICMP_UGE) + return getTrue(ITy); + } + // handle: // CI2 << X == CI // CI2 << X != CI @@ -2870,18 +2792,19 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() && LBO->getOperand(1) == RBO->getOperand(1)) { switch (LBO->getOpcode()) { - default: break; + default: + break; case Instruction::UDiv: case Instruction::LShr: if (ICmpInst::isSigned(Pred)) break; - // fall-through + LLVM_FALLTHROUGH; case Instruction::SDiv: case Instruction::AShr: if (!LBO->isExact() || !RBO->isExact()) break; if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), - RBO->getOperand(0), Q, MaxRecurse-1)) + RBO->getOperand(0), Q, MaxRecurse - 1)) return V; break; case Instruction::Shl: { @@ -2892,40 +2815,51 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (!NSW && ICmpInst::isSigned(Pred)) break; if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), - RBO->getOperand(0), Q, MaxRecurse-1)) + RBO->getOperand(0), Q, MaxRecurse - 1)) return V; break; } } } + return nullptr; +} - // Simplify comparisons involving max/min. +/// Simplify integer comparisons where at least one operand of the compare +/// matches an integer min/max idiom. +static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS, + Value *RHS, const Query &Q, + unsigned MaxRecurse) { + Type *ITy = GetCompareTy(LHS); // The return type. Value *A, *B; CmpInst::Predicate P = CmpInst::BAD_ICMP_PREDICATE; CmpInst::Predicate EqP; // Chosen so that "A == max/min(A,B)" iff "A EqP B". // Signed variants on "max(a,b)>=a -> true". if (match(LHS, m_SMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) { - if (A != RHS) std::swap(A, B); // smax(A, B) pred A. + if (A != RHS) + std::swap(A, B); // smax(A, B) pred A. EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B". // We analyze this as smax(A, B) pred A. P = Pred; } else if (match(RHS, m_SMax(m_Value(A), m_Value(B))) && (A == LHS || B == LHS)) { - if (A != LHS) std::swap(A, B); // A pred smax(A, B). + if (A != LHS) + std::swap(A, B); // A pred smax(A, B). EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B". // We analyze this as smax(A, B) swapped-pred A. P = CmpInst::getSwappedPredicate(Pred); } else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) { - if (A != RHS) std::swap(A, B); // smin(A, B) pred A. + if (A != RHS) + std::swap(A, B); // smin(A, B) pred A. EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B". // We analyze this as smax(-A, -B) swapped-pred -A. // Note that we do not need to actually form -A or -B thanks to EqP. P = CmpInst::getSwappedPredicate(Pred); } else if (match(RHS, m_SMin(m_Value(A), m_Value(B))) && (A == LHS || B == LHS)) { - if (A != LHS) std::swap(A, B); // A pred smin(A, B). + if (A != LHS) + std::swap(A, B); // A pred smin(A, B). EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B". // We analyze this as smax(-A, -B) pred -A. // Note that we do not need to actually form -A or -B thanks to EqP. @@ -2946,7 +2880,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return V; // Otherwise, see if "A EqP B" simplifies. if (MaxRecurse) - if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse-1)) + if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse - 1)) return V; break; case CmpInst::ICMP_NE: @@ -2960,7 +2894,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return V; // Otherwise, see if "A InvEqP B" simplifies. if (MaxRecurse) - if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse-1)) + if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse - 1)) return V; break; } @@ -2976,26 +2910,30 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Unsigned variants on "max(a,b)>=a -> true". P = CmpInst::BAD_ICMP_PREDICATE; if (match(LHS, m_UMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) { - if (A != RHS) std::swap(A, B); // umax(A, B) pred A. + if (A != RHS) + std::swap(A, B); // umax(A, B) pred A. EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B". // We analyze this as umax(A, B) pred A. P = Pred; } else if (match(RHS, m_UMax(m_Value(A), m_Value(B))) && (A == LHS || B == LHS)) { - if (A != LHS) std::swap(A, B); // A pred umax(A, B). + if (A != LHS) + std::swap(A, B); // A pred umax(A, B). EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B". // We analyze this as umax(A, B) swapped-pred A. P = CmpInst::getSwappedPredicate(Pred); } else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) { - if (A != RHS) std::swap(A, B); // umin(A, B) pred A. + if (A != RHS) + std::swap(A, B); // umin(A, B) pred A. EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B". // We analyze this as umax(-A, -B) swapped-pred -A. // Note that we do not need to actually form -A or -B thanks to EqP. P = CmpInst::getSwappedPredicate(Pred); } else if (match(RHS, m_UMin(m_Value(A), m_Value(B))) && (A == LHS || B == LHS)) { - if (A != LHS) std::swap(A, B); // A pred umin(A, B). + if (A != LHS) + std::swap(A, B); // A pred umin(A, B). EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B". // We analyze this as umax(-A, -B) pred -A. // Note that we do not need to actually form -A or -B thanks to EqP. @@ -3016,7 +2954,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return V; // Otherwise, see if "A EqP B" simplifies. if (MaxRecurse) - if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse-1)) + if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse - 1)) return V; break; case CmpInst::ICMP_NE: @@ -3030,7 +2968,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return V; // Otherwise, see if "A InvEqP B" simplifies. if (MaxRecurse) - if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse-1)) + if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse - 1)) return V; break; } @@ -3087,11 +3025,254 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getFalse(ITy); } + return nullptr; +} + +/// Given operands for an ICmpInst, see if we can fold the result. +/// If not, this returns null. +static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const Query &Q, unsigned MaxRecurse) { + CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; + assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!"); + + if (Constant *CLHS = dyn_cast<Constant>(LHS)) { + if (Constant *CRHS = dyn_cast<Constant>(RHS)) + return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.DL, Q.TLI); + + // If we have a constant, make sure it is on the RHS. + std::swap(LHS, RHS); + Pred = CmpInst::getSwappedPredicate(Pred); + } + + Type *ITy = GetCompareTy(LHS); // The return type. + + // icmp X, X -> true/false + // X icmp undef -> true/false. For example, icmp ugt %X, undef -> false + // because X could be 0. + if (LHS == RHS || isa<UndefValue>(RHS)) + return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred)); + + if (Value *V = simplifyICmpOfBools(Pred, LHS, RHS, Q)) + return V; + + if (Value *V = simplifyICmpWithZero(Pred, LHS, RHS, Q)) + return V; + + if (Value *V = simplifyICmpWithConstant(Pred, LHS, RHS)) + return V; + + // If both operands have range metadata, use the metadata + // to simplify the comparison. + if (isa<Instruction>(RHS) && isa<Instruction>(LHS)) { + auto RHS_Instr = dyn_cast<Instruction>(RHS); + auto LHS_Instr = dyn_cast<Instruction>(LHS); + + if (RHS_Instr->getMetadata(LLVMContext::MD_range) && + LHS_Instr->getMetadata(LLVMContext::MD_range)) { + auto RHS_CR = getConstantRangeFromMetadata( + *RHS_Instr->getMetadata(LLVMContext::MD_range)); + auto LHS_CR = getConstantRangeFromMetadata( + *LHS_Instr->getMetadata(LLVMContext::MD_range)); + + auto Satisfied_CR = ConstantRange::makeSatisfyingICmpRegion(Pred, RHS_CR); + if (Satisfied_CR.contains(LHS_CR)) + return ConstantInt::getTrue(RHS->getContext()); + + auto InversedSatisfied_CR = ConstantRange::makeSatisfyingICmpRegion( + CmpInst::getInversePredicate(Pred), RHS_CR); + if (InversedSatisfied_CR.contains(LHS_CR)) + return ConstantInt::getFalse(RHS->getContext()); + } + } + + // Compare of cast, for example (zext X) != 0 -> X != 0 + if (isa<CastInst>(LHS) && (isa<Constant>(RHS) || isa<CastInst>(RHS))) { + Instruction *LI = cast<CastInst>(LHS); + Value *SrcOp = LI->getOperand(0); + Type *SrcTy = SrcOp->getType(); + Type *DstTy = LI->getType(); + + // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input + // if the integer type is the same size as the pointer type. + if (MaxRecurse && isa<PtrToIntInst>(LI) && + Q.DL.getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) { + if (Constant *RHSC = dyn_cast<Constant>(RHS)) { + // Transfer the cast to the constant. + if (Value *V = SimplifyICmpInst(Pred, SrcOp, + ConstantExpr::getIntToPtr(RHSC, SrcTy), + Q, MaxRecurse-1)) + return V; + } else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) { + if (RI->getOperand(0)->getType() == SrcTy) + // Compare without the cast. + if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0), + Q, MaxRecurse-1)) + return V; + } + } + + if (isa<ZExtInst>(LHS)) { + // Turn icmp (zext X), (zext Y) into a compare of X and Y if they have the + // same type. + if (ZExtInst *RI = dyn_cast<ZExtInst>(RHS)) { + if (MaxRecurse && SrcTy == RI->getOperand(0)->getType()) + // Compare X and Y. Note that signed predicates become unsigned. + if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred), + SrcOp, RI->getOperand(0), Q, + MaxRecurse-1)) + return V; + } + // Turn icmp (zext X), Cst into a compare of X and Cst if Cst is extended + // too. If not, then try to deduce the result of the comparison. + else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { + // Compute the constant that would happen if we truncated to SrcTy then + // reextended to DstTy. + Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy); + Constant *RExt = ConstantExpr::getCast(CastInst::ZExt, Trunc, DstTy); + + // If the re-extended constant didn't change then this is effectively + // also a case of comparing two zero-extended values. + if (RExt == CI && MaxRecurse) + if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred), + SrcOp, Trunc, Q, MaxRecurse-1)) + return V; + + // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit + // there. Use this to work out the result of the comparison. + if (RExt != CI) { + switch (Pred) { + default: llvm_unreachable("Unknown ICmp predicate!"); + // LHS <u RHS. + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + return ConstantInt::getFalse(CI->getContext()); + + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + return ConstantInt::getTrue(CI->getContext()); + + // LHS is non-negative. If RHS is negative then LHS >s LHS. If RHS + // is non-negative then LHS <s RHS. + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + return CI->getValue().isNegative() ? + ConstantInt::getTrue(CI->getContext()) : + ConstantInt::getFalse(CI->getContext()); + + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + return CI->getValue().isNegative() ? + ConstantInt::getFalse(CI->getContext()) : + ConstantInt::getTrue(CI->getContext()); + } + } + } + } + + if (isa<SExtInst>(LHS)) { + // Turn icmp (sext X), (sext Y) into a compare of X and Y if they have the + // same type. + if (SExtInst *RI = dyn_cast<SExtInst>(RHS)) { + if (MaxRecurse && SrcTy == RI->getOperand(0)->getType()) + // Compare X and Y. Note that the predicate does not change. + if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0), + Q, MaxRecurse-1)) + return V; + } + // Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended + // too. If not, then try to deduce the result of the comparison. + else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { + // Compute the constant that would happen if we truncated to SrcTy then + // reextended to DstTy. + Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy); + Constant *RExt = ConstantExpr::getCast(CastInst::SExt, Trunc, DstTy); + + // If the re-extended constant didn't change then this is effectively + // also a case of comparing two sign-extended values. + if (RExt == CI && MaxRecurse) + if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, Q, MaxRecurse-1)) + return V; + + // Otherwise the upper bits of LHS are all equal, while RHS has varying + // bits there. Use this to work out the result of the comparison. + if (RExt != CI) { + switch (Pred) { + default: llvm_unreachable("Unknown ICmp predicate!"); + case ICmpInst::ICMP_EQ: + return ConstantInt::getFalse(CI->getContext()); + case ICmpInst::ICMP_NE: + return ConstantInt::getTrue(CI->getContext()); + + // If RHS is non-negative then LHS <s RHS. If RHS is negative then + // LHS >s RHS. + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + return CI->getValue().isNegative() ? + ConstantInt::getTrue(CI->getContext()) : + ConstantInt::getFalse(CI->getContext()); + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + return CI->getValue().isNegative() ? + ConstantInt::getFalse(CI->getContext()) : + ConstantInt::getTrue(CI->getContext()); + + // If LHS is non-negative then LHS <u RHS. If LHS is negative then + // LHS >u RHS. + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + // Comparison is true iff the LHS <s 0. + if (MaxRecurse) + if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp, + Constant::getNullValue(SrcTy), + Q, MaxRecurse-1)) + return V; + break; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + // Comparison is true iff the LHS >=s 0. + if (MaxRecurse) + if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp, + Constant::getNullValue(SrcTy), + Q, MaxRecurse-1)) + return V; + break; + } + } + } + } + } + + // icmp eq|ne X, Y -> false|true if X != Y + if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) && + isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT)) { + LLVMContext &Ctx = LHS->getType()->getContext(); + return Pred == ICmpInst::ICMP_NE ? + ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx); + } + + if (Value *V = simplifyICmpWithBinOp(Pred, LHS, RHS, Q, MaxRecurse)) + return V; + + if (Value *V = simplifyICmpWithMinMax(Pred, LHS, RHS, Q, MaxRecurse)) + return V; + // Simplify comparisons of related pointers using a powerful, recursive // GEP-walk when we have target data available.. if (LHS->getType()->isPointerTy()) if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.CxtI, LHS, RHS)) return C; + if (auto *CLHS = dyn_cast<PtrToIntOperator>(LHS)) + if (auto *CRHS = dyn_cast<PtrToIntOperator>(RHS)) + if (Q.DL.getTypeSizeInBits(CLHS->getPointerOperandType()) == + Q.DL.getTypeSizeInBits(CLHS->getType()) && + Q.DL.getTypeSizeInBits(CRHS->getPointerOperandType()) == + Q.DL.getTypeSizeInBits(CRHS->getType())) + if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.CxtI, + CLHS->getPointerOperand(), + CRHS->getPointerOperand())) + return C; if (GetElementPtrInst *GLHS = dyn_cast<GetElementPtrInst>(LHS)) { if (GEPOperator *GRHS = dyn_cast<GEPOperator>(RHS)) { @@ -3119,17 +3300,16 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // If a bit is known to be zero for A and known to be one for B, // then A and B cannot be equal. if (ICmpInst::isEquality(Pred)) { - if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { - uint32_t BitWidth = CI->getBitWidth(); + const APInt *RHSVal; + if (match(RHS, m_APInt(RHSVal))) { + unsigned BitWidth = RHSVal->getBitWidth(); APInt LHSKnownZero(BitWidth, 0); APInt LHSKnownOne(BitWidth, 0); computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); - const APInt &RHSVal = CI->getValue(); - if (((LHSKnownZero & RHSVal) != 0) || ((LHSKnownOne & ~RHSVal) != 0)) - return Pred == ICmpInst::ICMP_EQ - ? ConstantInt::getFalse(CI->getContext()) - : ConstantInt::getTrue(CI->getContext()); + if (((LHSKnownZero & *RHSVal) != 0) || ((LHSKnownOne & ~(*RHSVal)) != 0)) + return Pred == ICmpInst::ICMP_EQ ? ConstantInt::getFalse(ITy) + : ConstantInt::getTrue(ITy); } } @@ -3175,17 +3355,18 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, } // Fold trivial predicates. + Type *RetTy = GetCompareTy(LHS); if (Pred == FCmpInst::FCMP_FALSE) - return ConstantInt::get(GetCompareTy(LHS), 0); + return getFalse(RetTy); if (Pred == FCmpInst::FCMP_TRUE) - return ConstantInt::get(GetCompareTy(LHS), 1); + return getTrue(RetTy); // UNO/ORD predicates can be trivially folded if NaNs are ignored. if (FMF.noNaNs()) { if (Pred == FCmpInst::FCMP_UNO) - return ConstantInt::get(GetCompareTy(LHS), 0); + return getFalse(RetTy); if (Pred == FCmpInst::FCMP_ORD) - return ConstantInt::get(GetCompareTy(LHS), 1); + return getTrue(RetTy); } // fcmp pred x, undef and fcmp pred undef, x @@ -3193,15 +3374,15 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) { // Choosing NaN for the undef will always make unordered comparison succeed // and ordered comparison fail. - return ConstantInt::get(GetCompareTy(LHS), CmpInst::isUnordered(Pred)); + return ConstantInt::get(RetTy, CmpInst::isUnordered(Pred)); } // fcmp x,x -> true/false. Not all compares are foldable. if (LHS == RHS) { if (CmpInst::isTrueWhenEqual(Pred)) - return ConstantInt::get(GetCompareTy(LHS), 1); + return getTrue(RetTy); if (CmpInst::isFalseWhenEqual(Pred)) - return ConstantInt::get(GetCompareTy(LHS), 0); + return getFalse(RetTy); } // Handle fcmp with constant RHS @@ -3216,11 +3397,11 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, // If the constant is a nan, see if we can fold the comparison based on it. if (CFP->getValueAPF().isNaN()) { if (FCmpInst::isOrdered(Pred)) // True "if ordered and foo" - return ConstantInt::getFalse(CFP->getContext()); + return getFalse(RetTy); assert(FCmpInst::isUnordered(Pred) && "Comparison must be either ordered or unordered!"); // True if unordered. - return ConstantInt::get(GetCompareTy(LHS), 1); + return getTrue(RetTy); } // Check whether the constant is an infinity. if (CFP->getValueAPF().isInfinity()) { @@ -3228,10 +3409,10 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, switch (Pred) { case FCmpInst::FCMP_OLT: // No value is ordered and less than negative infinity. - return ConstantInt::get(GetCompareTy(LHS), 0); + return getFalse(RetTy); case FCmpInst::FCMP_UGE: // All values are unordered with or at least negative infinity. - return ConstantInt::get(GetCompareTy(LHS), 1); + return getTrue(RetTy); default: break; } @@ -3239,10 +3420,10 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, switch (Pred) { case FCmpInst::FCMP_OGT: // No value is ordered and greater than infinity. - return ConstantInt::get(GetCompareTy(LHS), 0); + return getFalse(RetTy); case FCmpInst::FCMP_ULE: // All values are unordered with and at most infinity. - return ConstantInt::get(GetCompareTy(LHS), 1); + return getTrue(RetTy); default: break; } @@ -3252,12 +3433,12 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, switch (Pred) { case FCmpInst::FCMP_UGE: if (CannotBeOrderedLessThanZero(LHS, Q.TLI)) - return ConstantInt::get(GetCompareTy(LHS), 1); + return getTrue(RetTy); break; case FCmpInst::FCMP_OLT: // X < 0 if (CannotBeOrderedLessThanZero(LHS, Q.TLI)) - return ConstantInt::get(GetCompareTy(LHS), 0); + return getFalse(RetTy); break; default: break; @@ -3371,6 +3552,150 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, return nullptr; } +/// Try to simplify a select instruction when its condition operand is an +/// integer comparison where one operand of the compare is a constant. +static Value *simplifySelectBitTest(Value *TrueVal, Value *FalseVal, Value *X, + const APInt *Y, bool TrueWhenUnset) { + const APInt *C; + + // (X & Y) == 0 ? X & ~Y : X --> X + // (X & Y) != 0 ? X & ~Y : X --> X & ~Y + if (FalseVal == X && match(TrueVal, m_And(m_Specific(X), m_APInt(C))) && + *Y == ~*C) + return TrueWhenUnset ? FalseVal : TrueVal; + + // (X & Y) == 0 ? X : X & ~Y --> X & ~Y + // (X & Y) != 0 ? X : X & ~Y --> X + if (TrueVal == X && match(FalseVal, m_And(m_Specific(X), m_APInt(C))) && + *Y == ~*C) + return TrueWhenUnset ? FalseVal : TrueVal; + + if (Y->isPowerOf2()) { + // (X & Y) == 0 ? X | Y : X --> X | Y + // (X & Y) != 0 ? X | Y : X --> X + if (FalseVal == X && match(TrueVal, m_Or(m_Specific(X), m_APInt(C))) && + *Y == *C) + return TrueWhenUnset ? TrueVal : FalseVal; + + // (X & Y) == 0 ? X : X | Y --> X + // (X & Y) != 0 ? X : X | Y --> X | Y + if (TrueVal == X && match(FalseVal, m_Or(m_Specific(X), m_APInt(C))) && + *Y == *C) + return TrueWhenUnset ? TrueVal : FalseVal; + } + + return nullptr; +} + +/// An alternative way to test if a bit is set or not uses sgt/slt instead of +/// eq/ne. +static Value *simplifySelectWithFakeICmpEq(Value *CmpLHS, Value *TrueVal, + Value *FalseVal, + bool TrueWhenUnset) { + unsigned BitWidth = TrueVal->getType()->getScalarSizeInBits(); + if (!BitWidth) + return nullptr; + + APInt MinSignedValue; + Value *X; + if (match(CmpLHS, m_Trunc(m_Value(X))) && (X == TrueVal || X == FalseVal)) { + // icmp slt (trunc X), 0 <--> icmp ne (and X, C), 0 + // icmp sgt (trunc X), -1 <--> icmp eq (and X, C), 0 + unsigned DestSize = CmpLHS->getType()->getScalarSizeInBits(); + MinSignedValue = APInt::getSignedMinValue(DestSize).zext(BitWidth); + } else { + // icmp slt X, 0 <--> icmp ne (and X, C), 0 + // icmp sgt X, -1 <--> icmp eq (and X, C), 0 + X = CmpLHS; + MinSignedValue = APInt::getSignedMinValue(BitWidth); + } + + if (Value *V = simplifySelectBitTest(TrueVal, FalseVal, X, &MinSignedValue, + TrueWhenUnset)) + return V; + + return nullptr; +} + +/// Try to simplify a select instruction when its condition operand is an +/// integer comparison. +static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, + Value *FalseVal, const Query &Q, + unsigned MaxRecurse) { + ICmpInst::Predicate Pred; + Value *CmpLHS, *CmpRHS; + if (!match(CondVal, m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)))) + return nullptr; + + // FIXME: This code is nearly duplicated in InstCombine. Using/refactoring + // decomposeBitTestICmp() might help. + if (ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero())) { + Value *X; + const APInt *Y; + if (match(CmpLHS, m_And(m_Value(X), m_APInt(Y)))) + if (Value *V = simplifySelectBitTest(TrueVal, FalseVal, X, Y, + Pred == ICmpInst::ICMP_EQ)) + return V; + } else if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, m_Zero())) { + // Comparing signed-less-than 0 checks if the sign bit is set. + if (Value *V = simplifySelectWithFakeICmpEq(CmpLHS, TrueVal, FalseVal, + false)) + return V; + } else if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_AllOnes())) { + // Comparing signed-greater-than -1 checks if the sign bit is not set. + if (Value *V = simplifySelectWithFakeICmpEq(CmpLHS, TrueVal, FalseVal, + true)) + return V; + } + + if (CondVal->hasOneUse()) { + const APInt *C; + if (match(CmpRHS, m_APInt(C))) { + // X < MIN ? T : F --> F + if (Pred == ICmpInst::ICMP_SLT && C->isMinSignedValue()) + return FalseVal; + // X < MIN ? T : F --> F + if (Pred == ICmpInst::ICMP_ULT && C->isMinValue()) + return FalseVal; + // X > MAX ? T : F --> F + if (Pred == ICmpInst::ICMP_SGT && C->isMaxSignedValue()) + return FalseVal; + // X > MAX ? T : F --> F + if (Pred == ICmpInst::ICMP_UGT && C->isMaxValue()) + return FalseVal; + } + } + + // If we have an equality comparison, then we know the value in one of the + // arms of the select. See if substituting this value into the arm and + // simplifying the result yields the same value as the other arm. + if (Pred == ICmpInst::ICMP_EQ) { + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) == + TrueVal || + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) == + TrueVal) + return FalseVal; + if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) == + FalseVal || + SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) == + FalseVal) + return FalseVal; + } else if (Pred == ICmpInst::ICMP_NE) { + if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) == + FalseVal || + SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) == + FalseVal) + return TrueVal; + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) == + TrueVal || + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) == + TrueVal) + return TrueVal; + } + + return nullptr; +} + /// Given operands for a SelectInst, see if we can fold the result. /// If not, this returns null. static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, @@ -3399,106 +3724,9 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X return TrueVal; - if (const auto *ICI = dyn_cast<ICmpInst>(CondVal)) { - // FIXME: This code is nearly duplicated in InstCombine. Using/refactoring - // decomposeBitTestICmp() might help. - unsigned BitWidth = - Q.DL.getTypeSizeInBits(TrueVal->getType()->getScalarType()); - ICmpInst::Predicate Pred = ICI->getPredicate(); - Value *CmpLHS = ICI->getOperand(0); - Value *CmpRHS = ICI->getOperand(1); - APInt MinSignedValue = APInt::getSignBit(BitWidth); - Value *X; - const APInt *Y; - bool TrueWhenUnset; - bool IsBitTest = false; - if (ICmpInst::isEquality(Pred) && - match(CmpLHS, m_And(m_Value(X), m_APInt(Y))) && - match(CmpRHS, m_Zero())) { - IsBitTest = true; - TrueWhenUnset = Pred == ICmpInst::ICMP_EQ; - } else if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, m_Zero())) { - X = CmpLHS; - Y = &MinSignedValue; - IsBitTest = true; - TrueWhenUnset = false; - } else if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_AllOnes())) { - X = CmpLHS; - Y = &MinSignedValue; - IsBitTest = true; - TrueWhenUnset = true; - } - if (IsBitTest) { - const APInt *C; - // (X & Y) == 0 ? X & ~Y : X --> X - // (X & Y) != 0 ? X & ~Y : X --> X & ~Y - if (FalseVal == X && match(TrueVal, m_And(m_Specific(X), m_APInt(C))) && - *Y == ~*C) - return TrueWhenUnset ? FalseVal : TrueVal; - // (X & Y) == 0 ? X : X & ~Y --> X & ~Y - // (X & Y) != 0 ? X : X & ~Y --> X - if (TrueVal == X && match(FalseVal, m_And(m_Specific(X), m_APInt(C))) && - *Y == ~*C) - return TrueWhenUnset ? FalseVal : TrueVal; - - if (Y->isPowerOf2()) { - // (X & Y) == 0 ? X | Y : X --> X | Y - // (X & Y) != 0 ? X | Y : X --> X - if (FalseVal == X && match(TrueVal, m_Or(m_Specific(X), m_APInt(C))) && - *Y == *C) - return TrueWhenUnset ? TrueVal : FalseVal; - // (X & Y) == 0 ? X : X | Y --> X - // (X & Y) != 0 ? X : X | Y --> X | Y - if (TrueVal == X && match(FalseVal, m_Or(m_Specific(X), m_APInt(C))) && - *Y == *C) - return TrueWhenUnset ? TrueVal : FalseVal; - } - } - if (ICI->hasOneUse()) { - const APInt *C; - if (match(CmpRHS, m_APInt(C))) { - // X < MIN ? T : F --> F - if (Pred == ICmpInst::ICMP_SLT && C->isMinSignedValue()) - return FalseVal; - // X < MIN ? T : F --> F - if (Pred == ICmpInst::ICMP_ULT && C->isMinValue()) - return FalseVal; - // X > MAX ? T : F --> F - if (Pred == ICmpInst::ICMP_SGT && C->isMaxSignedValue()) - return FalseVal; - // X > MAX ? T : F --> F - if (Pred == ICmpInst::ICMP_UGT && C->isMaxValue()) - return FalseVal; - } - } - - // If we have an equality comparison then we know the value in one of the - // arms of the select. See if substituting this value into the arm and - // simplifying the result yields the same value as the other arm. - if (Pred == ICmpInst::ICMP_EQ) { - if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) == - TrueVal || - SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) == - TrueVal) - return FalseVal; - if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) == - FalseVal || - SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) == - FalseVal) - return FalseVal; - } else if (Pred == ICmpInst::ICMP_NE) { - if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) == - FalseVal || - SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) == - FalseVal) - return TrueVal; - if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) == - TrueVal || - SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) == - TrueVal) - return TrueVal; - } - } + if (Value *V = + simplifySelectWithICmpCond(CondVal, TrueVal, FalseVal, Q, MaxRecurse)) + return V; return nullptr; } @@ -3587,6 +3815,32 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, } } + if (Q.DL.getTypeAllocSize(LastType) == 1 && + all_of(Ops.slice(1).drop_back(1), + [](Value *Idx) { return match(Idx, m_Zero()); })) { + unsigned PtrWidth = + Q.DL.getPointerSizeInBits(Ops[0]->getType()->getPointerAddressSpace()); + if (Q.DL.getTypeSizeInBits(Ops.back()->getType()) == PtrWidth) { + APInt BasePtrOffset(PtrWidth, 0); + Value *StrippedBasePtr = + Ops[0]->stripAndAccumulateInBoundsConstantOffsets(Q.DL, + BasePtrOffset); + + // gep (gep V, C), (sub 0, V) -> C + if (match(Ops.back(), + m_Sub(m_Zero(), m_PtrToInt(m_Specific(StrippedBasePtr))))) { + auto *CI = ConstantInt::get(GEPTy->getContext(), BasePtrOffset); + return ConstantExpr::getIntToPtr(CI, GEPTy); + } + // gep (gep V, C), (xor V, -1) -> C-1 + if (match(Ops.back(), + m_Xor(m_PtrToInt(m_Specific(StrippedBasePtr)), m_AllOnes()))) { + auto *CI = ConstantInt::get(GEPTy->getContext(), BasePtrOffset - 1); + return ConstantExpr::getIntToPtr(CI, GEPTy); + } + } + } + // Check to see if this is constant foldable. for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (!isa<Constant>(Ops[i])) @@ -3742,19 +3996,47 @@ static Value *SimplifyPHINode(PHINode *PN, const Query &Q) { return CommonValue; } -static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) { - if (Constant *C = dyn_cast<Constant>(Op)) - return ConstantFoldCastOperand(Instruction::Trunc, C, Ty, Q.DL); +static Value *SimplifyCastInst(unsigned CastOpc, Value *Op, + Type *Ty, const Query &Q, unsigned MaxRecurse) { + if (auto *C = dyn_cast<Constant>(Op)) + return ConstantFoldCastOperand(CastOpc, C, Ty, Q.DL); + + if (auto *CI = dyn_cast<CastInst>(Op)) { + auto *Src = CI->getOperand(0); + Type *SrcTy = Src->getType(); + Type *MidTy = CI->getType(); + Type *DstTy = Ty; + if (Src->getType() == Ty) { + auto FirstOp = static_cast<Instruction::CastOps>(CI->getOpcode()); + auto SecondOp = static_cast<Instruction::CastOps>(CastOpc); + Type *SrcIntPtrTy = + SrcTy->isPtrOrPtrVectorTy() ? Q.DL.getIntPtrType(SrcTy) : nullptr; + Type *MidIntPtrTy = + MidTy->isPtrOrPtrVectorTy() ? Q.DL.getIntPtrType(MidTy) : nullptr; + Type *DstIntPtrTy = + DstTy->isPtrOrPtrVectorTy() ? Q.DL.getIntPtrType(DstTy) : nullptr; + if (CastInst::isEliminableCastPair(FirstOp, SecondOp, SrcTy, MidTy, DstTy, + SrcIntPtrTy, MidIntPtrTy, + DstIntPtrTy) == Instruction::BitCast) + return Src; + } + } + + // bitcast x -> x + if (CastOpc == Instruction::BitCast) + if (Op->getType() == Ty) + return Op; return nullptr; } -Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyTruncInst(Op, Ty, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); +Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, + const DataLayout &DL, + const TargetLibraryInfo *TLI, + const DominatorTree *DT, AssumptionCache *AC, + const Instruction *CxtI) { + return ::SimplifyCastInst(CastOpc, Op, Ty, Query(DL, TLI, DT, AC, CxtI), + RecursionLimit); } //=== Helper functions for higher up the class hierarchy. @@ -3837,6 +4119,8 @@ static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, return SimplifyFSubInst(LHS, RHS, FMF, Q, MaxRecurse); case Instruction::FMul: return SimplifyFMulInst(LHS, RHS, FMF, Q, MaxRecurse); + case Instruction::FDiv: + return SimplifyFDivInst(LHS, RHS, FMF, Q, MaxRecurse); default: return SimplifyBinOp(Opcode, LHS, RHS, Q, MaxRecurse); } @@ -3968,14 +4252,36 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, const Query &Q, unsigned MaxRecurse) { Intrinsic::ID IID = F->getIntrinsicID(); unsigned NumOperands = std::distance(ArgBegin, ArgEnd); - Type *ReturnType = F->getReturnType(); + + // Unary Ops + if (NumOperands == 1) { + // Perform idempotent optimizations + if (IsIdempotent(IID)) { + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*ArgBegin)) { + if (II->getIntrinsicID() == IID) + return II; + } + } + + switch (IID) { + case Intrinsic::fabs: { + if (SignBitMustBeZero(*ArgBegin, Q.TLI)) + return *ArgBegin; + } + default: + return nullptr; + } + } // Binary Ops if (NumOperands == 2) { Value *LHS = *ArgBegin; Value *RHS = *(ArgBegin + 1); - if (IID == Intrinsic::usub_with_overflow || - IID == Intrinsic::ssub_with_overflow) { + Type *ReturnType = F->getReturnType(); + + switch (IID) { + case Intrinsic::usub_with_overflow: + case Intrinsic::ssub_with_overflow: { // X - X -> { 0, false } if (LHS == RHS) return Constant::getNullValue(ReturnType); @@ -3984,17 +4290,19 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, // undef - X -> undef if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) return UndefValue::get(ReturnType); - } - if (IID == Intrinsic::uadd_with_overflow || - IID == Intrinsic::sadd_with_overflow) { + return nullptr; + } + case Intrinsic::uadd_with_overflow: + case Intrinsic::sadd_with_overflow: { // X + undef -> undef if (isa<UndefValue>(RHS)) return UndefValue::get(ReturnType); - } - if (IID == Intrinsic::umul_with_overflow || - IID == Intrinsic::smul_with_overflow) { + return nullptr; + } + case Intrinsic::umul_with_overflow: + case Intrinsic::smul_with_overflow: { // X * 0 -> { 0, false } if (match(RHS, m_Zero())) return Constant::getNullValue(ReturnType); @@ -4002,34 +4310,34 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, // X * undef -> { 0, false } if (match(RHS, m_Undef())) return Constant::getNullValue(ReturnType); - } - if (IID == Intrinsic::load_relative && isa<Constant>(LHS) && - isa<Constant>(RHS)) - return SimplifyRelativeLoad(cast<Constant>(LHS), cast<Constant>(RHS), - Q.DL); + return nullptr; + } + case Intrinsic::load_relative: { + Constant *C0 = dyn_cast<Constant>(LHS); + Constant *C1 = dyn_cast<Constant>(RHS); + if (C0 && C1) + return SimplifyRelativeLoad(C0, C1, Q.DL); + return nullptr; + } + default: + return nullptr; + } } // Simplify calls to llvm.masked.load.* - if (IID == Intrinsic::masked_load) { + switch (IID) { + case Intrinsic::masked_load: { Value *MaskArg = ArgBegin[2]; Value *PassthruArg = ArgBegin[3]; // If the mask is all zeros or undef, the "passthru" argument is the result. if (maskIsAllZeroOrUndef(MaskArg)) return PassthruArg; + return nullptr; } - - // Perform idempotent optimizations - if (!IsIdempotent(IID)) + default: return nullptr; - - // Unary Ops - if (NumOperands == 1) - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*ArgBegin)) - if (II->getIntrinsicID() == IID) - return II; - - return nullptr; + } } template <typename IterTy> @@ -4223,21 +4531,23 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL, TLI, DT, AC, I); break; } - case Instruction::Trunc: - Result = - SimplifyTruncInst(I->getOperand(0), I->getType(), DL, TLI, DT, AC, I); +#define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc: +#include "llvm/IR/Instruction.def" +#undef HANDLE_CAST_INST + Result = SimplifyCastInst(I->getOpcode(), I->getOperand(0), I->getType(), + DL, TLI, DT, AC, I); break; } // In general, it is possible for computeKnownBits to determine all bits in a // value even when the operands are not all constants. - if (!Result && I->getType()->isIntegerTy()) { + if (!Result && I->getType()->isIntOrIntVectorTy()) { unsigned BitWidth = I->getType()->getScalarSizeInBits(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); computeKnownBits(I, KnownZero, KnownOne, DL, /*Depth*/0, AC, I, DT); if ((KnownZero | KnownOne).isAllOnesValue()) - Result = ConstantInt::get(I->getContext(), KnownOne); + Result = ConstantInt::get(I->getType(), KnownOne); } /// If called on unreachable code, the above logic may report that the diff --git a/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp b/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp index 3ab6b5d..d1374acd 100644 --- a/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp +++ b/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -/// \brief Compute iterated dominance frontiers using a linear time algorithm. +// Compute iterated dominance frontiers using a linear time algorithm. // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp b/contrib/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp index 7debfde..596b6fc 100644 --- a/contrib/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp +++ b/contrib/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp @@ -15,7 +15,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LazyBlockFrequencyInfo.h" -#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/LazyBranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" using namespace llvm; @@ -24,7 +24,7 @@ using namespace llvm; INITIALIZE_PASS_BEGIN(LazyBlockFrequencyInfoPass, DEBUG_TYPE, "Lazy Block Frequency Analysis", true, true) -INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LazyBPIPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(LazyBlockFrequencyInfoPass, DEBUG_TYPE, "Lazy Block Frequency Analysis", true, true) @@ -40,7 +40,7 @@ void LazyBlockFrequencyInfoPass::print(raw_ostream &OS, const Module *) const { } void LazyBlockFrequencyInfoPass::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<BranchProbabilityInfoWrapperPass>(); + LazyBranchProbabilityInfoPass::getLazyBPIAnalysisUsage(AU); AU.addRequired<LoopInfoWrapperPass>(); AU.setPreservesAll(); } @@ -48,21 +48,20 @@ void LazyBlockFrequencyInfoPass::getAnalysisUsage(AnalysisUsage &AU) const { void LazyBlockFrequencyInfoPass::releaseMemory() { LBFI.releaseMemory(); } bool LazyBlockFrequencyInfoPass::runOnFunction(Function &F) { - BranchProbabilityInfo &BPI = - getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); + auto &BPIPass = getAnalysis<LazyBranchProbabilityInfoPass>(); LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - LBFI.setAnalysis(&F, &BPI, &LI); + LBFI.setAnalysis(&F, &BPIPass, &LI); return false; } void LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AnalysisUsage &AU) { - AU.addRequired<BranchProbabilityInfoWrapperPass>(); + LazyBranchProbabilityInfoPass::getLazyBPIAnalysisUsage(AU); AU.addRequired<LazyBlockFrequencyInfoPass>(); AU.addRequired<LoopInfoWrapperPass>(); } void llvm::initializeLazyBFIPassPass(PassRegistry &Registry) { - INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass); + initializeLazyBPIPassPass(Registry); INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass); INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); } diff --git a/contrib/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp new file mode 100644 index 0000000..b51c6be --- /dev/null +++ b/contrib/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp @@ -0,0 +1,63 @@ +//===- LazyBranchProbabilityInfo.cpp - Lazy Branch Probability Analysis ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is an alternative analysis pass to BranchProbabilityInfoWrapperPass. +// The difference is that with this pass the branch probabilities are not +// computed when the analysis pass is executed but rather when the BPI results +// is explicitly requested by the analysis client. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LazyBranchProbabilityInfo.h" +#include "llvm/Analysis/LoopInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "lazy-branch-prob" + +INITIALIZE_PASS_BEGIN(LazyBranchProbabilityInfoPass, DEBUG_TYPE, + "Lazy Branch Probability Analysis", true, true) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_END(LazyBranchProbabilityInfoPass, DEBUG_TYPE, + "Lazy Branch Probability Analysis", true, true) + +char LazyBranchProbabilityInfoPass::ID = 0; + +LazyBranchProbabilityInfoPass::LazyBranchProbabilityInfoPass() + : FunctionPass(ID) { + initializeLazyBranchProbabilityInfoPassPass(*PassRegistry::getPassRegistry()); +} + +void LazyBranchProbabilityInfoPass::print(raw_ostream &OS, + const Module *) const { + LBPI->getCalculated().print(OS); +} + +void LazyBranchProbabilityInfoPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LoopInfoWrapperPass>(); + AU.setPreservesAll(); +} + +void LazyBranchProbabilityInfoPass::releaseMemory() { LBPI.reset(); } + +bool LazyBranchProbabilityInfoPass::runOnFunction(Function &F) { + LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + LBPI = llvm::make_unique<LazyBranchProbabilityInfo>(&F, &LI); + return false; +} + +void LazyBranchProbabilityInfoPass::getLazyBPIAnalysisUsage(AnalysisUsage &AU) { + AU.addRequired<LazyBranchProbabilityInfoPass>(); + AU.addRequired<LoopInfoWrapperPass>(); +} + +void llvm::initializeLazyBPIPassPass(PassRegistry &Registry) { + INITIALIZE_PASS_DEPENDENCY(LazyBranchProbabilityInfoPass); + INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); +} diff --git a/contrib/llvm/lib/Analysis/LazyCallGraph.cpp b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp index acff852..f7cf8c6 100644 --- a/contrib/llvm/lib/Analysis/LazyCallGraph.cpp +++ b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp @@ -8,7 +8,10 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/Sequence.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/Instructions.h" @@ -23,39 +26,11 @@ using namespace llvm; static void addEdge(SmallVectorImpl<LazyCallGraph::Edge> &Edges, DenseMap<Function *, int> &EdgeIndexMap, Function &F, LazyCallGraph::Edge::Kind EK) { - // Note that we consider *any* function with a definition to be a viable - // edge. Even if the function's definition is subject to replacement by - // some other module (say, a weak definition) there may still be - // optimizations which essentially speculate based on the definition and - // a way to check that the specific definition is in fact the one being - // used. For example, this could be done by moving the weak definition to - // a strong (internal) definition and making the weak definition be an - // alias. Then a test of the address of the weak function against the new - // strong definition's address would be an effective way to determine the - // safety of optimizing a direct call edge. - if (!F.isDeclaration() && - EdgeIndexMap.insert({&F, Edges.size()}).second) { - DEBUG(dbgs() << " Added callable function: " << F.getName() << "\n"); - Edges.emplace_back(LazyCallGraph::Edge(F, EK)); - } -} - -static void findReferences(SmallVectorImpl<Constant *> &Worklist, - SmallPtrSetImpl<Constant *> &Visited, - SmallVectorImpl<LazyCallGraph::Edge> &Edges, - DenseMap<Function *, int> &EdgeIndexMap) { - while (!Worklist.empty()) { - Constant *C = Worklist.pop_back_val(); - - if (Function *F = dyn_cast<Function>(C)) { - addEdge(Edges, EdgeIndexMap, *F, LazyCallGraph::Edge::Ref); - continue; - } + if (!EdgeIndexMap.insert({&F, Edges.size()}).second) + return; - for (Value *Op : C->operand_values()) - if (Visited.insert(cast<Constant>(Op)).second) - Worklist.push_back(cast<Constant>(Op)); - } + DEBUG(dbgs() << " Added callable function: " << F.getName() << "\n"); + Edges.emplace_back(LazyCallGraph::Edge(F, EK)); } LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F) @@ -72,14 +47,26 @@ LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F) // are trivially added, but to accumulate the latter we walk the instructions // and add every operand which is a constant to the worklist to process // afterward. + // + // Note that we consider *any* function with a definition to be a viable + // edge. Even if the function's definition is subject to replacement by + // some other module (say, a weak definition) there may still be + // optimizations which essentially speculate based on the definition and + // a way to check that the specific definition is in fact the one being + // used. For example, this could be done by moving the weak definition to + // a strong (internal) definition and making the weak definition be an + // alias. Then a test of the address of the weak function against the new + // strong definition's address would be an effective way to determine the + // safety of optimizing a direct call edge. for (BasicBlock &BB : F) for (Instruction &I : BB) { if (auto CS = CallSite(&I)) if (Function *Callee = CS.getCalledFunction()) - if (Callees.insert(Callee).second) { - Visited.insert(Callee); - addEdge(Edges, EdgeIndexMap, *Callee, LazyCallGraph::Edge::Call); - } + if (!Callee->isDeclaration()) + if (Callees.insert(Callee).second) { + Visited.insert(Callee); + addEdge(Edges, EdgeIndexMap, *Callee, LazyCallGraph::Edge::Call); + } for (Value *Op : I.operand_values()) if (Constant *C = dyn_cast<Constant>(Op)) @@ -90,7 +77,9 @@ LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F) // We've collected all the constant (and thus potentially function or // function containing) operands to all of the instructions in the function. // Process them (recursively) collecting every function found. - findReferences(Worklist, Visited, Edges, EdgeIndexMap); + visitReferences(Worklist, Visited, [&](Function &F) { + addEdge(Edges, EdgeIndexMap, F, LazyCallGraph::Edge::Ref); + }); } void LazyCallGraph::Node::insertEdgeInternal(Function &Target, Edge::Kind EK) { @@ -144,7 +133,9 @@ LazyCallGraph::LazyCallGraph(Module &M) : NextDFSNumber(0) { DEBUG(dbgs() << " Adding functions referenced by global initializers to the " "entry set.\n"); - findReferences(Worklist, Visited, EntryEdges, EntryIndexMap); + visitReferences(Worklist, Visited, [&](Function &F) { + addEdge(EntryEdges, EntryIndexMap, F, LazyCallGraph::Edge::Ref); + }); for (const Edge &E : EntryEdges) RefSCCEntryNodes.push_back(&E.getFunction()); @@ -199,6 +190,57 @@ void LazyCallGraph::SCC::verify() { } #endif +bool LazyCallGraph::SCC::isParentOf(const SCC &C) const { + if (this == &C) + return false; + + for (Node &N : *this) + for (Edge &E : N.calls()) + if (Node *CalleeN = E.getNode()) + if (OuterRefSCC->G->lookupSCC(*CalleeN) == &C) + return true; + + // No edges found. + return false; +} + +bool LazyCallGraph::SCC::isAncestorOf(const SCC &TargetC) const { + if (this == &TargetC) + return false; + + LazyCallGraph &G = *OuterRefSCC->G; + + // Start with this SCC. + SmallPtrSet<const SCC *, 16> Visited = {this}; + SmallVector<const SCC *, 16> Worklist = {this}; + + // Walk down the graph until we run out of edges or find a path to TargetC. + do { + const SCC &C = *Worklist.pop_back_val(); + for (Node &N : C) + for (Edge &E : N.calls()) { + Node *CalleeN = E.getNode(); + if (!CalleeN) + continue; + SCC *CalleeC = G.lookupSCC(*CalleeN); + if (!CalleeC) + continue; + + // If the callee's SCC is the TargetC, we're done. + if (CalleeC == &TargetC) + return true; + + // If this is the first time we've reached this SCC, put it on the + // worklist to recurse through. + if (Visited.insert(CalleeC).second) + Worklist.push_back(CalleeC); + } + } while (!Worklist.empty()); + + // No paths found. + return false; +} + LazyCallGraph::RefSCC::RefSCC(LazyCallGraph &G) : G(&G) {} void LazyCallGraph::RefSCC::dump() const { @@ -211,11 +253,17 @@ void LazyCallGraph::RefSCC::verify() { assert(!SCCs.empty() && "Can't have an empty SCC!"); // Verify basic properties of the SCCs. + SmallPtrSet<SCC *, 4> SCCSet; for (SCC *C : SCCs) { assert(C && "Can't have a null SCC!"); C->verify(); assert(&C->getOuterRefSCC() == this && "SCC doesn't think it is inside this RefSCC!"); + bool Inserted = SCCSet.insert(C).second; + assert(Inserted && "Found a duplicate SCC!"); + auto IndexIt = SCCIndices.find(C); + assert(IndexIt != SCCIndices.end() && + "Found an SCC that doesn't have an index!"); } // Check that our indices map correctly. @@ -223,6 +271,7 @@ void LazyCallGraph::RefSCC::verify() { SCC *C = SCCIndexPair.first; int i = SCCIndexPair.second; assert(C && "Can't have a null SCC in the indices!"); + assert(SCCSet.count(C) && "Found an index for an SCC not in the RefSCC!"); assert(SCCs[i] == C && "Index doesn't point to SCC!"); } @@ -243,6 +292,20 @@ void LazyCallGraph::RefSCC::verify() { "Edge to a RefSCC missing us in its parent set."); } } + + // Check that our parents are actually parents. + for (RefSCC *ParentRC : Parents) { + assert(ParentRC != this && "Cannot be our own parent!"); + auto HasConnectingEdge = [&] { + for (SCC &C : *ParentRC) + for (Node &N : C) + for (Edge &E : N) + if (G->lookupRefSCC(*E.getNode()) == this) + return true; + return false; + }; + assert(HasConnectingEdge() && "No edge connects the parent to us!"); + } } #endif @@ -261,12 +324,153 @@ bool LazyCallGraph::RefSCC::isDescendantOf(const RefSCC &C) const { return false; } +/// Generic helper that updates a postorder sequence of SCCs for a potentially +/// cycle-introducing edge insertion. +/// +/// A postorder sequence of SCCs of a directed graph has one fundamental +/// property: all deges in the DAG of SCCs point "up" the sequence. That is, +/// all edges in the SCC DAG point to prior SCCs in the sequence. +/// +/// This routine both updates a postorder sequence and uses that sequence to +/// compute the set of SCCs connected into a cycle. It should only be called to +/// insert a "downward" edge which will require changing the sequence to +/// restore it to a postorder. +/// +/// When inserting an edge from an earlier SCC to a later SCC in some postorder +/// sequence, all of the SCCs which may be impacted are in the closed range of +/// those two within the postorder sequence. The algorithm used here to restore +/// the state is as follows: +/// +/// 1) Starting from the source SCC, construct a set of SCCs which reach the +/// source SCC consisting of just the source SCC. Then scan toward the +/// target SCC in postorder and for each SCC, if it has an edge to an SCC +/// in the set, add it to the set. Otherwise, the source SCC is not +/// a successor, move it in the postorder sequence to immediately before +/// the source SCC, shifting the source SCC and all SCCs in the set one +/// position toward the target SCC. Stop scanning after processing the +/// target SCC. +/// 2) If the source SCC is now past the target SCC in the postorder sequence, +/// and thus the new edge will flow toward the start, we are done. +/// 3) Otherwise, starting from the target SCC, walk all edges which reach an +/// SCC between the source and the target, and add them to the set of +/// connected SCCs, then recurse through them. Once a complete set of the +/// SCCs the target connects to is known, hoist the remaining SCCs between +/// the source and the target to be above the target. Note that there is no +/// need to process the source SCC, it is already known to connect. +/// 4) At this point, all of the SCCs in the closed range between the source +/// SCC and the target SCC in the postorder sequence are connected, +/// including the target SCC and the source SCC. Inserting the edge from +/// the source SCC to the target SCC will form a cycle out of precisely +/// these SCCs. Thus we can merge all of the SCCs in this closed range into +/// a single SCC. +/// +/// This process has various important properties: +/// - Only mutates the SCCs when adding the edge actually changes the SCC +/// structure. +/// - Never mutates SCCs which are unaffected by the change. +/// - Updates the postorder sequence to correctly satisfy the postorder +/// constraint after the edge is inserted. +/// - Only reorders SCCs in the closed postorder sequence from the source to +/// the target, so easy to bound how much has changed even in the ordering. +/// - Big-O is the number of edges in the closed postorder range of SCCs from +/// source to target. +/// +/// This helper routine, in addition to updating the postorder sequence itself +/// will also update a map from SCCs to indices within that sequecne. +/// +/// The sequence and the map must operate on pointers to the SCC type. +/// +/// Two callbacks must be provided. The first computes the subset of SCCs in +/// the postorder closed range from the source to the target which connect to +/// the source SCC via some (transitive) set of edges. The second computes the +/// subset of the same range which the target SCC connects to via some +/// (transitive) set of edges. Both callbacks should populate the set argument +/// provided. +template <typename SCCT, typename PostorderSequenceT, typename SCCIndexMapT, + typename ComputeSourceConnectedSetCallableT, + typename ComputeTargetConnectedSetCallableT> +static iterator_range<typename PostorderSequenceT::iterator> +updatePostorderSequenceForEdgeInsertion( + SCCT &SourceSCC, SCCT &TargetSCC, PostorderSequenceT &SCCs, + SCCIndexMapT &SCCIndices, + ComputeSourceConnectedSetCallableT ComputeSourceConnectedSet, + ComputeTargetConnectedSetCallableT ComputeTargetConnectedSet) { + int SourceIdx = SCCIndices[&SourceSCC]; + int TargetIdx = SCCIndices[&TargetSCC]; + assert(SourceIdx < TargetIdx && "Cannot have equal indices here!"); + + SmallPtrSet<SCCT *, 4> ConnectedSet; + + // Compute the SCCs which (transitively) reach the source. + ComputeSourceConnectedSet(ConnectedSet); + + // Partition the SCCs in this part of the port-order sequence so only SCCs + // connecting to the source remain between it and the target. This is + // a benign partition as it preserves postorder. + auto SourceI = std::stable_partition( + SCCs.begin() + SourceIdx, SCCs.begin() + TargetIdx + 1, + [&ConnectedSet](SCCT *C) { return !ConnectedSet.count(C); }); + for (int i = SourceIdx, e = TargetIdx + 1; i < e; ++i) + SCCIndices.find(SCCs[i])->second = i; + + // If the target doesn't connect to the source, then we've corrected the + // post-order and there are no cycles formed. + if (!ConnectedSet.count(&TargetSCC)) { + assert(SourceI > (SCCs.begin() + SourceIdx) && + "Must have moved the source to fix the post-order."); + assert(*std::prev(SourceI) == &TargetSCC && + "Last SCC to move should have bene the target."); + + // Return an empty range at the target SCC indicating there is nothing to + // merge. + return make_range(std::prev(SourceI), std::prev(SourceI)); + } + + assert(SCCs[TargetIdx] == &TargetSCC && + "Should not have moved target if connected!"); + SourceIdx = SourceI - SCCs.begin(); + assert(SCCs[SourceIdx] == &SourceSCC && + "Bad updated index computation for the source SCC!"); + + + // See whether there are any remaining intervening SCCs between the source + // and target. If so we need to make sure they all are reachable form the + // target. + if (SourceIdx + 1 < TargetIdx) { + ConnectedSet.clear(); + ComputeTargetConnectedSet(ConnectedSet); + + // Partition SCCs so that only SCCs reached from the target remain between + // the source and the target. This preserves postorder. + auto TargetI = std::stable_partition( + SCCs.begin() + SourceIdx + 1, SCCs.begin() + TargetIdx + 1, + [&ConnectedSet](SCCT *C) { return ConnectedSet.count(C); }); + for (int i = SourceIdx + 1, e = TargetIdx + 1; i < e; ++i) + SCCIndices.find(SCCs[i])->second = i; + TargetIdx = std::prev(TargetI) - SCCs.begin(); + assert(SCCs[TargetIdx] == &TargetSCC && + "Should always end with the target!"); + } + + // At this point, we know that connecting source to target forms a cycle + // because target connects back to source, and we know that all of the SCCs + // between the source and target in the postorder sequence participate in that + // cycle. + return make_range(SCCs.begin() + SourceIdx, SCCs.begin() + TargetIdx); +} + SmallVector<LazyCallGraph::SCC *, 1> LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) { assert(!SourceN[TargetN].isCall() && "Must start with a ref edge!"); - SmallVector<SCC *, 1> DeletedSCCs; +#ifndef NDEBUG + // In a debug build, verify the RefSCC is valid to start with and when this + // routine finishes. + verify(); + auto VerifyOnExit = make_scope_exit([&]() { verify(); }); +#endif + SCC &SourceSCC = *G->lookupSCC(SourceN); SCC &TargetSCC = *G->lookupSCC(TargetN); @@ -274,10 +478,6 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) { // we've just added more connectivity. if (&SourceSCC == &TargetSCC) { SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call); -#ifndef NDEBUG - // Check that the RefSCC is still valid. - verify(); -#endif return DeletedSCCs; } @@ -291,114 +491,44 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) { int TargetIdx = SCCIndices[&TargetSCC]; if (TargetIdx < SourceIdx) { SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call); -#ifndef NDEBUG - // Check that the RefSCC is still valid. - verify(); -#endif return DeletedSCCs; } - // When we do have an edge from an earlier SCC to a later SCC in the - // postorder sequence, all of the SCCs which may be impacted are in the - // closed range of those two within the postorder sequence. The algorithm to - // restore the state is as follows: - // - // 1) Starting from the source SCC, construct a set of SCCs which reach the - // source SCC consisting of just the source SCC. Then scan toward the - // target SCC in postorder and for each SCC, if it has an edge to an SCC - // in the set, add it to the set. Otherwise, the source SCC is not - // a successor, move it in the postorder sequence to immediately before - // the source SCC, shifting the source SCC and all SCCs in the set one - // position toward the target SCC. Stop scanning after processing the - // target SCC. - // 2) If the source SCC is now past the target SCC in the postorder sequence, - // and thus the new edge will flow toward the start, we are done. - // 3) Otherwise, starting from the target SCC, walk all edges which reach an - // SCC between the source and the target, and add them to the set of - // connected SCCs, then recurse through them. Once a complete set of the - // SCCs the target connects to is known, hoist the remaining SCCs between - // the source and the target to be above the target. Note that there is no - // need to process the source SCC, it is already known to connect. - // 4) At this point, all of the SCCs in the closed range between the source - // SCC and the target SCC in the postorder sequence are connected, - // including the target SCC and the source SCC. Inserting the edge from - // the source SCC to the target SCC will form a cycle out of precisely - // these SCCs. Thus we can merge all of the SCCs in this closed range into - // a single SCC. - // - // This process has various important properties: - // - Only mutates the SCCs when adding the edge actually changes the SCC - // structure. - // - Never mutates SCCs which are unaffected by the change. - // - Updates the postorder sequence to correctly satisfy the postorder - // constraint after the edge is inserted. - // - Only reorders SCCs in the closed postorder sequence from the source to - // the target, so easy to bound how much has changed even in the ordering. - // - Big-O is the number of edges in the closed postorder range of SCCs from - // source to target. - - assert(SourceIdx < TargetIdx && "Cannot have equal indices here!"); - SmallPtrSet<SCC *, 4> ConnectedSet; - // Compute the SCCs which (transitively) reach the source. - ConnectedSet.insert(&SourceSCC); - auto IsConnected = [&](SCC &C) { - for (Node &N : C) - for (Edge &E : N.calls()) { - assert(E.getNode() && "Must have formed a node within an SCC!"); - if (ConnectedSet.count(G->lookupSCC(*E.getNode()))) - return true; - } - - return false; - }; - - for (SCC *C : - make_range(SCCs.begin() + SourceIdx + 1, SCCs.begin() + TargetIdx + 1)) - if (IsConnected(*C)) - ConnectedSet.insert(C); - - // Partition the SCCs in this part of the port-order sequence so only SCCs - // connecting to the source remain between it and the target. This is - // a benign partition as it preserves postorder. - auto SourceI = std::stable_partition( - SCCs.begin() + SourceIdx, SCCs.begin() + TargetIdx + 1, - [&ConnectedSet](SCC *C) { return !ConnectedSet.count(C); }); - for (int i = SourceIdx, e = TargetIdx + 1; i < e; ++i) - SCCIndices.find(SCCs[i])->second = i; - - // If the target doesn't connect to the source, then we've corrected the - // post-order and there are no cycles formed. - if (!ConnectedSet.count(&TargetSCC)) { - assert(SourceI > (SCCs.begin() + SourceIdx) && - "Must have moved the source to fix the post-order."); - assert(*std::prev(SourceI) == &TargetSCC && - "Last SCC to move should have bene the target."); - SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call); + auto ComputeSourceConnectedSet = [&](SmallPtrSetImpl<SCC *> &ConnectedSet) { #ifndef NDEBUG + // Check that the RefSCC is still valid before computing this as the + // results will be nonsensical of we've broken its invariants. verify(); #endif - return DeletedSCCs; - } + ConnectedSet.insert(&SourceSCC); + auto IsConnected = [&](SCC &C) { + for (Node &N : C) + for (Edge &E : N.calls()) { + assert(E.getNode() && "Must have formed a node within an SCC!"); + if (ConnectedSet.count(G->lookupSCC(*E.getNode()))) + return true; + } - assert(SCCs[TargetIdx] == &TargetSCC && - "Should not have moved target if connected!"); - SourceIdx = SourceI - SCCs.begin(); + return false; + }; + + for (SCC *C : + make_range(SCCs.begin() + SourceIdx + 1, SCCs.begin() + TargetIdx + 1)) + if (IsConnected(*C)) + ConnectedSet.insert(C); + }; + // Use a normal worklist to find which SCCs the target connects to. We still + // bound the search based on the range in the postorder list we care about, + // but because this is forward connectivity we just "recurse" through the + // edges. + auto ComputeTargetConnectedSet = [&](SmallPtrSetImpl<SCC *> &ConnectedSet) { #ifndef NDEBUG - // Check that the RefSCC is still valid. - verify(); + // Check that the RefSCC is still valid before computing this as the + // results will be nonsensical of we've broken its invariants. + verify(); #endif - - // See whether there are any remaining intervening SCCs between the source - // and target. If so we need to make sure they all are reachable form the - // target. - if (SourceIdx + 1 < TargetIdx) { - // Use a normal worklist to find which SCCs the target connects to. We still - // bound the search based on the range in the postorder list we care about, - // but because this is forward connectivity we just "recurse" through the - // edges. - ConnectedSet.clear(); ConnectedSet.insert(&TargetSCC); SmallVector<SCC *, 4> Worklist; Worklist.push_back(&TargetSCC); @@ -421,35 +551,36 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) { Worklist.push_back(&EdgeC); } } while (!Worklist.empty()); + }; - // Partition SCCs so that only SCCs reached from the target remain between - // the source and the target. This preserves postorder. - auto TargetI = std::stable_partition( - SCCs.begin() + SourceIdx + 1, SCCs.begin() + TargetIdx + 1, - [&ConnectedSet](SCC *C) { return ConnectedSet.count(C); }); - for (int i = SourceIdx + 1, e = TargetIdx + 1; i < e; ++i) - SCCIndices.find(SCCs[i])->second = i; - TargetIdx = std::prev(TargetI) - SCCs.begin(); - assert(SCCs[TargetIdx] == &TargetSCC && - "Should always end with the target!"); + // Use a generic helper to update the postorder sequence of SCCs and return + // a range of any SCCs connected into a cycle by inserting this edge. This + // routine will also take care of updating the indices into the postorder + // sequence. + auto MergeRange = updatePostorderSequenceForEdgeInsertion( + SourceSCC, TargetSCC, SCCs, SCCIndices, ComputeSourceConnectedSet, + ComputeTargetConnectedSet); + + // If the merge range is empty, then adding the edge didn't actually form any + // new cycles. We're done. + if (MergeRange.begin() == MergeRange.end()) { + // Now that the SCC structure is finalized, flip the kind to call. + SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call); + return DeletedSCCs; + } #ifndef NDEBUG - // Check that the RefSCC is still valid. - verify(); + // Before merging, check that the RefSCC remains valid after all the + // postorder updates. + verify(); #endif - } - // At this point, we know that connecting source to target forms a cycle - // because target connects back to source, and we know that all of the SCCs - // between the source and target in the postorder sequence participate in that - // cycle. This means that we need to merge all of these SCCs into a single + // Otherwise we need to merge all of the SCCs in the cycle into a single // result SCC. // // NB: We merge into the target because all of these functions were already // reachable from the target, meaning any SCC-wide properties deduced about it // other than the set of functions within it will not have changed. - auto MergeRange = - make_range(SCCs.begin() + SourceIdx, SCCs.begin() + TargetIdx); for (SCC *C : MergeRange) { assert(C != &TargetSCC && "We merge *into* the target and shouldn't process it here!"); @@ -471,37 +602,55 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) { // Now that the SCC structure is finalized, flip the kind to call. SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call); -#ifndef NDEBUG - // And we're done! Verify in debug builds that the RefSCC is coherent. - verify(); -#endif + // And we're done! return DeletedSCCs; } -void LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, - Node &TargetN) { +void LazyCallGraph::RefSCC::switchTrivialInternalEdgeToRef(Node &SourceN, + Node &TargetN) { assert(SourceN[TargetN].isCall() && "Must start with a call edge!"); - SCC &SourceSCC = *G->lookupSCC(SourceN); - SCC &TargetSCC = *G->lookupSCC(TargetN); +#ifndef NDEBUG + // In a debug build, verify the RefSCC is valid to start with and when this + // routine finishes. + verify(); + auto VerifyOnExit = make_scope_exit([&]() { verify(); }); +#endif - assert(&SourceSCC.getOuterRefSCC() == this && + assert(G->lookupRefSCC(SourceN) == this && "Source must be in this RefSCC."); - assert(&TargetSCC.getOuterRefSCC() == this && + assert(G->lookupRefSCC(TargetN) == this && "Target must be in this RefSCC."); + assert(G->lookupSCC(SourceN) != G->lookupSCC(TargetN) && + "Source and Target must be in separate SCCs for this to be trivial!"); // Set the edge kind. SourceN.setEdgeKind(TargetN.getFunction(), Edge::Ref); +} + +iterator_range<LazyCallGraph::RefSCC::iterator> +LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, Node &TargetN) { + assert(SourceN[TargetN].isCall() && "Must start with a call edge!"); - // If this call edge is just connecting two separate SCCs within this RefSCC, - // there is nothing to do. - if (&SourceSCC != &TargetSCC) { #ifndef NDEBUG - // Check that the RefSCC is still valid. - verify(); + // In a debug build, verify the RefSCC is valid to start with and when this + // routine finishes. + verify(); + auto VerifyOnExit = make_scope_exit([&]() { verify(); }); #endif - return; - } + + assert(G->lookupRefSCC(SourceN) == this && + "Source must be in this RefSCC."); + assert(G->lookupRefSCC(TargetN) == this && + "Target must be in this RefSCC."); + + SCC &TargetSCC = *G->lookupSCC(TargetN); + assert(G->lookupSCC(SourceN) == &TargetSCC && "Source and Target must be in " + "the same SCC to require the " + "full CG update."); + + // Set the edge kind. + SourceN.setEdgeKind(TargetN.getFunction(), Edge::Ref); // Otherwise we are removing a call edge from a single SCC. This may break // the cycle. In order to compute the new set of SCCs, we need to do a small @@ -635,10 +784,9 @@ void LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, // root DFS number. auto SCCNodes = make_range( PendingSCCStack.rbegin(), - std::find_if(PendingSCCStack.rbegin(), PendingSCCStack.rend(), - [RootDFSNumber](Node *N) { - return N->DFSNumber < RootDFSNumber; - })); + find_if(reverse(PendingSCCStack), [RootDFSNumber](const Node *N) { + return N->DFSNumber < RootDFSNumber; + })); // Form a new SCC out of these nodes and then clear them off our pending // stack. @@ -663,10 +811,8 @@ void LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, for (int Idx = OldIdx, Size = SCCs.size(); Idx < Size; ++Idx) SCCIndices[SCCs[Idx]] = Idx; -#ifndef NDEBUG - // We're done. Check the validity on our way out. - verify(); -#endif + return make_range(SCCs.begin() + OldIdx, + SCCs.begin() + OldIdx + NewSCCs.size()); } void LazyCallGraph::RefSCC::switchOutgoingEdgeToCall(Node &SourceN, @@ -746,112 +892,113 @@ void LazyCallGraph::RefSCC::insertOutgoingEdge(Node &SourceN, Node &TargetN, SmallVector<LazyCallGraph::RefSCC *, 1> LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) { - assert(G->lookupRefSCC(TargetN) == this && "Target must be in this SCC."); - - // We store the RefSCCs found to be connected in postorder so that we can use - // that when merging. We also return this to the caller to allow them to - // invalidate information pertaining to these RefSCCs. - SmallVector<RefSCC *, 1> Connected; - + assert(G->lookupRefSCC(TargetN) == this && "Target must be in this RefSCC."); RefSCC &SourceC = *G->lookupRefSCC(SourceN); - assert(&SourceC != this && "Source must not be in this SCC."); + assert(&SourceC != this && "Source must not be in this RefSCC."); assert(SourceC.isDescendantOf(*this) && "Source must be a descendant of the Target."); - // The algorithm we use for merging SCCs based on the cycle introduced here - // is to walk the RefSCC inverted DAG formed by the parent sets. The inverse - // graph has the same cycle properties as the actual DAG of the RefSCCs, and - // when forming RefSCCs lazily by a DFS, the bottom of the graph won't exist - // in many cases which should prune the search space. - // - // FIXME: We can get this pruning behavior even after the incremental RefSCC - // formation by leaving behind (conservative) DFS numberings in the nodes, - // and pruning the search with them. These would need to be cleverly updated - // during the removal of intra-SCC edges, but could be preserved - // conservatively. - // - // FIXME: This operation currently creates ordering stability problems - // because we don't use stably ordered containers for the parent SCCs. - - // The set of RefSCCs that are connected to the parent, and thus will - // participate in the merged connected component. - SmallPtrSet<RefSCC *, 8> ConnectedSet; - ConnectedSet.insert(this); - - // We build up a DFS stack of the parents chains. - SmallVector<std::pair<RefSCC *, parent_iterator>, 8> DFSStack; - SmallPtrSet<RefSCC *, 8> Visited; - int ConnectedDepth = -1; - DFSStack.push_back({&SourceC, SourceC.parent_begin()}); - do { - auto DFSPair = DFSStack.pop_back_val(); - RefSCC *C = DFSPair.first; - parent_iterator I = DFSPair.second; - auto E = C->parent_end(); + SmallVector<RefSCC *, 1> DeletedRefSCCs; - while (I != E) { - RefSCC &Parent = *I++; - - // If we have already processed this parent SCC, skip it, and remember - // whether it was connected so we don't have to check the rest of the - // stack. This also handles when we reach a child of the 'this' SCC (the - // callee) which terminates the search. - if (ConnectedSet.count(&Parent)) { - assert(ConnectedDepth < (int)DFSStack.size() && - "Cannot have a connected depth greater than the DFS depth!"); - ConnectedDepth = DFSStack.size(); - continue; +#ifndef NDEBUG + // In a debug build, verify the RefSCC is valid to start with and when this + // routine finishes. + verify(); + auto VerifyOnExit = make_scope_exit([&]() { verify(); }); +#endif + + int SourceIdx = G->RefSCCIndices[&SourceC]; + int TargetIdx = G->RefSCCIndices[this]; + assert(SourceIdx < TargetIdx && + "Postorder list doesn't see edge as incoming!"); + + // Compute the RefSCCs which (transitively) reach the source. We do this by + // working backwards from the source using the parent set in each RefSCC, + // skipping any RefSCCs that don't fall in the postorder range. This has the + // advantage of walking the sparser parent edge (in high fan-out graphs) but + // more importantly this removes examining all forward edges in all RefSCCs + // within the postorder range which aren't in fact connected. Only connected + // RefSCCs (and their edges) are visited here. + auto ComputeSourceConnectedSet = [&](SmallPtrSetImpl<RefSCC *> &Set) { + Set.insert(&SourceC); + SmallVector<RefSCC *, 4> Worklist; + Worklist.push_back(&SourceC); + do { + RefSCC &RC = *Worklist.pop_back_val(); + for (RefSCC &ParentRC : RC.parents()) { + // Skip any RefSCCs outside the range of source to target in the + // postorder sequence. + int ParentIdx = G->getRefSCCIndex(ParentRC); + assert(ParentIdx > SourceIdx && "Parent cannot precede source in postorder!"); + if (ParentIdx > TargetIdx) + continue; + if (Set.insert(&ParentRC).second) + // First edge connecting to this parent, add it to our worklist. + Worklist.push_back(&ParentRC); } - if (Visited.count(&Parent)) - continue; + } while (!Worklist.empty()); + }; - // We fully explore the depth-first space, adding nodes to the connected - // set only as we pop them off, so "recurse" by rotating to the parent. - DFSStack.push_back({C, I}); - C = &Parent; - I = C->parent_begin(); - E = C->parent_end(); - } + // Use a normal worklist to find which SCCs the target connects to. We still + // bound the search based on the range in the postorder list we care about, + // but because this is forward connectivity we just "recurse" through the + // edges. + auto ComputeTargetConnectedSet = [&](SmallPtrSetImpl<RefSCC *> &Set) { + Set.insert(this); + SmallVector<RefSCC *, 4> Worklist; + Worklist.push_back(this); + do { + RefSCC &RC = *Worklist.pop_back_val(); + for (SCC &C : RC) + for (Node &N : C) + for (Edge &E : N) { + assert(E.getNode() && "Must have formed a node!"); + RefSCC &EdgeRC = *G->lookupRefSCC(*E.getNode()); + if (G->getRefSCCIndex(EdgeRC) <= SourceIdx) + // Not in the postorder sequence between source and target. + continue; + + if (Set.insert(&EdgeRC).second) + Worklist.push_back(&EdgeRC); + } + } while (!Worklist.empty()); + }; - // If we've found a connection anywhere below this point on the stack (and - // thus up the parent graph from the caller), the current node needs to be - // added to the connected set now that we've processed all of its parents. - if ((int)DFSStack.size() == ConnectedDepth) { - --ConnectedDepth; // We're finished with this connection. - bool Inserted = ConnectedSet.insert(C).second; - (void)Inserted; - assert(Inserted && "Cannot insert a refSCC multiple times!"); - Connected.push_back(C); - } else { - // Otherwise remember that its parents don't ever connect. - assert(ConnectedDepth < (int)DFSStack.size() && - "Cannot have a connected depth greater than the DFS depth!"); - Visited.insert(C); - } - } while (!DFSStack.empty()); + // Use a generic helper to update the postorder sequence of RefSCCs and return + // a range of any RefSCCs connected into a cycle by inserting this edge. This + // routine will also take care of updating the indices into the postorder + // sequence. + iterator_range<SmallVectorImpl<RefSCC *>::iterator> MergeRange = + updatePostorderSequenceForEdgeInsertion( + SourceC, *this, G->PostOrderRefSCCs, G->RefSCCIndices, + ComputeSourceConnectedSet, ComputeTargetConnectedSet); + + // Build a set so we can do fast tests for whether a RefSCC will end up as + // part of the merged RefSCC. + SmallPtrSet<RefSCC *, 16> MergeSet(MergeRange.begin(), MergeRange.end()); + + // This RefSCC will always be part of that set, so just insert it here. + MergeSet.insert(this); // Now that we have identified all of the SCCs which need to be merged into // a connected set with the inserted edge, merge all of them into this SCC. - // We walk the newly connected RefSCCs in the reverse postorder of the parent - // DAG walk above and merge in each of their SCC postorder lists. This - // ensures a merged postorder SCC list. SmallVector<SCC *, 16> MergedSCCs; int SCCIndex = 0; - for (RefSCC *C : reverse(Connected)) { - assert(C != this && - "This RefSCC should terminate the DFS without being reached."); + for (RefSCC *RC : MergeRange) { + assert(RC != this && "We're merging into the target RefSCC, so it " + "shouldn't be in the range."); // Merge the parents which aren't part of the merge into the our parents. - for (RefSCC *ParentC : C->Parents) - if (!ConnectedSet.count(ParentC)) - Parents.insert(ParentC); - C->Parents.clear(); + for (RefSCC *ParentRC : RC->Parents) + if (!MergeSet.count(ParentRC)) + Parents.insert(ParentRC); + RC->Parents.clear(); // Walk the inner SCCs to update their up-pointer and walk all the edges to // update any parent sets. // FIXME: We should try to find a way to avoid this (rather expensive) edge // walk by updating the parent sets in some other manner. - for (SCC &InnerC : *C) { + for (SCC &InnerC : *RC) { InnerC.OuterRefSCC = this; SCCIndices[&InnerC] = SCCIndex++; for (Node &N : InnerC) { @@ -860,9 +1007,9 @@ LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) { assert(E.getNode() && "Cannot have a null node within a visited SCC!"); RefSCC &ChildRC = *G->lookupRefSCC(*E.getNode()); - if (ConnectedSet.count(&ChildRC)) + if (MergeSet.count(&ChildRC)) continue; - ChildRC.Parents.erase(C); + ChildRC.Parents.erase(RC); ChildRC.Parents.insert(this); } } @@ -871,33 +1018,37 @@ LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) { // Now merge in the SCCs. We can actually move here so try to reuse storage // the first time through. if (MergedSCCs.empty()) - MergedSCCs = std::move(C->SCCs); + MergedSCCs = std::move(RC->SCCs); else - MergedSCCs.append(C->SCCs.begin(), C->SCCs.end()); - C->SCCs.clear(); + MergedSCCs.append(RC->SCCs.begin(), RC->SCCs.end()); + RC->SCCs.clear(); + DeletedRefSCCs.push_back(RC); } - // Finally append our original SCCs to the merged list and move it into - // place. + // Append our original SCCs to the merged list and move it into place. for (SCC &InnerC : *this) SCCIndices[&InnerC] = SCCIndex++; MergedSCCs.append(SCCs.begin(), SCCs.end()); SCCs = std::move(MergedSCCs); + // Remove the merged away RefSCCs from the post order sequence. + for (RefSCC *RC : MergeRange) + G->RefSCCIndices.erase(RC); + int IndexOffset = MergeRange.end() - MergeRange.begin(); + auto EraseEnd = + G->PostOrderRefSCCs.erase(MergeRange.begin(), MergeRange.end()); + for (RefSCC *RC : make_range(EraseEnd, G->PostOrderRefSCCs.end())) + G->RefSCCIndices[RC] -= IndexOffset; + // At this point we have a merged RefSCC with a post-order SCCs list, just // connect the nodes to form the new edge. SourceN.insertEdgeInternal(TargetN, Edge::Ref); -#ifndef NDEBUG - // Check that the RefSCC is still valid. - verify(); -#endif - // We return the list of SCCs which were merged so that callers can // invalidate any data they have associated with those SCCs. Note that these // SCCs are no longer in an interesting state (they are totally empty) but // the pointers will remain stable for the life of the graph itself. - return Connected; + return DeletedRefSCCs; } void LazyCallGraph::RefSCC::removeOutgoingEdge(Node &SourceN, Node &TargetN) { @@ -907,10 +1058,16 @@ void LazyCallGraph::RefSCC::removeOutgoingEdge(Node &SourceN, Node &TargetN) { RefSCC &TargetRC = *G->lookupRefSCC(TargetN); assert(&TargetRC != this && "The target must not be a member of this RefSCC"); - assert(std::find(G->LeafRefSCCs.begin(), G->LeafRefSCCs.end(), this) == - G->LeafRefSCCs.end() && + assert(!is_contained(G->LeafRefSCCs, this) && "Cannot have a leaf RefSCC source."); +#ifndef NDEBUG + // In a debug build, verify the RefSCC is valid to start with and when this + // routine finishes. + verify(); + auto VerifyOnExit = make_scope_exit([&]() { verify(); }); +#endif + // First remove it from the node. SourceN.removeEdgeInternal(TargetN.getFunction()); @@ -962,6 +1119,13 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) { assert(!SourceN[TargetN].isCall() && "Cannot remove a call edge, it must first be made a ref edge"); +#ifndef NDEBUG + // In a debug build, verify the RefSCC is valid to start with and when this + // routine finishes. + verify(); + auto VerifyOnExit = make_scope_exit([&]() { verify(); }); +#endif + // First remove the actual edge. SourceN.removeEdgeInternal(TargetN.getFunction()); @@ -972,6 +1136,13 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) { if (&SourceN == &TargetN) return Result; + // If this ref edge is within an SCC then there are sufficient other edges to + // form a cycle without this edge so removing it is a no-op. + SCC &SourceC = *G->lookupSCC(SourceN); + SCC &TargetC = *G->lookupSCC(TargetN); + if (&SourceC == &TargetC) + return Result; + // We build somewhat synthetic new RefSCCs by providing a postorder mapping // for each inner SCC. We also store these associated with *nodes* rather // than SCCs because this saves a round-trip through the node->SCC map and in @@ -994,7 +1165,6 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) { // and handle participants in that cycle without walking all the edges that // form the connections, and instead by relying on the fundamental guarantee // coming into this operation. - SCC &TargetC = *G->lookupSCC(TargetN); for (Node &N : TargetC) PostOrderMapping[&N] = RootPostOrderNumber; @@ -1082,9 +1252,8 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) { } // If this child isn't currently in this RefSCC, no need to process - // it. - // However, we do need to remove this RefSCC from its RefSCC's parent - // set. + // it. However, we do need to remove this RefSCC from its RefSCC's + // parent set. RefSCC &ChildRC = *G->lookupRefSCC(ChildN); ChildRC.Parents.erase(this); ++I; @@ -1121,10 +1290,9 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) { // root DFS number. auto RefSCCNodes = make_range( PendingRefSCCStack.rbegin(), - std::find_if(PendingRefSCCStack.rbegin(), PendingRefSCCStack.rend(), - [RootDFSNumber](Node *N) { - return N->DFSNumber < RootDFSNumber; - })); + find_if(reverse(PendingRefSCCStack), [RootDFSNumber](const Node *N) { + return N->DFSNumber < RootDFSNumber; + })); // Mark the postorder number for these nodes and clear them off the // stack. We'll use the postorder number to pull them into RefSCCs at the @@ -1149,6 +1317,25 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) { for (int i = 1; i < PostOrderNumber; ++i) Result.push_back(G->createRefSCC(*G)); + // Insert the resulting postorder sequence into the global graph postorder + // sequence before the current RefSCC in that sequence. The idea being that + // this RefSCC is the target of the reference edge removed, and thus has + // a direct or indirect edge to every other RefSCC formed and so must be at + // the end of any postorder traversal. + // + // FIXME: It'd be nice to change the APIs so that we returned an iterator + // range over the global postorder sequence and generally use that sequence + // rather than building a separate result vector here. + if (!Result.empty()) { + int Idx = G->getRefSCCIndex(*this); + G->PostOrderRefSCCs.insert(G->PostOrderRefSCCs.begin() + Idx, + Result.begin(), Result.end()); + for (int i : seq<int>(Idx, G->PostOrderRefSCCs.size())) + G->RefSCCIndices[G->PostOrderRefSCCs[i]] = i; + assert(G->PostOrderRefSCCs[G->getRefSCCIndex(*this)] == this && + "Failed to update this RefSCC's index after insertion!"); + } + for (SCC *C : SCCs) { auto PostOrderI = PostOrderMapping.find(&*C->begin()); assert(PostOrderI != PostOrderMapping.end() && @@ -1166,7 +1353,7 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) { RefSCC &RC = *Result[SCCNumber - 1]; int SCCIndex = RC.SCCs.size(); RC.SCCs.push_back(C); - SCCIndices[C] = SCCIndex; + RC.SCCIndices[C] = SCCIndex; C->OuterRefSCC = &RC; } @@ -1178,12 +1365,15 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) { G->connectRefSCC(*RC); // Now erase all but the root's SCCs. - SCCs.erase(std::remove_if(SCCs.begin(), SCCs.end(), - [&](SCC *C) { - return PostOrderMapping.lookup(&*C->begin()) != - RootPostOrderNumber; - }), + SCCs.erase(remove_if(SCCs, + [&](SCC *C) { + return PostOrderMapping.lookup(&*C->begin()) != + RootPostOrderNumber; + }), SCCs.end()); + SCCIndices.clear(); + for (int i = 0, Size = SCCs.size(); i < Size; ++i) + SCCIndices[SCCs[i]] = i; #ifndef NDEBUG // Now we need to reconnect the current (root) SCC to the graph. We do this @@ -1207,11 +1397,24 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) { if (!Result.empty()) assert(!IsLeaf && "This SCC cannot be a leaf as we have split out new " "SCCs by removing this edge."); - if (!std::any_of(G->LeafRefSCCs.begin(), G->LeafRefSCCs.end(), - [&](RefSCC *C) { return C == this; })) + if (none_of(G->LeafRefSCCs, [&](RefSCC *C) { return C == this; })) assert(!IsLeaf && "This SCC cannot be a leaf as it already had child " "SCCs before we removed this edge."); #endif + // And connect both this RefSCC and all the new ones to the correct parents. + // The easiest way to do this is just to re-analyze the old parent set. + SmallVector<RefSCC *, 4> OldParents(Parents.begin(), Parents.end()); + Parents.clear(); + for (RefSCC *ParentRC : OldParents) + for (SCC &ParentC : *ParentRC) + for (Node &ParentN : ParentC) + for (Edge &E : ParentN) { + assert(E.getNode() && "Cannot have a missing node in a visited SCC!"); + RefSCC &RC = *G->lookupRefSCC(*E.getNode()); + if (&RC != ParentRC) + RC.Parents.insert(ParentRC); + } + // If this SCC stopped being a leaf through this edge removal, remove it from // the leaf SCC list. Note that this DTRT in the case where this was never // a leaf. @@ -1222,10 +1425,93 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) { std::remove(G->LeafRefSCCs.begin(), G->LeafRefSCCs.end(), this), G->LeafRefSCCs.end()); +#ifndef NDEBUG + // Verify all of the new RefSCCs. + for (RefSCC *RC : Result) + RC->verify(); +#endif + // Return the new list of SCCs. return Result; } +void LazyCallGraph::RefSCC::handleTrivialEdgeInsertion(Node &SourceN, + Node &TargetN) { + // The only trivial case that requires any graph updates is when we add new + // ref edge and may connect different RefSCCs along that path. This is only + // because of the parents set. Every other part of the graph remains constant + // after this edge insertion. + assert(G->lookupRefSCC(SourceN) == this && "Source must be in this RefSCC."); + RefSCC &TargetRC = *G->lookupRefSCC(TargetN); + if (&TargetRC == this) { + + return; + } + + assert(TargetRC.isDescendantOf(*this) && + "Target must be a descendant of the Source."); + // The only change required is to add this RefSCC to the parent set of the + // target. This is a set and so idempotent if the edge already existed. + TargetRC.Parents.insert(this); +} + +void LazyCallGraph::RefSCC::insertTrivialCallEdge(Node &SourceN, + Node &TargetN) { +#ifndef NDEBUG + // Check that the RefSCC is still valid when we finish. + auto ExitVerifier = make_scope_exit([this] { verify(); }); + + // Check that we aren't breaking some invariants of the SCC graph. + SCC &SourceC = *G->lookupSCC(SourceN); + SCC &TargetC = *G->lookupSCC(TargetN); + if (&SourceC != &TargetC) + assert(SourceC.isAncestorOf(TargetC) && + "Call edge is not trivial in the SCC graph!"); +#endif + // First insert it into the source or find the existing edge. + auto InsertResult = SourceN.EdgeIndexMap.insert( + {&TargetN.getFunction(), SourceN.Edges.size()}); + if (!InsertResult.second) { + // Already an edge, just update it. + Edge &E = SourceN.Edges[InsertResult.first->second]; + if (E.isCall()) + return; // Nothing to do! + E.setKind(Edge::Call); + } else { + // Create the new edge. + SourceN.Edges.emplace_back(TargetN, Edge::Call); + } + + // Now that we have the edge, handle the graph fallout. + handleTrivialEdgeInsertion(SourceN, TargetN); +} + +void LazyCallGraph::RefSCC::insertTrivialRefEdge(Node &SourceN, Node &TargetN) { +#ifndef NDEBUG + // Check that the RefSCC is still valid when we finish. + auto ExitVerifier = make_scope_exit([this] { verify(); }); + + // Check that we aren't breaking some invariants of the RefSCC graph. + RefSCC &SourceRC = *G->lookupRefSCC(SourceN); + RefSCC &TargetRC = *G->lookupRefSCC(TargetN); + if (&SourceRC != &TargetRC) + assert(SourceRC.isAncestorOf(TargetRC) && + "Ref edge is not trivial in the RefSCC graph!"); +#endif + // First insert it into the source or find the existing edge. + auto InsertResult = SourceN.EdgeIndexMap.insert( + {&TargetN.getFunction(), SourceN.Edges.size()}); + if (!InsertResult.second) + // Already an edge, we're done. + return; + + // Create the new edge. + SourceN.Edges.emplace_back(TargetN, Edge::Ref); + + // Now that we have the edge, handle the graph fallout. + handleTrivialEdgeInsertion(SourceN, TargetN); +} + void LazyCallGraph::insertEdge(Node &SourceN, Function &Target, Edge::Kind EK) { assert(SCCMap.empty() && DFSStack.empty() && "This method cannot be called after SCCs have been formed!"); @@ -1240,6 +1526,93 @@ void LazyCallGraph::removeEdge(Node &SourceN, Function &Target) { return SourceN.removeEdgeInternal(Target); } +void LazyCallGraph::removeDeadFunction(Function &F) { + // FIXME: This is unnecessarily restrictive. We should be able to remove + // functions which recursively call themselves. + assert(F.use_empty() && + "This routine should only be called on trivially dead functions!"); + + auto EII = EntryIndexMap.find(&F); + if (EII != EntryIndexMap.end()) { + EntryEdges[EII->second] = Edge(); + EntryIndexMap.erase(EII); + } + + // It's safe to just remove un-visited functions from the RefSCC entry list. + // FIXME: This is a linear operation which could become hot and benefit from + // an index map. + auto RENI = find(RefSCCEntryNodes, &F); + if (RENI != RefSCCEntryNodes.end()) + RefSCCEntryNodes.erase(RENI); + + auto NI = NodeMap.find(&F); + if (NI == NodeMap.end()) + // Not in the graph at all! + return; + + Node &N = *NI->second; + NodeMap.erase(NI); + + if (SCCMap.empty() && DFSStack.empty()) { + // No SCC walk has begun, so removing this is fine and there is nothing + // else necessary at this point but clearing out the node. + N.clear(); + return; + } + + // Check that we aren't going to break the DFS walk. + assert(all_of(DFSStack, + [&N](const std::pair<Node *, edge_iterator> &Element) { + return Element.first != &N; + }) && + "Tried to remove a function currently in the DFS stack!"); + assert(find(PendingRefSCCStack, &N) == PendingRefSCCStack.end() && + "Tried to remove a function currently pending to add to a RefSCC!"); + + // Cannot remove a function which has yet to be visited in the DFS walk, so + // if we have a node at all then we must have an SCC and RefSCC. + auto CI = SCCMap.find(&N); + assert(CI != SCCMap.end() && + "Tried to remove a node without an SCC after DFS walk started!"); + SCC &C = *CI->second; + SCCMap.erase(CI); + RefSCC &RC = C.getOuterRefSCC(); + + // This node must be the only member of its SCC as it has no callers, and + // that SCC must be the only member of a RefSCC as it has no references. + // Validate these properties first. + assert(C.size() == 1 && "Dead functions must be in a singular SCC"); + assert(RC.size() == 1 && "Dead functions must be in a singular RefSCC"); + assert(RC.Parents.empty() && "Cannot have parents of a dead RefSCC!"); + + // Now remove this RefSCC from any parents sets and the leaf list. + for (Edge &E : N) + if (Node *TargetN = E.getNode()) + if (RefSCC *TargetRC = lookupRefSCC(*TargetN)) + TargetRC->Parents.erase(&RC); + // FIXME: This is a linear operation which could become hot and benefit from + // an index map. + auto LRI = find(LeafRefSCCs, &RC); + if (LRI != LeafRefSCCs.end()) + LeafRefSCCs.erase(LRI); + + auto RCIndexI = RefSCCIndices.find(&RC); + int RCIndex = RCIndexI->second; + PostOrderRefSCCs.erase(PostOrderRefSCCs.begin() + RCIndex); + RefSCCIndices.erase(RCIndexI); + for (int i = RCIndex, Size = PostOrderRefSCCs.size(); i < Size; ++i) + RefSCCIndices[PostOrderRefSCCs[i]] = i; + + // Finally clear out all the data structures from the node down through the + // components. + N.clear(); + C.clear(); + RC.clear(); + + // Nothing to delete as all the objects are allocated in stable bump pointer + // allocators. +} + LazyCallGraph::Node &LazyCallGraph::insertInto(Function &F, Node *&MappedN) { return *new (MappedN = BPA.Allocate()) Node(*this, F); } @@ -1372,10 +1745,9 @@ void LazyCallGraph::buildSCCs(RefSCC &RC, node_stack_range Nodes) { // root DFS number. auto SCCNodes = make_range( PendingSCCStack.rbegin(), - std::find_if(PendingSCCStack.rbegin(), PendingSCCStack.rend(), - [RootDFSNumber](Node *N) { - return N->DFSNumber < RootDFSNumber; - })); + find_if(reverse(PendingSCCStack), [RootDFSNumber](const Node *N) { + return N->DFSNumber < RootDFSNumber; + })); // Form a new SCC out of these nodes and then clear them off our pending // stack. RC.SCCs.push_back(createSCC(RC, SCCNodes)); @@ -1411,19 +1783,19 @@ void LazyCallGraph::connectRefSCC(RefSCC &RC) { IsLeaf = false; } - // For the SCCs where we fine no child SCCs, add them to the leaf list. + // For the SCCs where we find no child SCCs, add them to the leaf list. if (IsLeaf) LeafRefSCCs.push_back(&RC); } -LazyCallGraph::RefSCC *LazyCallGraph::getNextRefSCCInPostOrder() { +bool LazyCallGraph::buildNextRefSCCInPostOrder() { if (DFSStack.empty()) { Node *N; do { // If we've handled all candidate entry nodes to the SCC forest, we're // done. if (RefSCCEntryNodes.empty()) - return nullptr; + return false; N = &get(*RefSCCEntryNodes.pop_back_val()); } while (N->DFSNumber != 0); @@ -1494,9 +1866,9 @@ LazyCallGraph::RefSCC *LazyCallGraph::getNextRefSCCInPostOrder() { // root DFS number. auto RefSCCNodes = node_stack_range( PendingRefSCCStack.rbegin(), - std::find_if( - PendingRefSCCStack.rbegin(), PendingRefSCCStack.rend(), - [RootDFSNumber](Node *N) { return N->DFSNumber < RootDFSNumber; })); + find_if(reverse(PendingRefSCCStack), [RootDFSNumber](const Node *N) { + return N->DFSNumber < RootDFSNumber; + })); // Form a new RefSCC out of these nodes and then clear them off our pending // stack. RefSCC *NewRC = createRefSCC(*this); @@ -1505,13 +1877,18 @@ LazyCallGraph::RefSCC *LazyCallGraph::getNextRefSCCInPostOrder() { PendingRefSCCStack.erase(RefSCCNodes.end().base(), PendingRefSCCStack.end()); - // We return the new node here. This essentially suspends the DFS walk - // until another RefSCC is requested. - return NewRC; + // Push the new node into the postorder list and return true indicating we + // successfully grew the postorder sequence by one. + bool Inserted = + RefSCCIndices.insert({NewRC, PostOrderRefSCCs.size()}).second; + (void)Inserted; + assert(Inserted && "Cannot already have this RefSCC in the index map!"); + PostOrderRefSCCs.push_back(NewRC); + return true; } } -char LazyCallGraphAnalysis::PassID; +AnalysisKey LazyCallGraphAnalysis::Key; LazyCallGraphPrinterPass::LazyCallGraphPrinterPass(raw_ostream &OS) : OS(OS) {} diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp index 3ce667f..d442310 100644 --- a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" @@ -50,7 +51,7 @@ namespace llvm { FunctionPass *createLazyValueInfoPass() { return new LazyValueInfoWrapperPass(); } } -char LazyValueAnalysis::PassID; +AnalysisKey LazyValueAnalysis::Key; //===----------------------------------------------------------------------===// // LVILatticeVal @@ -70,12 +71,14 @@ class LVILatticeVal { /// "nothing known yet". undefined, - /// This Value has a specific constant value. (For integers, constantrange - /// is used instead.) + /// This Value has a specific constant value. (For constant integers, + /// constantrange is used instead. Integer typed constantexprs can appear + /// as constant.) constant, - /// This Value is known to not have the specified value. (For integers, - /// constantrange is used instead.) + /// This Value is known to not have the specified value. (For constant + /// integers, constantrange is used instead. As above, integer typed + /// constantexprs can appear here.) notconstant, /// The Value falls within this range. (Used only for integer typed values.) @@ -139,37 +142,37 @@ public: return Range; } - /// Return true if this is a change in status. - bool markOverdefined() { +private: + void markOverdefined() { if (isOverdefined()) - return false; + return; Tag = overdefined; - return true; } - /// Return true if this is a change in status. - bool markConstant(Constant *V) { + void markConstant(Constant *V) { assert(V && "Marking constant with NULL"); - if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) - return markConstantRange(ConstantRange(CI->getValue())); + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + markConstantRange(ConstantRange(CI->getValue())); + return; + } if (isa<UndefValue>(V)) - return false; + return; assert((!isConstant() || getConstant() == V) && "Marking constant with different value"); assert(isUndefined()); Tag = constant; Val = V; - return true; } - /// Return true if this is a change in status. - bool markNotConstant(Constant *V) { + void markNotConstant(Constant *V) { assert(V && "Marking constant with NULL"); - if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) - return markConstantRange(ConstantRange(CI->getValue()+1, CI->getValue())); + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + markConstantRange(ConstantRange(CI->getValue()+1, CI->getValue())); + return; + } if (isa<UndefValue>(V)) - return false; + return; assert((!isConstant() || getConstant() != V) && "Marking constant !constant with same value"); @@ -178,100 +181,70 @@ public: assert(isUndefined() || isConstant()); Tag = notconstant; Val = V; - return true; } - /// Return true if this is a change in status. - bool markConstantRange(ConstantRange NewR) { + void markConstantRange(ConstantRange NewR) { if (isConstantRange()) { if (NewR.isEmptySet()) - return markOverdefined(); - - bool changed = Range != NewR; - Range = std::move(NewR); - return changed; + markOverdefined(); + else { + Range = std::move(NewR); + } + return; } assert(isUndefined()); if (NewR.isEmptySet()) - return markOverdefined(); - - Tag = constantrange; - Range = std::move(NewR); - return true; + markOverdefined(); + else { + Tag = constantrange; + Range = std::move(NewR); + } } +public: + /// Merge the specified lattice value into this one, updating this /// one and returning true if anything changed. - bool mergeIn(const LVILatticeVal &RHS, const DataLayout &DL) { - if (RHS.isUndefined() || isOverdefined()) return false; - if (RHS.isOverdefined()) return markOverdefined(); + void mergeIn(const LVILatticeVal &RHS, const DataLayout &DL) { + if (RHS.isUndefined() || isOverdefined()) + return; + if (RHS.isOverdefined()) { + markOverdefined(); + return; + } if (isUndefined()) { - Tag = RHS.Tag; - Val = RHS.Val; - Range = RHS.Range; - return true; + *this = RHS; + return; } if (isConstant()) { - if (RHS.isConstant()) { - if (Val == RHS.Val) - return false; - return markOverdefined(); - } - - if (RHS.isNotConstant()) { - if (Val == RHS.Val) - return markOverdefined(); - - // Unless we can prove that the two Constants are different, we must - // move to overdefined. - if (ConstantInt *Res = - dyn_cast<ConstantInt>(ConstantFoldCompareInstOperands( - CmpInst::ICMP_NE, getConstant(), RHS.getNotConstant(), DL))) - if (Res->isOne()) - return markNotConstant(RHS.getNotConstant()); - - return markOverdefined(); - } - - return markOverdefined(); + if (RHS.isConstant() && Val == RHS.Val) + return; + markOverdefined(); + return; } if (isNotConstant()) { - if (RHS.isConstant()) { - if (Val == RHS.Val) - return markOverdefined(); - - // Unless we can prove that the two Constants are different, we must - // move to overdefined. - if (ConstantInt *Res = - dyn_cast<ConstantInt>(ConstantFoldCompareInstOperands( - CmpInst::ICMP_NE, getNotConstant(), RHS.getConstant(), DL))) - if (Res->isOne()) - return false; - - return markOverdefined(); - } - - if (RHS.isNotConstant()) { - if (Val == RHS.Val) - return false; - return markOverdefined(); - } - - return markOverdefined(); + if (RHS.isNotConstant() && Val == RHS.Val) + return; + markOverdefined(); + return; } assert(isConstantRange() && "New LVILattice type?"); - if (!RHS.isConstantRange()) - return markOverdefined(); - + if (!RHS.isConstantRange()) { + // We can get here if we've encountered a constantexpr of integer type + // and merge it with a constantrange. + markOverdefined(); + return; + } ConstantRange NewR = Range.unionWith(RHS.getConstantRange()); if (NewR.isFullSet()) - return markOverdefined(); - return markConstantRange(NewR); + markOverdefined(); + else + markConstantRange(NewR); } }; @@ -366,6 +339,9 @@ namespace { /// A callback value handle updates the cache when values are erased. class LazyValueInfoCache; struct LVIValueHandle final : public CallbackVH { + // Needs to access getValPtr(), which is protected. + friend struct DenseMapInfo<LVIValueHandle>; + LazyValueInfoCache *Parent; LVIValueHandle(Value *V, LazyValueInfoCache *P) @@ -376,7 +352,7 @@ namespace { deleted(); } }; -} +} // end anonymous namespace namespace { /// This is the cache kept by LazyValueInfo which @@ -387,12 +363,15 @@ namespace { /// entries, allowing us to do a lookup with a binary search. /// Over-defined lattice values are recorded in OverDefinedCache to reduce /// memory overhead. - typedef SmallDenseMap<AssertingVH<BasicBlock>, LVILatticeVal, 4> - ValueCacheEntryTy; + struct ValueCacheEntryTy { + ValueCacheEntryTy(Value *V, LazyValueInfoCache *P) : Handle(V, P) {} + LVIValueHandle Handle; + SmallDenseMap<AssertingVH<BasicBlock>, LVILatticeVal, 4> BlockVals; + }; /// This is all of the cached information for all values, /// mapped from Value* to key information. - std::map<LVIValueHandle, ValueCacheEntryTy> ValueCache; + DenseMap<Value *, std::unique_ptr<ValueCacheEntryTy>> ValueCache; /// This tracks, on a per-block basis, the set of values that are /// over-defined at the end of that block. @@ -404,6 +383,183 @@ namespace { /// don't spend time removing unused blocks from our caches. DenseSet<AssertingVH<BasicBlock> > SeenBlocks; + public: + void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) { + SeenBlocks.insert(BB); + + // Insert over-defined values into their own cache to reduce memory + // overhead. + if (Result.isOverdefined()) + OverDefinedCache[BB].insert(Val); + else { + auto It = ValueCache.find_as(Val); + if (It == ValueCache.end()) { + ValueCache[Val] = make_unique<ValueCacheEntryTy>(Val, this); + It = ValueCache.find_as(Val); + assert(It != ValueCache.end() && "Val was just added to the map!"); + } + It->second->BlockVals[BB] = Result; + } + } + + bool isOverdefined(Value *V, BasicBlock *BB) const { + auto ODI = OverDefinedCache.find(BB); + + if (ODI == OverDefinedCache.end()) + return false; + + return ODI->second.count(V); + } + + bool hasCachedValueInfo(Value *V, BasicBlock *BB) const { + if (isOverdefined(V, BB)) + return true; + + auto I = ValueCache.find_as(V); + if (I == ValueCache.end()) + return false; + + return I->second->BlockVals.count(BB); + } + + LVILatticeVal getCachedValueInfo(Value *V, BasicBlock *BB) const { + if (isOverdefined(V, BB)) + return LVILatticeVal::getOverdefined(); + + auto I = ValueCache.find_as(V); + if (I == ValueCache.end()) + return LVILatticeVal(); + auto BBI = I->second->BlockVals.find(BB); + if (BBI == I->second->BlockVals.end()) + return LVILatticeVal(); + return BBI->second; + } + + /// clear - Empty the cache. + void clear() { + SeenBlocks.clear(); + ValueCache.clear(); + OverDefinedCache.clear(); + } + + /// Inform the cache that a given value has been deleted. + void eraseValue(Value *V); + + /// This is part of the update interface to inform the cache + /// that a block has been deleted. + void eraseBlock(BasicBlock *BB); + + /// Updates the cache to remove any influence an overdefined value in + /// OldSucc might have (unless also overdefined in NewSucc). This just + /// flushes elements from the cache and does not add any. + void threadEdgeImpl(BasicBlock *OldSucc,BasicBlock *NewSucc); + + friend struct LVIValueHandle; + }; +} + +void LazyValueInfoCache::eraseValue(Value *V) { + SmallVector<AssertingVH<BasicBlock>, 4> ToErase; + for (auto &I : OverDefinedCache) { + SmallPtrSetImpl<Value *> &ValueSet = I.second; + ValueSet.erase(V); + if (ValueSet.empty()) + ToErase.push_back(I.first); + } + for (auto &BB : ToErase) + OverDefinedCache.erase(BB); + + ValueCache.erase(V); +} + +void LVIValueHandle::deleted() { + // This erasure deallocates *this, so it MUST happen after we're done + // using any and all members of *this. + Parent->eraseValue(*this); +} + +void LazyValueInfoCache::eraseBlock(BasicBlock *BB) { + // Shortcut if we have never seen this block. + DenseSet<AssertingVH<BasicBlock> >::iterator I = SeenBlocks.find(BB); + if (I == SeenBlocks.end()) + return; + SeenBlocks.erase(I); + + auto ODI = OverDefinedCache.find(BB); + if (ODI != OverDefinedCache.end()) + OverDefinedCache.erase(ODI); + + for (auto &I : ValueCache) + I.second->BlockVals.erase(BB); +} + +void LazyValueInfoCache::threadEdgeImpl(BasicBlock *OldSucc, + BasicBlock *NewSucc) { + // When an edge in the graph has been threaded, values that we could not + // determine a value for before (i.e. were marked overdefined) may be + // possible to solve now. We do NOT try to proactively update these values. + // Instead, we clear their entries from the cache, and allow lazy updating to + // recompute them when needed. + + // The updating process is fairly simple: we need to drop cached info + // for all values that were marked overdefined in OldSucc, and for those same + // values in any successor of OldSucc (except NewSucc) in which they were + // also marked overdefined. + std::vector<BasicBlock*> worklist; + worklist.push_back(OldSucc); + + auto I = OverDefinedCache.find(OldSucc); + if (I == OverDefinedCache.end()) + return; // Nothing to process here. + SmallVector<Value *, 4> ValsToClear(I->second.begin(), I->second.end()); + + // Use a worklist to perform a depth-first search of OldSucc's successors. + // NOTE: We do not need a visited list since any blocks we have already + // visited will have had their overdefined markers cleared already, and we + // thus won't loop to their successors. + while (!worklist.empty()) { + BasicBlock *ToUpdate = worklist.back(); + worklist.pop_back(); + + // Skip blocks only accessible through NewSucc. + if (ToUpdate == NewSucc) continue; + + // If a value was marked overdefined in OldSucc, and is here too... + auto OI = OverDefinedCache.find(ToUpdate); + if (OI == OverDefinedCache.end()) + continue; + SmallPtrSetImpl<Value *> &ValueSet = OI->second; + + bool changed = false; + for (Value *V : ValsToClear) { + if (!ValueSet.erase(V)) + continue; + + // If we removed anything, then we potentially need to update + // blocks successors too. + changed = true; + + if (ValueSet.empty()) { + OverDefinedCache.erase(OI); + break; + } + } + + if (!changed) continue; + + worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate)); + } +} + +namespace { + // The actual implementation of the lazy analysis and update. Note that the + // inheritance from LazyValueInfoCache is intended to be temporary while + // splitting the code and then transitioning to a has-a relationship. + class LazyValueInfoImpl { + + /// Cached results from previous queries + LazyValueInfoCache TheCache; + /// This stack holds the state of the value solver during a query. /// It basically emulates the callstack of the naive /// recursive value lookup process. @@ -428,19 +584,6 @@ namespace { const DataLayout &DL; ///< A mandatory DataLayout DominatorTree *DT; ///< An optional DT pointer. - friend struct LVIValueHandle; - - void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) { - SeenBlocks.insert(BB); - - // Insert over-defined values into their own cache to reduce memory - // overhead. - if (Result.isOverdefined()) - OverDefinedCache[BB].insert(Val); - else - lookup(Val)[BB] = Result; - } - LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB); bool getEdgeValue(Value *V, BasicBlock *F, BasicBlock *T, LVILatticeVal &Result, Instruction *CxtI = nullptr); @@ -450,6 +593,7 @@ namespace { // returned means that the work item was not completely processed and must // be revisited after going through the new items. bool solveBlockValue(Value *Val, BasicBlock *BB); + bool solveBlockValueImpl(LVILatticeVal &Res, Value *Val, BasicBlock *BB); bool solveBlockValueNonLocal(LVILatticeVal &BBLV, Value *Val, BasicBlock *BB); bool solveBlockValuePHINode(LVILatticeVal &BBLV, PHINode *PN, BasicBlock *BB); bool solveBlockValueSelect(LVILatticeVal &BBLV, SelectInst *S, @@ -458,43 +602,12 @@ namespace { BasicBlock *BB); bool solveBlockValueCast(LVILatticeVal &BBLV, Instruction *BBI, BasicBlock *BB); - void intersectAssumeBlockValueConstantRange(Value *Val, LVILatticeVal &BBLV, + void intersectAssumeOrGuardBlockValueConstantRange(Value *Val, + LVILatticeVal &BBLV, Instruction *BBI); void solve(); - ValueCacheEntryTy &lookup(Value *V) { - return ValueCache[LVIValueHandle(V, this)]; - } - - bool isOverdefined(Value *V, BasicBlock *BB) const { - auto ODI = OverDefinedCache.find(BB); - - if (ODI == OverDefinedCache.end()) - return false; - - return ODI->second.count(V); - } - - bool hasCachedValueInfo(Value *V, BasicBlock *BB) { - if (isOverdefined(V, BB)) - return true; - - LVIValueHandle ValHandle(V, this); - auto I = ValueCache.find(ValHandle); - if (I == ValueCache.end()) - return false; - - return I->second.count(BB); - } - - LVILatticeVal getCachedValueInfo(Value *V, BasicBlock *BB) { - if (isOverdefined(V, BB)) - return LVILatticeVal::getOverdefined(); - - return lookup(V)[BB]; - } - public: /// This is the query interface to determine the lattice /// value for the specified Value* at the end of the specified block. @@ -511,60 +624,28 @@ namespace { LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB, Instruction *CxtI = nullptr); - /// This is the update interface to inform the cache that an edge from - /// PredBB to OldSucc has been threaded to be from PredBB to NewSucc. - void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc); + /// Complete flush all previously computed values + void clear() { + TheCache.clear(); + } /// This is part of the update interface to inform the cache /// that a block has been deleted. - void eraseBlock(BasicBlock *BB); - - /// clear - Empty the cache. - void clear() { - SeenBlocks.clear(); - ValueCache.clear(); - OverDefinedCache.clear(); + void eraseBlock(BasicBlock *BB) { + TheCache.eraseBlock(BB); } - LazyValueInfoCache(AssumptionCache *AC, const DataLayout &DL, + /// This is the update interface to inform the cache that an edge from + /// PredBB to OldSucc has been threaded to be from PredBB to NewSucc. + void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc); + + LazyValueInfoImpl(AssumptionCache *AC, const DataLayout &DL, DominatorTree *DT = nullptr) : AC(AC), DL(DL), DT(DT) {} }; } // end anonymous namespace -void LVIValueHandle::deleted() { - SmallVector<AssertingVH<BasicBlock>, 4> ToErase; - for (auto &I : Parent->OverDefinedCache) { - SmallPtrSetImpl<Value *> &ValueSet = I.second; - if (ValueSet.count(getValPtr())) - ValueSet.erase(getValPtr()); - if (ValueSet.empty()) - ToErase.push_back(I.first); - } - for (auto &BB : ToErase) - Parent->OverDefinedCache.erase(BB); - - // This erasure deallocates *this, so it MUST happen after we're done - // using any and all members of *this. - Parent->ValueCache.erase(*this); -} - -void LazyValueInfoCache::eraseBlock(BasicBlock *BB) { - // Shortcut if we have never seen this block. - DenseSet<AssertingVH<BasicBlock> >::iterator I = SeenBlocks.find(BB); - if (I == SeenBlocks.end()) - return; - SeenBlocks.erase(I); - - auto ODI = OverDefinedCache.find(BB); - if (ODI != OverDefinedCache.end()) - OverDefinedCache.erase(ODI); - - for (auto &I : ValueCache) - I.second.erase(BB); -} - -void LazyValueInfoCache::solve() { +void LazyValueInfoImpl::solve() { while (!BlockValueStack.empty()) { std::pair<BasicBlock*, Value*> &e = BlockValueStack.top(); assert(BlockValueSet.count(e) && "Stack value should be in BlockValueSet!"); @@ -572,11 +653,11 @@ void LazyValueInfoCache::solve() { if (solveBlockValue(e.second, e.first)) { // The work item was completely processed. assert(BlockValueStack.top() == e && "Nothing should have been pushed!"); - assert(hasCachedValueInfo(e.second, e.first) && + assert(TheCache.hasCachedValueInfo(e.second, e.first) && "Result should be in cache!"); DEBUG(dbgs() << "POP " << *e.second << " in " << e.first->getName() - << " = " << getCachedValueInfo(e.second, e.first) << "\n"); + << " = " << TheCache.getCachedValueInfo(e.second, e.first) << "\n"); BlockValueStack.pop(); BlockValueSet.erase(e); @@ -587,21 +668,20 @@ void LazyValueInfoCache::solve() { } } -bool LazyValueInfoCache::hasBlockValue(Value *Val, BasicBlock *BB) { +bool LazyValueInfoImpl::hasBlockValue(Value *Val, BasicBlock *BB) { // If already a constant, there is nothing to compute. if (isa<Constant>(Val)) return true; - return hasCachedValueInfo(Val, BB); + return TheCache.hasCachedValueInfo(Val, BB); } -LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) { +LVILatticeVal LazyValueInfoImpl::getBlockValue(Value *Val, BasicBlock *BB) { // If already a constant, there is nothing to compute. if (Constant *VC = dyn_cast<Constant>(Val)) return LVILatticeVal::get(VC); - SeenBlocks.insert(BB); - return getCachedValueInfo(Val, BB); + return TheCache.getCachedValueInfo(Val, BB); } static LVILatticeVal getFromRangeMetadata(Instruction *BBI) { @@ -610,7 +690,7 @@ static LVILatticeVal getFromRangeMetadata(Instruction *BBI) { case Instruction::Load: case Instruction::Call: case Instruction::Invoke: - if (MDNode *Ranges = BBI->getMetadata(LLVMContext::MD_range)) + if (MDNode *Ranges = BBI->getMetadata(LLVMContext::MD_range)) if (isa<IntegerType>(BBI->getType())) { return LVILatticeVal::getRange(getConstantRangeFromMetadata(*Ranges)); } @@ -620,14 +700,14 @@ static LVILatticeVal getFromRangeMetadata(Instruction *BBI) { return LVILatticeVal::getOverdefined(); } -bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { +bool LazyValueInfoImpl::solveBlockValue(Value *Val, BasicBlock *BB) { if (isa<Constant>(Val)) return true; - if (hasCachedValueInfo(Val, BB)) { + if (TheCache.hasCachedValueInfo(Val, BB)) { // If we have a cached value, use that. DEBUG(dbgs() << " reuse BB '" << BB->getName() - << "' val=" << getCachedValueInfo(Val, BB) << '\n'); + << "' val=" << TheCache.getCachedValueInfo(Val, BB) << '\n'); // Since we're reusing a cached value, we don't need to update the // OverDefinedCache. The cache will have been properly updated whenever the @@ -638,28 +718,26 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { // Hold off inserting this value into the Cache in case we have to return // false and come back later. LVILatticeVal Res; + if (!solveBlockValueImpl(Res, Val, BB)) + // Work pushed, will revisit + return false; + + TheCache.insertResult(Val, BB, Res); + return true; +} + +bool LazyValueInfoImpl::solveBlockValueImpl(LVILatticeVal &Res, + Value *Val, BasicBlock *BB) { Instruction *BBI = dyn_cast<Instruction>(Val); - if (!BBI || BBI->getParent() != BB) { - if (!solveBlockValueNonLocal(Res, Val, BB)) - return false; - insertResult(Val, BB, Res); - return true; - } + if (!BBI || BBI->getParent() != BB) + return solveBlockValueNonLocal(Res, Val, BB); - if (PHINode *PN = dyn_cast<PHINode>(BBI)) { - if (!solveBlockValuePHINode(Res, PN, BB)) - return false; - insertResult(Val, BB, Res); - return true; - } + if (PHINode *PN = dyn_cast<PHINode>(BBI)) + return solveBlockValuePHINode(Res, PN, BB); - if (auto *SI = dyn_cast<SelectInst>(BBI)) { - if (!solveBlockValueSelect(Res, SI, BB)) - return false; - insertResult(Val, BB, Res); - return true; - } + if (auto *SI = dyn_cast<SelectInst>(BBI)) + return solveBlockValueSelect(Res, SI, BB); // If this value is a nonnull pointer, record it's range and bailout. Note // that for all other pointer typed values, we terminate the search at the @@ -673,29 +751,20 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { PointerType *PT = dyn_cast<PointerType>(BBI->getType()); if (PT && isKnownNonNull(BBI)) { Res = LVILatticeVal::getNot(ConstantPointerNull::get(PT)); - insertResult(Val, BB, Res); return true; } if (BBI->getType()->isIntegerTy()) { - if (isa<CastInst>(BBI)) { - if (!solveBlockValueCast(Res, BBI, BB)) - return false; - insertResult(Val, BB, Res); - return true; - } + if (isa<CastInst>(BBI)) + return solveBlockValueCast(Res, BBI, BB); + BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI); - if (BO && isa<ConstantInt>(BO->getOperand(1))) { - if (!solveBlockValueBinaryOp(Res, BBI, BB)) - return false; - insertResult(Val, BB, Res); - return true; - } + if (BO && isa<ConstantInt>(BO->getOperand(1))) + return solveBlockValueBinaryOp(Res, BBI, BB); } DEBUG(dbgs() << " compute BB '" << BB->getName() << "' - unknown inst def found.\n"); Res = getFromRangeMetadata(BBI); - insertResult(Val, BB, Res); return true; } @@ -748,7 +817,7 @@ static bool isObjectDereferencedInBlock(Value *Val, BasicBlock *BB) { return false; } -bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV, +bool LazyValueInfoImpl::solveBlockValueNonLocal(LVILatticeVal &BBLV, Value *Val, BasicBlock *BB) { LVILatticeVal Result; // Start Undefined. @@ -763,7 +832,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV, PointerType *PTy = cast<PointerType>(Val->getType()); Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy)); } else { - Result.markOverdefined(); + Result = LVILatticeVal::getOverdefined(); } BBLV = Result; return true; @@ -785,7 +854,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV, if (Result.isOverdefined()) { DEBUG(dbgs() << " compute BB '" << BB->getName() << "' - overdefined because of pred (non local).\n"); - // Bofore giving up, see if we can prove the pointer non-null local to + // Before giving up, see if we can prove the pointer non-null local to // this particular block. if (Val->getType()->isPointerTy() && isObjectDereferencedInBlock(Val, BB)) { @@ -806,7 +875,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV, return true; } -bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV, +bool LazyValueInfoImpl::solveBlockValuePHINode(LVILatticeVal &BBLV, PHINode *PN, BasicBlock *BB) { LVILatticeVal Result; // Start Undefined. @@ -845,64 +914,70 @@ bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV, return true; } -static bool getValueFromFromCondition(Value *Val, ICmpInst *ICI, - LVILatticeVal &Result, - bool isTrueDest = true); +static LVILatticeVal getValueFromCondition(Value *Val, Value *Cond, + bool isTrueDest = true); // If we can determine a constraint on the value given conditions assumed by // the program, intersect those constraints with BBLV -void LazyValueInfoCache::intersectAssumeBlockValueConstantRange(Value *Val, - LVILatticeVal &BBLV, - Instruction *BBI) { +void LazyValueInfoImpl::intersectAssumeOrGuardBlockValueConstantRange( + Value *Val, LVILatticeVal &BBLV, Instruction *BBI) { BBI = BBI ? BBI : dyn_cast<Instruction>(Val); if (!BBI) return; - for (auto &AssumeVH : AC->assumptions()) { + for (auto &AssumeVH : AC->assumptionsFor(Val)) { if (!AssumeVH) continue; auto *I = cast<CallInst>(AssumeVH); if (!isValidAssumeForContext(I, BBI, DT)) continue; - Value *C = I->getArgOperand(0); - if (ICmpInst *ICI = dyn_cast<ICmpInst>(C)) { - LVILatticeVal Result; - if (getValueFromFromCondition(Val, ICI, Result)) - BBLV = intersect(BBLV, Result); - } + BBLV = intersect(BBLV, getValueFromCondition(Val, I->getArgOperand(0))); + } + + // If guards are not used in the module, don't spend time looking for them + auto *GuardDecl = BBI->getModule()->getFunction( + Intrinsic::getName(Intrinsic::experimental_guard)); + if (!GuardDecl || GuardDecl->use_empty()) + return; + + for (Instruction &I : make_range(BBI->getIterator().getReverse(), + BBI->getParent()->rend())) { + Value *Cond = nullptr; + if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(Cond)))) + BBLV = intersect(BBLV, getValueFromCondition(Val, Cond)); } } -bool LazyValueInfoCache::solveBlockValueSelect(LVILatticeVal &BBLV, +bool LazyValueInfoImpl::solveBlockValueSelect(LVILatticeVal &BBLV, SelectInst *SI, BasicBlock *BB) { // Recurse on our inputs if needed if (!hasBlockValue(SI->getTrueValue(), BB)) { if (pushBlockValue(std::make_pair(BB, SI->getTrueValue()))) return false; - BBLV.markOverdefined(); + BBLV = LVILatticeVal::getOverdefined(); return true; } LVILatticeVal TrueVal = getBlockValue(SI->getTrueValue(), BB); // If we hit overdefined, don't ask more queries. We want to avoid poisoning // extra slots in the table if we can. if (TrueVal.isOverdefined()) { - BBLV.markOverdefined(); + BBLV = LVILatticeVal::getOverdefined(); return true; } if (!hasBlockValue(SI->getFalseValue(), BB)) { if (pushBlockValue(std::make_pair(BB, SI->getFalseValue()))) return false; - BBLV.markOverdefined(); + BBLV = LVILatticeVal::getOverdefined(); return true; } LVILatticeVal FalseVal = getBlockValue(SI->getFalseValue(), BB); // If we hit overdefined, don't ask more queries. We want to avoid poisoning // extra slots in the table if we can. if (FalseVal.isOverdefined()) { - BBLV.markOverdefined(); + BBLV = LVILatticeVal::getOverdefined(); return true; } @@ -916,22 +991,22 @@ bool LazyValueInfoCache::solveBlockValueSelect(LVILatticeVal &BBLV, // ValueTracking getting smarter looking back past our immediate inputs.) if (SelectPatternResult::isMinOrMax(SPR.Flavor) && LHS == SI->getTrueValue() && RHS == SI->getFalseValue()) { - switch (SPR.Flavor) { - default: - llvm_unreachable("unexpected minmax type!"); - case SPF_SMIN: /// Signed minimum - BBLV.markConstantRange(TrueCR.smin(FalseCR)); - return true; - case SPF_UMIN: /// Unsigned minimum - BBLV.markConstantRange(TrueCR.umin(FalseCR)); - return true; - case SPF_SMAX: /// Signed maximum - BBLV.markConstantRange(TrueCR.smax(FalseCR)); - return true; - case SPF_UMAX: /// Unsigned maximum - BBLV.markConstantRange(TrueCR.umax(FalseCR)); - return true; - }; + ConstantRange ResultCR = [&]() { + switch (SPR.Flavor) { + default: + llvm_unreachable("unexpected minmax type!"); + case SPF_SMIN: /// Signed minimum + return TrueCR.smin(FalseCR); + case SPF_UMIN: /// Unsigned minimum + return TrueCR.umin(FalseCR); + case SPF_SMAX: /// Signed maximum + return TrueCR.smax(FalseCR); + case SPF_UMAX: /// Unsigned maximum + return TrueCR.umax(FalseCR); + }; + }(); + BBLV = LVILatticeVal::getRange(ResultCR); + return true; } // TODO: ABS, NABS from the SelectPatternResult @@ -940,27 +1015,21 @@ bool LazyValueInfoCache::solveBlockValueSelect(LVILatticeVal &BBLV, // Can we constrain the facts about the true and false values by using the // condition itself? This shows up with idioms like e.g. select(a > 5, a, 5). // TODO: We could potentially refine an overdefined true value above. - if (auto *ICI = dyn_cast<ICmpInst>(SI->getCondition())) { - LVILatticeVal TrueValTaken, FalseValTaken; - if (!getValueFromFromCondition(SI->getTrueValue(), ICI, - TrueValTaken, true)) - TrueValTaken.markOverdefined(); - if (!getValueFromFromCondition(SI->getFalseValue(), ICI, - FalseValTaken, false)) - FalseValTaken.markOverdefined(); - - TrueVal = intersect(TrueVal, TrueValTaken); - FalseVal = intersect(FalseVal, FalseValTaken); - - - // Handle clamp idioms such as: - // %24 = constantrange<0, 17> - // %39 = icmp eq i32 %24, 0 - // %40 = add i32 %24, -1 - // %siv.next = select i1 %39, i32 16, i32 %40 - // %siv.next = constantrange<0, 17> not <-1, 17> - // In general, this can handle any clamp idiom which tests the edge - // condition via an equality or inequality. + Value *Cond = SI->getCondition(); + TrueVal = intersect(TrueVal, + getValueFromCondition(SI->getTrueValue(), Cond, true)); + FalseVal = intersect(FalseVal, + getValueFromCondition(SI->getFalseValue(), Cond, false)); + + // Handle clamp idioms such as: + // %24 = constantrange<0, 17> + // %39 = icmp eq i32 %24, 0 + // %40 = add i32 %24, -1 + // %siv.next = select i1 %39, i32 16, i32 %40 + // %siv.next = constantrange<0, 17> not <-1, 17> + // In general, this can handle any clamp idiom which tests the edge + // condition via an equality or inequality. + if (auto *ICI = dyn_cast<ICmpInst>(Cond)) { ICmpInst::Predicate Pred = ICI->getPredicate(); Value *A = ICI->getOperand(0); if (ConstantInt *CIBase = dyn_cast<ConstantInt>(ICI->getOperand(1))) { @@ -1001,13 +1070,13 @@ bool LazyValueInfoCache::solveBlockValueSelect(LVILatticeVal &BBLV, return true; } -bool LazyValueInfoCache::solveBlockValueCast(LVILatticeVal &BBLV, +bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV, Instruction *BBI, BasicBlock *BB) { if (!BBI->getOperand(0)->getType()->isSized()) { // Without knowing how wide the input is, we can't analyze it in any useful // way. - BBLV.markOverdefined(); + BBLV = LVILatticeVal::getOverdefined(); return true; } @@ -1024,7 +1093,7 @@ bool LazyValueInfoCache::solveBlockValueCast(LVILatticeVal &BBLV, // Unhandled instructions are overdefined. DEBUG(dbgs() << " compute BB '" << BB->getName() << "' - overdefined (unknown cast).\n"); - BBLV.markOverdefined(); + BBLV = LVILatticeVal::getOverdefined(); return true; } @@ -1041,7 +1110,8 @@ bool LazyValueInfoCache::solveBlockValueCast(LVILatticeVal &BBLV, ConstantRange LHSRange = ConstantRange(OperandBitWidth); if (hasBlockValue(BBI->getOperand(0), BB)) { LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB); - intersectAssumeBlockValueConstantRange(BBI->getOperand(0), LHSVal, BBI); + intersectAssumeOrGuardBlockValueConstantRange(BBI->getOperand(0), LHSVal, + BBI); if (LHSVal.isConstantRange()) LHSRange = LHSVal.getConstantRange(); } @@ -1052,31 +1122,12 @@ bool LazyValueInfoCache::solveBlockValueCast(LVILatticeVal &BBLV, // NOTE: We're currently limited by the set of operations that ConstantRange // can evaluate symbolically. Enhancing that set will allows us to analyze // more definitions. - LVILatticeVal Result; - switch (BBI->getOpcode()) { - case Instruction::Trunc: - Result.markConstantRange(LHSRange.truncate(ResultBitWidth)); - break; - case Instruction::SExt: - Result.markConstantRange(LHSRange.signExtend(ResultBitWidth)); - break; - case Instruction::ZExt: - Result.markConstantRange(LHSRange.zeroExtend(ResultBitWidth)); - break; - case Instruction::BitCast: - Result.markConstantRange(LHSRange); - break; - default: - // Should be dead if the code above is correct - llvm_unreachable("inconsistent with above"); - break; - } - - BBLV = Result; + auto CastOp = (Instruction::CastOps) BBI->getOpcode(); + BBLV = LVILatticeVal::getRange(LHSRange.castOp(CastOp, ResultBitWidth)); return true; } -bool LazyValueInfoCache::solveBlockValueBinaryOp(LVILatticeVal &BBLV, +bool LazyValueInfoImpl::solveBlockValueBinaryOp(LVILatticeVal &BBLV, Instruction *BBI, BasicBlock *BB) { @@ -1101,7 +1152,7 @@ bool LazyValueInfoCache::solveBlockValueBinaryOp(LVILatticeVal &BBLV, // Unhandled instructions are overdefined. DEBUG(dbgs() << " compute BB '" << BB->getName() << "' - overdefined (unknown binary operator).\n"); - BBLV.markOverdefined(); + BBLV = LVILatticeVal::getOverdefined(); return true; }; @@ -1118,7 +1169,8 @@ bool LazyValueInfoCache::solveBlockValueBinaryOp(LVILatticeVal &BBLV, ConstantRange LHSRange = ConstantRange(OperandBitWidth); if (hasBlockValue(BBI->getOperand(0), BB)) { LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB); - intersectAssumeBlockValueConstantRange(BBI->getOperand(0), LHSVal, BBI); + intersectAssumeOrGuardBlockValueConstantRange(BBI->getOperand(0), LHSVal, + BBI); if (LHSVal.isConstantRange()) LHSRange = LHSVal.getConstantRange(); } @@ -1129,82 +1181,114 @@ bool LazyValueInfoCache::solveBlockValueBinaryOp(LVILatticeVal &BBLV, // NOTE: We're currently limited by the set of operations that ConstantRange // can evaluate symbolically. Enhancing that set will allows us to analyze // more definitions. - LVILatticeVal Result; - switch (BBI->getOpcode()) { - case Instruction::Add: - Result.markConstantRange(LHSRange.add(RHSRange)); - break; - case Instruction::Sub: - Result.markConstantRange(LHSRange.sub(RHSRange)); - break; - case Instruction::Mul: - Result.markConstantRange(LHSRange.multiply(RHSRange)); - break; - case Instruction::UDiv: - Result.markConstantRange(LHSRange.udiv(RHSRange)); - break; - case Instruction::Shl: - Result.markConstantRange(LHSRange.shl(RHSRange)); - break; - case Instruction::LShr: - Result.markConstantRange(LHSRange.lshr(RHSRange)); - break; - case Instruction::And: - Result.markConstantRange(LHSRange.binaryAnd(RHSRange)); - break; - case Instruction::Or: - Result.markConstantRange(LHSRange.binaryOr(RHSRange)); - break; - default: - // Should be dead if the code above is correct - llvm_unreachable("inconsistent with above"); - break; - } - - BBLV = Result; + auto BinOp = (Instruction::BinaryOps) BBI->getOpcode(); + BBLV = LVILatticeVal::getRange(LHSRange.binaryOp(BinOp, RHSRange)); return true; } -bool getValueFromFromCondition(Value *Val, ICmpInst *ICI, - LVILatticeVal &Result, bool isTrueDest) { - assert(ICI && "precondition"); - if (isa<Constant>(ICI->getOperand(1))) { - if (ICI->isEquality() && ICI->getOperand(0) == Val) { +static LVILatticeVal getValueFromICmpCondition(Value *Val, ICmpInst *ICI, + bool isTrueDest) { + Value *LHS = ICI->getOperand(0); + Value *RHS = ICI->getOperand(1); + CmpInst::Predicate Predicate = ICI->getPredicate(); + + if (isa<Constant>(RHS)) { + if (ICI->isEquality() && LHS == Val) { // We know that V has the RHS constant if this is a true SETEQ or // false SETNE. - if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ)) - Result = LVILatticeVal::get(cast<Constant>(ICI->getOperand(1))); + if (isTrueDest == (Predicate == ICmpInst::ICMP_EQ)) + return LVILatticeVal::get(cast<Constant>(RHS)); else - Result = LVILatticeVal::getNot(cast<Constant>(ICI->getOperand(1))); - return true; + return LVILatticeVal::getNot(cast<Constant>(RHS)); } + } - // Recognize the range checking idiom that InstCombine produces. - // (X-C1) u< C2 --> [C1, C1+C2) - ConstantInt *NegOffset = nullptr; - if (ICI->getPredicate() == ICmpInst::ICMP_ULT) - match(ICI->getOperand(0), m_Add(m_Specific(Val), - m_ConstantInt(NegOffset))); - - ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1)); - if (CI && (ICI->getOperand(0) == Val || NegOffset)) { - // Calculate the range of values that are allowed by the comparison - ConstantRange CmpRange(CI->getValue()); - ConstantRange TrueValues = - ConstantRange::makeAllowedICmpRegion(ICI->getPredicate(), CmpRange); + if (!Val->getType()->isIntegerTy()) + return LVILatticeVal::getOverdefined(); + + // Use ConstantRange::makeAllowedICmpRegion in order to determine the possible + // range of Val guaranteed by the condition. Recognize comparisons in the from + // of: + // icmp <pred> Val, ... + // icmp <pred> (add Val, Offset), ... + // The latter is the range checking idiom that InstCombine produces. Subtract + // the offset from the allowed range for RHS in this case. + + // Val or (add Val, Offset) can be on either hand of the comparison + if (LHS != Val && !match(LHS, m_Add(m_Specific(Val), m_ConstantInt()))) { + std::swap(LHS, RHS); + Predicate = CmpInst::getSwappedPredicate(Predicate); + } - if (NegOffset) // Apply the offset from above. - TrueValues = TrueValues.subtract(NegOffset->getValue()); + ConstantInt *Offset = nullptr; + if (LHS != Val) + match(LHS, m_Add(m_Specific(Val), m_ConstantInt(Offset))); + + if (LHS == Val || Offset) { + // Calculate the range of values that are allowed by the comparison + ConstantRange RHSRange(RHS->getType()->getIntegerBitWidth(), + /*isFullSet=*/true); + if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) + RHSRange = ConstantRange(CI->getValue()); + else if (Instruction *I = dyn_cast<Instruction>(RHS)) + if (auto *Ranges = I->getMetadata(LLVMContext::MD_range)) + RHSRange = getConstantRangeFromMetadata(*Ranges); + + // If we're interested in the false dest, invert the condition + CmpInst::Predicate Pred = + isTrueDest ? Predicate : CmpInst::getInversePredicate(Predicate); + ConstantRange TrueValues = + ConstantRange::makeAllowedICmpRegion(Pred, RHSRange); - // If we're interested in the false dest, invert the condition. - if (!isTrueDest) TrueValues = TrueValues.inverse(); + if (Offset) // Apply the offset from above. + TrueValues = TrueValues.subtract(Offset->getValue()); - Result = LVILatticeVal::getRange(std::move(TrueValues)); - return true; - } + return LVILatticeVal::getRange(std::move(TrueValues)); } - return false; + return LVILatticeVal::getOverdefined(); +} + +static LVILatticeVal +getValueFromCondition(Value *Val, Value *Cond, bool isTrueDest, + DenseMap<Value*, LVILatticeVal> &Visited); + +static LVILatticeVal +getValueFromConditionImpl(Value *Val, Value *Cond, bool isTrueDest, + DenseMap<Value*, LVILatticeVal> &Visited) { + if (ICmpInst *ICI = dyn_cast<ICmpInst>(Cond)) + return getValueFromICmpCondition(Val, ICI, isTrueDest); + + // Handle conditions in the form of (cond1 && cond2), we know that on the + // true dest path both of the conditions hold. + if (!isTrueDest) + return LVILatticeVal::getOverdefined(); + + BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond); + if (!BO || BO->getOpcode() != BinaryOperator::And) + return LVILatticeVal::getOverdefined(); + + auto RHS = getValueFromCondition(Val, BO->getOperand(0), isTrueDest, Visited); + auto LHS = getValueFromCondition(Val, BO->getOperand(1), isTrueDest, Visited); + return intersect(RHS, LHS); +} + +static LVILatticeVal +getValueFromCondition(Value *Val, Value *Cond, bool isTrueDest, + DenseMap<Value*, LVILatticeVal> &Visited) { + auto I = Visited.find(Cond); + if (I != Visited.end()) + return I->second; + + auto Result = getValueFromConditionImpl(Val, Cond, isTrueDest, Visited); + Visited[Cond] = Result; + return Result; +} + +LVILatticeVal getValueFromCondition(Value *Val, Value *Cond, bool isTrueDest) { + assert(Cond && "precondition"); + DenseMap<Value*, LVILatticeVal> Visited; + return getValueFromCondition(Val, Cond, isTrueDest, Visited); } /// \brief Compute the value of Val on the edge BBFrom -> BBTo. Returns false if @@ -1233,9 +1317,9 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, // If the condition of the branch is an equality comparison, we may be // able to infer the value. - if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) - if (getValueFromFromCondition(Val, ICI, Result, isTrueDest)) - return true; + Result = getValueFromCondition(Val, BI->getCondition(), isTrueDest); + if (!Result.isOverdefined()) + return true; } } @@ -1267,7 +1351,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, /// \brief Compute the value of Val on the edge BBFrom -> BBTo or the value at /// the basic block if the edge does not constrain Val. -bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, +bool LazyValueInfoImpl::getEdgeValue(Value *Val, BasicBlock *BBFrom, BasicBlock *BBTo, LVILatticeVal &Result, Instruction *CxtI) { // If already a constant, there is nothing to compute. @@ -1280,7 +1364,7 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, if (!getEdgeValueLocal(Val, BBFrom, BBTo, LocalResult)) // If we couldn't constrain the value on the edge, LocalResult doesn't // provide any information. - LocalResult.markOverdefined(); + LocalResult = LVILatticeVal::getOverdefined(); if (hasSingleValue(LocalResult)) { // Can't get any more precise here @@ -1298,39 +1382,40 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, // Try to intersect ranges of the BB and the constraint on the edge. LVILatticeVal InBlock = getBlockValue(Val, BBFrom); - intersectAssumeBlockValueConstantRange(Val, InBlock, BBFrom->getTerminator()); + intersectAssumeOrGuardBlockValueConstantRange(Val, InBlock, + BBFrom->getTerminator()); // We can use the context instruction (generically the ultimate instruction // the calling pass is trying to simplify) here, even though the result of // this function is generally cached when called from the solve* functions // (and that cached result might be used with queries using a different // context instruction), because when this function is called from the solve* // functions, the context instruction is not provided. When called from - // LazyValueInfoCache::getValueOnEdge, the context instruction is provided, + // LazyValueInfoImpl::getValueOnEdge, the context instruction is provided, // but then the result is not cached. - intersectAssumeBlockValueConstantRange(Val, InBlock, CxtI); + intersectAssumeOrGuardBlockValueConstantRange(Val, InBlock, CxtI); Result = intersect(LocalResult, InBlock); return true; } -LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB, +LVILatticeVal LazyValueInfoImpl::getValueInBlock(Value *V, BasicBlock *BB, Instruction *CxtI) { DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '" << BB->getName() << "'\n"); assert(BlockValueStack.empty() && BlockValueSet.empty()); if (!hasBlockValue(V, BB)) { - pushBlockValue(std::make_pair(BB, V)); + pushBlockValue(std::make_pair(BB, V)); solve(); } LVILatticeVal Result = getBlockValue(V, BB); - intersectAssumeBlockValueConstantRange(V, Result, CxtI); + intersectAssumeOrGuardBlockValueConstantRange(V, Result, CxtI); DEBUG(dbgs() << " Result = " << Result << "\n"); return Result; } -LVILatticeVal LazyValueInfoCache::getValueAt(Value *V, Instruction *CxtI) { +LVILatticeVal LazyValueInfoImpl::getValueAt(Value *V, Instruction *CxtI) { DEBUG(dbgs() << "LVI Getting value " << *V << " at '" << CxtI->getName() << "'\n"); @@ -1340,13 +1425,13 @@ LVILatticeVal LazyValueInfoCache::getValueAt(Value *V, Instruction *CxtI) { LVILatticeVal Result = LVILatticeVal::getOverdefined(); if (auto *I = dyn_cast<Instruction>(V)) Result = getFromRangeMetadata(I); - intersectAssumeBlockValueConstantRange(V, Result, CxtI); + intersectAssumeOrGuardBlockValueConstantRange(V, Result, CxtI); DEBUG(dbgs() << " Result = " << Result << "\n"); return Result; } -LVILatticeVal LazyValueInfoCache:: +LVILatticeVal LazyValueInfoImpl:: getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI) { DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '" @@ -1364,75 +1449,24 @@ getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, return Result; } -void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, - BasicBlock *NewSucc) { - // When an edge in the graph has been threaded, values that we could not - // determine a value for before (i.e. were marked overdefined) may be - // possible to solve now. We do NOT try to proactively update these values. - // Instead, we clear their entries from the cache, and allow lazy updating to - // recompute them when needed. - - // The updating process is fairly simple: we need to drop cached info - // for all values that were marked overdefined in OldSucc, and for those same - // values in any successor of OldSucc (except NewSucc) in which they were - // also marked overdefined. - std::vector<BasicBlock*> worklist; - worklist.push_back(OldSucc); - - auto I = OverDefinedCache.find(OldSucc); - if (I == OverDefinedCache.end()) - return; // Nothing to process here. - SmallVector<Value *, 4> ValsToClear(I->second.begin(), I->second.end()); - - // Use a worklist to perform a depth-first search of OldSucc's successors. - // NOTE: We do not need a visited list since any blocks we have already - // visited will have had their overdefined markers cleared already, and we - // thus won't loop to their successors. - while (!worklist.empty()) { - BasicBlock *ToUpdate = worklist.back(); - worklist.pop_back(); - - // Skip blocks only accessible through NewSucc. - if (ToUpdate == NewSucc) continue; - - bool changed = false; - for (Value *V : ValsToClear) { - // If a value was marked overdefined in OldSucc, and is here too... - auto OI = OverDefinedCache.find(ToUpdate); - if (OI == OverDefinedCache.end()) - continue; - SmallPtrSetImpl<Value *> &ValueSet = OI->second; - if (!ValueSet.count(V)) - continue; - - ValueSet.erase(V); - if (ValueSet.empty()) - OverDefinedCache.erase(OI); - - // If we removed anything, then we potentially need to update - // blocks successors too. - changed = true; - } - - if (!changed) continue; - - worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate)); - } +void LazyValueInfoImpl::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, + BasicBlock *NewSucc) { + TheCache.threadEdgeImpl(OldSucc, NewSucc); } //===----------------------------------------------------------------------===// // LazyValueInfo Impl //===----------------------------------------------------------------------===// -/// This lazily constructs the LazyValueInfoCache. -static LazyValueInfoCache &getCache(void *&PImpl, AssumptionCache *AC, - const DataLayout *DL, - DominatorTree *DT = nullptr) { +/// This lazily constructs the LazyValueInfoImpl. +static LazyValueInfoImpl &getImpl(void *&PImpl, AssumptionCache *AC, + const DataLayout *DL, + DominatorTree *DT = nullptr) { if (!PImpl) { assert(DL && "getCache() called with a null DataLayout"); - PImpl = new LazyValueInfoCache(AC, *DL, DT); + PImpl = new LazyValueInfoImpl(AC, *DL, DT); } - return *static_cast<LazyValueInfoCache*>(PImpl); + return *static_cast<LazyValueInfoImpl*>(PImpl); } bool LazyValueInfoWrapperPass::runOnFunction(Function &F) { @@ -1445,7 +1479,7 @@ bool LazyValueInfoWrapperPass::runOnFunction(Function &F) { Info.TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); if (Info.PImpl) - getCache(Info.PImpl, Info.AC, &DL, Info.DT).clear(); + getImpl(Info.PImpl, Info.AC, &DL, Info.DT).clear(); // Fully lazy. return false; @@ -1464,7 +1498,7 @@ LazyValueInfo::~LazyValueInfo() { releaseMemory(); } void LazyValueInfo::releaseMemory() { // If the cache was allocated, free it. if (PImpl) { - delete &getCache(PImpl, AC, nullptr); + delete &getImpl(PImpl, AC, nullptr); PImpl = nullptr; } } @@ -1479,7 +1513,6 @@ LazyValueInfo LazyValueAnalysis::run(Function &F, FunctionAnalysisManager &FAM) return LazyValueInfo(&AC, &TLI, DT); } - /// Returns true if we can statically tell that this value will never be a /// "useful" constant. In practice, this means we've got something like an /// alloca or a malloc call for which a comparison against a constant can @@ -1502,7 +1535,7 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB, const DataLayout &DL = BB->getModule()->getDataLayout(); LVILatticeVal Result = - getCache(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI); + getImpl(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI); if (Result.isConstant()) return Result.getConstant(); @@ -1520,12 +1553,15 @@ ConstantRange LazyValueInfo::getConstantRange(Value *V, BasicBlock *BB, unsigned Width = V->getType()->getIntegerBitWidth(); const DataLayout &DL = BB->getModule()->getDataLayout(); LVILatticeVal Result = - getCache(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI); - assert(!Result.isConstant()); + getImpl(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI); if (Result.isUndefined()) return ConstantRange(Width, /*isFullSet=*/false); if (Result.isConstantRange()) return Result.getConstantRange(); + // We represent ConstantInt constants as constant ranges but other kinds + // of integer constants, i.e. ConstantExpr will be tagged as constants + assert(!(Result.isConstant() && isa<ConstantInt>(Result.getConstant())) && + "ConstantInt value must be represented as constantrange"); return ConstantRange(Width, /*isFullSet=*/true); } @@ -1536,7 +1572,7 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB, Instruction *CxtI) { const DataLayout &DL = FromBB->getModule()->getDataLayout(); LVILatticeVal Result = - getCache(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); + getImpl(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); if (Result.isConstant()) return Result.getConstant(); @@ -1583,8 +1619,8 @@ static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C, } // Handle more complex predicates. - ConstantRange TrueValues = - ICmpInst::makeConstantRange((ICmpInst::Predicate)Pred, CI->getValue()); + ConstantRange TrueValues = ConstantRange::makeExactICmpRegion( + (ICmpInst::Predicate)Pred, CI->getValue()); if (TrueValues.contains(CR)) return LazyValueInfo::True; if (TrueValues.inverse().contains(CR)) @@ -1624,7 +1660,7 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, Instruction *CxtI) { const DataLayout &DL = FromBB->getModule()->getDataLayout(); LVILatticeVal Result = - getCache(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); + getImpl(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); return getPredicateResult(Pred, C, Result, DL, TLI); } @@ -1644,7 +1680,7 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C, return LazyValueInfo::True; } const DataLayout &DL = CxtI->getModule()->getDataLayout(); - LVILatticeVal Result = getCache(PImpl, AC, &DL, DT).getValueAt(V, CxtI); + LVILatticeVal Result = getImpl(PImpl, AC, &DL, DT).getValueAt(V, CxtI); Tristate Ret = getPredicateResult(Pred, C, Result, DL, TLI); if (Ret != Unknown) return Ret; @@ -1703,7 +1739,7 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C, } if (Baseline != Unknown) return Baseline; - } + } // For a comparison where the V is outside this block, it's possible // that we've branched on it before. Look to see if the value is known @@ -1734,13 +1770,13 @@ void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, BasicBlock *NewSucc) { if (PImpl) { const DataLayout &DL = PredBB->getModule()->getDataLayout(); - getCache(PImpl, AC, &DL, DT).threadEdge(PredBB, OldSucc, NewSucc); + getImpl(PImpl, AC, &DL, DT).threadEdge(PredBB, OldSucc, NewSucc); } } void LazyValueInfo::eraseBlock(BasicBlock *BB) { if (PImpl) { const DataLayout &DL = BB->getModule()->getDataLayout(); - getCache(PImpl, AC, &DL, DT).eraseBlock(BB); + getImpl(PImpl, AC, &DL, DT).eraseBlock(BB); } } diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp index fdf5f55..2ca46b1 100644 --- a/contrib/llvm/lib/Analysis/Lint.cpp +++ b/contrib/llvm/lib/Analysis/Lint.cpp @@ -35,27 +35,48 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/Lint.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Twine.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Module.h" #include "llvm/IR/InstVisitor.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <cstdint> +#include <iterator> +#include <string> + using namespace llvm; namespace { @@ -64,7 +85,7 @@ namespace { static const unsigned Write = 2; static const unsigned Callee = 4; static const unsigned Branchee = 8; - } + } // end namespace MemRef class Lint : public FunctionPass, public InstVisitor<Lint> { friend class InstVisitor<Lint>; @@ -159,7 +180,7 @@ namespace { WriteValues({V1, Vs...}); } }; -} +} // end anonymous namespace char Lint::ID = 0; INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR", @@ -173,7 +194,7 @@ INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR", // Assert - We know that cond should be true, if not print an error message. #define Assert(C, ...) \ - do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (0) + do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (false) // Lint::run - This is the main Analysis entry point for a // function. @@ -680,9 +701,9 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, if (Instruction *Inst = dyn_cast<Instruction>(V)) { if (Value *W = SimplifyInstruction(Inst, *DL, TLI, DT, AC)) return findValueImpl(W, OffsetOk, Visited); - } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { - if (Value *W = ConstantFoldConstantExpression(CE, *DL, TLI)) - if (W != V) + } else if (auto *C = dyn_cast<Constant>(V)) { + if (Value *W = ConstantFoldConstant(C, *DL, TLI)) + if (W && W != V) return findValueImpl(W, OffsetOk, Visited); } diff --git a/contrib/llvm/lib/Analysis/Loads.cpp b/contrib/llvm/lib/Analysis/Loads.cpp index 75426b5..e46541e 100644 --- a/contrib/llvm/lib/Analysis/Loads.cpp +++ b/contrib/llvm/lib/Analysis/Loads.cpp @@ -55,6 +55,10 @@ static bool isDereferenceableAndAlignedPointer( const Value *V, unsigned Align, const APInt &Size, const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT, SmallPtrSetImpl<const Value *> &Visited) { + // Already visited? Bail out, we've likely hit unreachable code. + if (!Visited.insert(V).second) + return false; + // Note that it is not safe to speculate into a malloc'd region because // malloc may return null. @@ -87,9 +91,11 @@ static bool isDereferenceableAndAlignedPointer( // then the GEP (== Base + Offset == k_0 * Align + k_1 * Align) is also // aligned to Align bytes. - return Visited.insert(Base).second && - isDereferenceableAndAlignedPointer(Base, Align, Offset + Size, DL, - CtxI, DT, Visited); + // Offset and Size may have different bit widths if we have visited an + // addrspacecast, so we can't do arithmetic directly on the APInt values. + return isDereferenceableAndAlignedPointer( + Base, Align, Offset + Size.sextOrTrunc(Offset.getBitWidth()), + DL, CtxI, DT, Visited); } // For gc.relocate, look through relocations @@ -302,11 +308,11 @@ llvm::DefMaxInstsToScan("available-load-scan-limit", cl::init(6), cl::Hidden, "to scan backward from a given instruction, when searching for " "available loaded value")); -Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, +Value *llvm::FindAvailableLoadedValue(LoadInst *Load, + BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, - AliasAnalysis *AA, AAMDNodes *AATags, - bool *IsLoadCSE) { + AliasAnalysis *AA, bool *IsLoadCSE) { if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; @@ -356,8 +362,6 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, if (LI->isAtomic() < Load->isAtomic()) return nullptr; - if (AATags) - LI->getAAMetadata(*AATags); if (IsLoadCSE) *IsLoadCSE = true; return LI; @@ -377,8 +381,8 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, if (SI->isAtomic() < Load->isAtomic()) return nullptr; - if (AATags) - SI->getAAMetadata(*AATags); + if (IsLoadCSE) + *IsLoadCSE = false; return SI->getOperand(0); } diff --git a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 5214eb7..bf80072 100644 --- a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -13,18 +13,60 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopPassManager.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstdlib> +#include <iterator> +#include <utility> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "loop-accesses" @@ -94,14 +136,18 @@ bool VectorizerParams::isInterleaveForced() { } void LoopAccessReport::emitAnalysis(const LoopAccessReport &Message, - const Function *TheFunction, - const Loop *TheLoop, - const char *PassName) { + const Loop *TheLoop, const char *PassName, + OptimizationRemarkEmitter &ORE) { DebugLoc DL = TheLoop->getStartLoc(); - if (const Instruction *I = Message.getInstr()) - DL = I->getDebugLoc(); - emitOptimizationRemarkAnalysis(TheFunction->getContext(), PassName, - *TheFunction, DL, Message.str()); + const Value *V = TheLoop->getHeader(); + if (const Instruction *I = Message.getInstr()) { + // If there is no debug location attached to the instruction, revert back to + // using the loop's. + if (I->getDebugLoc()) + DL = I->getDebugLoc(); + V = I->getParent(); + } + ORE.emitOptimizationRemarkAnalysis(PassName, DL, V, Message.str()); } Value *llvm::stripIntegerCast(Value *V) { @@ -463,6 +509,7 @@ void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const { } namespace { + /// \brief Analyses memory accesses in a loop. /// /// Checks whether run time pointer checks are needed and builds sets for data @@ -886,7 +933,7 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, /// \brief Check whether the access through \p Ptr has a constant stride. int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap, - bool Assume) { + bool Assume, bool ShouldCheckWrap) { Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Unexpected non-ptr"); @@ -925,9 +972,9 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, // to access the pointer value "0" which is undefined behavior in address // space 0, therefore we can also vectorize this case. bool IsInBoundsGEP = isInBoundsGep(Ptr); - bool IsNoWrapAddRec = - PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) || - isNoWrapAddRec(Ptr, AR, PSE, Lp); + bool IsNoWrapAddRec = !ShouldCheckWrap || + PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) || + isNoWrapAddRec(Ptr, AR, PSE, Lp); bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0; if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) { if (Assume) { @@ -1028,8 +1075,8 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, return false; // Make sure that A and B have the same type if required. - if(CheckType && PtrA->getType() != PtrB->getType()) - return false; + if (CheckType && PtrA->getType() != PtrB->getType()) + return false; unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA); Type *Ty = cast<PointerType>(PtrA->getType())->getElementType(); @@ -1451,7 +1498,7 @@ MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool isWrite) const { auto &IndexVector = Accesses.find(Access)->second; SmallVector<Instruction *, 4> Insts; - std::transform(IndexVector.begin(), IndexVector.end(), + transform(IndexVector, std::back_inserter(Insts), [&](unsigned Idx) { return this->InstMap[Idx]; }); return Insts; @@ -1478,25 +1525,23 @@ bool LoopAccessInfo::canAnalyzeLoop() { // We can only analyze innermost loops. if (!TheLoop->empty()) { DEBUG(dbgs() << "LAA: loop is not the innermost loop\n"); - emitAnalysis(LoopAccessReport() << "loop is not the innermost loop"); + recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop"; return false; } // We must have a single backedge. if (TheLoop->getNumBackEdges() != 1) { DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n"); - emitAnalysis( - LoopAccessReport() << - "loop control flow is not understood by analyzer"); + recordAnalysis("CFGNotUnderstood") + << "loop control flow is not understood by analyzer"; return false; } // We must have a single exiting block. if (!TheLoop->getExitingBlock()) { DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n"); - emitAnalysis( - LoopAccessReport() << - "loop control flow is not understood by analyzer"); + recordAnalysis("CFGNotUnderstood") + << "loop control flow is not understood by analyzer"; return false; } @@ -1505,17 +1550,16 @@ bool LoopAccessInfo::canAnalyzeLoop() { // instructions in the loop are executed the same number of times. if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) { DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n"); - emitAnalysis( - LoopAccessReport() << - "loop control flow is not understood by analyzer"); + recordAnalysis("CFGNotUnderstood") + << "loop control flow is not understood by analyzer"; return false; } // ScalarEvolution needs to be able to find the exit count. const SCEV *ExitCount = PSE->getBackedgeTakenCount(); if (ExitCount == PSE->getSE()->getCouldNotCompute()) { - emitAnalysis(LoopAccessReport() - << "could not determine number of loop iterations"); + recordAnalysis("CantComputeNumberOfIterations") + << "could not determine number of loop iterations"; DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n"); return false; } @@ -1564,8 +1608,8 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, auto *Ld = dyn_cast<LoadInst>(&I); if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) { - emitAnalysis(LoopAccessReport(Ld) - << "read with atomic ordering or volatile read"); + recordAnalysis("NonSimpleLoad", Ld) + << "read with atomic ordering or volatile read"; DEBUG(dbgs() << "LAA: Found a non-simple load.\n"); CanVecMem = false; return; @@ -1582,14 +1626,14 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, if (I.mayWriteToMemory()) { auto *St = dyn_cast<StoreInst>(&I); if (!St) { - emitAnalysis(LoopAccessReport(St) - << "instruction cannot be vectorized"); + recordAnalysis("CantVectorizeInstruction", St) + << "instruction cannot be vectorized"; CanVecMem = false; return; } if (!St->isSimple() && !IsAnnotatedParallel) { - emitAnalysis(LoopAccessReport(St) - << "write with atomic ordering or volatile write"); + recordAnalysis("NonSimpleStore", St) + << "write with atomic ordering or volatile write"; DEBUG(dbgs() << "LAA: Found a non-simple store.\n"); CanVecMem = false; return; @@ -1697,7 +1741,7 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(), TheLoop, SymbolicStrides); if (!CanDoRTIfNeeded) { - emitAnalysis(LoopAccessReport() << "cannot identify array bounds"); + recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds"; DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " << "the array bounds.\n"); CanVecMem = false; @@ -1728,8 +1772,8 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, // Check that we found the bounds for the pointer. if (!CanDoRTIfNeeded) { - emitAnalysis(LoopAccessReport() - << "cannot check memory dependencies at runtime"); + recordAnalysis("CantCheckMemDepsAtRunTime") + << "cannot check memory dependencies at runtime"; DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n"); CanVecMem = false; return; @@ -1744,12 +1788,11 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, << (PtrRtChecking->Need ? "" : " don't") << " need runtime memory checks.\n"); else { - emitAnalysis( - LoopAccessReport() + recordAnalysis("UnsafeMemDep") << "unsafe dependent memory operations in loop. Use " "#pragma loop distribute(enable) to allow loop distribution " "to attempt to isolate the offending operations into a separate " - "loop"); + "loop"; DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n"); } } @@ -1763,13 +1806,35 @@ bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, return !DT->dominates(BB, Latch); } -void LoopAccessInfo::emitAnalysis(LoopAccessReport &Message) { +OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName, + Instruction *I) { assert(!Report && "Multiple reports generated"); - Report = Message; + + Value *CodeRegion = TheLoop->getHeader(); + DebugLoc DL = TheLoop->getStartLoc(); + + if (I) { + CodeRegion = I->getParent(); + // If there is no debug location attached to the instruction, revert back to + // using the loop's. + if (I->getDebugLoc()) + DL = I->getDebugLoc(); + } + + Report = make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL, + CodeRegion); + return *Report; } bool LoopAccessInfo::isUniform(Value *V) const { - return (PSE->getSE()->isLoopInvariant(PSE->getSE()->getSCEV(V), TheLoop)); + auto *SE = PSE->getSE(); + // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is + // never considered uniform. + // TODO: Is this really what we want? Even without FP SCEV, we may want some + // trivially loop-invariant FP values to be considered uniform. + if (!SE->isSCEVable(V->getType())) + return false; + return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop)); } // FIXME: this function is currently a duplicate of the one in @@ -1784,6 +1849,7 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V, } namespace { + /// \brief IR Values for the lower and upper bounds of a pointer evolution. We /// need to use value-handles because SCEV expansion can invalidate previously /// expanded values. Thus expansion of a pointer can invalidate the bounds for @@ -1792,6 +1858,7 @@ struct PointerBounds { TrackingVH<Value> Start; TrackingVH<Value> End; }; + } // end anonymous namespace /// \brief Expand code for the lower and upper bound of the pointer group \p CG @@ -1803,18 +1870,24 @@ expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop, Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue; const SCEV *Sc = SE->getSCEV(Ptr); + unsigned AS = Ptr->getType()->getPointerAddressSpace(); + LLVMContext &Ctx = Loc->getContext(); + + // Use this type for pointer arithmetic. + Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS); + if (SE->isLoopInvariant(Sc, TheLoop)) { DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr << "\n"); - return {Ptr, Ptr}; + // Ptr could be in the loop body. If so, expand a new one at the correct + // location. + Instruction *Inst = dyn_cast<Instruction>(Ptr); + Value *NewPtr = (Inst && TheLoop->contains(Inst)) + ? Exp.expandCodeFor(Sc, PtrArithTy, Loc) + : Ptr; + return {NewPtr, NewPtr}; } else { - unsigned AS = Ptr->getType()->getPointerAddressSpace(); - LLVMContext &Ctx = Loc->getContext(); - - // Use this type for pointer arithmetic. - Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS); Value *Start = nullptr, *End = nullptr; - DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc); End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc); @@ -1833,9 +1906,8 @@ static SmallVector<std::pair<PointerBounds, PointerBounds>, 4> expandBounds( // Here we're relying on the SCEV Expander's cache to only emit code for the // same bounds once. - std::transform( - PointerChecks.begin(), PointerChecks.end(), - std::back_inserter(ChecksWithBounds), + transform( + PointerChecks, std::back_inserter(ChecksWithBounds), [&](const RuntimePointerChecking::PointerCheck &Check) { PointerBounds First = expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking), @@ -1967,7 +2039,7 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { } if (Report) - OS.indent(Depth) << "Report: " << Report->str() << "\n"; + OS.indent(Depth) << "Report: " << Report->getMsg() << "\n"; if (auto *Dependences = DepChecker->getDependences()) { OS.indent(Depth) << "Dependences:\n"; @@ -2046,41 +2118,17 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true) -char LoopAccessAnalysis::PassID; - -LoopAccessInfo LoopAccessAnalysis::run(Loop &L, AnalysisManager<Loop> &AM) { - const AnalysisManager<Function> &FAM = - AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager(); - Function &F = *L.getHeader()->getParent(); - auto *SE = FAM.getCachedResult<ScalarEvolutionAnalysis>(F); - auto *TLI = FAM.getCachedResult<TargetLibraryAnalysis>(F); - auto *AA = FAM.getCachedResult<AAManager>(F); - auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F); - auto *LI = FAM.getCachedResult<LoopAnalysis>(F); - if (!SE) - report_fatal_error( - "ScalarEvolution must have been cached at a higher level"); - if (!AA) - report_fatal_error("AliasAnalysis must have been cached at a higher level"); - if (!DT) - report_fatal_error("DominatorTree must have been cached at a higher level"); - if (!LI) - report_fatal_error("LoopInfo must have been cached at a higher level"); - return LoopAccessInfo(&L, SE, TLI, AA, DT, LI); -} +AnalysisKey LoopAccessAnalysis::Key; -PreservedAnalyses LoopAccessInfoPrinterPass::run(Loop &L, - AnalysisManager<Loop> &AM) { - Function &F = *L.getHeader()->getParent(); - auto &LAI = AM.getResult<LoopAccessAnalysis>(L); - OS << "Loop access info in function '" << F.getName() << "':\n"; - OS.indent(2) << L.getHeader()->getName() << ":\n"; - LAI.print(OS, 4); - return PreservedAnalyses::all(); +LoopAccessInfo LoopAccessAnalysis::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR) { + return LoopAccessInfo(&L, &AR.SE, &AR.TLI, &AR.AA, &AR.DT, &AR.LI); } namespace llvm { + Pass *createLAAPass() { return new LoopAccessLegacyAnalysis(); } -} + +} // end namespace llvm diff --git a/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp b/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp new file mode 100644 index 0000000..5be3ee3 --- /dev/null +++ b/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp @@ -0,0 +1,160 @@ +//===- LoopAnalysisManager.cpp - Loop analysis management -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/IR/Dominators.h" + +using namespace llvm; + +// Explicit template instantiations and specialization defininitions for core +// template typedefs. +namespace llvm { +template class AllAnalysesOn<Loop>; +template class AnalysisManager<Loop, LoopStandardAnalysisResults &>; +template class InnerAnalysisManagerProxy<LoopAnalysisManager, Function>; +template class OuterAnalysisManagerProxy<FunctionAnalysisManager, Loop, + LoopStandardAnalysisResults &>; + +bool LoopAnalysisManagerFunctionProxy::Result::invalidate( + Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv) { + // First compute the sequence of IR units covered by this proxy. We will want + // to visit this in postorder, but because this is a tree structure we can do + // this by building a preorder sequence and walking it in reverse. + SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist; + // Note that we want to walk the roots in reverse order because we will end + // up reversing the preorder sequence. However, it happens that the loop nest + // roots are in reverse order within the LoopInfo object. So we just walk + // forward here. + // FIXME: If we change the order of LoopInfo we will want to add a reverse + // here. + for (Loop *RootL : *LI) { + assert(PreOrderWorklist.empty() && + "Must start with an empty preorder walk worklist."); + PreOrderWorklist.push_back(RootL); + do { + Loop *L = PreOrderWorklist.pop_back_val(); + PreOrderWorklist.append(L->begin(), L->end()); + PreOrderLoops.push_back(L); + } while (!PreOrderWorklist.empty()); + } + + // If this proxy or the loop info is going to be invalidated, we also need + // to clear all the keys coming from that analysis. We also completely blow + // away the loop analyses if any of the standard analyses provided by the + // loop pass manager go away so that loop analyses can freely use these + // without worrying about declaring dependencies on them etc. + // FIXME: It isn't clear if this is the right tradeoff. We could instead make + // loop analyses declare any dependencies on these and use the more general + // invalidation logic below to act on that. + auto PAC = PA.getChecker<LoopAnalysisManagerFunctionProxy>(); + if (!(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) || + Inv.invalidate<AAManager>(F, PA) || + Inv.invalidate<AssumptionAnalysis>(F, PA) || + Inv.invalidate<DominatorTreeAnalysis>(F, PA) || + Inv.invalidate<LoopAnalysis>(F, PA) || + Inv.invalidate<ScalarEvolutionAnalysis>(F, PA)) { + // Note that the LoopInfo may be stale at this point, however the loop + // objects themselves remain the only viable keys that could be in the + // analysis manager's cache. So we just walk the keys and forcibly clear + // those results. Note that the order doesn't matter here as this will just + // directly destroy the results without calling methods on them. + for (Loop *L : PreOrderLoops) + InnerAM->clear(*L); + + // We also need to null out the inner AM so that when the object gets + // destroyed as invalid we don't try to clear the inner AM again. At that + // point we won't be able to reliably walk the loops for this function and + // only clear results associated with those loops the way we do here. + // FIXME: Making InnerAM null at this point isn't very nice. Most analyses + // try to remain valid during invalidation. Maybe we should add an + // `IsClean` flag? + InnerAM = nullptr; + + // Now return true to indicate this *is* invalid and a fresh proxy result + // needs to be built. This is especially important given the null InnerAM. + return true; + } + + // Directly check if the relevant set is preserved so we can short circuit + // invalidating loops. + bool AreLoopAnalysesPreserved = + PA.allAnalysesInSetPreserved<AllAnalysesOn<Loop>>(); + + // Since we have a valid LoopInfo we can actually leave the cached results in + // the analysis manager associated with the Loop keys, but we need to + // propagate any necessary invalidation logic into them. We'd like to + // invalidate things in roughly the same order as they were put into the + // cache and so we walk the preorder list in reverse to form a valid + // postorder. + for (Loop *L : reverse(PreOrderLoops)) { + Optional<PreservedAnalyses> InnerPA; + + // Check to see whether the preserved set needs to be adjusted based on + // function-level analysis invalidation triggering deferred invalidation + // for this loop. + if (auto *OuterProxy = + InnerAM->getCachedResult<FunctionAnalysisManagerLoopProxy>(*L)) + for (const auto &OuterInvalidationPair : + OuterProxy->getOuterInvalidations()) { + AnalysisKey *OuterAnalysisID = OuterInvalidationPair.first; + const auto &InnerAnalysisIDs = OuterInvalidationPair.second; + if (Inv.invalidate(OuterAnalysisID, F, PA)) { + if (!InnerPA) + InnerPA = PA; + for (AnalysisKey *InnerAnalysisID : InnerAnalysisIDs) + InnerPA->abandon(InnerAnalysisID); + } + } + + // Check if we needed a custom PA set. If so we'll need to run the inner + // invalidation. + if (InnerPA) { + InnerAM->invalidate(*L, *InnerPA); + continue; + } + + // Otherwise we only need to do invalidation if the original PA set didn't + // preserve all Loop analyses. + if (!AreLoopAnalysesPreserved) + InnerAM->invalidate(*L, PA); + } + + // Return false to indicate that this result is still a valid proxy. + return false; +} + +template <> +LoopAnalysisManagerFunctionProxy::Result +LoopAnalysisManagerFunctionProxy::run(Function &F, + FunctionAnalysisManager &AM) { + return Result(*InnerAM, AM.getResult<LoopAnalysis>(F)); +} +} + +PreservedAnalyses llvm::getLoopPassPreservedAnalyses() { + PreservedAnalyses PA; + PA.preserve<AssumptionAnalysis>(); + PA.preserve<DominatorTreeAnalysis>(); + PA.preserve<LoopAnalysis>(); + PA.preserve<LoopAnalysisManagerFunctionProxy>(); + PA.preserve<ScalarEvolutionAnalysis>(); + // TODO: What we really want to do here is preserve an AA category, but that + // concept doesn't exist yet. + PA.preserve<AAManager>(); + PA.preserve<BasicAA>(); + PA.preserve<GlobalsAA>(); + PA.preserve<SCEVAA>(); + return PA; +} diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp index 30f7ef3..f449ce9 100644 --- a/contrib/llvm/lib/Analysis/LoopInfo.cpp +++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp @@ -143,42 +143,47 @@ PHINode *Loop::getCanonicalInductionVariable() const { return nullptr; } -bool Loop::isLCSSAForm(DominatorTree &DT) const { - for (BasicBlock *BB : this->blocks()) { - for (Instruction &I : *BB) { - // Tokens can't be used in PHI nodes and live-out tokens prevent loop - // optimizations, so for the purposes of considered LCSSA form, we - // can ignore them. - if (I.getType()->isTokenTy()) - continue; +// Check that 'BB' doesn't have any uses outside of the 'L' +static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB, + DominatorTree &DT) { + for (const Instruction &I : BB) { + // Tokens can't be used in PHI nodes and live-out tokens prevent loop + // optimizations, so for the purposes of considered LCSSA form, we + // can ignore them. + if (I.getType()->isTokenTy()) + continue; - for (Use &U : I.uses()) { - Instruction *UI = cast<Instruction>(U.getUser()); - BasicBlock *UserBB = UI->getParent(); - if (PHINode *P = dyn_cast<PHINode>(UI)) - UserBB = P->getIncomingBlock(U); - - // Check the current block, as a fast-path, before checking whether - // the use is anywhere in the loop. Most values are used in the same - // block they are defined in. Also, blocks not reachable from the - // entry are special; uses in them don't need to go through PHIs. - if (UserBB != BB && - !contains(UserBB) && - DT.isReachableFromEntry(UserBB)) - return false; - } + for (const Use &U : I.uses()) { + const Instruction *UI = cast<Instruction>(U.getUser()); + const BasicBlock *UserBB = UI->getParent(); + if (const PHINode *P = dyn_cast<PHINode>(UI)) + UserBB = P->getIncomingBlock(U); + + // Check the current block, as a fast-path, before checking whether + // the use is anywhere in the loop. Most values are used in the same + // block they are defined in. Also, blocks not reachable from the + // entry are special; uses in them don't need to go through PHIs. + if (UserBB != &BB && !L.contains(UserBB) && + DT.isReachableFromEntry(UserBB)) + return false; } } - return true; } -bool Loop::isRecursivelyLCSSAForm(DominatorTree &DT) const { - if (!isLCSSAForm(DT)) - return false; +bool Loop::isLCSSAForm(DominatorTree &DT) const { + // For each block we check that it doesn't have any uses outside of this loop. + return all_of(this->blocks(), [&](const BasicBlock *BB) { + return isBlockInLCSSAForm(*this, *BB, DT); + }); +} - return std::all_of(begin(), end(), [&](const Loop *L) { - return L->isRecursivelyLCSSAForm(DT); +bool Loop::isRecursivelyLCSSAForm(DominatorTree &DT, const LoopInfo &LI) const { + // For each block we check that it doesn't have any uses outside of its + // innermost loop. This process will transitively guarantee that the current + // loop and all of the nested loops are in LCSSA form. + return all_of(this->blocks(), [&](const BasicBlock *BB) { + return isBlockInLCSSAForm(*LI.getLoopFor(BB), *BB, DT); }); } @@ -300,23 +305,40 @@ bool Loop::isAnnotatedParallel() const { } DebugLoc Loop::getStartLoc() const { + return getLocRange().getStart(); +} + +Loop::LocRange Loop::getLocRange() const { // If we have a debug location in the loop ID, then use it. - if (MDNode *LoopID = getLoopID()) - for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) - if (DILocation *L = dyn_cast<DILocation>(LoopID->getOperand(i))) - return DebugLoc(L); + if (MDNode *LoopID = getLoopID()) { + DebugLoc Start; + // We use the first DebugLoc in the header as the start location of the loop + // and if there is a second DebugLoc in the header we use it as end location + // of the loop. + for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { + if (DILocation *L = dyn_cast<DILocation>(LoopID->getOperand(i))) { + if (!Start) + Start = DebugLoc(L); + else + return LocRange(Start, DebugLoc(L)); + } + } + + if (Start) + return LocRange(Start); + } // Try the pre-header first. if (BasicBlock *PHeadBB = getLoopPreheader()) if (DebugLoc DL = PHeadBB->getTerminator()->getDebugLoc()) - return DL; + return LocRange(DL); // If we have no pre-header or there are no instructions with debug // info in it, try the header. if (BasicBlock *HeadBB = getHeader()) - return HeadBB->getTerminator()->getDebugLoc(); + return LocRange(HeadBB->getTerminator()->getDebugLoc()); - return DebugLoc(); + return LocRange(); } bool Loop::hasDedicatedExits() const { @@ -366,8 +388,7 @@ Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const { // In case of multiple edges from current block to exit block, collect // only one edge in ExitBlocks. Use switchExitBlocks to keep track of // duplicate edges. - if (std::find(SwitchExitBlocks.begin(), SwitchExitBlocks.end(), Successor) - == SwitchExitBlocks.end()) { + if (!is_contained(SwitchExitBlocks, Successor)) { SwitchExitBlocks.push_back(Successor); ExitBlocks.push_back(Successor); } @@ -387,6 +408,10 @@ BasicBlock *Loop::getUniqueExitBlock() const { LLVM_DUMP_METHOD void Loop::dump() const { print(dbgs()); } + +LLVM_DUMP_METHOD void Loop::dumpVerbose() const { + print(dbgs(), /*Depth=*/ 0, /*Verbose=*/ true); +} #endif //===----------------------------------------------------------------------===// @@ -532,8 +557,7 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) { assert(Subloop && "subloop is not an ancestor of the original loop"); } // Get the current nearest parent of the Subloop exits, initially Unloop. - NearLoop = - SubloopParents.insert(std::make_pair(Subloop, &Unloop)).first->second; + NearLoop = SubloopParents.insert({Subloop, &Unloop}).first->second; } succ_iterator I = succ_begin(BB), E = succ_end(BB); @@ -645,9 +669,9 @@ void LoopInfo::markAsRemoved(Loop *Unloop) { } } -char LoopAnalysis::PassID; +AnalysisKey LoopAnalysis::Key; -LoopInfo LoopAnalysis::run(Function &F, AnalysisManager<Function> &AM) { +LoopInfo LoopAnalysis::run(Function &F, FunctionAnalysisManager &AM) { // FIXME: Currently we create a LoopInfo from scratch for every function. // This may prove to be too wasteful due to deallocating and re-allocating // memory each time for the underlying map and vector datastructures. At some @@ -660,23 +684,18 @@ LoopInfo LoopAnalysis::run(Function &F, AnalysisManager<Function> &AM) { } PreservedAnalyses LoopPrinterPass::run(Function &F, - AnalysisManager<Function> &AM) { + FunctionAnalysisManager &AM) { AM.getResult<LoopAnalysis>(F).print(OS); return PreservedAnalyses::all(); } -PrintLoopPass::PrintLoopPass() : OS(dbgs()) {} -PrintLoopPass::PrintLoopPass(raw_ostream &OS, const std::string &Banner) - : OS(OS), Banner(Banner) {} - -PreservedAnalyses PrintLoopPass::run(Loop &L, AnalysisManager<Loop> &) { +void llvm::printLoop(Loop &L, raw_ostream &OS, const std::string &Banner) { OS << Banner; for (auto *Block : L.blocks()) if (Block) Block->print(OS); else OS << "Printing <null> block"; - return PreservedAnalyses::all(); } //===----------------------------------------------------------------------===// @@ -702,8 +721,10 @@ void LoopInfoWrapperPass::verifyAnalysis() const { // -verify-loop-info option can enable this. In order to perform some // checking by default, LoopPass has been taught to call verifyLoop manually // during loop pass sequences. - if (VerifyLoopInfo) - LI.verify(); + if (VerifyLoopInfo) { + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + LI.verify(DT); + } } void LoopInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { @@ -715,6 +736,14 @@ void LoopInfoWrapperPass::print(raw_ostream &OS, const Module *) const { LI.print(OS); } +PreservedAnalyses LoopVerifierPass::run(Function &F, + FunctionAnalysisManager &AM) { + LoopInfo &LI = AM.getResult<LoopAnalysis>(F); + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + LI.verify(DT); + return PreservedAnalyses::all(); +} + //===----------------------------------------------------------------------===// // LoopBlocksDFS implementation // diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp index 222345c..3f4a079 100644 --- a/contrib/llvm/lib/Analysis/LoopPass.cpp +++ b/contrib/llvm/lib/Analysis/LoopPass.cpp @@ -14,7 +14,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/LoopPassManager.h" +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/OptBisect.h" @@ -31,13 +32,14 @@ namespace { /// PrintLoopPass - Print a Function corresponding to a Loop. /// class PrintLoopPassWrapper : public LoopPass { - PrintLoopPass P; + raw_ostream &OS; + std::string Banner; public: static char ID; - PrintLoopPassWrapper() : LoopPass(ID) {} + PrintLoopPassWrapper() : LoopPass(ID), OS(dbgs()) {} PrintLoopPassWrapper(raw_ostream &OS, const std::string &Banner) - : LoopPass(ID), P(OS, Banner) {} + : LoopPass(ID), OS(OS), Banner(Banner) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); @@ -48,8 +50,7 @@ public: [](BasicBlock *BB) { return BB; }); if (BBI != L->blocks().end() && isFunctionInPrintList((*BBI)->getParent()->getName())) { - AnalysisManager<Loop> DummyLAM; - P.run(*L, DummyLAM); + printLoop(*L, OS, Banner); } return false; } @@ -131,8 +132,8 @@ void LPPassManager::deleteSimpleAnalysisLoop(Loop *L) { // Recurse through all subloops and all loops into LQ. static void addLoopIntoQueue(Loop *L, std::deque<Loop *> &LQ) { LQ.push_back(L); - for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I) - addLoopIntoQueue(*I, LQ); + for (Loop *I : reverse(*L)) + addLoopIntoQueue(I, LQ); } /// Pass Manager itself does not invalidate any analysis info. @@ -140,6 +141,7 @@ void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const { // LPPassManager needs LoopInfo. In the long term LoopInfo class will // become part of LPPassManager. Info.addRequired<LoopInfoWrapperPass>(); + Info.addRequired<DominatorTreeWrapperPass>(); Info.setPreservesAll(); } @@ -148,6 +150,7 @@ void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const { bool LPPassManager::runOnFunction(Function &F) { auto &LIWP = getAnalysis<LoopInfoWrapperPass>(); LI = &LIWP.getLoopInfo(); + DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); bool Changed = false; // Collect inherited analysis from Module level pass manager. @@ -162,16 +165,14 @@ bool LPPassManager::runOnFunction(Function &F) { // Note that LoopInfo::iterator visits loops in reverse program // order. Here, reverse_iterator gives us a forward order, and the LoopQueue // reverses the order a third time by popping from the back. - for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I) - addLoopIntoQueue(*I, LQ); + for (Loop *L : reverse(*LI)) + addLoopIntoQueue(L, LQ); if (LQ.empty()) // No loops, skip calling finalizers return false; // Initialization - for (std::deque<Loop *>::const_iterator I = LQ.begin(), E = LQ.end(); - I != E; ++I) { - Loop *L = *I; + for (Loop *L : LQ) { for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { LoopPass *P = getContainedPass(Index); Changed |= P->doInitialization(L, *this); @@ -220,6 +221,12 @@ bool LPPassManager::runOnFunction(Function &F) { TimeRegion PassTimer(getPassTimer(&LIWP)); CurrentLoop->verifyLoop(); } + // Here we apply same reasoning as in the above case. Only difference + // is that LPPassManager might run passes which do not require LCSSA + // form (LoopPassPrinter for example). We should skip verification for + // such passes. + if (mustPreserveAnalysisID(LCSSAVerificationPass::ID)) + CurrentLoop->isRecursivelyLCSSAForm(*DT, *LI); // Then call the regular verifyAnalysis functions. verifyPreservedAnalysis(P); @@ -355,3 +362,8 @@ bool LoopPass::skipLoop(const Loop *L) const { } return false; } + +char LCSSAVerificationPass::ID = 0; +INITIALIZE_PASS(LCSSAVerificationPass, "lcssa-verification", "LCSSA Verifier", + false, false) + diff --git a/contrib/llvm/lib/Analysis/LoopPassManager.cpp b/contrib/llvm/lib/Analysis/LoopPassManager.cpp deleted file mode 100644 index 8bac19a..0000000 --- a/contrib/llvm/lib/Analysis/LoopPassManager.cpp +++ /dev/null @@ -1,39 +0,0 @@ -//===- LoopPassManager.cpp - Loop pass management -------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/LoopPassManager.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" -#include "llvm/IR/Dominators.h" - -using namespace llvm; - -// Explicit instantiations for core typedef'ed templates. -namespace llvm { -template class PassManager<Loop>; -template class AnalysisManager<Loop>; -template class InnerAnalysisManagerProxy<LoopAnalysisManager, Function>; -template class OuterAnalysisManagerProxy<FunctionAnalysisManager, Loop>; -} - -PreservedAnalyses llvm::getLoopPassPreservedAnalyses() { - PreservedAnalyses PA; - PA.preserve<DominatorTreeAnalysis>(); - PA.preserve<LoopAnalysis>(); - PA.preserve<ScalarEvolutionAnalysis>(); - // TODO: What we really want to do here is preserve an AA category, but that - // concept doesn't exist yet. - PA.preserve<BasicAA>(); - PA.preserve<GlobalsAA>(); - PA.preserve<SCEVAA>(); - return PA; -} diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp index f234776..2d82740 100644 --- a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -77,8 +77,12 @@ static const std::pair<LibFunc::Func, AllocFnsTy> AllocationFnData[] = { // TODO: Handle "int posix_memalign(void **, size_t, size_t)" }; +static Function *getCalledFunction(const Value *V, bool LookThroughBitCast, + bool &IsNoBuiltin) { + // Don't care about intrinsics in this case. + if (isa<IntrinsicInst>(V)) + return nullptr; -static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) { if (LookThroughBitCast) V = V->stripPointerCasts(); @@ -86,8 +90,7 @@ static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) { if (!CS.getInstruction()) return nullptr; - if (CS.isNoBuiltin()) - return nullptr; + IsNoBuiltin = CS.isNoBuiltin(); Function *Callee = CS.getCalledFunction(); if (!Callee || !Callee->isDeclaration()) @@ -98,47 +101,19 @@ static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) { /// Returns the allocation data for the given value if it's either a call to a /// known allocation function, or a call to a function with the allocsize /// attribute. -static Optional<AllocFnsTy> getAllocationData(const Value *V, AllocType AllocTy, - const TargetLibraryInfo *TLI, - bool LookThroughBitCast = false) { - // Skip intrinsics - if (isa<IntrinsicInst>(V)) - return None; - - const Function *Callee = getCalledFunction(V, LookThroughBitCast); - if (!Callee) - return None; - - // If it has allocsize, we can skip checking if it's a known function. - // - // MallocLike is chosen here because allocsize makes no guarantees about the - // nullness of the result of the function, nor does it deal with strings, nor - // does it require that the memory returned is zeroed out. - LLVM_CONSTEXPR auto AllocSizeAllocTy = MallocLike; - if ((AllocTy & AllocSizeAllocTy) == AllocSizeAllocTy && - Callee->hasFnAttribute(Attribute::AllocSize)) { - Attribute Attr = Callee->getFnAttribute(Attribute::AllocSize); - std::pair<unsigned, Optional<unsigned>> Args = Attr.getAllocSizeArgs(); - - AllocFnsTy Result; - Result.AllocTy = AllocSizeAllocTy; - Result.NumParams = Callee->getNumOperands(); - Result.FstParam = Args.first; - Result.SndParam = Args.second.getValueOr(-1); - return Result; - } - +static Optional<AllocFnsTy> +getAllocationDataForFunction(const Function *Callee, AllocType AllocTy, + const TargetLibraryInfo *TLI) { // Make sure that the function is available. StringRef FnName = Callee->getName(); LibFunc::Func TLIFn; if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn)) return None; - const auto *Iter = - std::find_if(std::begin(AllocationFnData), std::end(AllocationFnData), - [TLIFn](const std::pair<LibFunc::Func, AllocFnsTy> &P) { - return P.first == TLIFn; - }); + const auto *Iter = find_if( + AllocationFnData, [TLIFn](const std::pair<LibFunc::Func, AllocFnsTy> &P) { + return P.first == TLIFn; + }); if (Iter == std::end(AllocationFnData)) return None; @@ -164,6 +139,48 @@ static Optional<AllocFnsTy> getAllocationData(const Value *V, AllocType AllocTy, return None; } +static Optional<AllocFnsTy> getAllocationData(const Value *V, AllocType AllocTy, + const TargetLibraryInfo *TLI, + bool LookThroughBitCast = false) { + bool IsNoBuiltinCall; + if (const Function *Callee = + getCalledFunction(V, LookThroughBitCast, IsNoBuiltinCall)) + if (!IsNoBuiltinCall) + return getAllocationDataForFunction(Callee, AllocTy, TLI); + return None; +} + +static Optional<AllocFnsTy> getAllocationSize(const Value *V, + const TargetLibraryInfo *TLI) { + bool IsNoBuiltinCall; + const Function *Callee = + getCalledFunction(V, /*LookThroughBitCast=*/false, IsNoBuiltinCall); + if (!Callee) + return None; + + // Prefer to use existing information over allocsize. This will give us an + // accurate AllocTy. + if (!IsNoBuiltinCall) + if (Optional<AllocFnsTy> Data = + getAllocationDataForFunction(Callee, AnyAlloc, TLI)) + return Data; + + Attribute Attr = Callee->getFnAttribute(Attribute::AllocSize); + if (Attr == Attribute()) + return None; + + std::pair<unsigned, Optional<unsigned>> Args = Attr.getAllocSizeArgs(); + + AllocFnsTy Result; + // Because allocsize only tells us how many bytes are allocated, we're not + // really allowed to assume anything, so we use MallocLike. + Result.AllocTy = MallocLike; + Result.NumParams = Callee->getNumOperands(); + Result.FstParam = Args.first; + Result.SndParam = Args.second.getValueOr(-1); + return Result; +} + static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) { ImmutableCallSite CS(LookThroughBitCast ? V->stripPointerCasts() : V); return CS && CS.paramHasAttr(AttributeSet::ReturnIndex, Attribute::NoAlias); @@ -389,6 +406,36 @@ bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, return true; } +ConstantInt *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize, + const DataLayout &DL, + const TargetLibraryInfo *TLI, + bool MustSucceed) { + assert(ObjectSize->getIntrinsicID() == Intrinsic::objectsize && + "ObjectSize must be a call to llvm.objectsize!"); + + bool MaxVal = cast<ConstantInt>(ObjectSize->getArgOperand(1))->isZero(); + ObjSizeMode Mode; + // Unless we have to fold this to something, try to be as accurate as + // possible. + if (MustSucceed) + Mode = MaxVal ? ObjSizeMode::Max : ObjSizeMode::Min; + else + Mode = ObjSizeMode::Exact; + + // FIXME: Does it make sense to just return a failure value if the size won't + // fit in the output and `!MustSucceed`? + uint64_t Size; + auto *ResultType = cast<IntegerType>(ObjectSize->getType()); + if (getObjectSize(ObjectSize->getArgOperand(0), Size, DL, TLI, false, Mode) && + isUIntN(ResultType->getBitWidth(), Size)) + return ConstantInt::get(ResultType, Size); + + if (!MustSucceed) + return nullptr; + + return ConstantInt::get(ResultType, MaxVal ? -1ULL : 0); +} + STATISTIC(ObjectVisitorArgument, "Number of arguments with unsolved size and offset"); STATISTIC(ObjectVisitorLoad, @@ -476,8 +523,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) { } SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) { - Optional<AllocFnsTy> FnData = - getAllocationData(CS.getInstruction(), AnyAlloc, TLI); + Optional<AllocFnsTy> FnData = getAllocationSize(CS.getInstruction(), TLI); if (!FnData) return unknown(); @@ -736,8 +782,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitAllocaInst(AllocaInst &I) { } SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitCallSite(CallSite CS) { - Optional<AllocFnsTy> FnData = - getAllocationData(CS.getInstruction(), AnyAlloc, TLI); + Optional<AllocFnsTy> FnData = getAllocationSize(CS.getInstruction(), TLI); if (!FnData) return unknown(); diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index 3349933..66a0d14 100644 --- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -15,24 +15,38 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/OrderedBasicBlock.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/PredIteratorCache.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include <algorithm> +#include <cassert> +#include <iterator> + using namespace llvm; #define DEBUG_TYPE "memdep" @@ -166,7 +180,7 @@ MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom( BasicBlock *BB) { unsigned Limit = BlockScanLimit; - // Walk backwards through the block, looking for dependencies + // Walk backwards through the block, looking for dependencies. while (ScanIt != BB->begin()) { // Limit the amount of scanning we do so we don't end up with quadratic // running time on extreme testcases. @@ -220,26 +234,6 @@ MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom( return MemDepResult::getNonFuncLocal(); } -/// Return true if LI is a load that would fully overlap MemLoc if done as -/// a wider legal integer load. -/// -/// MemLocBase, MemLocOffset are lazily computed here the first time the -/// base/offs of memloc is needed. -static bool isLoadLoadClobberIfExtendedToFullWidth(const MemoryLocation &MemLoc, - const Value *&MemLocBase, - int64_t &MemLocOffs, - const LoadInst *LI) { - const DataLayout &DL = LI->getModule()->getDataLayout(); - - // If we haven't already computed the base/offset of MemLoc, do so now. - if (!MemLocBase) - MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, DL); - - unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize( - MemLocBase, MemLocOffs, MemLoc.Size, LI); - return Size != 0; -} - unsigned MemoryDependenceResults::getLoadLoadClobberFullWidthSize( const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize, const LoadInst *LI) { @@ -292,7 +286,7 @@ unsigned MemoryDependenceResults::getLoadLoadClobberFullWidthSize( unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits() / 8U; NewLoadByteSize = NextPowerOf2(NewLoadByteSize); - while (1) { + while (true) { // If this load size is bigger than our known alignment or would not fit // into a native integer register, then we fail. if (NewLoadByteSize > LoadAlign || @@ -327,80 +321,129 @@ static bool isVolatile(Instruction *Inst) { MemDepResult MemoryDependenceResults::getPointerDependencyFrom( const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, - BasicBlock *BB, Instruction *QueryInst) { + BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) { + MemDepResult InvariantGroupDependency = MemDepResult::getUnknown(); if (QueryInst != nullptr) { if (auto *LI = dyn_cast<LoadInst>(QueryInst)) { - MemDepResult invariantGroupDependency = - getInvariantGroupPointerDependency(LI, BB); + InvariantGroupDependency = getInvariantGroupPointerDependency(LI, BB); - if (invariantGroupDependency.isDef()) - return invariantGroupDependency; + if (InvariantGroupDependency.isDef()) + return InvariantGroupDependency; } } - return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst); + MemDepResult SimpleDep = getSimplePointerDependencyFrom( + MemLoc, isLoad, ScanIt, BB, QueryInst, Limit); + if (SimpleDep.isDef()) + return SimpleDep; + // Non-local invariant group dependency indicates there is non local Def + // (it only returns nonLocal if it finds nonLocal def), which is better than + // local clobber and everything else. + if (InvariantGroupDependency.isNonLocal()) + return InvariantGroupDependency; + + assert(InvariantGroupDependency.isUnknown() && + "InvariantGroupDependency should be only unknown at this point"); + return SimpleDep; } MemDepResult MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI, - BasicBlock *BB) { - Value *LoadOperand = LI->getPointerOperand(); - // It's is not safe to walk the use list of global value, because function - // passes aren't allowed to look outside their functions. - if (isa<GlobalValue>(LoadOperand)) - return MemDepResult::getUnknown(); + BasicBlock *BB) { auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group); if (!InvariantGroupMD) return MemDepResult::getUnknown(); - MemDepResult Result = MemDepResult::getUnknown(); - llvm::SmallSet<Value *, 14> Seen; + // Take the ptr operand after all casts and geps 0. This way we can search + // cast graph down only. + Value *LoadOperand = LI->getPointerOperand()->stripPointerCasts(); + + // It's is not safe to walk the use list of global value, because function + // passes aren't allowed to look outside their functions. + // FIXME: this could be fixed by filtering instructions from outside + // of current function. + if (isa<GlobalValue>(LoadOperand)) + return MemDepResult::getUnknown(); + // Queue to process all pointers that are equivalent to load operand. - llvm::SmallVector<Value *, 8> LoadOperandsQueue; + SmallVector<const Value *, 8> LoadOperandsQueue; LoadOperandsQueue.push_back(LoadOperand); - while (!LoadOperandsQueue.empty()) { - Value *Ptr = LoadOperandsQueue.pop_back_val(); - if (isa<GlobalValue>(Ptr)) - continue; - if (auto *BCI = dyn_cast<BitCastInst>(Ptr)) { - if (Seen.insert(BCI->getOperand(0)).second) { - LoadOperandsQueue.push_back(BCI->getOperand(0)); - } - } + Instruction *ClosestDependency = nullptr; + // Order of instructions in uses list is unpredictible. In order to always + // get the same result, we will look for the closest dominance. + auto GetClosestDependency = [this](Instruction *Best, Instruction *Other) { + assert(Other && "Must call it with not null instruction"); + if (Best == nullptr || DT.dominates(Best, Other)) + return Other; + return Best; + }; + - for (Use &Us : Ptr->uses()) { + // FIXME: This loop is O(N^2) because dominates can be O(n) and in worst case + // we will see all the instructions. This should be fixed in MSSA. + while (!LoadOperandsQueue.empty()) { + const Value *Ptr = LoadOperandsQueue.pop_back_val(); + assert(Ptr && !isa<GlobalValue>(Ptr) && + "Null or GlobalValue should not be inserted"); + + for (const Use &Us : Ptr->uses()) { auto *U = dyn_cast<Instruction>(Us.getUser()); if (!U || U == LI || !DT.dominates(U, LI)) continue; - if (auto *BCI = dyn_cast<BitCastInst>(U)) { - if (Seen.insert(BCI).second) { - LoadOperandsQueue.push_back(BCI); - } + // Bitcast or gep with zeros are using Ptr. Add to queue to check it's + // users. U = bitcast Ptr + if (isa<BitCastInst>(U)) { + LoadOperandsQueue.push_back(U); continue; } + // Gep with zeros is equivalent to bitcast. + // FIXME: we are not sure if some bitcast should be canonicalized to gep 0 + // or gep 0 to bitcast because of SROA, so there are 2 forms. When + // typeless pointers will be ready then both cases will be gone + // (and this BFS also won't be needed). + if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) + if (GEP->hasAllZeroIndices()) { + LoadOperandsQueue.push_back(U); + continue; + } + // If we hit load/store with the same invariant.group metadata (and the // same pointer operand) we can assume that value pointed by pointer // operand didn't change. - if ((isa<LoadInst>(U) || isa<StoreInst>(U)) && U->getParent() == BB && + if ((isa<LoadInst>(U) || isa<StoreInst>(U)) && U->getMetadata(LLVMContext::MD_invariant_group) == InvariantGroupMD) - return MemDepResult::getDef(U); + ClosestDependency = GetClosestDependency(ClosestDependency, U); } } - return Result; + + if (!ClosestDependency) + return MemDepResult::getUnknown(); + if (ClosestDependency->getParent() == BB) + return MemDepResult::getDef(ClosestDependency); + // Def(U) can't be returned here because it is non-local. If local + // dependency won't be found then return nonLocal counting that the + // user will call getNonLocalPointerDependency, which will return cached + // result. + NonLocalDefsCache.try_emplace( + LI, NonLocalDepResult(ClosestDependency->getParent(), + MemDepResult::getDef(ClosestDependency), nullptr)); + return MemDepResult::getNonLocal(); } MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, - BasicBlock *BB, Instruction *QueryInst) { - - const Value *MemLocBase = nullptr; - int64_t MemLocOffset = 0; - unsigned Limit = BlockScanLimit; + BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) { bool isInvariantLoad = false; + if (!Limit) { + unsigned DefaultLimit = BlockScanLimit; + return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst, + &DefaultLimit); + } + // We must be careful with atomic accesses, as they may allow another thread // to touch this location, clobbering it. We are conservative: if the // QueryInst is not a simple (non-atomic) memory access, we automatically @@ -474,8 +517,8 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( // Limit the amount of scanning we do so we don't end up with quadratic // running time on extreme testcases. - --Limit; - if (!Limit) + --*Limit; + if (!*Limit) return MemDepResult::getUnknown(); if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { @@ -530,21 +573,8 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( AliasResult R = AA.alias(LoadLoc, MemLoc); if (isLoad) { - if (R == NoAlias) { - // If this is an over-aligned integer load (for example, - // "load i8* %P, align 4") see if it would obviously overlap with the - // queried location if widened to a larger load (e.g. if the queried - // location is 1 byte at P+1). If so, return it as a load/load - // clobber result, allowing the client to decide to widen the load if - // it wants to. - if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) { - if (LI->getAlignment() * 8 > ITy->getPrimitiveSizeInBits() && - isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase, - MemLocOffset, LI)) - return MemDepResult::getClobber(Inst); - } + if (R == NoAlias) continue; - } // Must aliased loads are defs of each other. if (R == MustAlias) @@ -697,7 +727,7 @@ MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) { // Do the scan. if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) { - // No dependence found. If this is the entry block of the function, it is + // No dependence found. If this is the entry block of the function, it is // unknown, otherwise it is non-local. if (QueryParent != &QueryParent->getParent()->getEntryBlock()) LocalCache = MemDepResult::getNonLocal(); @@ -709,7 +739,7 @@ MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) { if (MemLoc.Ptr) { // If we can do a pointer scan, make it happen. bool isLoad = !(MR & MRI_Mod); - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst)) + if (auto *II = dyn_cast<IntrinsicInst>(QueryInst)) isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start; LocalCache = getPointerDependencyFrom( @@ -884,7 +914,17 @@ void MemoryDependenceResults::getNonLocalPointerDependency( assert(Loc.Ptr->getType()->isPointerTy() && "Can't get pointer deps of a non-pointer!"); Result.clear(); - + { + // Check if there is cached Def with invariant.group. FIXME: cache might be + // invalid if cached instruction would be removed between call to + // getPointerDependencyFrom and this function. + auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst); + if (NonLocalDefIt != NonLocalDefsCache.end()) { + Result.push_back(std::move(NonLocalDefIt->second)); + NonLocalDefsCache.erase(NonLocalDefIt); + return; + } + } // This routine does not expect to deal with volatile instructions. // Doing so would require piping through the QueryInst all the way through. // TODO: volatiles can't be elided, but they can be reordered with other @@ -1010,7 +1050,7 @@ SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache, MemoryDependenceResults::NonLocalDepInfo::iterator Entry = std::upper_bound(Cache.begin(), Cache.end() - 1, Val); Cache.insert(Entry, Val); - // FALL THROUGH. + LLVM_FALLTHROUGH; } case 1: // One new entry, Just insert the new value at the appropriate position. @@ -1659,10 +1699,10 @@ void MemoryDependenceResults::verifyRemoved(Instruction *D) const { #endif } -char MemoryDependenceAnalysis::PassID; +AnalysisKey MemoryDependenceAnalysis::Key; MemoryDependenceResults -MemoryDependenceAnalysis::run(Function &F, AnalysisManager<Function> &AM) { +MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) { auto &AA = AM.getResult<AAManager>(F); auto &AC = AM.getResult<AssumptionAnalysis>(F); auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); @@ -1684,6 +1724,7 @@ INITIALIZE_PASS_END(MemoryDependenceWrapperPass, "memdep", MemoryDependenceWrapperPass::MemoryDependenceWrapperPass() : FunctionPass(ID) { initializeMemoryDependenceWrapperPassPass(*PassRegistry::getPassRegistry()); } + MemoryDependenceWrapperPass::~MemoryDependenceWrapperPass() {} void MemoryDependenceWrapperPass::releaseMemory() { @@ -1698,6 +1739,28 @@ void MemoryDependenceWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>(); } +bool MemoryDependenceResults::invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv) { + // Check whether our analysis is preserved. + auto PAC = PA.getChecker<MemoryDependenceAnalysis>(); + if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>()) + // If not, give up now. + return true; + + // Check whether the analyses we depend on became invalid for any reason. + if (Inv.invalidate<AAManager>(F, PA) || + Inv.invalidate<AssumptionAnalysis>(F, PA) || + Inv.invalidate<DominatorTreeAnalysis>(F, PA)) + return true; + + // Otherwise this analysis result remains valid. + return false; +} + +unsigned MemoryDependenceResults::getDefaultBlockScanLimit() const { + return BlockScanLimit; +} + bool MemoryDependenceWrapperPass::runOnFunction(Function &F) { auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); diff --git a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp index 36c4714..f675830 100644 --- a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp +++ b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp @@ -74,7 +74,8 @@ void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const { // filenames), so just print a few useful things. for (DICompileUnit *CU : Finder.compile_units()) { O << "Compile unit: "; - if (const char *Lang = dwarf::LanguageString(CU->getSourceLanguage())) + auto Lang = dwarf::LanguageString(CU->getSourceLanguage()); + if (!Lang.empty()) O << Lang; else O << "unknown-language(" << CU->getSourceLanguage() << ")"; @@ -90,7 +91,8 @@ void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const { O << '\n'; } - for (const DIGlobalVariable *GV : Finder.global_variables()) { + for (auto GVU : Finder.global_variables()) { + const auto *GV = GVU->getVariable(); O << "Global variable: " << GV->getName(); printFile(O, GV->getFilename(), GV->getDirectory(), GV->getLine()); if (!GV->getLinkageName().empty()) @@ -105,14 +107,15 @@ void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const { printFile(O, T->getFilename(), T->getDirectory(), T->getLine()); if (auto *BT = dyn_cast<DIBasicType>(T)) { O << " "; - if (const char *Encoding = - dwarf::AttributeEncodingString(BT->getEncoding())) + auto Encoding = dwarf::AttributeEncodingString(BT->getEncoding()); + if (!Encoding.empty()) O << Encoding; else O << "unknown-encoding(" << BT->getEncoding() << ')'; } else { O << ' '; - if (const char *Tag = dwarf::TagString(T->getTag())) + auto Tag = dwarf::TagString(T->getTag()); + if (!Tag.empty()) O << Tag; else O << "unknown-tag(" << T->getTag() << ")"; diff --git a/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp index c9ac2bd..f5ba637 100644 --- a/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -13,16 +13,22 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ModuleSummaryAnalysis.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Triple.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/IndirectCallPromotionAnalysis.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TypeMetadataUtils.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/ValueSymbolTable.h" +#include "llvm/Object/IRObjectFile.h" #include "llvm/Pass.h" using namespace llvm; @@ -31,7 +37,7 @@ using namespace llvm; // Walk through the operands of a given User via worklist iteration and populate // the set of GlobalValue references encountered. Invoked either on an // Instruction or a GlobalVariable (which walks its initializer). -static void findRefEdges(const User *CurUser, DenseSet<const Value *> &RefEdges, +static void findRefEdges(const User *CurUser, SetVector<ValueInfo> &RefEdges, SmallPtrSet<const User *, 8> &Visited) { SmallVector<const User *, 32> Worklist; Worklist.push_back(CurUser); @@ -50,12 +56,12 @@ static void findRefEdges(const User *CurUser, DenseSet<const Value *> &RefEdges, continue; if (isa<BlockAddress>(Operand)) continue; - if (isa<GlobalValue>(Operand)) { + if (auto *GV = dyn_cast<GlobalValue>(Operand)) { // We have a reference to a global value. This should be added to // the reference set unless it is a callee. Callees are handled // specially by WriteFunction and are added to a separate list. if (!(CS && CS.isCallee(&OI))) - RefEdges.insert(Operand); + RefEdges.insert(GV); continue; } Worklist.push_back(Operand); @@ -63,98 +69,213 @@ static void findRefEdges(const User *CurUser, DenseSet<const Value *> &RefEdges, } } -void ModuleSummaryIndexBuilder::computeFunctionSummary( - const Function &F, BlockFrequencyInfo *BFI) { - // Summary not currently supported for anonymous functions, they must - // be renamed. - if (!F.hasName()) - return; +static CalleeInfo::HotnessType getHotness(uint64_t ProfileCount, + ProfileSummaryInfo *PSI) { + if (!PSI) + return CalleeInfo::HotnessType::Unknown; + if (PSI->isHotCount(ProfileCount)) + return CalleeInfo::HotnessType::Hot; + if (PSI->isColdCount(ProfileCount)) + return CalleeInfo::HotnessType::Cold; + return CalleeInfo::HotnessType::None; +} + +static bool isNonRenamableLocal(const GlobalValue &GV) { + return GV.hasSection() && GV.hasLocalLinkage(); +} + +static void +computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, + const Function &F, BlockFrequencyInfo *BFI, + ProfileSummaryInfo *PSI, bool HasLocalsInUsed, + DenseSet<GlobalValue::GUID> &CantBePromoted) { + // Summary not currently supported for anonymous functions, they should + // have been named. + assert(F.hasName()); unsigned NumInsts = 0; // Map from callee ValueId to profile count. Used to accumulate profile // counts for all static calls to a given callee. - DenseMap<const Value *, CalleeInfo> CallGraphEdges; - DenseMap<GlobalValue::GUID, CalleeInfo> IndirectCallEdges; - DenseSet<const Value *> RefEdges; + MapVector<ValueInfo, CalleeInfo> CallGraphEdges; + SetVector<ValueInfo> RefEdges; + SetVector<GlobalValue::GUID> TypeTests; ICallPromotionAnalysis ICallAnalysis; + bool HasInlineAsmMaybeReferencingInternal = false; SmallPtrSet<const User *, 8> Visited; for (const BasicBlock &BB : F) for (const Instruction &I : BB) { - if (!isa<DbgInfoIntrinsic>(I)) - ++NumInsts; - - if (auto CS = ImmutableCallSite(&I)) { - auto *CalledFunction = CS.getCalledFunction(); - // Check if this is a direct call to a known function. - if (CalledFunction) { - if (CalledFunction->hasName() && !CalledFunction->isIntrinsic()) { - auto ScaledCount = BFI ? BFI->getBlockProfileCount(&BB) : None; - auto *CalleeId = - M->getValueSymbolTable().lookup(CalledFunction->getName()); - CallGraphEdges[CalleeId] += - (ScaledCount ? ScaledCount.getValue() : 0); - } - } else { - // Otherwise, check for an indirect call (call to a non-const value - // that isn't an inline assembly call). - const CallInst *CI = dyn_cast<CallInst>(&I); - if (CS.getCalledValue() && !isa<Constant>(CS.getCalledValue()) && - !(CI && CI->isInlineAsm())) { - uint32_t NumVals, NumCandidates; - uint64_t TotalCount; - auto CandidateProfileData = - ICallAnalysis.getPromotionCandidatesForInstruction( - &I, NumVals, TotalCount, NumCandidates); - for (auto &Candidate : CandidateProfileData) - IndirectCallEdges[Candidate.Value] += Candidate.Count; + if (isa<DbgInfoIntrinsic>(I)) + continue; + ++NumInsts; + findRefEdges(&I, RefEdges, Visited); + auto CS = ImmutableCallSite(&I); + if (!CS) + continue; + + const auto *CI = dyn_cast<CallInst>(&I); + // Since we don't know exactly which local values are referenced in inline + // assembly, conservatively mark the function as possibly referencing + // a local value from inline assembly to ensure we don't export a + // reference (which would require renaming and promotion of the + // referenced value). + if (HasLocalsInUsed && CI && CI->isInlineAsm()) + HasInlineAsmMaybeReferencingInternal = true; + + auto *CalledValue = CS.getCalledValue(); + auto *CalledFunction = CS.getCalledFunction(); + // Check if this is an alias to a function. If so, get the + // called aliasee for the checks below. + if (auto *GA = dyn_cast<GlobalAlias>(CalledValue)) { + assert(!CalledFunction && "Expected null called function in callsite for alias"); + CalledFunction = dyn_cast<Function>(GA->getBaseObject()); + } + // Check if this is a direct call to a known function or a known + // intrinsic, or an indirect call with profile data. + if (CalledFunction) { + if (CalledFunction->isIntrinsic()) { + if (CalledFunction->getIntrinsicID() != Intrinsic::type_test) + continue; + // Produce a summary from type.test intrinsics. We only summarize + // type.test intrinsics that are used other than by an llvm.assume + // intrinsic. Intrinsics that are assumed are relevant only to the + // devirtualization pass, not the type test lowering pass. + bool HasNonAssumeUses = llvm::any_of(CI->uses(), [](const Use &CIU) { + auto *AssumeCI = dyn_cast<CallInst>(CIU.getUser()); + if (!AssumeCI) + return true; + Function *F = AssumeCI->getCalledFunction(); + return !F || F->getIntrinsicID() != Intrinsic::assume; + }); + if (HasNonAssumeUses) { + auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1)); + if (auto *TypeId = dyn_cast<MDString>(TypeMDVal->getMetadata())) + TypeTests.insert(GlobalValue::getGUID(TypeId->getString())); } } + // We should have named any anonymous globals + assert(CalledFunction->hasName()); + auto ScaledCount = BFI ? BFI->getBlockProfileCount(&BB) : None; + auto Hotness = ScaledCount ? getHotness(ScaledCount.getValue(), PSI) + : CalleeInfo::HotnessType::Unknown; + + // Use the original CalledValue, in case it was an alias. We want + // to record the call edge to the alias in that case. Eventually + // an alias summary will be created to associate the alias and + // aliasee. + CallGraphEdges[cast<GlobalValue>(CalledValue)].updateHotness(Hotness); + } else { + // Skip inline assembly calls. + if (CI && CI->isInlineAsm()) + continue; + // Skip direct calls. + if (!CS.getCalledValue() || isa<Constant>(CS.getCalledValue())) + continue; + + uint32_t NumVals, NumCandidates; + uint64_t TotalCount; + auto CandidateProfileData = + ICallAnalysis.getPromotionCandidatesForInstruction( + &I, NumVals, TotalCount, NumCandidates); + for (auto &Candidate : CandidateProfileData) + CallGraphEdges[Candidate.Value].updateHotness( + getHotness(Candidate.Count, PSI)); } - findRefEdges(&I, RefEdges, Visited); } - GlobalValueSummary::GVFlags Flags(F); - std::unique_ptr<FunctionSummary> FuncSummary = - llvm::make_unique<FunctionSummary>(Flags, NumInsts); - FuncSummary->addCallGraphEdges(CallGraphEdges); - FuncSummary->addCallGraphEdges(IndirectCallEdges); - FuncSummary->addRefEdges(RefEdges); - Index->addGlobalValueSummary(F.getName(), std::move(FuncSummary)); + bool NonRenamableLocal = isNonRenamableLocal(F); + bool NotEligibleForImport = + NonRenamableLocal || HasInlineAsmMaybeReferencingInternal || + // Inliner doesn't handle variadic functions. + // FIXME: refactor this to use the same code that inliner is using. + F.isVarArg(); + GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport, + /* LiveRoot = */ false); + auto FuncSummary = llvm::make_unique<FunctionSummary>( + Flags, NumInsts, RefEdges.takeVector(), CallGraphEdges.takeVector(), + TypeTests.takeVector()); + if (NonRenamableLocal) + CantBePromoted.insert(F.getGUID()); + Index.addGlobalValueSummary(F.getName(), std::move(FuncSummary)); } -void ModuleSummaryIndexBuilder::computeVariableSummary( - const GlobalVariable &V) { - DenseSet<const Value *> RefEdges; +static void +computeVariableSummary(ModuleSummaryIndex &Index, const GlobalVariable &V, + DenseSet<GlobalValue::GUID> &CantBePromoted) { + SetVector<ValueInfo> RefEdges; SmallPtrSet<const User *, 8> Visited; findRefEdges(&V, RefEdges, Visited); - GlobalValueSummary::GVFlags Flags(V); - std::unique_ptr<GlobalVarSummary> GVarSummary = - llvm::make_unique<GlobalVarSummary>(Flags); - GVarSummary->addRefEdges(RefEdges); - Index->addGlobalValueSummary(V.getName(), std::move(GVarSummary)); + bool NonRenamableLocal = isNonRenamableLocal(V); + GlobalValueSummary::GVFlags Flags(V.getLinkage(), NonRenamableLocal, + /* LiveRoot = */ false); + auto GVarSummary = + llvm::make_unique<GlobalVarSummary>(Flags, RefEdges.takeVector()); + if (NonRenamableLocal) + CantBePromoted.insert(V.getGUID()); + Index.addGlobalValueSummary(V.getName(), std::move(GVarSummary)); } -ModuleSummaryIndexBuilder::ModuleSummaryIndexBuilder( - const Module *M, - std::function<BlockFrequencyInfo *(const Function &F)> Ftor) - : Index(llvm::make_unique<ModuleSummaryIndex>()), M(M) { - // Check if the module can be promoted, otherwise just disable importing from - // it by not emitting any summary. - // FIXME: we could still import *into* it most of the time. - if (!moduleCanBeRenamedForThinLTO(*M)) +static void +computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, + DenseSet<GlobalValue::GUID> &CantBePromoted) { + bool NonRenamableLocal = isNonRenamableLocal(A); + GlobalValueSummary::GVFlags Flags(A.getLinkage(), NonRenamableLocal, + /* LiveRoot = */ false); + auto AS = llvm::make_unique<AliasSummary>(Flags, ArrayRef<ValueInfo>{}); + auto *Aliasee = A.getBaseObject(); + auto *AliaseeSummary = Index.getGlobalValueSummary(*Aliasee); + assert(AliaseeSummary && "Alias expects aliasee summary to be parsed"); + AS->setAliasee(AliaseeSummary); + if (NonRenamableLocal) + CantBePromoted.insert(A.getGUID()); + Index.addGlobalValueSummary(A.getName(), std::move(AS)); +} + +// Set LiveRoot flag on entries matching the given value name. +static void setLiveRoot(ModuleSummaryIndex &Index, StringRef Name) { + auto SummaryList = + Index.findGlobalValueSummaryList(GlobalValue::getGUID(Name)); + if (SummaryList == Index.end()) return; + for (auto &Summary : SummaryList->second) + Summary->setLiveRoot(); +} + +ModuleSummaryIndex llvm::buildModuleSummaryIndex( + const Module &M, + std::function<BlockFrequencyInfo *(const Function &F)> GetBFICallback, + ProfileSummaryInfo *PSI) { + ModuleSummaryIndex Index; + + // Identify the local values in the llvm.used and llvm.compiler.used sets, + // which should not be exported as they would then require renaming and + // promotion, but we may have opaque uses e.g. in inline asm. We collect them + // here because we use this information to mark functions containing inline + // assembly calls as not importable. + SmallPtrSet<GlobalValue *, 8> LocalsUsed; + SmallPtrSet<GlobalValue *, 8> Used; + // First collect those in the llvm.used set. + collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false); + // Next collect those in the llvm.compiler.used set. + collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ true); + DenseSet<GlobalValue::GUID> CantBePromoted; + for (auto *V : Used) { + if (V->hasLocalLinkage()) { + LocalsUsed.insert(V); + CantBePromoted.insert(V->getGUID()); + } + } // Compute summaries for all functions defined in module, and save in the // index. - for (auto &F : *M) { + for (auto &F : M) { if (F.isDeclaration()) continue; BlockFrequencyInfo *BFI = nullptr; std::unique_ptr<BlockFrequencyInfo> BFIPtr; - if (Ftor) - BFI = Ftor(F); + if (GetBFICallback) + BFI = GetBFICallback(F); else if (F.getEntryCount().hasValue()) { LoopInfo LI{DominatorTree(const_cast<Function &>(F))}; BranchProbabilityInfo BPI{F, LI}; @@ -162,22 +283,129 @@ ModuleSummaryIndexBuilder::ModuleSummaryIndexBuilder( BFI = BFIPtr.get(); } - computeFunctionSummary(F, BFI); + computeFunctionSummary(Index, M, F, BFI, PSI, !LocalsUsed.empty(), + CantBePromoted); } // Compute summaries for all variables defined in module, and save in the // index. - for (const GlobalVariable &G : M->globals()) { + for (const GlobalVariable &G : M.globals()) { if (G.isDeclaration()) continue; - computeVariableSummary(G); + computeVariableSummary(Index, G, CantBePromoted); + } + + // Compute summaries for all aliases defined in module, and save in the + // index. + for (const GlobalAlias &A : M.aliases()) + computeAliasSummary(Index, A, CantBePromoted); + + for (auto *V : LocalsUsed) { + auto *Summary = Index.getGlobalValueSummary(*V); + assert(Summary && "Missing summary for global value"); + Summary->setNotEligibleToImport(); + } + + // The linker doesn't know about these LLVM produced values, so we need + // to flag them as live in the index to ensure index-based dead value + // analysis treats them as live roots of the analysis. + setLiveRoot(Index, "llvm.used"); + setLiveRoot(Index, "llvm.compiler.used"); + setLiveRoot(Index, "llvm.global_ctors"); + setLiveRoot(Index, "llvm.global_dtors"); + setLiveRoot(Index, "llvm.global.annotations"); + + if (!M.getModuleInlineAsm().empty()) { + // Collect the local values defined by module level asm, and set up + // summaries for these symbols so that they can be marked as NoRename, + // to prevent export of any use of them in regular IR that would require + // renaming within the module level asm. Note we don't need to create a + // summary for weak or global defs, as they don't need to be flagged as + // NoRename, and defs in module level asm can't be imported anyway. + // Also, any values used but not defined within module level asm should + // be listed on the llvm.used or llvm.compiler.used global and marked as + // referenced from there. + ModuleSymbolTable::CollectAsmSymbols( + Triple(M.getTargetTriple()), M.getModuleInlineAsm(), + [&M, &Index, &CantBePromoted](StringRef Name, + object::BasicSymbolRef::Flags Flags) { + // Symbols not marked as Weak or Global are local definitions. + if (Flags & (object::BasicSymbolRef::SF_Weak | + object::BasicSymbolRef::SF_Global)) + return; + GlobalValue *GV = M.getNamedValue(Name); + if (!GV) + return; + assert(GV->isDeclaration() && "Def in module asm already has definition"); + GlobalValueSummary::GVFlags GVFlags(GlobalValue::InternalLinkage, + /* NotEligibleToImport */ true, + /* LiveRoot */ true); + CantBePromoted.insert(GlobalValue::getGUID(Name)); + // Create the appropriate summary type. + if (isa<Function>(GV)) { + std::unique_ptr<FunctionSummary> Summary = + llvm::make_unique<FunctionSummary>( + GVFlags, 0, ArrayRef<ValueInfo>{}, + ArrayRef<FunctionSummary::EdgeTy>{}, + ArrayRef<GlobalValue::GUID>{}); + Index.addGlobalValueSummary(Name, std::move(Summary)); + } else { + std::unique_ptr<GlobalVarSummary> Summary = + llvm::make_unique<GlobalVarSummary>(GVFlags, + ArrayRef<ValueInfo>{}); + Index.addGlobalValueSummary(Name, std::move(Summary)); + } + }); } + + for (auto &GlobalList : Index) { + assert(GlobalList.second.size() == 1 && + "Expected module's index to have one summary per GUID"); + auto &Summary = GlobalList.second[0]; + bool AllRefsCanBeExternallyReferenced = + llvm::all_of(Summary->refs(), [&](const ValueInfo &VI) { + return !CantBePromoted.count(VI.getValue()->getGUID()); + }); + if (!AllRefsCanBeExternallyReferenced) { + Summary->setNotEligibleToImport(); + continue; + } + + if (auto *FuncSummary = dyn_cast<FunctionSummary>(Summary.get())) { + bool AllCallsCanBeExternallyReferenced = llvm::all_of( + FuncSummary->calls(), [&](const FunctionSummary::EdgeTy &Edge) { + auto GUID = Edge.first.isGUID() ? Edge.first.getGUID() + : Edge.first.getValue()->getGUID(); + return !CantBePromoted.count(GUID); + }); + if (!AllCallsCanBeExternallyReferenced) + Summary->setNotEligibleToImport(); + } + } + + return Index; +} + +AnalysisKey ModuleSummaryIndexAnalysis::Key; + +ModuleSummaryIndex +ModuleSummaryIndexAnalysis::run(Module &M, ModuleAnalysisManager &AM) { + ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(M); + auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + return buildModuleSummaryIndex( + M, + [&FAM](const Function &F) { + return &FAM.getResult<BlockFrequencyAnalysis>( + *const_cast<Function *>(&F)); + }, + &PSI); } char ModuleSummaryIndexWrapperPass::ID = 0; INITIALIZE_PASS_BEGIN(ModuleSummaryIndexWrapperPass, "module-summary-analysis", "Module Summary Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(ModuleSummaryIndexWrapperPass, "module-summary-analysis", "Module Summary Analysis", false, true) @@ -191,59 +419,25 @@ ModuleSummaryIndexWrapperPass::ModuleSummaryIndexWrapperPass() } bool ModuleSummaryIndexWrapperPass::runOnModule(Module &M) { - IndexBuilder = llvm::make_unique<ModuleSummaryIndexBuilder>( - &M, [this](const Function &F) { + auto &PSI = *getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + Index = buildModuleSummaryIndex( + M, + [this](const Function &F) { return &(this->getAnalysis<BlockFrequencyInfoWrapperPass>( *const_cast<Function *>(&F)) .getBFI()); - }); + }, + &PSI); return false; } bool ModuleSummaryIndexWrapperPass::doFinalization(Module &M) { - IndexBuilder.reset(); + Index.reset(); return false; } void ModuleSummaryIndexWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired<BlockFrequencyInfoWrapperPass>(); -} - -bool llvm::moduleCanBeRenamedForThinLTO(const Module &M) { - // We cannot currently promote or rename anything used in inline assembly, - // which are not visible to the compiler. Detect a possible case by looking - // for a llvm.used local value, in conjunction with an inline assembly call - // in the module. Prevent importing of any modules containing these uses by - // suppressing generation of the index. This also prevents importing - // into this module, which is also necessary to avoid needing to rename - // in case of a name clash between a local in this module and an imported - // global. - // FIXME: If we find we need a finer-grained approach of preventing promotion - // and renaming of just the functions using inline assembly we will need to: - // - Add flag in the function summaries to identify those with inline asm. - // - Prevent importing of any functions with flag set. - // - Prevent importing of any global function with the same name as a - // function in current module that has the flag set. - // - For any llvm.used value that is exported and promoted, add a private - // alias to the original name in the current module (even if we don't - // export the function using those values in inline asm, another function - // with a reference could be exported). - SmallPtrSet<GlobalValue *, 8> Used; - collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false); - bool LocalIsUsed = - llvm::any_of(Used, [](GlobalValue *V) { return V->hasLocalLinkage(); }); - if (!LocalIsUsed) - return true; - - // Walk all the instructions in the module and find if one is inline ASM - auto HasInlineAsm = llvm::any_of(M, [](const Function &F) { - return llvm::any_of(instructions(F), [](const Instruction &I) { - const CallInst *CallI = dyn_cast<CallInst>(&I); - if (!CallI) - return false; - return CallI->isInlineAsm(); - }); - }); - return !HasInlineAsm; + AU.addRequired<ProfileSummaryInfoWrapperPass>(); } diff --git a/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp index 9bb1048..ed03406 100644 --- a/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp @@ -131,7 +131,7 @@ ModRefInfo ObjCARCAAResult::getModRefInfo(ImmutableCallSite CS, return AAResultBase::getModRefInfo(CS, Loc); } -ObjCARCAAResult ObjCARCAA::run(Function &F, AnalysisManager<Function> &AM) { +ObjCARCAAResult ObjCARCAA::run(Function &F, FunctionAnalysisManager &AM) { return ObjCARCAAResult(F.getParent()->getDataLayout()); } diff --git a/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp b/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp index 3dc1463..1e75c08 100644 --- a/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp +++ b/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp @@ -96,43 +96,47 @@ ARCInstKind llvm::objcarc::GetFunctionClass(const Function *F) { // One argument. const Argument *A0 = &*AI++; - if (AI == AE) + if (AI == AE) { // Argument is a pointer. - if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) { - Type *ETy = PTy->getElementType(); - // Argument is i8*. - if (ETy->isIntegerTy(8)) + PointerType *PTy = dyn_cast<PointerType>(A0->getType()); + if (!PTy) + return ARCInstKind::CallOrUser; + + Type *ETy = PTy->getElementType(); + // Argument is i8*. + if (ETy->isIntegerTy(8)) + return StringSwitch<ARCInstKind>(F->getName()) + .Case("objc_retain", ARCInstKind::Retain) + .Case("objc_retainAutoreleasedReturnValue", ARCInstKind::RetainRV) + .Case("objc_unsafeClaimAutoreleasedReturnValue", ARCInstKind::ClaimRV) + .Case("objc_retainBlock", ARCInstKind::RetainBlock) + .Case("objc_release", ARCInstKind::Release) + .Case("objc_autorelease", ARCInstKind::Autorelease) + .Case("objc_autoreleaseReturnValue", ARCInstKind::AutoreleaseRV) + .Case("objc_autoreleasePoolPop", ARCInstKind::AutoreleasepoolPop) + .Case("objc_retainedObject", ARCInstKind::NoopCast) + .Case("objc_unretainedObject", ARCInstKind::NoopCast) + .Case("objc_unretainedPointer", ARCInstKind::NoopCast) + .Case("objc_retain_autorelease", ARCInstKind::FusedRetainAutorelease) + .Case("objc_retainAutorelease", ARCInstKind::FusedRetainAutorelease) + .Case("objc_retainAutoreleaseReturnValue", + ARCInstKind::FusedRetainAutoreleaseRV) + .Case("objc_sync_enter", ARCInstKind::User) + .Case("objc_sync_exit", ARCInstKind::User) + .Default(ARCInstKind::CallOrUser); + + // Argument is i8** + if (PointerType *Pte = dyn_cast<PointerType>(ETy)) + if (Pte->getElementType()->isIntegerTy(8)) return StringSwitch<ARCInstKind>(F->getName()) - .Case("objc_retain", ARCInstKind::Retain) - .Case("objc_retainAutoreleasedReturnValue", ARCInstKind::RetainRV) - .Case("objc_unsafeClaimAutoreleasedReturnValue", - ARCInstKind::ClaimRV) - .Case("objc_retainBlock", ARCInstKind::RetainBlock) - .Case("objc_release", ARCInstKind::Release) - .Case("objc_autorelease", ARCInstKind::Autorelease) - .Case("objc_autoreleaseReturnValue", ARCInstKind::AutoreleaseRV) - .Case("objc_autoreleasePoolPop", ARCInstKind::AutoreleasepoolPop) - .Case("objc_retainedObject", ARCInstKind::NoopCast) - .Case("objc_unretainedObject", ARCInstKind::NoopCast) - .Case("objc_unretainedPointer", ARCInstKind::NoopCast) - .Case("objc_retain_autorelease", - ARCInstKind::FusedRetainAutorelease) - .Case("objc_retainAutorelease", ARCInstKind::FusedRetainAutorelease) - .Case("objc_retainAutoreleaseReturnValue", - ARCInstKind::FusedRetainAutoreleaseRV) - .Case("objc_sync_enter", ARCInstKind::User) - .Case("objc_sync_exit", ARCInstKind::User) + .Case("objc_loadWeakRetained", ARCInstKind::LoadWeakRetained) + .Case("objc_loadWeak", ARCInstKind::LoadWeak) + .Case("objc_destroyWeak", ARCInstKind::DestroyWeak) .Default(ARCInstKind::CallOrUser); - // Argument is i8** - if (PointerType *Pte = dyn_cast<PointerType>(ETy)) - if (Pte->getElementType()->isIntegerTy(8)) - return StringSwitch<ARCInstKind>(F->getName()) - .Case("objc_loadWeakRetained", ARCInstKind::LoadWeakRetained) - .Case("objc_loadWeak", ARCInstKind::LoadWeak) - .Case("objc_destroyWeak", ARCInstKind::DestroyWeak) - .Default(ARCInstKind::CallOrUser); - } + // Anything else with one argument. + return ARCInstKind::CallOrUser; + } // Two arguments, first is i8**. const Argument *A1 = &*AI++; @@ -180,6 +184,7 @@ static bool isInertIntrinsic(unsigned ID) { // TODO: Make this into a covered switch. switch (ID) { case Intrinsic::returnaddress: + case Intrinsic::addressofreturnaddress: case Intrinsic::frameaddress: case Intrinsic::stacksave: case Intrinsic::stackrestore: diff --git a/contrib/llvm/lib/Analysis/OptimizationDiagnosticInfo.cpp b/contrib/llvm/lib/Analysis/OptimizationDiagnosticInfo.cpp index e979ba2..fa8b07d 100644 --- a/contrib/llvm/lib/Analysis/OptimizationDiagnosticInfo.cpp +++ b/contrib/llvm/lib/Analysis/OptimizationDiagnosticInfo.cpp @@ -13,30 +13,204 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/LLVMContext.h" using namespace llvm; -Optional<uint64_t> OptimizationRemarkEmitter::computeHotness(Value *V) { +OptimizationRemarkEmitter::OptimizationRemarkEmitter(Function *F) + : F(F), BFI(nullptr) { + if (!F->getContext().getDiagnosticHotnessRequested()) + return; + + // First create a dominator tree. + DominatorTree DT; + DT.recalculate(*F); + + // Generate LoopInfo from it. + LoopInfo LI; + LI.analyze(DT); + + // Then compute BranchProbabilityInfo. + BranchProbabilityInfo BPI; + BPI.calculate(*F, LI); + + // Finally compute BFI. + OwnedBFI = llvm::make_unique<BlockFrequencyInfo>(*F, BPI, LI); + BFI = OwnedBFI.get(); +} + +Optional<uint64_t> OptimizationRemarkEmitter::computeHotness(const Value *V) { if (!BFI) return None; return BFI->getBlockProfileCount(cast<BasicBlock>(V)); } +namespace llvm { +namespace yaml { + +template <> struct MappingTraits<DiagnosticInfoOptimizationBase *> { + static void mapping(IO &io, DiagnosticInfoOptimizationBase *&OptDiag) { + assert(io.outputting() && "input not yet implemented"); + + if (io.mapTag("!Passed", OptDiag->getKind() == DK_OptimizationRemark)) + ; + else if (io.mapTag("!Missed", + OptDiag->getKind() == DK_OptimizationRemarkMissed)) + ; + else if (io.mapTag("!Analysis", + OptDiag->getKind() == DK_OptimizationRemarkAnalysis)) + ; + else if (io.mapTag("!AnalysisFPCommute", + OptDiag->getKind() == + DK_OptimizationRemarkAnalysisFPCommute)) + ; + else if (io.mapTag("!AnalysisAliasing", + OptDiag->getKind() == + DK_OptimizationRemarkAnalysisAliasing)) + ; + else + llvm_unreachable("todo"); + + // These are read-only for now. + DebugLoc DL = OptDiag->getDebugLoc(); + StringRef FN = GlobalValue::getRealLinkageName( + OptDiag->getFunction().getName()); + + StringRef PassName(OptDiag->PassName); + io.mapRequired("Pass", PassName); + io.mapRequired("Name", OptDiag->RemarkName); + if (!io.outputting() || DL) + io.mapOptional("DebugLoc", DL); + io.mapRequired("Function", FN); + io.mapOptional("Hotness", OptDiag->Hotness); + io.mapOptional("Args", OptDiag->Args); + } +}; + +template <> struct MappingTraits<DebugLoc> { + static void mapping(IO &io, DebugLoc &DL) { + assert(io.outputting() && "input not yet implemented"); + + auto *Scope = cast<DIScope>(DL.getScope()); + StringRef File = Scope->getFilename(); + unsigned Line = DL.getLine(); + unsigned Col = DL.getCol(); + + io.mapRequired("File", File); + io.mapRequired("Line", Line); + io.mapRequired("Column", Col); + } + + static const bool flow = true; +}; + +// Implement this as a mapping for now to get proper quotation for the value. +template <> struct MappingTraits<DiagnosticInfoOptimizationBase::Argument> { + static void mapping(IO &io, DiagnosticInfoOptimizationBase::Argument &A) { + assert(io.outputting() && "input not yet implemented"); + io.mapRequired(A.Key.data(), A.Val); + if (A.DLoc) + io.mapOptional("DebugLoc", A.DLoc); + } +}; + +} // end namespace yaml +} // end namespace llvm + +LLVM_YAML_IS_SEQUENCE_VECTOR(DiagnosticInfoOptimizationBase::Argument) + +void OptimizationRemarkEmitter::computeHotness( + DiagnosticInfoOptimizationBase &OptDiag) { + Value *V = OptDiag.getCodeRegion(); + if (V) + OptDiag.setHotness(computeHotness(V)); +} + +void OptimizationRemarkEmitter::emit(DiagnosticInfoOptimizationBase &OptDiag) { + computeHotness(OptDiag); + + yaml::Output *Out = F->getContext().getDiagnosticsOutputFile(); + if (Out) { + auto *P = &const_cast<DiagnosticInfoOptimizationBase &>(OptDiag); + *Out << P; + } + // FIXME: now that IsVerbose is part of DI, filtering for this will be moved + // from here to clang. + if (!OptDiag.isVerbose() || shouldEmitVerbose()) + F->getContext().diagnose(OptDiag); +} + +void OptimizationRemarkEmitter::emitOptimizationRemark(const char *PassName, + const DebugLoc &DLoc, + const Value *V, + const Twine &Msg) { + LLVMContext &Ctx = F->getContext(); + Ctx.diagnose(OptimizationRemark(PassName, *F, DLoc, Msg, computeHotness(V))); +} + +void OptimizationRemarkEmitter::emitOptimizationRemark(const char *PassName, + Loop *L, + const Twine &Msg) { + emitOptimizationRemark(PassName, L->getStartLoc(), L->getHeader(), Msg); +} + void OptimizationRemarkEmitter::emitOptimizationRemarkMissed( - const char *PassName, const DebugLoc &DLoc, Value *V, const Twine &Msg) { + const char *PassName, const DebugLoc &DLoc, const Value *V, + const Twine &Msg, bool IsVerbose) { LLVMContext &Ctx = F->getContext(); - Ctx.diagnose(DiagnosticInfoOptimizationRemarkMissed(PassName, *F, DLoc, Msg, - computeHotness(V))); + if (!IsVerbose || shouldEmitVerbose()) + Ctx.diagnose( + OptimizationRemarkMissed(PassName, *F, DLoc, Msg, computeHotness(V))); } void OptimizationRemarkEmitter::emitOptimizationRemarkMissed( + const char *PassName, Loop *L, const Twine &Msg, bool IsVerbose) { + emitOptimizationRemarkMissed(PassName, L->getStartLoc(), L->getHeader(), Msg, + IsVerbose); +} + +void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysis( + const char *PassName, const DebugLoc &DLoc, const Value *V, + const Twine &Msg, bool IsVerbose) { + LLVMContext &Ctx = F->getContext(); + if (!IsVerbose || shouldEmitVerbose()) + Ctx.diagnose( + OptimizationRemarkAnalysis(PassName, *F, DLoc, Msg, computeHotness(V))); +} + +void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysis( + const char *PassName, Loop *L, const Twine &Msg, bool IsVerbose) { + emitOptimizationRemarkAnalysis(PassName, L->getStartLoc(), L->getHeader(), + Msg, IsVerbose); +} + +void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysisFPCommute( + const char *PassName, const DebugLoc &DLoc, const Value *V, + const Twine &Msg) { + LLVMContext &Ctx = F->getContext(); + Ctx.diagnose(OptimizationRemarkAnalysisFPCommute(PassName, *F, DLoc, Msg, + computeHotness(V))); +} + +void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysisAliasing( + const char *PassName, const DebugLoc &DLoc, const Value *V, + const Twine &Msg) { + LLVMContext &Ctx = F->getContext(); + Ctx.diagnose(OptimizationRemarkAnalysisAliasing(PassName, *F, DLoc, Msg, + computeHotness(V))); +} + +void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysisAliasing( const char *PassName, Loop *L, const Twine &Msg) { - emitOptimizationRemarkMissed(PassName, L->getStartLoc(), L->getHeader(), Msg); + emitOptimizationRemarkAnalysisAliasing(PassName, L->getStartLoc(), + L->getHeader(), Msg); } OptimizationRemarkEmitterWrapperPass::OptimizationRemarkEmitterWrapperPass() @@ -63,10 +237,11 @@ void OptimizationRemarkEmitterWrapperPass::getAnalysisUsage( AU.setPreservesAll(); } -char OptimizationRemarkEmitterAnalysis::PassID; +AnalysisKey OptimizationRemarkEmitterAnalysis::Key; OptimizationRemarkEmitter -OptimizationRemarkEmitterAnalysis::run(Function &F, AnalysisManager<Function> &AM) { +OptimizationRemarkEmitterAnalysis::run(Function &F, + FunctionAnalysisManager &AM) { BlockFrequencyInfo *BFI; if (F.getContext().getDiagnosticHotnessRequested()) diff --git a/contrib/llvm/lib/Analysis/PHITransAddr.cpp b/contrib/llvm/lib/Analysis/PHITransAddr.cpp index b4aad74..84ecd4a 100644 --- a/contrib/llvm/lib/Analysis/PHITransAddr.cpp +++ b/contrib/llvm/lib/Analysis/PHITransAddr.cpp @@ -62,8 +62,7 @@ static bool VerifySubExpr(Value *Expr, // If it's an instruction, it is either in Tmp or its operands recursively // are. - SmallVectorImpl<Instruction*>::iterator Entry = - std::find(InstInputs.begin(), InstInputs.end(), I); + SmallVectorImpl<Instruction *>::iterator Entry = find(InstInputs, I); if (Entry != InstInputs.end()) { InstInputs.erase(Entry); return true; @@ -126,8 +125,7 @@ static void RemoveInstInputs(Value *V, if (!I) return; // If the instruction is in the InstInputs list, remove it. - SmallVectorImpl<Instruction*>::iterator Entry = - std::find(InstInputs.begin(), InstInputs.end(), I); + SmallVectorImpl<Instruction *>::iterator Entry = find(InstInputs, I); if (Entry != InstInputs.end()) { InstInputs.erase(Entry); return; @@ -150,8 +148,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, if (!Inst) return V; // Determine whether 'Inst' is an input to our PHI translatable expression. - bool isInput = - std::find(InstInputs.begin(), InstInputs.end(), Inst) != InstInputs.end(); + bool isInput = is_contained(InstInputs, Inst); // Handle inputs instructions if needed. if (isInput) { @@ -165,7 +162,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, // translated, we need to incorporate the value into the expression or fail. // In either case, the instruction itself isn't an input any longer. - InstInputs.erase(std::find(InstInputs.begin(), InstInputs.end(), Inst)); + InstInputs.erase(find(InstInputs, Inst)); // If this is a PHI, go ahead and translate it. if (PHINode *PN = dyn_cast<PHINode>(Inst)) @@ -272,8 +269,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, isNSW = isNUW = false; // If the old 'LHS' was an input, add the new 'LHS' as an input. - if (std::find(InstInputs.begin(), InstInputs.end(), BOp) != - InstInputs.end()) { + if (is_contained(InstInputs, BOp)) { RemoveInstInputs(BOp, InstInputs); AddAsInput(LHS); } diff --git a/contrib/llvm/lib/Analysis/PostDominators.cpp b/contrib/llvm/lib/Analysis/PostDominators.cpp index 7355080..cb9438a 100644 --- a/contrib/llvm/lib/Analysis/PostDominators.cpp +++ b/contrib/llvm/lib/Analysis/PostDominators.cpp @@ -44,7 +44,7 @@ FunctionPass* llvm::createPostDomTree() { return new PostDominatorTreeWrapperPass(); } -char PostDominatorTreeAnalysis::PassID; +AnalysisKey PostDominatorTreeAnalysis::Key; PostDominatorTree PostDominatorTreeAnalysis::run(Function &F, FunctionAnalysisManager &) { diff --git a/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp index 9cf99af..16d3614 100644 --- a/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp +++ b/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp @@ -12,7 +12,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/ProfileSummary.h" @@ -63,10 +65,10 @@ void ProfileSummaryInfo::computeSummary() { Summary.reset(ProfileSummary::getFromMD(SummaryMD)); } -// Returns true if the function is a hot function. If it returns false, it -// either means it is not hot or it is unknown whether F is hot or not (for -// example, no profile data is available). -bool ProfileSummaryInfo::isHotFunction(const Function *F) { +/// Returns true if the function's entry is hot. If it returns false, it +/// either means it is not hot or it is unknown whether it is hot or not (for +/// example, no profile data is available). +bool ProfileSummaryInfo::isFunctionEntryHot(const Function *F) { computeSummary(); if (!F || !Summary) return false; @@ -74,15 +76,13 @@ bool ProfileSummaryInfo::isHotFunction(const Function *F) { // FIXME: The heuristic used below for determining hotness is based on // preliminary SPEC tuning for inliner. This will eventually be a // convenience method that calls isHotCount. - return (FunctionCount && - FunctionCount.getValue() >= - (uint64_t)(0.3 * (double)Summary->getMaxFunctionCount())); + return FunctionCount && isHotCount(FunctionCount.getValue()); } -// Returns true if the function is a cold function. If it returns false, it -// either means it is not cold or it is unknown whether F is cold or not (for -// example, no profile data is available). -bool ProfileSummaryInfo::isColdFunction(const Function *F) { +/// Returns true if the function's entry is a cold. If it returns false, it +/// either means it is not cold or it is unknown whether it is cold or not (for +/// example, no profile data is available). +bool ProfileSummaryInfo::isFunctionEntryCold(const Function *F) { computeSummary(); if (!F) return false; @@ -95,12 +95,10 @@ bool ProfileSummaryInfo::isColdFunction(const Function *F) { // FIXME: The heuristic used below for determining coldness is based on // preliminary SPEC tuning for inliner. This will eventually be a // convenience method that calls isHotCount. - return (FunctionCount && - FunctionCount.getValue() <= - (uint64_t)(0.01 * (double)Summary->getMaxFunctionCount())); + return FunctionCount && isColdCount(FunctionCount.getValue()); } -// Compute the hot and cold thresholds. +/// Compute the hot and cold thresholds. void ProfileSummaryInfo::computeThresholds() { if (!Summary) computeSummary(); @@ -125,10 +123,22 @@ bool ProfileSummaryInfo::isColdCount(uint64_t C) { return ColdCountThreshold && C <= ColdCountThreshold.getValue(); } -ProfileSummaryInfo *ProfileSummaryInfoWrapperPass::getPSI(Module &M) { - if (!PSI) - PSI.reset(new ProfileSummaryInfo(M)); - return PSI.get(); +bool ProfileSummaryInfo::isHotBB(const BasicBlock *B, BlockFrequencyInfo *BFI) { + auto Count = BFI->getBlockProfileCount(B); + if (Count && isHotCount(*Count)) + return true; + // Use extractProfTotalWeight to get BB count. + // For Sample PGO, BFI may not provide accurate BB count due to errors + // magnified during sample count propagation. This serves as a backup plan + // to ensure all hot BB will not be missed. + // The query currently has false positives as branch instruction cloning does + // not update/scale branch weights. Unlike false negatives, this will not cause + // performance problem. + uint64_t TotalCount; + if (B->getTerminator()->extractProfTotalWeight(TotalCount) && + isHotCount(TotalCount)) + return true; + return false; } INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info", @@ -139,25 +149,33 @@ ProfileSummaryInfoWrapperPass::ProfileSummaryInfoWrapperPass() initializeProfileSummaryInfoWrapperPassPass(*PassRegistry::getPassRegistry()); } -char ProfileSummaryAnalysis::PassID; +bool ProfileSummaryInfoWrapperPass::doInitialization(Module &M) { + PSI.reset(new ProfileSummaryInfo(M)); + return false; +} + +bool ProfileSummaryInfoWrapperPass::doFinalization(Module &M) { + PSI.reset(); + return false; +} + +AnalysisKey ProfileSummaryAnalysis::Key; ProfileSummaryInfo ProfileSummaryAnalysis::run(Module &M, ModuleAnalysisManager &) { return ProfileSummaryInfo(M); } -// FIXME: This only tests isHotFunction and isColdFunction and not the -// isHotCount and isColdCount calls. PreservedAnalyses ProfileSummaryPrinterPass::run(Module &M, - AnalysisManager<Module> &AM) { + ModuleAnalysisManager &AM) { ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(M); OS << "Functions in " << M.getName() << " with hot/cold annotations: \n"; for (auto &F : M) { OS << F.getName(); - if (PSI.isHotFunction(&F)) - OS << " :hot "; - else if (PSI.isColdFunction(&F)) - OS << " :cold "; + if (PSI.isFunctionEntryHot(&F)) + OS << " :hot entry "; + else if (PSI.isFunctionEntryCold(&F)) + OS << " :cold entry "; OS << "\n"; } return PreservedAnalyses::all(); diff --git a/contrib/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm/lib/Analysis/RegionInfo.cpp index 6860a3e..8c084dd 100644 --- a/contrib/llvm/lib/Analysis/RegionInfo.cpp +++ b/contrib/llvm/lib/Analysis/RegionInfo.cpp @@ -54,8 +54,7 @@ static cl::opt<Region::PrintStyle, true> printStyleX("print-region-style", clEnumValN(Region::PrintBB, "bb", "print regions in detail with block_iterator"), clEnumValN(Region::PrintRN, "rn", - "print regions in detail with element_iterator"), - clEnumValEnd)); + "print regions in detail with element_iterator"))); //===----------------------------------------------------------------------===// @@ -182,9 +181,9 @@ namespace llvm { // RegionInfoAnalysis implementation // -char RegionInfoAnalysis::PassID; +AnalysisKey RegionInfoAnalysis::Key; -RegionInfo RegionInfoAnalysis::run(Function &F, AnalysisManager<Function> &AM) { +RegionInfo RegionInfoAnalysis::run(Function &F, FunctionAnalysisManager &AM) { RegionInfo RI; auto *DT = &AM.getResult<DominatorTreeAnalysis>(F); auto *PDT = &AM.getResult<PostDominatorTreeAnalysis>(F); @@ -206,7 +205,7 @@ PreservedAnalyses RegionInfoPrinterPass::run(Function &F, } PreservedAnalyses RegionInfoVerifierPass::run(Function &F, - AnalysisManager<Function> &AM) { + FunctionAnalysisManager &AM) { AM.getResult<RegionInfoAnalysis>(F).verifyAnalysis(); return PreservedAnalyses::all(); diff --git a/contrib/llvm/lib/Analysis/RegionPass.cpp b/contrib/llvm/lib/Analysis/RegionPass.cpp index 5e1cdd4..7358aa6 100644 --- a/contrib/llvm/lib/Analysis/RegionPass.cpp +++ b/contrib/llvm/lib/Analysis/RegionPass.cpp @@ -10,7 +10,7 @@ // This file implements RegionPass and RGPassManager. All region optimization // and transformation passes are derived from RegionPass. RGPassManager is // responsible for managing RegionPasses. -// most of these codes are COPY from LoopPass.cpp +// Most of this code has been COPIED from LoopPass.cpp // //===----------------------------------------------------------------------===// #include "llvm/Analysis/RegionPass.h" @@ -64,9 +64,7 @@ bool RGPassManager::runOnFunction(Function &F) { return false; // Initialization - for (std::deque<Region *>::const_iterator I = RQ.begin(), E = RQ.end(); - I != E; ++I) { - Region *R = *I; + for (Region *R : RQ) { for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { RegionPass *RP = (RegionPass *)getContainedPass(Index); Changed |= RP->doInitialization(R, *this); diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp index 8fefada..ed328f1 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp @@ -61,6 +61,8 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/Sequence.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" @@ -120,6 +122,21 @@ static cl::opt<bool> cl::desc("Verify no dangling value in ScalarEvolution's " "ExprValueMap (slow)")); +static cl::opt<unsigned> MulOpsInlineThreshold( + "scev-mulops-inline-threshold", cl::Hidden, + cl::desc("Threshold for inlining multiplication operands into a SCEV"), + cl::init(1000)); + +static cl::opt<unsigned> MaxSCEVCompareDepth( + "scalar-evolution-max-scev-compare-depth", cl::Hidden, + cl::desc("Maximum depth of recursive SCEV complexity comparisons"), + cl::init(32)); + +static cl::opt<unsigned> MaxValueCompareDepth( + "scalar-evolution-max-value-compare-depth", cl::Hidden, + cl::desc("Maximum depth of recursive value complexity comparisons"), + cl::init(2)); + //===----------------------------------------------------------------------===// // SCEV class definitions //===----------------------------------------------------------------------===// @@ -447,180 +464,233 @@ bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const { // SCEV Utilities //===----------------------------------------------------------------------===// -namespace { -/// SCEVComplexityCompare - Return true if the complexity of the LHS is less -/// than the complexity of the RHS. This comparator is used to canonicalize -/// expressions. -class SCEVComplexityCompare { - const LoopInfo *const LI; -public: - explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {} +/// Compare the two values \p LV and \p RV in terms of their "complexity" where +/// "complexity" is a partial (and somewhat ad-hoc) relation used to order +/// operands in SCEV expressions. \p EqCache is a set of pairs of values that +/// have been previously deemed to be "equally complex" by this routine. It is +/// intended to avoid exponential time complexity in cases like: +/// +/// %a = f(%x, %y) +/// %b = f(%a, %a) +/// %c = f(%b, %b) +/// +/// %d = f(%x, %y) +/// %e = f(%d, %d) +/// %f = f(%e, %e) +/// +/// CompareValueComplexity(%f, %c) +/// +/// Since we do not continue running this routine on expression trees once we +/// have seen unequal values, there is no need to track them in the cache. +static int +CompareValueComplexity(SmallSet<std::pair<Value *, Value *>, 8> &EqCache, + const LoopInfo *const LI, Value *LV, Value *RV, + unsigned Depth) { + if (Depth > MaxValueCompareDepth || EqCache.count({LV, RV})) + return 0; + + // Order pointer values after integer values. This helps SCEVExpander form + // GEPs. + bool LIsPointer = LV->getType()->isPointerTy(), + RIsPointer = RV->getType()->isPointerTy(); + if (LIsPointer != RIsPointer) + return (int)LIsPointer - (int)RIsPointer; - // Return true or false if LHS is less than, or at least RHS, respectively. - bool operator()(const SCEV *LHS, const SCEV *RHS) const { - return compare(LHS, RHS) < 0; + // Compare getValueID values. + unsigned LID = LV->getValueID(), RID = RV->getValueID(); + if (LID != RID) + return (int)LID - (int)RID; + + // Sort arguments by their position. + if (const auto *LA = dyn_cast<Argument>(LV)) { + const auto *RA = cast<Argument>(RV); + unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo(); + return (int)LArgNo - (int)RArgNo; } - // Return negative, zero, or positive, if LHS is less than, equal to, or - // greater than RHS, respectively. A three-way result allows recursive - // comparisons to be more efficient. - int compare(const SCEV *LHS, const SCEV *RHS) const { - // Fast-path: SCEVs are uniqued so we can do a quick equality check. - if (LHS == RHS) - return 0; - - // Primarily, sort the SCEVs by their getSCEVType(). - unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType(); - if (LType != RType) - return (int)LType - (int)RType; - - // Aside from the getSCEVType() ordering, the particular ordering - // isn't very important except that it's beneficial to be consistent, - // so that (a + b) and (b + a) don't end up as different expressions. - switch (static_cast<SCEVTypes>(LType)) { - case scUnknown: { - const SCEVUnknown *LU = cast<SCEVUnknown>(LHS); - const SCEVUnknown *RU = cast<SCEVUnknown>(RHS); - - // Sort SCEVUnknown values with some loose heuristics. TODO: This is - // not as complete as it could be. - const Value *LV = LU->getValue(), *RV = RU->getValue(); - - // Order pointer values after integer values. This helps SCEVExpander - // form GEPs. - bool LIsPointer = LV->getType()->isPointerTy(), - RIsPointer = RV->getType()->isPointerTy(); - if (LIsPointer != RIsPointer) - return (int)LIsPointer - (int)RIsPointer; - - // Compare getValueID values. - unsigned LID = LV->getValueID(), - RID = RV->getValueID(); - if (LID != RID) - return (int)LID - (int)RID; - - // Sort arguments by their position. - if (const Argument *LA = dyn_cast<Argument>(LV)) { - const Argument *RA = cast<Argument>(RV); - unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo(); - return (int)LArgNo - (int)RArgNo; - } + if (const auto *LGV = dyn_cast<GlobalValue>(LV)) { + const auto *RGV = cast<GlobalValue>(RV); - // For instructions, compare their loop depth, and their operand - // count. This is pretty loose. - if (const Instruction *LInst = dyn_cast<Instruction>(LV)) { - const Instruction *RInst = cast<Instruction>(RV); - - // Compare loop depths. - const BasicBlock *LParent = LInst->getParent(), - *RParent = RInst->getParent(); - if (LParent != RParent) { - unsigned LDepth = LI->getLoopDepth(LParent), - RDepth = LI->getLoopDepth(RParent); - if (LDepth != RDepth) - return (int)LDepth - (int)RDepth; - } + const auto IsGVNameSemantic = [&](const GlobalValue *GV) { + auto LT = GV->getLinkage(); + return !(GlobalValue::isPrivateLinkage(LT) || + GlobalValue::isInternalLinkage(LT)); + }; - // Compare the number of operands. - unsigned LNumOps = LInst->getNumOperands(), - RNumOps = RInst->getNumOperands(); - return (int)LNumOps - (int)RNumOps; - } + // Use the names to distinguish the two values, but only if the + // names are semantically important. + if (IsGVNameSemantic(LGV) && IsGVNameSemantic(RGV)) + return LGV->getName().compare(RGV->getName()); + } + + // For instructions, compare their loop depth, and their operand count. This + // is pretty loose. + if (const auto *LInst = dyn_cast<Instruction>(LV)) { + const auto *RInst = cast<Instruction>(RV); - return 0; + // Compare loop depths. + const BasicBlock *LParent = LInst->getParent(), + *RParent = RInst->getParent(); + if (LParent != RParent) { + unsigned LDepth = LI->getLoopDepth(LParent), + RDepth = LI->getLoopDepth(RParent); + if (LDepth != RDepth) + return (int)LDepth - (int)RDepth; } - case scConstant: { - const SCEVConstant *LC = cast<SCEVConstant>(LHS); - const SCEVConstant *RC = cast<SCEVConstant>(RHS); + // Compare the number of operands. + unsigned LNumOps = LInst->getNumOperands(), + RNumOps = RInst->getNumOperands(); + if (LNumOps != RNumOps) + return (int)LNumOps - (int)RNumOps; - // Compare constant values. - const APInt &LA = LC->getAPInt(); - const APInt &RA = RC->getAPInt(); - unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth(); - if (LBitWidth != RBitWidth) - return (int)LBitWidth - (int)RBitWidth; - return LA.ult(RA) ? -1 : 1; + for (unsigned Idx : seq(0u, LNumOps)) { + int Result = + CompareValueComplexity(EqCache, LI, LInst->getOperand(Idx), + RInst->getOperand(Idx), Depth + 1); + if (Result != 0) + return Result; } + } - case scAddRecExpr: { - const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS); - const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS); + EqCache.insert({LV, RV}); + return 0; +} - // Compare addrec loop depths. - const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop(); - if (LLoop != RLoop) { - unsigned LDepth = LLoop->getLoopDepth(), - RDepth = RLoop->getLoopDepth(); - if (LDepth != RDepth) - return (int)LDepth - (int)RDepth; - } +// Return negative, zero, or positive, if LHS is less than, equal to, or greater +// than RHS, respectively. A three-way result allows recursive comparisons to be +// more efficient. +static int CompareSCEVComplexity( + SmallSet<std::pair<const SCEV *, const SCEV *>, 8> &EqCacheSCEV, + const LoopInfo *const LI, const SCEV *LHS, const SCEV *RHS, + unsigned Depth = 0) { + // Fast-path: SCEVs are uniqued so we can do a quick equality check. + if (LHS == RHS) + return 0; - // Addrec complexity grows with operand count. - unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands(); - if (LNumOps != RNumOps) - return (int)LNumOps - (int)RNumOps; + // Primarily, sort the SCEVs by their getSCEVType(). + unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType(); + if (LType != RType) + return (int)LType - (int)RType; - // Lexicographically compare. - for (unsigned i = 0; i != LNumOps; ++i) { - long X = compare(LA->getOperand(i), RA->getOperand(i)); - if (X != 0) - return X; - } + if (Depth > MaxSCEVCompareDepth || EqCacheSCEV.count({LHS, RHS})) + return 0; + // Aside from the getSCEVType() ordering, the particular ordering + // isn't very important except that it's beneficial to be consistent, + // so that (a + b) and (b + a) don't end up as different expressions. + switch (static_cast<SCEVTypes>(LType)) { + case scUnknown: { + const SCEVUnknown *LU = cast<SCEVUnknown>(LHS); + const SCEVUnknown *RU = cast<SCEVUnknown>(RHS); + + SmallSet<std::pair<Value *, Value *>, 8> EqCache; + int X = CompareValueComplexity(EqCache, LI, LU->getValue(), RU->getValue(), + Depth + 1); + if (X == 0) + EqCacheSCEV.insert({LHS, RHS}); + return X; + } - return 0; + case scConstant: { + const SCEVConstant *LC = cast<SCEVConstant>(LHS); + const SCEVConstant *RC = cast<SCEVConstant>(RHS); + + // Compare constant values. + const APInt &LA = LC->getAPInt(); + const APInt &RA = RC->getAPInt(); + unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth(); + if (LBitWidth != RBitWidth) + return (int)LBitWidth - (int)RBitWidth; + return LA.ult(RA) ? -1 : 1; + } + + case scAddRecExpr: { + const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS); + const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS); + + // Compare addrec loop depths. + const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop(); + if (LLoop != RLoop) { + unsigned LDepth = LLoop->getLoopDepth(), RDepth = RLoop->getLoopDepth(); + if (LDepth != RDepth) + return (int)LDepth - (int)RDepth; } - case scAddExpr: - case scMulExpr: - case scSMaxExpr: - case scUMaxExpr: { - const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS); - const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS); - - // Lexicographically compare n-ary expressions. - unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands(); - if (LNumOps != RNumOps) - return (int)LNumOps - (int)RNumOps; - - for (unsigned i = 0; i != LNumOps; ++i) { - if (i >= RNumOps) - return 1; - long X = compare(LC->getOperand(i), RC->getOperand(i)); - if (X != 0) - return X; - } + // Addrec complexity grows with operand count. + unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands(); + if (LNumOps != RNumOps) return (int)LNumOps - (int)RNumOps; + + // Lexicographically compare. + for (unsigned i = 0; i != LNumOps; ++i) { + int X = CompareSCEVComplexity(EqCacheSCEV, LI, LA->getOperand(i), + RA->getOperand(i), Depth + 1); + if (X != 0) + return X; } + EqCacheSCEV.insert({LHS, RHS}); + return 0; + } - case scUDivExpr: { - const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS); - const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS); + case scAddExpr: + case scMulExpr: + case scSMaxExpr: + case scUMaxExpr: { + const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS); + const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS); + + // Lexicographically compare n-ary expressions. + unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands(); + if (LNumOps != RNumOps) + return (int)LNumOps - (int)RNumOps; - // Lexicographically compare udiv expressions. - long X = compare(LC->getLHS(), RC->getLHS()); + for (unsigned i = 0; i != LNumOps; ++i) { + if (i >= RNumOps) + return 1; + int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getOperand(i), + RC->getOperand(i), Depth + 1); if (X != 0) return X; - return compare(LC->getRHS(), RC->getRHS()); } + EqCacheSCEV.insert({LHS, RHS}); + return 0; + } - case scTruncate: - case scZeroExtend: - case scSignExtend: { - const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS); - const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS); + case scUDivExpr: { + const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS); + const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS); - // Compare cast expressions by operand. - return compare(LC->getOperand(), RC->getOperand()); - } + // Lexicographically compare udiv expressions. + int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getLHS(), RC->getLHS(), + Depth + 1); + if (X != 0) + return X; + X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getRHS(), RC->getRHS(), + Depth + 1); + if (X == 0) + EqCacheSCEV.insert({LHS, RHS}); + return X; + } - case scCouldNotCompute: - llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - } - llvm_unreachable("Unknown SCEV kind!"); + case scTruncate: + case scZeroExtend: + case scSignExtend: { + const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS); + const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS); + + // Compare cast expressions by operand. + int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getOperand(), + RC->getOperand(), Depth + 1); + if (X == 0) + EqCacheSCEV.insert({LHS, RHS}); + return X; } -}; -} // end anonymous namespace + + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + } + llvm_unreachable("Unknown SCEV kind!"); +} /// Given a list of SCEV objects, order them by their complexity, and group /// objects of the same complexity together by value. When this routine is @@ -635,17 +705,22 @@ public: static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops, LoopInfo *LI) { if (Ops.size() < 2) return; // Noop + + SmallSet<std::pair<const SCEV *, const SCEV *>, 8> EqCache; if (Ops.size() == 2) { // This is the common case, which also happens to be trivially simple. // Special case it. const SCEV *&LHS = Ops[0], *&RHS = Ops[1]; - if (SCEVComplexityCompare(LI)(RHS, LHS)) + if (CompareSCEVComplexity(EqCache, LI, RHS, LHS) < 0) std::swap(LHS, RHS); return; } // Do the rough sort by complexity. - std::stable_sort(Ops.begin(), Ops.end(), SCEVComplexityCompare(LI)); + std::stable_sort(Ops.begin(), Ops.end(), + [&EqCache, LI](const SCEV *LHS, const SCEV *RHS) { + return CompareSCEVComplexity(EqCache, LI, LHS, RHS) < 0; + }); // Now that we are sorted by complexity, group elements of the same // complexity. Note that this is, at worst, N^2, but the vector is likely to @@ -2518,6 +2593,8 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, if (Idx < Ops.size()) { bool DeletedMul = false; while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) { + if (Ops.size() > MulOpsInlineThreshold) + break; // If we have an mul, expand the mul operands onto the end of the operands // list. Ops.erase(Ops.begin()+Idx); @@ -2970,9 +3047,9 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, } const SCEV * -ScalarEvolution::getGEPExpr(Type *PointeeType, const SCEV *BaseExpr, - const SmallVectorImpl<const SCEV *> &IndexExprs, - bool InBounds) { +ScalarEvolution::getGEPExpr(GEPOperator *GEP, + const SmallVectorImpl<const SCEV *> &IndexExprs) { + const SCEV *BaseExpr = getSCEV(GEP->getPointerOperand()); // getSCEV(Base)->getType() has the same address space as Base->getType() // because SCEV::getType() preserves the address space. Type *IntPtrTy = getEffectiveSCEVType(BaseExpr->getType()); @@ -2981,12 +3058,13 @@ ScalarEvolution::getGEPExpr(Type *PointeeType, const SCEV *BaseExpr, // flow and the no-overflow bits may not be valid for the expression in any // context. This can be fixed similarly to how these flags are handled for // adds. - SCEV::NoWrapFlags Wrap = InBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap; + SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? SCEV::FlagNSW + : SCEV::FlagAnyWrap; const SCEV *TotalOffset = getZero(IntPtrTy); - // The address space is unimportant. The first thing we do on CurTy is getting + // The array size is unimportant. The first thing we do on CurTy is getting // its element type. - Type *CurTy = PointerType::getUnqual(PointeeType); + Type *CurTy = ArrayType::get(GEP->getSourceElementType(), 0); for (const SCEV *IndexExpr : IndexExprs) { // Compute the (potentially symbolic) offset in bytes for this index. if (StructType *STy = dyn_cast<StructType>(CurTy)) { @@ -3311,71 +3389,23 @@ const SCEV *ScalarEvolution::getCouldNotCompute() { return CouldNotCompute.get(); } - bool ScalarEvolution::checkValidity(const SCEV *S) const { - // Helper class working with SCEVTraversal to figure out if a SCEV contains - // a SCEVUnknown with null value-pointer. FindInvalidSCEVUnknown::FindOne - // is set iff if find such SCEVUnknown. - // - struct FindInvalidSCEVUnknown { - bool FindOne; - FindInvalidSCEVUnknown() { FindOne = false; } - bool follow(const SCEV *S) { - switch (static_cast<SCEVTypes>(S->getSCEVType())) { - case scConstant: - return false; - case scUnknown: - if (!cast<SCEVUnknown>(S)->getValue()) - FindOne = true; - return false; - default: - return true; - } - } - bool isDone() const { return FindOne; } - }; - - FindInvalidSCEVUnknown F; - SCEVTraversal<FindInvalidSCEVUnknown> ST(F); - ST.visitAll(S); + bool ContainsNulls = SCEVExprContains(S, [](const SCEV *S) { + auto *SU = dyn_cast<SCEVUnknown>(S); + return SU && SU->getValue() == nullptr; + }); - return !F.FindOne; -} - -namespace { -// Helper class working with SCEVTraversal to figure out if a SCEV contains -// a sub SCEV of scAddRecExpr type. FindInvalidSCEVUnknown::FoundOne is set -// iff if such sub scAddRecExpr type SCEV is found. -struct FindAddRecurrence { - bool FoundOne; - FindAddRecurrence() : FoundOne(false) {} - - bool follow(const SCEV *S) { - switch (static_cast<SCEVTypes>(S->getSCEVType())) { - case scAddRecExpr: - FoundOne = true; - case scConstant: - case scUnknown: - case scCouldNotCompute: - return false; - default: - return true; - } - } - bool isDone() const { return FoundOne; } -}; + return !ContainsNulls; } bool ScalarEvolution::containsAddRecurrence(const SCEV *S) { - HasRecMapType::iterator I = HasRecMap.find_as(S); + HasRecMapType::iterator I = HasRecMap.find(S); if (I != HasRecMap.end()) return I->second; - FindAddRecurrence F; - SCEVTraversal<FindAddRecurrence> ST(F); - ST.visitAll(S); - HasRecMap.insert({S, F.FoundOne}); - return F.FoundOne; + bool FoundAddRec = SCEVExprContains(S, isa<SCEVAddRecExpr, const SCEV *>); + HasRecMap.insert({S, FoundAddRec}); + return FoundAddRec; } /// Try to split a SCEVAddExpr into a pair of {SCEV, ConstantInt}. @@ -4210,7 +4240,9 @@ static bool BrPHIToSelect(DominatorTree &DT, BranchInst *BI, PHINode *Merge, } const SCEV *ScalarEvolution::createNodeFromSelectLikePHI(PHINode *PN) { - if (PN->getNumIncomingValues() == 2) { + auto IsReachable = + [&](BasicBlock *BB) { return DT.isReachableFromEntry(BB); }; + if (PN->getNumIncomingValues() == 2 && all_of(PN->blocks(), IsReachable)) { const Loop *L = LI.getLoopFor(PN->getParent()); // We don't want to break LCSSA, even in a SCEV expression tree. @@ -4286,7 +4318,7 @@ const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I, case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: std::swap(LHS, RHS); - // fall through + LLVM_FALLTHROUGH; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: // a >s b ? a+x : b+x -> smax(a, b)+x @@ -4309,7 +4341,7 @@ const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I, case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: std::swap(LHS, RHS); - // fall through + LLVM_FALLTHROUGH; case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: // a >u b ? a+x : b+x -> umax(a, b)+x @@ -4374,9 +4406,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { SmallVector<const SCEV *, 4> IndexExprs; for (auto Index = GEP->idx_begin(); Index != GEP->idx_end(); ++Index) IndexExprs.push_back(getSCEV(*Index)); - return getGEPExpr(GEP->getSourceElementType(), - getSCEV(GEP->getPointerOperand()), - IndexExprs, GEP->isInBounds()); + return getGEPExpr(GEP, IndexExprs); } uint32_t @@ -4654,19 +4684,18 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start, MaxBECount = getNoopOrZeroExtend(MaxBECount, Start->getType()); ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount); - ConstantRange ZExtMaxBECountRange = - MaxBECountRange.zextOrTrunc(BitWidth * 2 + 1); + ConstantRange ZExtMaxBECountRange = MaxBECountRange.zextOrTrunc(BitWidth * 2); ConstantRange StepSRange = getSignedRange(Step); - ConstantRange SExtStepSRange = StepSRange.sextOrTrunc(BitWidth * 2 + 1); + ConstantRange SExtStepSRange = StepSRange.sextOrTrunc(BitWidth * 2); ConstantRange StartURange = getUnsignedRange(Start); ConstantRange EndURange = StartURange.add(MaxBECountRange.multiply(StepSRange)); // Check for unsigned overflow. - ConstantRange ZExtStartURange = StartURange.zextOrTrunc(BitWidth * 2 + 1); - ConstantRange ZExtEndURange = EndURange.zextOrTrunc(BitWidth * 2 + 1); + ConstantRange ZExtStartURange = StartURange.zextOrTrunc(BitWidth * 2); + ConstantRange ZExtEndURange = EndURange.zextOrTrunc(BitWidth * 2); if (ZExtStartURange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) == ZExtEndURange) { APInt Min = APIntOps::umin(StartURange.getUnsignedMin(), @@ -4686,8 +4715,8 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start, // Check for signed overflow. This must be done with ConstantRange // arithmetic because we could be called from within the ScalarEvolution // overflow checking code. - ConstantRange SExtStartSRange = StartSRange.sextOrTrunc(BitWidth * 2 + 1); - ConstantRange SExtEndSRange = EndSRange.sextOrTrunc(BitWidth * 2 + 1); + ConstantRange SExtStartSRange = StartSRange.sextOrTrunc(BitWidth * 2); + ConstantRange SExtEndSRange = EndSRange.sextOrTrunc(BitWidth * 2); if (SExtStartSRange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) == SExtEndSRange) { APInt Min = @@ -4951,17 +4980,33 @@ bool ScalarEvolution::isAddRecNeverPoison(const Instruction *I, const Loop *L) { return LatchControlDependentOnPoison && loopHasNoAbnormalExits(L); } -bool ScalarEvolution::loopHasNoAbnormalExits(const Loop *L) { - auto Itr = LoopHasNoAbnormalExits.find(L); - if (Itr == LoopHasNoAbnormalExits.end()) { - auto NoAbnormalExitInBB = [&](BasicBlock *BB) { - return all_of(*BB, [](Instruction &I) { - return isGuaranteedToTransferExecutionToSuccessor(&I); - }); +ScalarEvolution::LoopProperties +ScalarEvolution::getLoopProperties(const Loop *L) { + typedef ScalarEvolution::LoopProperties LoopProperties; + + auto Itr = LoopPropertiesCache.find(L); + if (Itr == LoopPropertiesCache.end()) { + auto HasSideEffects = [](Instruction *I) { + if (auto *SI = dyn_cast<StoreInst>(I)) + return !SI->isSimple(); + + return I->mayHaveSideEffects(); }; - auto InsertPair = LoopHasNoAbnormalExits.insert( - {L, all_of(L->getBlocks(), NoAbnormalExitInBB)}); + LoopProperties LP = {/* HasNoAbnormalExits */ true, + /*HasNoSideEffects*/ true}; + + for (auto *BB : L->getBlocks()) + for (auto &I : *BB) { + if (!isGuaranteedToTransferExecutionToSuccessor(&I)) + LP.HasNoAbnormalExits = false; + if (HasSideEffects(&I)) + LP.HasNoSideEffects = false; + if (!LP.HasNoAbnormalExits && !LP.HasNoSideEffects) + break; // We're already as pessimistic as we can get. + } + + auto InsertPair = LoopPropertiesCache.insert({L, LP}); assert(InsertPair.second && "We just checked!"); Itr = InsertPair.first; } @@ -5289,6 +5334,20 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // Iteration Count Computation Code // +static unsigned getConstantTripCount(const SCEVConstant *ExitCount) { + if (!ExitCount) + return 0; + + ConstantInt *ExitConst = ExitCount->getValue(); + + // Guard against huge trip counts. + if (ExitConst->getValue().getActiveBits() > 32) + return 0; + + // In case of integer overflow, this returns 0, which is correct. + return ((unsigned)ExitConst->getZExtValue()) + 1; +} + unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L) { if (BasicBlock *ExitingBB = L->getExitingBlock()) return getSmallConstantTripCount(L, ExitingBB); @@ -5304,17 +5363,13 @@ unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L, "Exiting block must actually branch out of the loop!"); const SCEVConstant *ExitCount = dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock)); - if (!ExitCount) - return 0; - - ConstantInt *ExitConst = ExitCount->getValue(); - - // Guard against huge trip counts. - if (ExitConst->getValue().getActiveBits() > 32) - return 0; + return getConstantTripCount(ExitCount); +} - // In case of integer overflow, this returns 0, which is correct. - return ((unsigned)ExitConst->getZExtValue()) + 1; +unsigned ScalarEvolution::getSmallConstantMaxTripCount(Loop *L) { + const auto *MaxExitCount = + dyn_cast<SCEVConstant>(getMaxBackedgeTakenCount(L)); + return getConstantTripCount(MaxExitCount); } unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L) { @@ -5393,6 +5448,10 @@ const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) { return getBackedgeTakenInfo(L).getMax(this); } +bool ScalarEvolution::isBackedgeTakenCountMaxOrZero(const Loop *L) { + return getBackedgeTakenInfo(L).isMaxOrZero(this); +} + /// Push PHI nodes in the header of the given loop onto the given Worklist. static void PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) { @@ -5418,7 +5477,7 @@ ScalarEvolution::getPredicatedBackedgeTakenInfo(const Loop *L) { BackedgeTakenInfo Result = computeBackedgeTakenCount(L, /*AllowPredicates=*/true); - return PredicatedBackedgeTakenCounts.find(L)->second = Result; + return PredicatedBackedgeTakenCounts.find(L)->second = std::move(Result); } const ScalarEvolution::BackedgeTakenInfo & @@ -5493,7 +5552,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { // recusive call to getBackedgeTakenInfo (on a different // loop), which would invalidate the iterator computed // earlier. - return BackedgeTakenCounts.find(L)->second = Result; + return BackedgeTakenCounts.find(L)->second = std::move(Result); } void ScalarEvolution::forgetLoop(const Loop *L) { @@ -5537,7 +5596,7 @@ void ScalarEvolution::forgetLoop(const Loop *L) { for (Loop *I : *L) forgetLoop(I); - LoopHasNoAbnormalExits.erase(L); + LoopPropertiesCache.erase(L); } void ScalarEvolution::forgetValue(Value *V) { @@ -5576,14 +5635,11 @@ void ScalarEvolution::forgetValue(Value *V) { /// caller's responsibility to specify the relevant loop exit using /// getExact(ExitingBlock, SE). const SCEV * -ScalarEvolution::BackedgeTakenInfo::getExact( - ScalarEvolution *SE, SCEVUnionPredicate *Preds) const { +ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE, + SCEVUnionPredicate *Preds) const { // If any exits were not computable, the loop is not computable. - if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute(); - - // We need exactly one computable exit. - if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute(); - assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info"); + if (!isComplete() || ExitNotTaken.empty()) + return SE->getCouldNotCompute(); const SCEV *BECount = nullptr; for (auto &ENT : ExitNotTaken) { @@ -5593,10 +5649,10 @@ ScalarEvolution::BackedgeTakenInfo::getExact( BECount = ENT.ExactNotTaken; else if (BECount != ENT.ExactNotTaken) return SE->getCouldNotCompute(); - if (Preds && ENT.getPred()) - Preds->add(ENT.getPred()); + if (Preds && !ENT.hasAlwaysTruePredicate()) + Preds->add(ENT.Predicate.get()); - assert((Preds || ENT.hasAlwaysTruePred()) && + assert((Preds || ENT.hasAlwaysTruePredicate()) && "Predicate should be always true!"); } @@ -5609,7 +5665,7 @@ const SCEV * ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock, ScalarEvolution *SE) const { for (auto &ENT : ExitNotTaken) - if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePred()) + if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate()) return ENT.ExactNotTaken; return SE->getCouldNotCompute(); @@ -5618,21 +5674,29 @@ ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock, /// getMax - Get the max backedge taken count for the loop. const SCEV * ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const { - for (auto &ENT : ExitNotTaken) - if (!ENT.hasAlwaysTruePred()) - return SE->getCouldNotCompute(); + auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) { + return !ENT.hasAlwaysTruePredicate(); + }; - return Max ? Max : SE->getCouldNotCompute(); + if (any_of(ExitNotTaken, PredicateNotAlwaysTrue) || !getMax()) + return SE->getCouldNotCompute(); + + return getMax(); +} + +bool ScalarEvolution::BackedgeTakenInfo::isMaxOrZero(ScalarEvolution *SE) const { + auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) { + return !ENT.hasAlwaysTruePredicate(); + }; + return MaxOrZero && !any_of(ExitNotTaken, PredicateNotAlwaysTrue); } bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S, ScalarEvolution *SE) const { - if (Max && Max != SE->getCouldNotCompute() && SE->hasOperand(Max, S)) + if (getMax() && getMax() != SE->getCouldNotCompute() && + SE->hasOperand(getMax(), S)) return true; - if (!ExitNotTaken.ExitingBlock) - return false; - for (auto &ENT : ExitNotTaken) if (ENT.ExactNotTaken != SE->getCouldNotCompute() && SE->hasOperand(ENT.ExactNotTaken, S)) @@ -5644,62 +5708,31 @@ bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S, /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each /// computable exit into a persistent ExitNotTakenInfo array. ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( - SmallVectorImpl<EdgeInfo> &ExitCounts, bool Complete, const SCEV *MaxCount) - : Max(MaxCount) { - - if (!Complete) - ExitNotTaken.setIncomplete(); - - unsigned NumExits = ExitCounts.size(); - if (NumExits == 0) return; - - ExitNotTaken.ExitingBlock = ExitCounts[0].ExitBlock; - ExitNotTaken.ExactNotTaken = ExitCounts[0].Taken; - - // Determine the number of ExitNotTakenExtras structures that we need. - unsigned ExtraInfoSize = 0; - if (NumExits > 1) - ExtraInfoSize = 1 + std::count_if(std::next(ExitCounts.begin()), - ExitCounts.end(), [](EdgeInfo &Entry) { - return !Entry.Pred.isAlwaysTrue(); - }); - else if (!ExitCounts[0].Pred.isAlwaysTrue()) - ExtraInfoSize = 1; - - ExitNotTakenExtras *ENT = nullptr; - - // Allocate the ExitNotTakenExtras structures and initialize the first - // element (ExitNotTaken). - if (ExtraInfoSize > 0) { - ENT = new ExitNotTakenExtras[ExtraInfoSize]; - ExitNotTaken.ExtraInfo = &ENT[0]; - *ExitNotTaken.getPred() = std::move(ExitCounts[0].Pred); - } - - if (NumExits == 1) - return; - - assert(ENT && "ExitNotTakenExtras is NULL while having more than one exit"); - - auto &Exits = ExitNotTaken.ExtraInfo->Exits; - - // Handle the rare case of multiple computable exits. - for (unsigned i = 1, PredPos = 1; i < NumExits; ++i) { - ExitNotTakenExtras *Ptr = nullptr; - if (!ExitCounts[i].Pred.isAlwaysTrue()) { - Ptr = &ENT[PredPos++]; - Ptr->Pred = std::move(ExitCounts[i].Pred); - } - - Exits.emplace_back(ExitCounts[i].ExitBlock, ExitCounts[i].Taken, Ptr); - } + SmallVectorImpl<ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo> + &&ExitCounts, + bool Complete, const SCEV *MaxCount, bool MaxOrZero) + : MaxAndComplete(MaxCount, Complete), MaxOrZero(MaxOrZero) { + typedef ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo EdgeExitInfo; + ExitNotTaken.reserve(ExitCounts.size()); + std::transform( + ExitCounts.begin(), ExitCounts.end(), std::back_inserter(ExitNotTaken), + [&](const EdgeExitInfo &EEI) { + BasicBlock *ExitBB = EEI.first; + const ExitLimit &EL = EEI.second; + if (EL.Predicates.empty()) + return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, nullptr); + + std::unique_ptr<SCEVUnionPredicate> Predicate(new SCEVUnionPredicate); + for (auto *Pred : EL.Predicates) + Predicate->add(Pred); + + return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, std::move(Predicate)); + }); } /// Invalidate this result and free the ExitNotTakenInfo array. void ScalarEvolution::BackedgeTakenInfo::clear() { - ExitNotTaken.ExitingBlock = nullptr; - ExitNotTaken.ExactNotTaken = nullptr; - delete[] ExitNotTaken.ExtraInfo; + ExitNotTaken.clear(); } /// Compute the number of times the backedge of the specified loop will execute. @@ -5709,11 +5742,14 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L, SmallVector<BasicBlock *, 8> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); - SmallVector<EdgeInfo, 4> ExitCounts; + typedef ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo EdgeExitInfo; + + SmallVector<EdgeExitInfo, 4> ExitCounts; bool CouldComputeBECount = true; BasicBlock *Latch = L->getLoopLatch(); // may be NULL. const SCEV *MustExitMaxBECount = nullptr; const SCEV *MayExitMaxBECount = nullptr; + bool MustExitMaxOrZero = false; // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts // and compute maxBECount. @@ -5722,17 +5758,17 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L, BasicBlock *ExitBB = ExitingBlocks[i]; ExitLimit EL = computeExitLimit(L, ExitBB, AllowPredicates); - assert((AllowPredicates || EL.Pred.isAlwaysTrue()) && + assert((AllowPredicates || EL.Predicates.empty()) && "Predicated exit limit when predicates are not allowed!"); // 1. For each exit that can be computed, add an entry to ExitCounts. // CouldComputeBECount is true only if all exits can be computed. - if (EL.Exact == getCouldNotCompute()) + if (EL.ExactNotTaken == getCouldNotCompute()) // We couldn't compute an exact value for this exit, so // we won't be able to compute an exact value for the loop. CouldComputeBECount = false; else - ExitCounts.emplace_back(EdgeInfo(ExitBB, EL.Exact, EL.Pred)); + ExitCounts.emplace_back(ExitBB, EL); // 2. Derive the loop's MaxBECount from each exit's max number of // non-exiting iterations. Partition the loop exits into two kinds: @@ -5740,29 +5776,35 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L, // // If the exit dominates the loop latch, it is a LoopMustExit otherwise it // is a LoopMayExit. If any computable LoopMustExit is found, then - // MaxBECount is the minimum EL.Max of computable LoopMustExits. Otherwise, - // MaxBECount is conservatively the maximum EL.Max, where CouldNotCompute is - // considered greater than any computable EL.Max. - if (EL.Max != getCouldNotCompute() && Latch && + // MaxBECount is the minimum EL.MaxNotTaken of computable + // LoopMustExits. Otherwise, MaxBECount is conservatively the maximum + // EL.MaxNotTaken, where CouldNotCompute is considered greater than any + // computable EL.MaxNotTaken. + if (EL.MaxNotTaken != getCouldNotCompute() && Latch && DT.dominates(ExitBB, Latch)) { - if (!MustExitMaxBECount) - MustExitMaxBECount = EL.Max; - else { + if (!MustExitMaxBECount) { + MustExitMaxBECount = EL.MaxNotTaken; + MustExitMaxOrZero = EL.MaxOrZero; + } else { MustExitMaxBECount = - getUMinFromMismatchedTypes(MustExitMaxBECount, EL.Max); + getUMinFromMismatchedTypes(MustExitMaxBECount, EL.MaxNotTaken); } } else if (MayExitMaxBECount != getCouldNotCompute()) { - if (!MayExitMaxBECount || EL.Max == getCouldNotCompute()) - MayExitMaxBECount = EL.Max; + if (!MayExitMaxBECount || EL.MaxNotTaken == getCouldNotCompute()) + MayExitMaxBECount = EL.MaxNotTaken; else { MayExitMaxBECount = - getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.Max); + getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.MaxNotTaken); } } } const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount : (MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute()); - return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount); + // The loop backedge will be taken the maximum or zero times if there's + // a single exit that must be taken the maximum or zero times. + bool MaxOrZero = (MustExitMaxOrZero && ExitingBlocks.size() == 1); + return BackedgeTakenInfo(std::move(ExitCounts), CouldComputeBECount, + MaxBECount, MaxOrZero); } ScalarEvolution::ExitLimit @@ -5867,39 +5909,40 @@ ScalarEvolution::computeExitLimitFromCond(const Loop *L, if (EitherMayExit) { // Both conditions must be true for the loop to continue executing. // Choose the less conservative count. - if (EL0.Exact == getCouldNotCompute() || - EL1.Exact == getCouldNotCompute()) + if (EL0.ExactNotTaken == getCouldNotCompute() || + EL1.ExactNotTaken == getCouldNotCompute()) BECount = getCouldNotCompute(); else - BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact); - if (EL0.Max == getCouldNotCompute()) - MaxBECount = EL1.Max; - else if (EL1.Max == getCouldNotCompute()) - MaxBECount = EL0.Max; + BECount = + getUMinFromMismatchedTypes(EL0.ExactNotTaken, EL1.ExactNotTaken); + if (EL0.MaxNotTaken == getCouldNotCompute()) + MaxBECount = EL1.MaxNotTaken; + else if (EL1.MaxNotTaken == getCouldNotCompute()) + MaxBECount = EL0.MaxNotTaken; else - MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max); + MaxBECount = + getUMinFromMismatchedTypes(EL0.MaxNotTaken, EL1.MaxNotTaken); } else { // Both conditions must be true at the same time for the loop to exit. // For now, be conservative. assert(L->contains(FBB) && "Loop block has no successor in loop!"); - if (EL0.Max == EL1.Max) - MaxBECount = EL0.Max; - if (EL0.Exact == EL1.Exact) - BECount = EL0.Exact; + if (EL0.MaxNotTaken == EL1.MaxNotTaken) + MaxBECount = EL0.MaxNotTaken; + if (EL0.ExactNotTaken == EL1.ExactNotTaken) + BECount = EL0.ExactNotTaken; } - SCEVUnionPredicate NP; - NP.add(&EL0.Pred); - NP.add(&EL1.Pred); // There are cases (e.g. PR26207) where computeExitLimitFromCond is able // to be more aggressive when computing BECount than when computing - // MaxBECount. In these cases it is possible for EL0.Exact and EL1.Exact - // to match, but for EL0.Max and EL1.Max to not. + // MaxBECount. In these cases it is possible for EL0.ExactNotTaken and + // EL1.ExactNotTaken to match, but for EL0.MaxNotTaken and EL1.MaxNotTaken + // to not. if (isa<SCEVCouldNotCompute>(MaxBECount) && !isa<SCEVCouldNotCompute>(BECount)) MaxBECount = BECount; - return ExitLimit(BECount, MaxBECount, NP); + return ExitLimit(BECount, MaxBECount, false, + {&EL0.Predicates, &EL1.Predicates}); } if (BO->getOpcode() == Instruction::Or) { // Recurse on the operands of the or. @@ -5915,31 +5958,31 @@ ScalarEvolution::computeExitLimitFromCond(const Loop *L, if (EitherMayExit) { // Both conditions must be false for the loop to continue executing. // Choose the less conservative count. - if (EL0.Exact == getCouldNotCompute() || - EL1.Exact == getCouldNotCompute()) + if (EL0.ExactNotTaken == getCouldNotCompute() || + EL1.ExactNotTaken == getCouldNotCompute()) BECount = getCouldNotCompute(); else - BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact); - if (EL0.Max == getCouldNotCompute()) - MaxBECount = EL1.Max; - else if (EL1.Max == getCouldNotCompute()) - MaxBECount = EL0.Max; + BECount = + getUMinFromMismatchedTypes(EL0.ExactNotTaken, EL1.ExactNotTaken); + if (EL0.MaxNotTaken == getCouldNotCompute()) + MaxBECount = EL1.MaxNotTaken; + else if (EL1.MaxNotTaken == getCouldNotCompute()) + MaxBECount = EL0.MaxNotTaken; else - MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max); + MaxBECount = + getUMinFromMismatchedTypes(EL0.MaxNotTaken, EL1.MaxNotTaken); } else { // Both conditions must be false at the same time for the loop to exit. // For now, be conservative. assert(L->contains(TBB) && "Loop block has no successor in loop!"); - if (EL0.Max == EL1.Max) - MaxBECount = EL0.Max; - if (EL0.Exact == EL1.Exact) - BECount = EL0.Exact; + if (EL0.MaxNotTaken == EL1.MaxNotTaken) + MaxBECount = EL0.MaxNotTaken; + if (EL0.ExactNotTaken == EL1.ExactNotTaken) + BECount = EL0.ExactNotTaken; } - SCEVUnionPredicate NP; - NP.add(&EL0.Pred); - NP.add(&EL1.Pred); - return ExitLimit(BECount, MaxBECount, NP); + return ExitLimit(BECount, MaxBECount, false, + {&EL0.Predicates, &EL1.Predicates}); } } @@ -6021,8 +6064,8 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L, if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS)) if (AddRec->getLoop() == L) { // Form the constant range. - ConstantRange CompRange( - ICmpInst::makeConstantRange(Cond, RHSC->getAPInt())); + ConstantRange CompRange = + ConstantRange::makeExactICmpRegion(Cond, RHSC->getAPInt()); const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this); if (!isa<SCEVCouldNotCompute>(Ret)) return Ret; @@ -6226,7 +6269,7 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit( // %iv = phi i32 [ %iv.shifted, %loop ], [ %val, %preheader ] // %iv.shifted = lshr i32 %iv, <positive constant> // - // Return true on a succesful match. Return the corresponding PHI node (%iv + // Return true on a successful match. Return the corresponding PHI node (%iv // above) in PNOut and the opcode of the shift operation in OpCodeOut. auto MatchShiftRecurrence = [&](Value *V, PHINode *&PNOut, Instruction::BinaryOps &OpCodeOut) { @@ -6324,8 +6367,7 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit( unsigned BitWidth = getTypeSizeInBits(RHS->getType()); const SCEV *UpperBound = getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth); - SCEVUnionPredicate P; - return ExitLimit(getCouldNotCompute(), UpperBound, P); + return ExitLimit(getCouldNotCompute(), UpperBound, false); } return getCouldNotCompute(); @@ -6995,20 +7037,21 @@ static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B, // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic // modulo (N / D). // - // (N / D) may need BW+1 bits in its representation. Hence, we'll use this - // bit width during computations. + // If D == 1, (N / D) == N == 2^BW, so we need one extra bit to represent + // (N / D) in general. The inverse itself always fits into BW bits, though, + // so we immediately truncate it. APInt AD = A.lshr(Mult2).zext(BW + 1); // AD = A / D APInt Mod(BW + 1, 0); Mod.setBit(BW - Mult2); // Mod = N / D - APInt I = AD.multiplicativeInverse(Mod); + APInt I = AD.multiplicativeInverse(Mod).trunc(BW); // 4. Compute the minimum unsigned root of the equation: // I * (B / D) mod (N / D) - APInt Result = (I * B.lshr(Mult2).zext(BW + 1)).urem(Mod); + // To simplify the computation, we factor out the divide by D: + // (I * B mod N) / D + APInt Result = (I * B).lshr(Mult2); - // The result is guaranteed to be less than 2^BW so we may truncate it to BW - // bits. - return SE.getConstant(Result.trunc(BW)); + return SE.getConstant(Result); } /// Find the roots of the quadratic equation for the given quadratic chrec @@ -7086,7 +7129,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, // effectively V != 0. We know and take advantage of the fact that this // expression only being used in a comparison by zero context. - SCEVUnionPredicate P; + SmallPtrSet<const SCEVPredicate *, 4> Predicates; // If the value is a constant if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) { // If the value is already zero, the branch will execute zero times. @@ -7099,7 +7142,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, // Try to make this an AddRec using runtime tests, in the first X // iterations of this loop, where X is the SCEV expression found by the // algorithm below. - AddRec = convertSCEVToAddRecWithPredicates(V, L, P); + AddRec = convertSCEVToAddRecWithPredicates(V, L, Predicates); if (!AddRec || AddRec->getLoop() != L) return getCouldNotCompute(); @@ -7121,7 +7164,8 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, // should not accept a root of 2. const SCEV *Val = AddRec->evaluateAtIteration(R1, *this); if (Val->isZero()) - return ExitLimit(R1, R1, P); // We found a quadratic root! + // We found a quadratic root! + return ExitLimit(R1, R1, false, Predicates); } } return getCouldNotCompute(); @@ -7168,17 +7212,25 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, // 1*N = -Start; -1*N = Start (mod 2^BW), so: // N = Distance (as unsigned) if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue()) { - ConstantRange CR = getUnsignedRange(Start); - const SCEV *MaxBECount; - if (!CountDown && CR.getUnsignedMin().isMinValue()) - // When counting up, the worst starting value is 1, not 0. - MaxBECount = CR.getUnsignedMax().isMinValue() - ? getConstant(APInt::getMinValue(CR.getBitWidth())) - : getConstant(APInt::getMaxValue(CR.getBitWidth())); - else - MaxBECount = getConstant(CountDown ? CR.getUnsignedMax() - : -CR.getUnsignedMin()); - return ExitLimit(Distance, MaxBECount, P); + APInt MaxBECount = getUnsignedRange(Distance).getUnsignedMax(); + + // When a loop like "for (int i = 0; i != n; ++i) { /* body */ }" is rotated, + // we end up with a loop whose backedge-taken count is n - 1. Detect this + // case, and see if we can improve the bound. + // + // Explicitly handling this here is necessary because getUnsignedRange + // isn't context-sensitive; it doesn't know that we only care about the + // range inside the loop. + const SCEV *Zero = getZero(Distance->getType()); + const SCEV *One = getOne(Distance->getType()); + const SCEV *DistancePlusOne = getAddExpr(Distance, One); + if (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, DistancePlusOne, Zero)) { + // If Distance + 1 doesn't overflow, we can compute the maximum distance + // as "unsigned_max(Distance + 1) - 1". + ConstantRange CR = getUnsignedRange(DistancePlusOne); + MaxBECount = APIntOps::umin(MaxBECount, CR.getUnsignedMax() - 1); + } + return ExitLimit(Distance, getConstant(MaxBECount), false, Predicates); } // As a special case, handle the instance where Step is a positive power of @@ -7233,7 +7285,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, const SCEV *Limit = getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy); - return ExitLimit(Limit, Limit, P); + return ExitLimit(Limit, Limit, false, Predicates); } } @@ -7246,14 +7298,14 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, loopHasNoAbnormalExits(AddRec->getLoop())) { const SCEV *Exact = getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); - return ExitLimit(Exact, Exact, P); + return ExitLimit(Exact, Exact, false, Predicates); } // Then, try to solve the above equation provided that Start is constant. if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start)) { const SCEV *E = SolveLinEquationWithOverflow( StepC->getValue()->getValue(), -StartC->getValue()->getValue(), *this); - return ExitLimit(E, E, P); + return ExitLimit(E, E, false, Predicates); } return getCouldNotCompute(); } @@ -7365,149 +7417,77 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, // cases, and canonicalize *-or-equal comparisons to regular comparisons. if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) { const APInt &RA = RC->getAPInt(); - switch (Pred) { - default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); - case ICmpInst::ICMP_EQ: - case ICmpInst::ICMP_NE: - // Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b. - if (!RA) - if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(LHS)) - if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(AE->getOperand(0))) - if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 && - ME->getOperand(0)->isAllOnesValue()) { - RHS = AE->getOperand(1); - LHS = ME->getOperand(1); - Changed = true; - } - break; - case ICmpInst::ICMP_UGE: - if ((RA - 1).isMinValue()) { - Pred = ICmpInst::ICMP_NE; - RHS = getConstant(RA - 1); - Changed = true; - break; - } - if (RA.isMaxValue()) { - Pred = ICmpInst::ICMP_EQ; - Changed = true; - break; - } - if (RA.isMinValue()) goto trivially_true; - Pred = ICmpInst::ICMP_UGT; - RHS = getConstant(RA - 1); - Changed = true; - break; - case ICmpInst::ICMP_ULE: - if ((RA + 1).isMaxValue()) { - Pred = ICmpInst::ICMP_NE; - RHS = getConstant(RA + 1); - Changed = true; - break; - } - if (RA.isMinValue()) { - Pred = ICmpInst::ICMP_EQ; - Changed = true; - break; - } - if (RA.isMaxValue()) goto trivially_true; + bool SimplifiedByConstantRange = false; - Pred = ICmpInst::ICMP_ULT; - RHS = getConstant(RA + 1); - Changed = true; - break; - case ICmpInst::ICMP_SGE: - if ((RA - 1).isMinSignedValue()) { - Pred = ICmpInst::ICMP_NE; - RHS = getConstant(RA - 1); - Changed = true; - break; - } - if (RA.isMaxSignedValue()) { - Pred = ICmpInst::ICMP_EQ; - Changed = true; - break; + if (!ICmpInst::isEquality(Pred)) { + ConstantRange ExactCR = ConstantRange::makeExactICmpRegion(Pred, RA); + if (ExactCR.isFullSet()) + goto trivially_true; + else if (ExactCR.isEmptySet()) + goto trivially_false; + + APInt NewRHS; + CmpInst::Predicate NewPred; + if (ExactCR.getEquivalentICmp(NewPred, NewRHS) && + ICmpInst::isEquality(NewPred)) { + // We were able to convert an inequality to an equality. + Pred = NewPred; + RHS = getConstant(NewRHS); + Changed = SimplifiedByConstantRange = true; } - if (RA.isMinSignedValue()) goto trivially_true; + } - Pred = ICmpInst::ICMP_SGT; - RHS = getConstant(RA - 1); - Changed = true; - break; - case ICmpInst::ICMP_SLE: - if ((RA + 1).isMaxSignedValue()) { - Pred = ICmpInst::ICMP_NE; - RHS = getConstant(RA + 1); - Changed = true; + if (!SimplifiedByConstantRange) { + switch (Pred) { + default: break; - } - if (RA.isMinSignedValue()) { - Pred = ICmpInst::ICMP_EQ; - Changed = true; + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_NE: + // Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b. + if (!RA) + if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(LHS)) + if (const SCEVMulExpr *ME = + dyn_cast<SCEVMulExpr>(AE->getOperand(0))) + if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 && + ME->getOperand(0)->isAllOnesValue()) { + RHS = AE->getOperand(1); + LHS = ME->getOperand(1); + Changed = true; + } break; - } - if (RA.isMaxSignedValue()) goto trivially_true; - Pred = ICmpInst::ICMP_SLT; - RHS = getConstant(RA + 1); - Changed = true; - break; - case ICmpInst::ICMP_UGT: - if (RA.isMinValue()) { - Pred = ICmpInst::ICMP_NE; + + // The "Should have been caught earlier!" messages refer to the fact + // that the ExactCR.isFullSet() or ExactCR.isEmptySet() check above + // should have fired on the corresponding cases, and canonicalized the + // check to trivially_true or trivially_false. + + case ICmpInst::ICMP_UGE: + assert(!RA.isMinValue() && "Should have been caught earlier!"); + Pred = ICmpInst::ICMP_UGT; + RHS = getConstant(RA - 1); Changed = true; break; - } - if ((RA + 1).isMaxValue()) { - Pred = ICmpInst::ICMP_EQ; + case ICmpInst::ICMP_ULE: + assert(!RA.isMaxValue() && "Should have been caught earlier!"); + Pred = ICmpInst::ICMP_ULT; RHS = getConstant(RA + 1); Changed = true; break; - } - if (RA.isMaxValue()) goto trivially_false; - break; - case ICmpInst::ICMP_ULT: - if (RA.isMaxValue()) { - Pred = ICmpInst::ICMP_NE; - Changed = true; - break; - } - if ((RA - 1).isMinValue()) { - Pred = ICmpInst::ICMP_EQ; + case ICmpInst::ICMP_SGE: + assert(!RA.isMinSignedValue() && "Should have been caught earlier!"); + Pred = ICmpInst::ICMP_SGT; RHS = getConstant(RA - 1); Changed = true; break; - } - if (RA.isMinValue()) goto trivially_false; - break; - case ICmpInst::ICMP_SGT: - if (RA.isMinSignedValue()) { - Pred = ICmpInst::ICMP_NE; - Changed = true; - break; - } - if ((RA + 1).isMaxSignedValue()) { - Pred = ICmpInst::ICMP_EQ; + case ICmpInst::ICMP_SLE: + assert(!RA.isMaxSignedValue() && "Should have been caught earlier!"); + Pred = ICmpInst::ICMP_SLT; RHS = getConstant(RA + 1); Changed = true; break; } - if (RA.isMaxSignedValue()) goto trivially_false; - break; - case ICmpInst::ICMP_SLT: - if (RA.isMaxSignedValue()) { - Pred = ICmpInst::ICMP_NE; - Changed = true; - break; - } - if ((RA - 1).isMinSignedValue()) { - Pred = ICmpInst::ICMP_EQ; - RHS = getConstant(RA - 1); - Changed = true; - break; - } - if (RA.isMinSignedValue()) goto trivially_false; - break; } } @@ -8067,34 +8047,16 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, return false; } -namespace { -/// RAII wrapper to prevent recursive application of isImpliedCond. -/// ScalarEvolution's PendingLoopPredicates set must be empty unless we are -/// currently evaluating isImpliedCond. -struct MarkPendingLoopPredicate { - Value *Cond; - DenseSet<Value*> &LoopPreds; - bool Pending; - - MarkPendingLoopPredicate(Value *C, DenseSet<Value*> &LP) - : Cond(C), LoopPreds(LP) { - Pending = !LoopPreds.insert(Cond).second; - } - ~MarkPendingLoopPredicate() { - if (!Pending) - LoopPreds.erase(Cond); - } -}; -} // end anonymous namespace - bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, Value *FoundCondValue, bool Inverse) { - MarkPendingLoopPredicate Mark(FoundCondValue, PendingLoopPredicates); - if (Mark.Pending) + if (!PendingLoopPredicates.insert(FoundCondValue).second) return false; + auto ClearOnExit = + make_scope_exit([&]() { PendingLoopPredicates.erase(FoundCondValue); }); + // Recursively handle And and Or conditions. if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) { if (BO->getOpcode() == Instruction::And) { @@ -8279,9 +8241,8 @@ bool ScalarEvolution::splitBinaryAdd(const SCEV *Expr, return true; } -bool ScalarEvolution::computeConstantDifference(const SCEV *Less, - const SCEV *More, - APInt &C) { +Optional<APInt> ScalarEvolution::computeConstantDifference(const SCEV *More, + const SCEV *Less) { // We avoid subtracting expressions here because this function is usually // fairly deep in the call stack (i.e. is called many times). @@ -8290,15 +8251,15 @@ bool ScalarEvolution::computeConstantDifference(const SCEV *Less, const auto *MAR = cast<SCEVAddRecExpr>(More); if (LAR->getLoop() != MAR->getLoop()) - return false; + return None; // We look at affine expressions only; not for correctness but to keep // getStepRecurrence cheap. if (!LAR->isAffine() || !MAR->isAffine()) - return false; + return None; if (LAR->getStepRecurrence(*this) != MAR->getStepRecurrence(*this)) - return false; + return None; Less = LAR->getStart(); More = MAR->getStart(); @@ -8309,27 +8270,22 @@ bool ScalarEvolution::computeConstantDifference(const SCEV *Less, if (isa<SCEVConstant>(Less) && isa<SCEVConstant>(More)) { const auto &M = cast<SCEVConstant>(More)->getAPInt(); const auto &L = cast<SCEVConstant>(Less)->getAPInt(); - C = M - L; - return true; + return M - L; } const SCEV *L, *R; SCEV::NoWrapFlags Flags; if (splitBinaryAdd(Less, L, R, Flags)) if (const auto *LC = dyn_cast<SCEVConstant>(L)) - if (R == More) { - C = -(LC->getAPInt()); - return true; - } + if (R == More) + return -(LC->getAPInt()); if (splitBinaryAdd(More, L, R, Flags)) if (const auto *LC = dyn_cast<SCEVConstant>(L)) - if (R == Less) { - C = LC->getAPInt(); - return true; - } + if (R == Less) + return LC->getAPInt(); - return false; + return None; } bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow( @@ -8386,22 +8342,21 @@ bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow( // neither necessary nor sufficient to prove "(FoundLHS + C) s< (FoundRHS + // C)". - APInt LDiff, RDiff; - if (!computeConstantDifference(FoundLHS, LHS, LDiff) || - !computeConstantDifference(FoundRHS, RHS, RDiff) || - LDiff != RDiff) + Optional<APInt> LDiff = computeConstantDifference(LHS, FoundLHS); + Optional<APInt> RDiff = computeConstantDifference(RHS, FoundRHS); + if (!LDiff || !RDiff || *LDiff != *RDiff) return false; - if (LDiff == 0) + if (LDiff->isMinValue()) return true; APInt FoundRHSLimit; if (Pred == CmpInst::ICMP_ULT) { - FoundRHSLimit = -RDiff; + FoundRHSLimit = -(*RDiff); } else { assert(Pred == CmpInst::ICMP_SLT && "Checked above!"); - FoundRHSLimit = APInt::getSignedMinValue(getTypeSizeInBits(RHS->getType())) - RDiff; + FoundRHSLimit = APInt::getSignedMinValue(getTypeSizeInBits(RHS->getType())) - *RDiff; } // Try to prove (1) or (2), as needed. @@ -8511,7 +8466,7 @@ static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE, case ICmpInst::ICMP_SGE: std::swap(LHS, RHS); - // fall through + LLVM_FALLTHROUGH; case ICmpInst::ICMP_SLE: return // min(A, ...) <= A @@ -8521,7 +8476,7 @@ static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE, case ICmpInst::ICMP_UGE: std::swap(LHS, RHS); - // fall through + LLVM_FALLTHROUGH; case ICmpInst::ICMP_ULE: return // min(A, ...) <= A @@ -8592,9 +8547,8 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred, // reduce the compile time impact of this optimization. return false; - const SCEVAddExpr *AddLHS = dyn_cast<SCEVAddExpr>(LHS); - if (!AddLHS || AddLHS->getOperand(1) != FoundLHS || - !isa<SCEVConstant>(AddLHS->getOperand(0))) + Optional<APInt> Addend = computeConstantDifference(LHS, FoundLHS); + if (!Addend) return false; APInt ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getAPInt(); @@ -8604,10 +8558,8 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred, ConstantRange FoundLHSRange = ConstantRange::makeAllowedICmpRegion(Pred, ConstFoundRHS); - // Since `LHS` is `FoundLHS` + `AddLHS->getOperand(0)`, we can compute a range - // for `LHS`: - APInt Addend = cast<SCEVConstant>(AddLHS->getOperand(0))->getAPInt(); - ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(Addend)); + // Since `LHS` is `FoundLHS` + `Addend`, we can compute a range for `LHS`: + ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(*Addend)); // We can also compute the range of values for `LHS` that satisfy the // consequent, "`LHS` `Pred` `RHS`": @@ -8622,6 +8574,8 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred, bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, bool IsSigned, bool NoWrap) { + assert(isKnownPositive(Stride) && "Positive stride expected!"); + if (NoWrap) return false; unsigned BitWidth = getTypeSizeInBits(RHS->getType()); @@ -8684,17 +8638,21 @@ ScalarEvolution::ExitLimit ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool IsSigned, bool ControlsExit, bool AllowPredicates) { - SCEVUnionPredicate P; + SmallPtrSet<const SCEVPredicate *, 4> Predicates; // We handle only IV < Invariant if (!isLoopInvariant(RHS, L)) return getCouldNotCompute(); const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS); - if (!IV && AllowPredicates) + bool PredicatedIV = false; + + if (!IV && AllowPredicates) { // Try to make this an AddRec using runtime tests, in the first X // iterations of this loop, where X is the SCEV expression found by the // algorithm below. - IV = convertSCEVToAddRecWithPredicates(LHS, L, P); + IV = convertSCEVToAddRecWithPredicates(LHS, L, Predicates); + PredicatedIV = true; + } // Avoid weird loops if (!IV || IV->getLoop() != L || !IV->isAffine()) @@ -8705,61 +8663,144 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, const SCEV *Stride = IV->getStepRecurrence(*this); - // Avoid negative or zero stride values - if (!isKnownPositive(Stride)) - return getCouldNotCompute(); + bool PositiveStride = isKnownPositive(Stride); - // Avoid proven overflow cases: this will ensure that the backedge taken count - // will not generate any unsigned overflow. Relaxed no-overflow conditions - // exploit NoWrapFlags, allowing to optimize in presence of undefined - // behaviors like the case of C language. - if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap)) + // Avoid negative or zero stride values. + if (!PositiveStride) { + // We can compute the correct backedge taken count for loops with unknown + // strides if we can prove that the loop is not an infinite loop with side + // effects. Here's the loop structure we are trying to handle - + // + // i = start + // do { + // A[i] = i; + // i += s; + // } while (i < end); + // + // The backedge taken count for such loops is evaluated as - + // (max(end, start + stride) - start - 1) /u stride + // + // The additional preconditions that we need to check to prove correctness + // of the above formula is as follows - + // + // a) IV is either nuw or nsw depending upon signedness (indicated by the + // NoWrap flag). + // b) loop is single exit with no side effects. + // + // + // Precondition a) implies that if the stride is negative, this is a single + // trip loop. The backedge taken count formula reduces to zero in this case. + // + // Precondition b) implies that the unknown stride cannot be zero otherwise + // we have UB. + // + // The positive stride case is the same as isKnownPositive(Stride) returning + // true (original behavior of the function). + // + // We want to make sure that the stride is truly unknown as there are edge + // cases where ScalarEvolution propagates no wrap flags to the + // post-increment/decrement IV even though the increment/decrement operation + // itself is wrapping. The computed backedge taken count may be wrong in + // such cases. This is prevented by checking that the stride is not known to + // be either positive or non-positive. For example, no wrap flags are + // propagated to the post-increment IV of this loop with a trip count of 2 - + // + // unsigned char i; + // for(i=127; i<128; i+=129) + // A[i] = i; + // + if (PredicatedIV || !NoWrap || isKnownNonPositive(Stride) || + !loopHasNoSideEffects(L)) + return getCouldNotCompute(); + + } else if (!Stride->isOne() && + doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap)) + // Avoid proven overflow cases: this will ensure that the backedge taken + // count will not generate any unsigned overflow. Relaxed no-overflow + // conditions exploit NoWrapFlags, allowing to optimize in presence of + // undefined behaviors like the case of C language. return getCouldNotCompute(); ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; const SCEV *Start = IV->getStart(); const SCEV *End = RHS; - if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS)) + // If the backedge is taken at least once, then it will be taken + // (End-Start)/Stride times (rounded up to a multiple of Stride), where Start + // is the LHS value of the less-than comparison the first time it is evaluated + // and End is the RHS. + const SCEV *BECountIfBackedgeTaken = + computeBECount(getMinusSCEV(End, Start), Stride, false); + // If the loop entry is guarded by the result of the backedge test of the + // first loop iteration, then we know the backedge will be taken at least + // once and so the backedge taken count is as above. If not then we use the + // expression (max(End,Start)-Start)/Stride to describe the backedge count, + // as if the backedge is taken at least once max(End,Start) is End and so the + // result is as above, and if not max(End,Start) is Start so we get a backedge + // count of zero. + const SCEV *BECount; + if (isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS)) + BECount = BECountIfBackedgeTaken; + else { End = IsSigned ? getSMaxExpr(RHS, Start) : getUMaxExpr(RHS, Start); + BECount = computeBECount(getMinusSCEV(End, Start), Stride, false); + } - const SCEV *BECount = computeBECount(getMinusSCEV(End, Start), Stride, false); + const SCEV *MaxBECount; + bool MaxOrZero = false; + if (isa<SCEVConstant>(BECount)) + MaxBECount = BECount; + else if (isa<SCEVConstant>(BECountIfBackedgeTaken)) { + // If we know exactly how many times the backedge will be taken if it's + // taken at least once, then the backedge count will either be that or + // zero. + MaxBECount = BECountIfBackedgeTaken; + MaxOrZero = true; + } else { + // Calculate the maximum backedge count based on the range of values + // permitted by Start, End, and Stride. + APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin() + : getUnsignedRange(Start).getUnsignedMin(); - APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin() - : getUnsignedRange(Start).getUnsignedMin(); + unsigned BitWidth = getTypeSizeInBits(LHS->getType()); - APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin() - : getUnsignedRange(Stride).getUnsignedMin(); + APInt StrideForMaxBECount; - unsigned BitWidth = getTypeSizeInBits(LHS->getType()); - APInt Limit = IsSigned ? APInt::getSignedMaxValue(BitWidth) - (MinStride - 1) - : APInt::getMaxValue(BitWidth) - (MinStride - 1); + if (PositiveStride) + StrideForMaxBECount = + IsSigned ? getSignedRange(Stride).getSignedMin() + : getUnsignedRange(Stride).getUnsignedMin(); + else + // Using a stride of 1 is safe when computing max backedge taken count for + // a loop with unknown stride. + StrideForMaxBECount = APInt(BitWidth, 1, IsSigned); - // Although End can be a MAX expression we estimate MaxEnd considering only - // the case End = RHS. This is safe because in the other case (End - Start) - // is zero, leading to a zero maximum backedge taken count. - APInt MaxEnd = - IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit) - : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit); + APInt Limit = + IsSigned ? APInt::getSignedMaxValue(BitWidth) - (StrideForMaxBECount - 1) + : APInt::getMaxValue(BitWidth) - (StrideForMaxBECount - 1); + + // Although End can be a MAX expression we estimate MaxEnd considering only + // the case End = RHS. This is safe because in the other case (End - Start) + // is zero, leading to a zero maximum backedge taken count. + APInt MaxEnd = + IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit) + : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit); - const SCEV *MaxBECount; - if (isa<SCEVConstant>(BECount)) - MaxBECount = BECount; - else MaxBECount = computeBECount(getConstant(MaxEnd - MinStart), - getConstant(MinStride), false); + getConstant(StrideForMaxBECount), false); + } if (isa<SCEVCouldNotCompute>(MaxBECount)) MaxBECount = BECount; - return ExitLimit(BECount, MaxBECount, P); + return ExitLimit(BECount, MaxBECount, MaxOrZero, Predicates); } ScalarEvolution::ExitLimit ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool IsSigned, bool ControlsExit, bool AllowPredicates) { - SCEVUnionPredicate P; + SmallPtrSet<const SCEVPredicate *, 4> Predicates; // We handle only IV > Invariant if (!isLoopInvariant(RHS, L)) return getCouldNotCompute(); @@ -8769,7 +8810,7 @@ ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS, // Try to make this an AddRec using runtime tests, in the first X // iterations of this loop, where X is the SCEV expression found by the // algorithm below. - IV = convertSCEVToAddRecWithPredicates(LHS, L, P); + IV = convertSCEVToAddRecWithPredicates(LHS, L, Predicates); // Avoid weird loops if (!IV || IV->getLoop() != L || !IV->isAffine()) @@ -8829,7 +8870,7 @@ ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS, if (isa<SCEVCouldNotCompute>(MaxBECount)) MaxBECount = BECount; - return ExitLimit(BECount, MaxBECount, P); + return ExitLimit(BECount, MaxBECount, false, Predicates); } const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range, @@ -8901,9 +8942,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range, // Range.getUpper() is crossed. SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end()); NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper())); - const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(), - // getNoWrapFlags(FlagNW) - FlagAnyWrap); + const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(), FlagAnyWrap); // Next, solve the constructed addrec if (auto Roots = @@ -8947,38 +8986,15 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range, return SE.getCouldNotCompute(); } -namespace { -struct FindUndefs { - bool Found; - FindUndefs() : Found(false) {} - - bool follow(const SCEV *S) { - if (const SCEVUnknown *C = dyn_cast<SCEVUnknown>(S)) { - if (isa<UndefValue>(C->getValue())) - Found = true; - } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) { - if (isa<UndefValue>(C->getValue())) - Found = true; - } - - // Keep looking if we haven't found it yet. - return !Found; - } - bool isDone() const { - // Stop recursion if we have found an undef. - return Found; - } -}; -} - // Return true when S contains at least an undef value. -static inline bool -containsUndefs(const SCEV *S) { - FindUndefs F; - SCEVTraversal<FindUndefs> ST(F); - ST.visitAll(S); - - return F.Found; +static inline bool containsUndefs(const SCEV *S) { + return SCEVExprContains(S, [](const SCEV *S) { + if (const auto *SU = dyn_cast<SCEVUnknown>(S)) + return isa<UndefValue>(SU->getValue()); + else if (const auto *SC = dyn_cast<SCEVConstant>(S)) + return isa<UndefValue>(SC->getValue()); + return false; + }); } namespace { @@ -9006,7 +9022,8 @@ struct SCEVCollectTerms { : Terms(T) {} bool follow(const SCEV *S) { - if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S)) { + if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S) || + isa<SCEVSignExtendExpr>(S)) { if (!containsUndefs(S)) Terms.push_back(S); @@ -9158,10 +9175,9 @@ static bool findArrayDimensionsRec(ScalarEvolution &SE, } // Remove all SCEVConstants. - Terms.erase(std::remove_if(Terms.begin(), Terms.end(), [](const SCEV *E) { - return isa<SCEVConstant>(E); - }), - Terms.end()); + Terms.erase( + remove_if(Terms, [](const SCEV *E) { return isa<SCEVConstant>(E); }), + Terms.end()); if (Terms.size() > 0) if (!findArrayDimensionsRec(SE, Terms, Sizes)) @@ -9171,40 +9187,11 @@ static bool findArrayDimensionsRec(ScalarEvolution &SE, return true; } -// Returns true when S contains at least a SCEVUnknown parameter. -static inline bool -containsParameters(const SCEV *S) { - struct FindParameter { - bool FoundParameter; - FindParameter() : FoundParameter(false) {} - - bool follow(const SCEV *S) { - if (isa<SCEVUnknown>(S)) { - FoundParameter = true; - // Stop recursion: we found a parameter. - return false; - } - // Keep looking. - return true; - } - bool isDone() const { - // Stop recursion if we have found a parameter. - return FoundParameter; - } - }; - - FindParameter F; - SCEVTraversal<FindParameter> ST(F); - ST.visitAll(S); - - return F.FoundParameter; -} // Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter. -static inline bool -containsParameters(SmallVectorImpl<const SCEV *> &Terms) { +static inline bool containsParameters(SmallVectorImpl<const SCEV *> &Terms) { for (const SCEV *T : Terms) - if (containsParameters(T)) + if (SCEVExprContains(T, isa<SCEVUnknown, const SCEV *>)) return true; return false; } @@ -9535,6 +9522,7 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg) : F(Arg.F), HasGuards(Arg.HasGuards), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT), LI(Arg.LI), CouldNotCompute(std::move(Arg.CouldNotCompute)), ValueExprMap(std::move(Arg.ValueExprMap)), + PendingLoopPredicates(std::move(Arg.PendingLoopPredicates)), WalkingBEDominatingConds(false), ProvingSplitPredicate(false), BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)), PredicatedBackedgeTakenCounts( @@ -9543,6 +9531,7 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg) std::move(Arg.ConstantEvolutionLoopExitValue)), ValuesAtScopes(std::move(Arg.ValuesAtScopes)), LoopDispositions(std::move(Arg.LoopDispositions)), + LoopPropertiesCache(std::move(Arg.LoopPropertiesCache)), BlockDispositions(std::move(Arg.BlockDispositions)), UnsignedRanges(std::move(Arg.UnsignedRanges)), SignedRanges(std::move(Arg.SignedRanges)), @@ -9611,6 +9600,8 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) { OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L); + if (SE->isBackedgeTakenCountMaxOrZero(L)) + OS << ", actual taken count either this or zero."; } else { OS << "Unpredictable max backedge-taken count. "; } @@ -9871,8 +9862,10 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S); if (!DT.dominates(AR->getLoop()->getHeader(), BB)) return DoesNotDominateBlock; + + // Fall through into SCEVNAryExpr handling. + LLVM_FALLTHROUGH; } - // FALL THROUGH into SCEVNAryExpr handling. case scAddExpr: case scMulExpr: case scUMaxExpr: @@ -9925,24 +9918,7 @@ bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) { } bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const { - // Search for a SCEV expression node within an expression tree. - // Implements SCEVTraversal::Visitor. - struct SCEVSearch { - const SCEV *Node; - bool IsFound; - - SCEVSearch(const SCEV *N): Node(N), IsFound(false) {} - - bool follow(const SCEV *S) { - IsFound |= (S == Node); - return !IsFound; - } - bool isDone() const { return IsFound; } - }; - - SCEVSearch Search(Op); - visitAll(S, Search); - return Search.IsFound; + return SCEVExprContains(S, [&](const SCEV *Expr) { return Expr == Op; }); } void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { @@ -10050,10 +10026,22 @@ void ScalarEvolution::verify() const { // TODO: Verify more things. } -char ScalarEvolutionAnalysis::PassID; +bool ScalarEvolution::invalidate( + Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv) { + // Invalidate the ScalarEvolution object whenever it isn't preserved or one + // of its dependencies is invalidated. + auto PAC = PA.getChecker<ScalarEvolutionAnalysis>(); + return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) || + Inv.invalidate<AssumptionAnalysis>(F, PA) || + Inv.invalidate<DominatorTreeAnalysis>(F, PA) || + Inv.invalidate<LoopAnalysis>(F, PA); +} + +AnalysisKey ScalarEvolutionAnalysis::Key; ScalarEvolution ScalarEvolutionAnalysis::run(Function &F, - AnalysisManager<Function> &AM) { + FunctionAnalysisManager &AM) { return ScalarEvolution(F, AM.getResult<TargetLibraryAnalysis>(F), AM.getResult<AssumptionAnalysis>(F), AM.getResult<DominatorTreeAnalysis>(F), @@ -10061,7 +10049,7 @@ ScalarEvolution ScalarEvolutionAnalysis::run(Function &F, } PreservedAnalyses -ScalarEvolutionPrinterPass::run(Function &F, AnalysisManager<Function> &AM) { +ScalarEvolutionPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { AM.getResult<ScalarEvolutionAnalysis>(F).print(OS); return PreservedAnalyses::all(); } @@ -10148,25 +10136,34 @@ namespace { class SCEVPredicateRewriter : public SCEVRewriteVisitor<SCEVPredicateRewriter> { public: - // Rewrites \p S in the context of a loop L and the predicate A. - // If Assume is true, rewrite is free to add further predicates to A - // such that the result will be an AddRecExpr. + /// Rewrites \p S in the context of a loop L and the SCEV predication + /// infrastructure. + /// + /// If \p Pred is non-null, the SCEV expression is rewritten to respect the + /// equivalences present in \p Pred. + /// + /// If \p NewPreds is non-null, rewrite is free to add further predicates to + /// \p NewPreds such that the result will be an AddRecExpr. static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE, - SCEVUnionPredicate &A, bool Assume) { - SCEVPredicateRewriter Rewriter(L, SE, A, Assume); + SmallPtrSetImpl<const SCEVPredicate *> *NewPreds, + SCEVUnionPredicate *Pred) { + SCEVPredicateRewriter Rewriter(L, SE, NewPreds, Pred); return Rewriter.visit(S); } SCEVPredicateRewriter(const Loop *L, ScalarEvolution &SE, - SCEVUnionPredicate &P, bool Assume) - : SCEVRewriteVisitor(SE), P(P), L(L), Assume(Assume) {} + SmallPtrSetImpl<const SCEVPredicate *> *NewPreds, + SCEVUnionPredicate *Pred) + : SCEVRewriteVisitor(SE), NewPreds(NewPreds), Pred(Pred), L(L) {} const SCEV *visitUnknown(const SCEVUnknown *Expr) { - auto ExprPreds = P.getPredicatesForExpr(Expr); - for (auto *Pred : ExprPreds) - if (const auto *IPred = dyn_cast<SCEVEqualPredicate>(Pred)) - if (IPred->getLHS() == Expr) - return IPred->getRHS(); + if (Pred) { + auto ExprPreds = Pred->getPredicatesForExpr(Expr); + for (auto *Pred : ExprPreds) + if (const auto *IPred = dyn_cast<SCEVEqualPredicate>(Pred)) + if (IPred->getLHS() == Expr) + return IPred->getRHS(); + } return Expr; } @@ -10207,32 +10204,31 @@ private: bool addOverflowAssumption(const SCEVAddRecExpr *AR, SCEVWrapPredicate::IncrementWrapFlags AddedFlags) { auto *A = SE.getWrapPredicate(AR, AddedFlags); - if (!Assume) { + if (!NewPreds) { // Check if we've already made this assumption. - if (P.implies(A)) - return true; - return false; + return Pred && Pred->implies(A); } - P.add(A); + NewPreds->insert(A); return true; } - SCEVUnionPredicate &P; + SmallPtrSetImpl<const SCEVPredicate *> *NewPreds; + SCEVUnionPredicate *Pred; const Loop *L; - bool Assume; }; } // end anonymous namespace const SCEV *ScalarEvolution::rewriteUsingPredicate(const SCEV *S, const Loop *L, SCEVUnionPredicate &Preds) { - return SCEVPredicateRewriter::rewrite(S, L, *this, Preds, false); + return SCEVPredicateRewriter::rewrite(S, L, *this, nullptr, &Preds); } -const SCEVAddRecExpr * -ScalarEvolution::convertSCEVToAddRecWithPredicates(const SCEV *S, const Loop *L, - SCEVUnionPredicate &Preds) { - SCEVUnionPredicate TransformPreds; - S = SCEVPredicateRewriter::rewrite(S, L, *this, TransformPreds, true); +const SCEVAddRecExpr *ScalarEvolution::convertSCEVToAddRecWithPredicates( + const SCEV *S, const Loop *L, + SmallPtrSetImpl<const SCEVPredicate *> &Preds) { + + SmallPtrSet<const SCEVPredicate *, 4> TransformPreds; + S = SCEVPredicateRewriter::rewrite(S, L, *this, &TransformPreds, nullptr); auto *AddRec = dyn_cast<SCEVAddRecExpr>(S); if (!AddRec) @@ -10240,7 +10236,9 @@ ScalarEvolution::convertSCEVToAddRecWithPredicates(const SCEV *S, const Loop *L, // Since the transformation was successful, we can now transfer the SCEV // predicates. - Preds.add(&TransformPreds); + for (auto *P : TransformPreds) + Preds.insert(P); + return AddRec; } @@ -10393,7 +10391,7 @@ const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) { return Entry.second; // We found an entry but it's stale. Rewrite the stale entry - // acording to the current predicate. + // according to the current predicate. if (Entry.second) Expr = Entry.second; @@ -10467,11 +10465,15 @@ bool PredicatedScalarEvolution::hasNoOverflow( const SCEVAddRecExpr *PredicatedScalarEvolution::getAsAddRec(Value *V) { const SCEV *Expr = this->getSCEV(V); - auto *New = SE.convertSCEVToAddRecWithPredicates(Expr, &L, Preds); + SmallPtrSet<const SCEVPredicate *, 4> NewPreds; + auto *New = SE.convertSCEVToAddRecWithPredicates(Expr, &L, NewPreds); if (!New) return nullptr; + for (auto *P : NewPreds) + Preds.add(P); + updateGeneration(); RewriteMap[SE.getSCEV(V)] = {Generation, New}; return New; diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp index 61fb411..7bea994 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -110,9 +110,9 @@ Value *SCEVAAResult::GetBaseValue(const SCEV *S) { return nullptr; } -char SCEVAA::PassID; +AnalysisKey SCEVAA::Key; -SCEVAAResult SCEVAA::run(Function &F, AnalysisManager<Function> &AM) { +SCEVAAResult SCEVAA::run(Function &F, FunctionAnalysisManager &AM) { return SCEVAAResult(AM.getResult<ScalarEvolutionAnalysis>(F)); } diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp index 6036dcc..d15a7db 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -549,9 +549,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { if (!L->isLoopInvariant(V)) break; - bool AnyIndexNotLoopInvariant = - std::any_of(GepIndices.begin(), GepIndices.end(), - [L](Value *Op) { return !L->isLoopInvariant(Op); }); + bool AnyIndexNotLoopInvariant = any_of( + GepIndices, [L](Value *Op) { return !L->isLoopInvariant(Op); }); if (AnyIndexNotLoopInvariant) break; @@ -1183,11 +1182,14 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, PostIncLoopSet SavedPostIncLoops = PostIncLoops; PostIncLoops.clear(); - // Expand code for the start value. - Value *StartV = - expandCodeFor(Normalized->getStart(), ExpandTy, &L->getHeader()->front()); + // Expand code for the start value into the loop preheader. + assert(L->getLoopPreheader() && + "Can't expand add recurrences without a loop preheader!"); + Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy, + L->getLoopPreheader()->getTerminator()); - // StartV must be hoisted into L's preheader to dominate the new phi. + // StartV must have been be inserted into L's preheader to dominate the new + // phi. assert(!isa<Instruction>(StartV) || SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(), L->getHeader())); @@ -1962,7 +1964,7 @@ bool SCEVExpander::isHighCostExpansionHelper( const SCEV *S, Loop *L, const Instruction *At, SmallPtrSetImpl<const SCEV *> &Processed) { - // If we can find an existing value for this scev avaliable at the point "At" + // If we can find an existing value for this scev available at the point "At" // then consider the expression cheap. if (At && getRelatedExistingExpansion(S, At, L)) return false; diff --git a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp index 82e65a1..833c6e0 100644 --- a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp +++ b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp @@ -127,9 +127,8 @@ ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS1, return AAResultBase::getModRefInfo(CS1, CS2); } -void ScopedNoAliasAAResult::collectMDInDomain( - const MDNode *List, const MDNode *Domain, - SmallPtrSetImpl<const MDNode *> &Nodes) const { +static void collectMDInDomain(const MDNode *List, const MDNode *Domain, + SmallPtrSetImpl<const MDNode *> &Nodes) { for (const MDOperand &MDOp : List->operands()) if (const MDNode *MD = dyn_cast<MDNode>(MDOp)) if (AliasScopeNode(MD).getDomain() == Domain) @@ -151,12 +150,14 @@ bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes, // We alias unless, for some domain, the set of noalias scopes in that domain // is a superset of the set of alias scopes in that domain. for (const MDNode *Domain : Domains) { - SmallPtrSet<const MDNode *, 16> NANodes, ScopeNodes; - collectMDInDomain(NoAlias, Domain, NANodes); + SmallPtrSet<const MDNode *, 16> ScopeNodes; collectMDInDomain(Scopes, Domain, ScopeNodes); - if (!ScopeNodes.size()) + if (ScopeNodes.empty()) continue; + SmallPtrSet<const MDNode *, 16> NANodes; + collectMDInDomain(NoAlias, Domain, NANodes); + // To not alias, all of the nodes in ScopeNodes must be in NANodes. bool FoundAll = true; for (const MDNode *SMD : ScopeNodes) @@ -172,10 +173,10 @@ bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes, return true; } -char ScopedNoAliasAA::PassID; +AnalysisKey ScopedNoAliasAA::Key; ScopedNoAliasAAResult ScopedNoAliasAA::run(Function &F, - AnalysisManager<Function> &AM) { + FunctionAnalysisManager &AM) { return ScopedNoAliasAAResult(); } diff --git a/contrib/llvm/lib/Analysis/StratifiedSets.h b/contrib/llvm/lib/Analysis/StratifiedSets.h index fd3a241..772df17 100644 --- a/contrib/llvm/lib/Analysis/StratifiedSets.h +++ b/contrib/llvm/lib/Analysis/StratifiedSets.h @@ -85,17 +85,8 @@ struct StratifiedLink { template <typename T> class StratifiedSets { public: StratifiedSets() = default; - - // TODO: Figure out how to make MSVC not call the copy ctor here, and delete - // it. - - // Can't default these due to compile errors in MSVC2013 - StratifiedSets(StratifiedSets &&Other) { *this = std::move(Other); } - StratifiedSets &operator=(StratifiedSets &&Other) { - Values = std::move(Other.Values); - Links = std::move(Other.Links); - return *this; - } + StratifiedSets(StratifiedSets &&) = default; + StratifiedSets &operator=(StratifiedSets &&) = default; StratifiedSets(DenseMap<T, StratifiedInfo> Map, std::vector<StratifiedLink> Links) diff --git a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp index 93d537a..112118a 100644 --- a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -23,9 +23,10 @@ static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary( "No vector functions library"), clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate", "Accelerate framework"), - clEnumValEnd)); + clEnumValN(TargetLibraryInfoImpl::SVML, "SVML", + "Intel SVML library"))); -const char *const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = { +StringRef const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = { #define TLI_DEFINE_STRING #include "llvm/Analysis/TargetLibraryInfo.def" }; @@ -52,14 +53,33 @@ static bool hasSinCosPiStret(const Triple &T) { /// specified target triple. This should be carefully written so that a missing /// target triple gets a sane set of defaults. static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, - ArrayRef<const char *> StandardNames) { + ArrayRef<StringRef> StandardNames) { // Verify that the StandardNames array is in alphabetical order. assert(std::is_sorted(StandardNames.begin(), StandardNames.end(), - [](const char *LHS, const char *RHS) { - return strcmp(LHS, RHS) < 0; + [](StringRef LHS, StringRef RHS) { + return LHS < RHS; }) && "TargetLibraryInfoImpl function names must be sorted"); + bool ShouldExtI32Param = false, ShouldExtI32Return = false, + ShouldSignExtI32Param = false; + // PowerPC64, Sparc64, SystemZ need signext/zeroext on i32 parameters and + // returns corresponding to C-level ints and unsigned ints. + if (T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le || + T.getArch() == Triple::sparcv9 || T.getArch() == Triple::systemz) { + ShouldExtI32Param = true; + ShouldExtI32Return = true; + } + // Mips, on the other hand, needs signext on i32 parameters corresponding + // to both signed and unsigned ints. + if (T.getArch() == Triple::mips || T.getArch() == Triple::mipsel || + T.getArch() == Triple::mips64 || T.getArch() == Triple::mips64el) { + ShouldSignExtI32Param = true; + } + TLI.setShouldExtI32Param(ShouldExtI32Param); + TLI.setShouldExtI32Return(ShouldExtI32Return); + TLI.setShouldSignExtI32Param(ShouldSignExtI32Param); + if (T.getArch() == Triple::r600 || T.getArch() == Triple::amdgcn) { TLI.setUnavailable(LibFunc::ldexp); @@ -322,6 +342,7 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, // on Linux. // // Fall through to disable all of them. + LLVM_FALLTHROUGH; default: TLI.setUnavailable(LibFunc::exp10); TLI.setUnavailable(LibFunc::exp10f); @@ -429,14 +450,19 @@ TargetLibraryInfoImpl::TargetLibraryInfoImpl(const Triple &T) { } TargetLibraryInfoImpl::TargetLibraryInfoImpl(const TargetLibraryInfoImpl &TLI) - : CustomNames(TLI.CustomNames) { + : CustomNames(TLI.CustomNames), ShouldExtI32Param(TLI.ShouldExtI32Param), + ShouldExtI32Return(TLI.ShouldExtI32Return), + ShouldSignExtI32Param(TLI.ShouldSignExtI32Param) { memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray)); VectorDescs = TLI.VectorDescs; ScalarDescs = TLI.ScalarDescs; } TargetLibraryInfoImpl::TargetLibraryInfoImpl(TargetLibraryInfoImpl &&TLI) - : CustomNames(std::move(TLI.CustomNames)) { + : CustomNames(std::move(TLI.CustomNames)), + ShouldExtI32Param(TLI.ShouldExtI32Param), + ShouldExtI32Return(TLI.ShouldExtI32Return), + ShouldSignExtI32Param(TLI.ShouldSignExtI32Param) { std::move(std::begin(TLI.AvailableArray), std::end(TLI.AvailableArray), AvailableArray); VectorDescs = TLI.VectorDescs; @@ -445,12 +471,18 @@ TargetLibraryInfoImpl::TargetLibraryInfoImpl(TargetLibraryInfoImpl &&TLI) TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(const TargetLibraryInfoImpl &TLI) { CustomNames = TLI.CustomNames; + ShouldExtI32Param = TLI.ShouldExtI32Param; + ShouldExtI32Return = TLI.ShouldExtI32Return; + ShouldSignExtI32Param = TLI.ShouldSignExtI32Param; memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray)); return *this; } TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(TargetLibraryInfoImpl &&TLI) { CustomNames = std::move(TLI.CustomNames); + ShouldExtI32Param = TLI.ShouldExtI32Param; + ShouldExtI32Return = TLI.ShouldExtI32Return; + ShouldSignExtI32Param = TLI.ShouldSignExtI32Param; std::move(std::begin(TLI.AvailableArray), std::end(TLI.AvailableArray), AvailableArray); return *this; @@ -469,16 +501,16 @@ static StringRef sanitizeFunctionName(StringRef funcName) { bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName, LibFunc::Func &F) const { - const char *const *Start = &StandardNames[0]; - const char *const *End = &StandardNames[LibFunc::NumLibFuncs]; + StringRef const *Start = &StandardNames[0]; + StringRef const *End = &StandardNames[LibFunc::NumLibFuncs]; funcName = sanitizeFunctionName(funcName); if (funcName.empty()) return false; - const char *const *I = std::lower_bound( - Start, End, funcName, [](const char *LHS, StringRef RHS) { - return std::strncmp(LHS, RHS.data(), RHS.size()) < 0; + StringRef const *I = std::lower_bound( + Start, End, funcName, [](StringRef LHS, StringRef RHS) { + return LHS < RHS; }); if (I != End && *I == funcName) { F = (LibFunc::Func)(I - Start); @@ -535,7 +567,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, --NumParams; if (!IsSizeTTy(FTy.getParamType(NumParams))) return false; - // fallthrough + LLVM_FALLTHROUGH; case LibFunc::strcpy: case LibFunc::stpcpy: return (NumParams == 2 && FTy.getReturnType() == FTy.getParamType(0) && @@ -547,7 +579,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, --NumParams; if (!IsSizeTTy(FTy.getParamType(NumParams))) return false; - // fallthrough + LLVM_FALLTHROUGH; case LibFunc::strncpy: case LibFunc::stpncpy: return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) && @@ -640,8 +672,9 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, --NumParams; if (!IsSizeTTy(FTy.getParamType(NumParams))) return false; - // fallthrough + LLVM_FALLTHROUGH; case LibFunc::memcpy: + case LibFunc::mempcpy: case LibFunc::memmove: return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) && FTy.getParamType(0)->isPointerTy() && @@ -652,7 +685,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, --NumParams; if (!IsSizeTTy(FTy.getParamType(NumParams))) return false; - // fallthrough + LLVM_FALLTHROUGH; case LibFunc::memset: return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) && FTy.getParamType(0)->isPointerTy() && @@ -843,10 +876,10 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc::stat64: case LibFunc::lstat64: case LibFunc::statvfs64: - return (NumParams >= 1 && FTy.getParamType(0)->isPointerTy() && + return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy()); case LibFunc::dunder_isoc99_sscanf: - return (NumParams >= 1 && FTy.getParamType(0)->isPointerTy() && + return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy()); case LibFunc::fopen64: return (NumParams == 2 && FTy.getReturnType()->isPointerTy() && @@ -953,15 +986,18 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc::ffs: case LibFunc::ffsl: case LibFunc::ffsll: + case LibFunc::fls: + case LibFunc::flsl: + case LibFunc::flsll: + return (NumParams == 1 && FTy.getReturnType()->isIntegerTy(32) && + FTy.getParamType(0)->isIntegerTy()); + case LibFunc::isdigit: case LibFunc::isascii: case LibFunc::toascii: return (NumParams == 1 && FTy.getReturnType()->isIntegerTy(32) && - FTy.getParamType(0)->isIntegerTy()); + FTy.getReturnType() == FTy.getParamType(0)); - case LibFunc::fls: - case LibFunc::flsl: - case LibFunc::flsll: case LibFunc::abs: case LibFunc::labs: case LibFunc::llabs: @@ -1004,21 +1040,19 @@ void TargetLibraryInfoImpl::disableAllFunctions() { } static bool compareByScalarFnName(const VecDesc &LHS, const VecDesc &RHS) { - return std::strncmp(LHS.ScalarFnName, RHS.ScalarFnName, - std::strlen(RHS.ScalarFnName)) < 0; + return LHS.ScalarFnName < RHS.ScalarFnName; } static bool compareByVectorFnName(const VecDesc &LHS, const VecDesc &RHS) { - return std::strncmp(LHS.VectorFnName, RHS.VectorFnName, - std::strlen(RHS.VectorFnName)) < 0; + return LHS.VectorFnName < RHS.VectorFnName; } static bool compareWithScalarFnName(const VecDesc &LHS, StringRef S) { - return std::strncmp(LHS.ScalarFnName, S.data(), S.size()) < 0; + return LHS.ScalarFnName < S; } static bool compareWithVectorFnName(const VecDesc &LHS, StringRef S) { - return std::strncmp(LHS.VectorFnName, S.data(), S.size()) < 0; + return LHS.VectorFnName < S; } void TargetLibraryInfoImpl::addVectorizableFunctions(ArrayRef<VecDesc> Fns) { @@ -1074,6 +1108,75 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( addVectorizableFunctions(VecFuncs); break; } + case SVML: { + const VecDesc VecFuncs[] = { + {"sin", "__svml_sin2", 2}, + {"sin", "__svml_sin4", 4}, + {"sin", "__svml_sin8", 8}, + + {"sinf", "__svml_sinf4", 4}, + {"sinf", "__svml_sinf8", 8}, + {"sinf", "__svml_sinf16", 16}, + + {"cos", "__svml_cos2", 2}, + {"cos", "__svml_cos4", 4}, + {"cos", "__svml_cos8", 8}, + + {"cosf", "__svml_cosf4", 4}, + {"cosf", "__svml_cosf8", 8}, + {"cosf", "__svml_cosf16", 16}, + + {"pow", "__svml_pow2", 2}, + {"pow", "__svml_pow4", 4}, + {"pow", "__svml_pow8", 8}, + + {"powf", "__svml_powf4", 4}, + {"powf", "__svml_powf8", 8}, + {"powf", "__svml_powf16", 16}, + + {"llvm.pow.f64", "__svml_pow2", 2}, + {"llvm.pow.f64", "__svml_pow4", 4}, + {"llvm.pow.f64", "__svml_pow8", 8}, + + {"llvm.pow.f32", "__svml_powf4", 4}, + {"llvm.pow.f32", "__svml_powf8", 8}, + {"llvm.pow.f32", "__svml_powf16", 16}, + + {"exp", "__svml_exp2", 2}, + {"exp", "__svml_exp4", 4}, + {"exp", "__svml_exp8", 8}, + + {"expf", "__svml_expf4", 4}, + {"expf", "__svml_expf8", 8}, + {"expf", "__svml_expf16", 16}, + + {"llvm.exp.f64", "__svml_exp2", 2}, + {"llvm.exp.f64", "__svml_exp4", 4}, + {"llvm.exp.f64", "__svml_exp8", 8}, + + {"llvm.exp.f32", "__svml_expf4", 4}, + {"llvm.exp.f32", "__svml_expf8", 8}, + {"llvm.exp.f32", "__svml_expf16", 16}, + + {"log", "__svml_log2", 2}, + {"log", "__svml_log4", 4}, + {"log", "__svml_log8", 8}, + + {"logf", "__svml_logf4", 4}, + {"logf", "__svml_logf8", 8}, + {"logf", "__svml_logf16", 16}, + + {"llvm.log.f64", "__svml_log2", 2}, + {"llvm.log.f64", "__svml_log4", 4}, + {"llvm.log.f64", "__svml_log8", 8}, + + {"llvm.log.f32", "__svml_logf4", 4}, + {"llvm.log.f32", "__svml_logf8", 8}, + {"llvm.log.f32", "__svml_logf16", 16}, + }; + addVectorizableFunctions(VecFuncs); + break; + } case NoLibrary: break; } @@ -1162,7 +1265,7 @@ TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass( initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry()); } -char TargetLibraryAnalysis::PassID; +AnalysisKey TargetLibraryAnalysis::Key; // Register the basic pass. INITIALIZE_PASS(TargetLibraryInfoWrapperPass, "targetlibinfo", diff --git a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp index 52013f7..5c0d1aa 100644 --- a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -150,6 +150,11 @@ int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, return Cost; } +bool TargetTransformInfo::isFoldableMemAccessOffset(Instruction *I, + int64_t Offset) const { + return TTIImpl->isFoldableMemAccessOffset(I, Offset); +} + bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const { return TTIImpl->isTruncateFree(Ty1, Ty2); } @@ -173,6 +178,9 @@ unsigned TargetTransformInfo::getJumpBufSize() const { bool TargetTransformInfo::shouldBuildLookupTables() const { return TTIImpl->shouldBuildLookupTables(); } +bool TargetTransformInfo::shouldBuildLookupTablesForConstant(Constant *C) const { + return TTIImpl->shouldBuildLookupTablesForConstant(C); +} bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const { return TTIImpl->enableAggressiveInterleaving(LoopHasReductions); @@ -186,11 +194,12 @@ bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const { return TTIImpl->isFPVectorizationPotentiallyUnsafe(); } -bool TargetTransformInfo::allowsMisalignedMemoryAccesses(unsigned BitWidth, +bool TargetTransformInfo::allowsMisalignedMemoryAccesses(LLVMContext &Context, + unsigned BitWidth, unsigned AddressSpace, unsigned Alignment, bool *Fast) const { - return TTIImpl->allowsMisalignedMemoryAccesses(BitWidth, AddressSpace, + return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace, Alignment, Fast); } @@ -245,10 +254,6 @@ unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const { return TTIImpl->getRegisterBitWidth(Vector); } -unsigned TargetTransformInfo::getLoadStoreVecRegBitWidth(unsigned AS) const { - return TTIImpl->getLoadStoreVecRegBitWidth(AS); -} - unsigned TargetTransformInfo::getCacheLineSize() const { return TTIImpl->getCacheLineSize(); } @@ -272,9 +277,10 @@ unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const { int TargetTransformInfo::getArithmeticInstrCost( unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, - OperandValueProperties Opd2PropInfo) const { + OperandValueProperties Opd2PropInfo, + ArrayRef<const Value *> Args) const { int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, - Opd1PropInfo, Opd2PropInfo); + Opd1PropInfo, Opd2PropInfo, Args); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -384,8 +390,9 @@ unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const { } int TargetTransformInfo::getAddressComputationCost(Type *Tp, - bool IsComplex) const { - int Cost = TTIImpl->getAddressComputationCost(Tp, IsComplex); + ScalarEvolution *SE, + const SCEV *Ptr) const { + int Cost = TTIImpl->getAddressComputationCost(Tp, SE, Ptr); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -417,6 +424,44 @@ bool TargetTransformInfo::areInlineCompatible(const Function *Caller, return TTIImpl->areInlineCompatible(Caller, Callee); } +unsigned TargetTransformInfo::getLoadStoreVecRegBitWidth(unsigned AS) const { + return TTIImpl->getLoadStoreVecRegBitWidth(AS); +} + +bool TargetTransformInfo::isLegalToVectorizeLoad(LoadInst *LI) const { + return TTIImpl->isLegalToVectorizeLoad(LI); +} + +bool TargetTransformInfo::isLegalToVectorizeStore(StoreInst *SI) const { + return TTIImpl->isLegalToVectorizeStore(SI); +} + +bool TargetTransformInfo::isLegalToVectorizeLoadChain( + unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const { + return TTIImpl->isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, + AddrSpace); +} + +bool TargetTransformInfo::isLegalToVectorizeStoreChain( + unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const { + return TTIImpl->isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment, + AddrSpace); +} + +unsigned TargetTransformInfo::getLoadVectorFactor(unsigned VF, + unsigned LoadSize, + unsigned ChainSizeInBytes, + VectorType *VecTy) const { + return TTIImpl->getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy); +} + +unsigned TargetTransformInfo::getStoreVectorFactor(unsigned VF, + unsigned StoreSize, + unsigned ChainSizeInBytes, + VectorType *VecTy) const { + return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy); +} + TargetTransformInfo::Concept::~Concept() {} TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {} @@ -426,11 +471,11 @@ TargetIRAnalysis::TargetIRAnalysis( : TTICallback(std::move(TTICallback)) {} TargetIRAnalysis::Result TargetIRAnalysis::run(const Function &F, - AnalysisManager<Function> &) { + FunctionAnalysisManager &) { return TTICallback(F); } -char TargetIRAnalysis::PassID; +AnalysisKey TargetIRAnalysis::Key; TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) { return Result(F.getParent()->getDataLayout()); @@ -457,7 +502,7 @@ TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass( } TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(const Function &F) { - AnalysisManager<Function> DummyFAM; + FunctionAnalysisManager DummyFAM; TTI = TIRA.run(F, DummyFAM); return *TTI; } diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp index 20d162a..e920c4c 100644 --- a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -135,34 +135,35 @@ using namespace llvm; static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true)); namespace { -/// TBAANode - This is a simple wrapper around an MDNode which provides a -/// higher-level interface by hiding the details of how alias analysis -/// information is encoded in its operands. -class TBAANode { - const MDNode *Node; +/// This is a simple wrapper around an MDNode which provides a higher-level +/// interface by hiding the details of how alias analysis information is encoded +/// in its operands. +template<typename MDNodeTy> +class TBAANodeImpl { + MDNodeTy *Node; public: - TBAANode() : Node(nullptr) {} - explicit TBAANode(const MDNode *N) : Node(N) {} + TBAANodeImpl() : Node(nullptr) {} + explicit TBAANodeImpl(MDNodeTy *N) : Node(N) {} /// getNode - Get the MDNode for this TBAANode. - const MDNode *getNode() const { return Node; } + MDNodeTy *getNode() const { return Node; } /// getParent - Get this TBAANode's Alias tree parent. - TBAANode getParent() const { + TBAANodeImpl<MDNodeTy> getParent() const { if (Node->getNumOperands() < 2) - return TBAANode(); - MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1)); + return TBAANodeImpl<MDNodeTy>(); + MDNodeTy *P = dyn_cast_or_null<MDNodeTy>(Node->getOperand(1)); if (!P) - return TBAANode(); + return TBAANodeImpl<MDNodeTy>(); // Ok, this node has a valid parent. Return it. - return TBAANode(P); + return TBAANodeImpl<MDNodeTy>(P); } - /// TypeIsImmutable - Test if this TBAANode represents a type for objects - /// which are not modified (by any means) in the context where this + /// Test if this TBAANode represents a type for objects which are + /// not modified (by any means) in the context where this /// AliasAnalysis is relevant. - bool TypeIsImmutable() const { + bool isTypeImmutable() const { if (Node->getNumOperands() < 3) return false; ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(2)); @@ -172,32 +173,40 @@ public: } }; +/// \name Specializations of \c TBAANodeImpl for const and non const qualified +/// \c MDNode. +/// @{ +typedef TBAANodeImpl<const MDNode> TBAANode; +typedef TBAANodeImpl<MDNode> MutableTBAANode; +/// @} + /// This is a simple wrapper around an MDNode which provides a /// higher-level interface by hiding the details of how alias analysis /// information is encoded in its operands. -class TBAAStructTagNode { +template<typename MDNodeTy> +class TBAAStructTagNodeImpl { /// This node should be created with createTBAAStructTagNode. - const MDNode *Node; + MDNodeTy *Node; public: - explicit TBAAStructTagNode(const MDNode *N) : Node(N) {} + explicit TBAAStructTagNodeImpl(MDNodeTy *N) : Node(N) {} /// Get the MDNode for this TBAAStructTagNode. - const MDNode *getNode() const { return Node; } + MDNodeTy *getNode() const { return Node; } - const MDNode *getBaseType() const { + MDNodeTy *getBaseType() const { return dyn_cast_or_null<MDNode>(Node->getOperand(0)); } - const MDNode *getAccessType() const { + MDNodeTy *getAccessType() const { return dyn_cast_or_null<MDNode>(Node->getOperand(1)); } uint64_t getOffset() const { return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue(); } - /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for - /// objects which are not modified (by any means) in the context where this + /// Test if this TBAAStructTagNode represents a type for objects + /// which are not modified (by any means) in the context where this /// AliasAnalysis is relevant. - bool TypeIsImmutable() const { + bool isTypeImmutable() const { if (Node->getNumOperands() < 4) return false; ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(3)); @@ -207,6 +216,13 @@ public: } }; +/// \name Specializations of \c TBAAStructTagNodeImpl for const and non const +/// qualified \c MDNods. +/// @{ +typedef TBAAStructTagNodeImpl<const MDNode> TBAAStructTagNode; +typedef TBAAStructTagNodeImpl<MDNode> MutableTBAAStructTagNode; +/// @} + /// This is a simple wrapper around an MDNode which provides a /// higher-level interface by hiding the details of how alias analysis /// information is encoded in its operands. @@ -311,8 +327,8 @@ bool TypeBasedAAResult::pointsToConstantMemory(const MemoryLocation &Loc, // If this is an "immutable" type, we can assume the pointer is pointing // to constant memory. - if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) || - (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable())) + if ((!isStructPathTBAA(M) && TBAANode(M).isTypeImmutable()) || + (isStructPathTBAA(M) && TBAAStructTagNode(M).isTypeImmutable())) return true; return AAResultBase::pointsToConstantMemory(Loc, OrLocal); @@ -328,8 +344,8 @@ TypeBasedAAResult::getModRefBehavior(ImmutableCallSite CS) { // If this is an "immutable" type, we can assume the call doesn't write // to memory. if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) - if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) || - (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable())) + if ((!isStructPathTBAA(M) && TBAANode(M).isTypeImmutable()) || + (isStructPathTBAA(M) && TBAAStructTagNode(M).isTypeImmutable())) Min = FMRB_OnlyReadsMemory; return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min); @@ -401,34 +417,31 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { return A; // For struct-path aware TBAA, we use the access type of the tag. - bool StructPath = isStructPathTBAA(A) && isStructPathTBAA(B); - if (StructPath) { - A = cast_or_null<MDNode>(A->getOperand(1)); - if (!A) - return nullptr; - B = cast_or_null<MDNode>(B->getOperand(1)); - if (!B) - return nullptr; - } + assert(isStructPathTBAA(A) && isStructPathTBAA(B) && + "Auto upgrade should have taken care of this!"); + A = cast_or_null<MDNode>(MutableTBAAStructTagNode(A).getAccessType()); + if (!A) + return nullptr; + B = cast_or_null<MDNode>(MutableTBAAStructTagNode(B).getAccessType()); + if (!B) + return nullptr; SmallSetVector<MDNode *, 4> PathA; - MDNode *T = A; - while (T) { - if (PathA.count(T)) + MutableTBAANode TA(A); + while (TA.getNode()) { + if (PathA.count(TA.getNode())) report_fatal_error("Cycle found in TBAA metadata."); - PathA.insert(T); - T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) - : nullptr; + PathA.insert(TA.getNode()); + TA = TA.getParent(); } SmallSetVector<MDNode *, 4> PathB; - T = B; - while (T) { - if (PathB.count(T)) + MutableTBAANode TB(B); + while (TB.getNode()) { + if (PathB.count(TB.getNode())) report_fatal_error("Cycle found in TBAA metadata."); - PathB.insert(T); - T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) - : nullptr; + PathB.insert(TB.getNode()); + TB = TB.getParent(); } int IA = PathA.size() - 1; @@ -443,11 +456,13 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { --IA; --IB; } - if (!StructPath) - return Ret; - if (!Ret) + // We either did not find a match, or the only common base "type" is + // the root node. In either case, we don't have any useful TBAA + // metadata to attach. + if (!Ret || Ret->getNumOperands() < 2) return nullptr; + // We need to convert from a type node to a tag node. Type *Int64 = IntegerType::get(A->getContext(), 64); Metadata *Ops[3] = {Ret, Ret, @@ -478,52 +493,8 @@ void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const { /// Aliases - Test whether the type represented by A may alias the /// type represented by B. bool TypeBasedAAResult::Aliases(const MDNode *A, const MDNode *B) const { - // Make sure that both MDNodes are struct-path aware. - if (isStructPathTBAA(A) && isStructPathTBAA(B)) - return PathAliases(A, B); - - // Keep track of the root node for A and B. - TBAANode RootA, RootB; - - // Climb the tree from A to see if we reach B. - for (TBAANode T(A);;) { - if (T.getNode() == B) - // B is an ancestor of A. - return true; - - RootA = T; - T = T.getParent(); - if (!T.getNode()) - break; - } - - // Climb the tree from B to see if we reach A. - for (TBAANode T(B);;) { - if (T.getNode() == A) - // A is an ancestor of B. - return true; - - RootB = T; - T = T.getParent(); - if (!T.getNode()) - break; - } - - // Neither node is an ancestor of the other. - - // If they have different roots, they're part of different potentially - // unrelated type systems, so we must be conservative. - if (RootA.getNode() != RootB.getNode()) - return true; - - // If they have the same root, then we've proved there's no alias. - return false; -} - -/// Test whether the struct-path tag represented by A may alias the -/// struct-path tag represented by B. -bool TypeBasedAAResult::PathAliases(const MDNode *A, const MDNode *B) const { - // Verify that both input nodes are struct-path aware. + // Verify that both input nodes are struct-path aware. Auto-upgrade should + // have taken care of this. assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware."); assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware."); @@ -583,9 +554,9 @@ bool TypeBasedAAResult::PathAliases(const MDNode *A, const MDNode *B) const { return false; } -char TypeBasedAA::PassID; +AnalysisKey TypeBasedAA::Key; -TypeBasedAAResult TypeBasedAA::run(Function &F, AnalysisManager<Function> &AM) { +TypeBasedAAResult TypeBasedAA::run(Function &F, FunctionAnalysisManager &AM) { return TypeBasedAAResult(); } diff --git a/contrib/llvm/lib/Analysis/TypeMetadataUtils.cpp b/contrib/llvm/lib/Analysis/TypeMetadataUtils.cpp index 31e2b42..f567541 100644 --- a/contrib/llvm/lib/Analysis/TypeMetadataUtils.cpp +++ b/contrib/llvm/lib/Analysis/TypeMetadataUtils.cpp @@ -69,8 +69,7 @@ void llvm::findDevirtualizableCallsForTypeTest( // Find llvm.assume intrinsics for this llvm.type.test call. for (const Use &CIU : CI->uses()) { - auto AssumeCI = dyn_cast<CallInst>(CIU.getUser()); - if (AssumeCI) { + if (auto *AssumeCI = dyn_cast<CallInst>(CIU.getUser())) { Function *F = AssumeCI->getCalledFunction(); if (F && F->getIntrinsicID() == Intrinsic::assume) Assumes.push_back(AssumeCI); diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp index f2b4078..be62858 100644 --- a/contrib/llvm/lib/Analysis/ValueTracking.cpp +++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp @@ -51,6 +51,12 @@ const unsigned MaxDepth = 6; static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses", cl::Hidden, cl::init(20)); +// This optimization is known to cause performance regressions is some cases, +// keep it under a temporary flag for now. +static cl::opt<bool> +DontImproveNonNegativePhiBits("dont-improve-non-negative-phi-bits", + cl::Hidden, cl::init(true)); + /// Returns the bitwidth of the given scalar or pointer type (if unknown returns /// 0). For vector types, returns the element type's bitwidth. static unsigned getBitWidth(Type *Ty, const DataLayout &DL) { @@ -80,7 +86,7 @@ struct Query { /// isKnownNonZero, which calls computeKnownBits and ComputeSignBit and /// isKnownToBeAPowerOfTwo (all of which can call computeKnownBits), and so /// on. - std::array<const Value*, MaxDepth> Excluded; + std::array<const Value *, MaxDepth> Excluded; unsigned NumExcluded; Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, @@ -119,10 +125,10 @@ static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) { return nullptr; } -static void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, +static void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, unsigned Depth, const Query &Q); -void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, +void llvm::computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { @@ -130,7 +136,8 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, Query(DL, AC, safeCxtI(V, CxtI), DT)); } -bool llvm::haveNoCommonBitsSet(Value *LHS, Value *RHS, const DataLayout &DL, +bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, + const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { assert(LHS->getType() == RHS->getType() && @@ -145,10 +152,10 @@ bool llvm::haveNoCommonBitsSet(Value *LHS, Value *RHS, const DataLayout &DL, return (LHSKnownZero | RHSKnownZero).isAllOnesValue(); } -static void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, +static void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne, unsigned Depth, const Query &Q); -void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, +void llvm::ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { @@ -156,10 +163,11 @@ void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, Query(DL, AC, safeCxtI(V, CxtI), DT)); } -static bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, +static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, const Query &Q); -bool llvm::isKnownToBeAPowerOfTwo(Value *V, const DataLayout &DL, bool OrZero, +bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, + bool OrZero, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { @@ -167,15 +175,16 @@ bool llvm::isKnownToBeAPowerOfTwo(Value *V, const DataLayout &DL, bool OrZero, Query(DL, AC, safeCxtI(V, CxtI), DT)); } -static bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q); +static bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q); -bool llvm::isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, +bool llvm::isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { return ::isKnownNonZero(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT)); } -bool llvm::isKnownNonNegative(Value *V, const DataLayout &DL, unsigned Depth, +bool llvm::isKnownNonNegative(const Value *V, const DataLayout &DL, + unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { bool NonNegative, Negative; @@ -183,7 +192,7 @@ bool llvm::isKnownNonNegative(Value *V, const DataLayout &DL, unsigned Depth, return NonNegative; } -bool llvm::isKnownPositive(Value *V, const DataLayout &DL, unsigned Depth, +bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { if (auto *CI = dyn_cast<ConstantInt>(V)) @@ -195,7 +204,7 @@ bool llvm::isKnownPositive(Value *V, const DataLayout &DL, unsigned Depth, isKnownNonZero(V, DL, Depth, AC, CxtI, DT); } -bool llvm::isKnownNegative(Value *V, const DataLayout &DL, unsigned Depth, +bool llvm::isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { bool NonNegative, Negative; @@ -203,41 +212,45 @@ bool llvm::isKnownNegative(Value *V, const DataLayout &DL, unsigned Depth, return Negative; } -static bool isKnownNonEqual(Value *V1, Value *V2, const Query &Q); +static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q); -bool llvm::isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL, - AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT) { +bool llvm::isKnownNonEqual(const Value *V1, const Value *V2, + const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT) { return ::isKnownNonEqual(V1, V2, Query(DL, AC, safeCxtI(V1, safeCxtI(V2, CxtI)), DT)); } -static bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth, +static bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth, const Query &Q); -bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL, +bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask, + const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { return ::MaskedValueIsZero(V, Mask, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT)); } -static unsigned ComputeNumSignBits(Value *V, unsigned Depth, const Query &Q); +static unsigned ComputeNumSignBits(const Value *V, unsigned Depth, + const Query &Q); -unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout &DL, +unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { return ::ComputeNumSignBits(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT)); } -static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, +static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1, + bool NSW, APInt &KnownZero, APInt &KnownOne, APInt &KnownZero2, APInt &KnownOne2, unsigned Depth, const Query &Q) { if (!Add) { - if (ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) { + if (const ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) { // We know that the top bits of C-X are clear if X contains less bits // than C (i.e. no wrap-around can happen). For example, 20-X is // positive if we can prove that X is >= 0 and < 16. @@ -311,7 +324,7 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, } } -static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW, +static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, APInt &KnownZero, APInt &KnownOne, APInt &KnownZero2, APInt &KnownOne2, unsigned Depth, const Query &Q) { @@ -398,7 +411,7 @@ void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges, } } -static bool isEphemeralValueOf(Instruction *I, const Value *E) { +static bool isEphemeralValueOf(const Instruction *I, const Value *E) { SmallVector<const Value *, 16> WorkSet(1, I); SmallPtrSet<const Value *, 32> Visited; SmallPtrSet<const Value *, 16> EphValues; @@ -406,7 +419,7 @@ static bool isEphemeralValueOf(Instruction *I, const Value *E) { // The instruction defining an assumption's condition itself is always // considered ephemeral to that assumption (even if it has other // non-ephemeral users). See r246696's test case for an example. - if (std::find(I->op_begin(), I->op_end(), E) != I->op_end()) + if (is_contained(I->operands(), E)) return true; while (!WorkSet.empty()) { @@ -415,8 +428,7 @@ static bool isEphemeralValueOf(Instruction *I, const Value *E) { continue; // If all uses of this value are ephemeral, then so is this value. - if (std::all_of(V->user_begin(), V->user_end(), - [&](const User *U) { return EphValues.count(U); })) { + if (all_of(V->users(), [&](const User *U) { return EphValues.count(U); })) { if (V == E) return true; @@ -456,9 +468,9 @@ static bool isAssumeLikeIntrinsic(const Instruction *I) { return false; } -static bool isValidAssumeForContext(Value *V, const Instruction *CxtI, - const DominatorTree *DT) { - Instruction *Inv = cast<Instruction>(V); +bool llvm::isValidAssumeForContext(const Instruction *Inv, + const Instruction *CxtI, + const DominatorTree *DT) { // There are two restrictions on the use of an assume: // 1. The assume must dominate the context (or the control flow must @@ -469,54 +481,42 @@ static bool isValidAssumeForContext(Value *V, const Instruction *CxtI, // the assume). if (DT) { - if (DT->dominates(Inv, CxtI)) { + if (DT->dominates(Inv, CxtI)) return true; - } else if (Inv->getParent() == CxtI->getParent()) { - // The context comes first, but they're both in the same block. Make sure - // there is nothing in between that might interrupt the control flow. - for (BasicBlock::const_iterator I = - std::next(BasicBlock::const_iterator(CxtI)), - IE(Inv); I != IE; ++I) - if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I)) - return false; - - return !isEphemeralValueOf(Inv, CxtI); - } + } else if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor()) { + // We don't have a DT, but this trivially dominates. + return true; + } + // With or without a DT, the only remaining case we will check is if the + // instructions are in the same BB. Give up if that is not the case. + if (Inv->getParent() != CxtI->getParent()) return false; - } - // When we don't have a DT, we do a limited search... - if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor()) { - return true; - } else if (Inv->getParent() == CxtI->getParent()) { + // If we have a dom tree, then we now know that the assume doens't dominate + // the other instruction. If we don't have a dom tree then we can check if + // the assume is first in the BB. + if (!DT) { // Search forward from the assume until we reach the context (or the end // of the block); the common case is that the assume will come first. - for (BasicBlock::iterator I = std::next(BasicBlock::iterator(Inv)), + for (auto I = std::next(BasicBlock::const_iterator(Inv)), IE = Inv->getParent()->end(); I != IE; ++I) if (&*I == CxtI) return true; - - // The context must come first... - for (BasicBlock::const_iterator I = - std::next(BasicBlock::const_iterator(CxtI)), - IE(Inv); I != IE; ++I) - if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I)) - return false; - - return !isEphemeralValueOf(Inv, CxtI); } - return false; -} + // The context comes first, but they're both in the same block. Make sure + // there is nothing in between that might interrupt the control flow. + for (BasicBlock::const_iterator I = + std::next(BasicBlock::const_iterator(CxtI)), IE(Inv); + I != IE; ++I) + if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I)) + return false; -bool llvm::isValidAssumeForContext(const Instruction *I, - const Instruction *CxtI, - const DominatorTree *DT) { - return ::isValidAssumeForContext(const_cast<Instruction *>(I), CxtI, DT); + return !isEphemeralValueOf(Inv, CxtI); } -static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, +static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, APInt &KnownOne, unsigned Depth, const Query &Q) { // Use of assumptions is context-sensitive. If we don't have a context, we @@ -526,7 +526,10 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, unsigned BitWidth = KnownZero.getBitWidth(); - for (auto &AssumeVH : Q.AC->assumptions()) { + // Note that the patterns below need to be kept in sync with the code + // in AssumptionCache::updateAffectedValues. + + for (auto &AssumeVH : Q.AC->assumptionsFor(V)) { if (!AssumeVH) continue; CallInst *I = cast<CallInst>(AssumeVH); @@ -778,6 +781,23 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes()); } } + + // If assumptions conflict with each other or previous known bits, then we + // have a logical fallacy. This should only happen when a program has + // undefined behavior. We can't assert/crash, so clear out the known bits and + // hope for the best. + + // FIXME: Publish a warning/remark that we have encountered UB or the compiler + // is broken. + + // FIXME: Implement a stronger version of "I give up" by invalidating/clearing + // the assumption cache. This should indicate that the cache is corrupted so + // future callers will not waste time repopulating it with faulty assumptions. + + if ((KnownZero & KnownOne) != 0) { + KnownZero.clearAllBits(); + KnownOne.clearAllBits(); + } } // Compute known bits from a shift operator, including those with a @@ -788,11 +808,11 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, // shift amount, compute the implied known-zero or known-one bits of the shift // operator's result respectively for that shift amount. The results from calling // KZF and KOF are conservatively combined for all permitted shift amounts. -template <typename KZFunctor, typename KOFunctor> -static void computeKnownBitsFromShiftOperator(Operator *I, - APInt &KnownZero, APInt &KnownOne, - APInt &KnownZero2, APInt &KnownOne2, - unsigned Depth, const Query &Q, KZFunctor KZF, KOFunctor KOF) { +static void computeKnownBitsFromShiftOperator( + const Operator *I, APInt &KnownZero, APInt &KnownOne, APInt &KnownZero2, + APInt &KnownOne2, unsigned Depth, const Query &Q, + function_ref<APInt(const APInt &, unsigned)> KZF, + function_ref<APInt(const APInt &, unsigned)> KOF) { unsigned BitWidth = KnownZero.getBitWidth(); if (auto *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { @@ -801,6 +821,14 @@ static void computeKnownBitsFromShiftOperator(Operator *I, computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); KnownZero = KZF(KnownZero, ShiftAmt); KnownOne = KOF(KnownOne, ShiftAmt); + // If there is conflict between KnownZero and KnownOne, this must be an + // overflowing left shift, so the shift result is undefined. Clear KnownZero + // and KnownOne bits so that other code could propagate this undef. + if ((KnownZero & KnownOne) != 0) { + KnownZero.clearAllBits(); + KnownOne.clearAllBits(); + } + return; } @@ -866,7 +894,7 @@ static void computeKnownBitsFromShiftOperator(Operator *I, } } -static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, +static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, APInt &KnownOne, unsigned Depth, const Query &Q) { unsigned BitWidth = KnownZero.getBitWidth(); @@ -950,14 +978,64 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ); break; } - case Instruction::Select: + case Instruction::Select: { computeKnownBits(I->getOperand(2), KnownZero, KnownOne, Depth + 1, Q); computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q); + const Value *LHS; + const Value *RHS; + SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor; + if (SelectPatternResult::isMinOrMax(SPF)) { + computeKnownBits(RHS, KnownZero, KnownOne, Depth + 1, Q); + computeKnownBits(LHS, KnownZero2, KnownOne2, Depth + 1, Q); + } else { + computeKnownBits(I->getOperand(2), KnownZero, KnownOne, Depth + 1, Q); + computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q); + } + + unsigned MaxHighOnes = 0; + unsigned MaxHighZeros = 0; + if (SPF == SPF_SMAX) { + // If both sides are negative, the result is negative. + if (KnownOne[BitWidth - 1] && KnownOne2[BitWidth - 1]) + // We can derive a lower bound on the result by taking the max of the + // leading one bits. + MaxHighOnes = + std::max(KnownOne.countLeadingOnes(), KnownOne2.countLeadingOnes()); + // If either side is non-negative, the result is non-negative. + else if (KnownZero[BitWidth - 1] || KnownZero2[BitWidth - 1]) + MaxHighZeros = 1; + } else if (SPF == SPF_SMIN) { + // If both sides are non-negative, the result is non-negative. + if (KnownZero[BitWidth - 1] && KnownZero2[BitWidth - 1]) + // We can derive an upper bound on the result by taking the max of the + // leading zero bits. + MaxHighZeros = std::max(KnownZero.countLeadingOnes(), + KnownZero2.countLeadingOnes()); + // If either side is negative, the result is negative. + else if (KnownOne[BitWidth - 1] || KnownOne2[BitWidth - 1]) + MaxHighOnes = 1; + } else if (SPF == SPF_UMAX) { + // We can derive a lower bound on the result by taking the max of the + // leading one bits. + MaxHighOnes = + std::max(KnownOne.countLeadingOnes(), KnownOne2.countLeadingOnes()); + } else if (SPF == SPF_UMIN) { + // We can derive an upper bound on the result by taking the max of the + // leading zero bits. + MaxHighZeros = + std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); + } + // Only known if known in both the LHS and RHS. KnownOne &= KnownOne2; KnownZero &= KnownZero2; + if (MaxHighOnes > 0) + KnownOne |= APInt::getHighBitsSet(BitWidth, MaxHighOnes); + if (MaxHighZeros > 0) + KnownZero |= APInt::getHighBitsSet(BitWidth, MaxHighZeros); break; + } case Instruction::FPTrunc: case Instruction::FPExt: case Instruction::FPToUI: @@ -967,8 +1045,8 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, break; // Can't work with floating point. case Instruction::PtrToInt: case Instruction::IntToPtr: - case Instruction::AddrSpaceCast: // Pointers could be different sizes. - // FALL THROUGH and handle them the same as zext/trunc. + // Fall through and handle them the same as zext/trunc. + LLVM_FALLTHROUGH; case Instruction::ZExt: case Instruction::Trunc: { Type *SrcTy = I->getOperand(0)->getType(); @@ -1020,13 +1098,23 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, } case Instruction::Shl: { // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 - auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) { - return (KnownZero << ShiftAmt) | - APInt::getLowBitsSet(BitWidth, ShiftAmt); // Low bits known 0. + bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); + auto KZF = [BitWidth, NSW](const APInt &KnownZero, unsigned ShiftAmt) { + APInt KZResult = + (KnownZero << ShiftAmt) | + APInt::getLowBitsSet(BitWidth, ShiftAmt); // Low bits known 0. + // If this shift has "nsw" keyword, then the result is either a poison + // value or has the same sign bit as the first operand. + if (NSW && KnownZero.isNegative()) + KZResult.setBit(BitWidth - 1); + return KZResult; }; - auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) { - return KnownOne << ShiftAmt; + auto KOF = [BitWidth, NSW](const APInt &KnownOne, unsigned ShiftAmt) { + APInt KOResult = KnownOne << ShiftAmt; + if (NSW && KnownOne.isNegative()) + KOResult.setBit(BitWidth - 1); + return KOResult; }; computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, @@ -1143,7 +1231,7 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, } case Instruction::Alloca: { - AllocaInst *AI = cast<AllocaInst>(I); + const AllocaInst *AI = cast<AllocaInst>(I); unsigned Align = AI->getAlignment(); if (Align == 0) Align = Q.DL.getABITypeAlignment(AI->getAllocatedType()); @@ -1163,7 +1251,7 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, gep_type_iterator GTI = gep_type_begin(I); for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { Value *Index = I->getOperand(i); - if (StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = GTI.getStructTypeOrNull()) { // Handle struct member offset arithmetic. // Handle case when index is vector zeroinitializer @@ -1200,7 +1288,7 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, break; } case Instruction::PHI: { - PHINode *P = cast<PHINode>(I); + const PHINode *P = cast<PHINode>(I); // Handle the case of a simple two-predecessor recurrence PHI. // There's a lot more that could theoretically be done here, but // this is sufficient to catch some interesting cases. @@ -1237,9 +1325,46 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, APInt KnownZero3(KnownZero), KnownOne3(KnownOne); computeKnownBits(L, KnownZero3, KnownOne3, Depth + 1, Q); - KnownZero = APInt::getLowBitsSet(BitWidth, - std::min(KnownZero2.countTrailingOnes(), - KnownZero3.countTrailingOnes())); + KnownZero = APInt::getLowBitsSet( + BitWidth, std::min(KnownZero2.countTrailingOnes(), + KnownZero3.countTrailingOnes())); + + if (DontImproveNonNegativePhiBits) + break; + + auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(LU); + if (OverflowOp && OverflowOp->hasNoSignedWrap()) { + // If initial value of recurrence is nonnegative, and we are adding + // a nonnegative number with nsw, the result can only be nonnegative + // or poison value regardless of the number of times we execute the + // add in phi recurrence. If initial value is negative and we are + // adding a negative number with nsw, the result can only be + // negative or poison value. Similar arguments apply to sub and mul. + // + // (add non-negative, non-negative) --> non-negative + // (add negative, negative) --> negative + if (Opcode == Instruction::Add) { + if (KnownZero2.isNegative() && KnownZero3.isNegative()) + KnownZero.setBit(BitWidth - 1); + else if (KnownOne2.isNegative() && KnownOne3.isNegative()) + KnownOne.setBit(BitWidth - 1); + } + + // (sub nsw non-negative, negative) --> non-negative + // (sub nsw negative, non-negative) --> negative + else if (Opcode == Instruction::Sub && LL == I) { + if (KnownZero2.isNegative() && KnownOne3.isNegative()) + KnownZero.setBit(BitWidth - 1); + else if (KnownOne2.isNegative() && KnownZero3.isNegative()) + KnownOne.setBit(BitWidth - 1); + } + + // (mul nsw non-negative, non-negative) --> non-negative + else if (Opcode == Instruction::Mul && KnownZero2.isNegative() && + KnownZero3.isNegative()) + KnownZero.setBit(BitWidth - 1); + } + break; } } @@ -1284,12 +1409,12 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, // function. if (MDNode *MD = cast<Instruction>(I)->getMetadata(LLVMContext::MD_range)) computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne); - if (Value *RV = CallSite(I).getReturnedArgOperand()) { + if (const Value *RV = ImmutableCallSite(I).getReturnedArgOperand()) { computeKnownBits(RV, KnownZero2, KnownOne2, Depth + 1, Q); KnownZero |= KnownZero2; KnownOne |= KnownOne2; } - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { switch (II->getIntrinsicID()) { default: break; case Intrinsic::bswap: @@ -1326,9 +1451,16 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, } } break; + case Instruction::ExtractElement: + // Look through extract element. At the moment we keep this simple and skip + // tracking the specific element. But at least we might find information + // valid for all elements of the vector (for example if vector is sign + // extended, shifted, etc). + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); + break; case Instruction::ExtractValue: if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) { - ExtractValueInst *EVI = cast<ExtractValueInst>(I); + const ExtractValueInst *EVI = cast<ExtractValueInst>(I); if (EVI->getNumIndices() != 1) break; if (EVI->getIndices()[0] == 0) { switch (II->getIntrinsicID()) { @@ -1372,7 +1504,7 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, /// where V is a vector, known zero, and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. -void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, +void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, unsigned Depth, const Query &Q) { assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); @@ -1388,9 +1520,10 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, KnownOne.getBitWidth() == BitWidth && "V, KnownOne and KnownZero should have same BitWidth"); - if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - // We know all of the bits for a constant! - KnownOne = CI->getValue(); + const APInt *C; + if (match(V, m_APInt(C))) { + // We know all of the bits for a scalar constant or a splat vector constant! + KnownOne = *C; KnownZero = ~KnownOne; return; } @@ -1402,7 +1535,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, } // Handle a constant vector by taking the intersection of the known bits of // each element. - if (ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V)) { + if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V)) { // We know that CDS must be a vector of integers. Take the intersection of // each element. KnownZero.setAllBits(); KnownOne.setAllBits(); @@ -1415,7 +1548,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, return; } - if (auto *CV = dyn_cast<ConstantVector>(V)) { + if (const auto *CV = dyn_cast<ConstantVector>(V)) { // We know that CV must be a vector of integers. Take the intersection of // each element. KnownZero.setAllBits(); KnownOne.setAllBits(); @@ -1438,6 +1571,14 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // Start out not knowing anything. KnownZero.clearAllBits(); KnownOne.clearAllBits(); + // We can't imply anything about undefs. + if (isa<UndefValue>(V)) + return; + + // There's no point in looking through other users of ConstantData for + // assumptions. Confirm that we've handled them all. + assert(!isa<ConstantData>(V) && "Unhandled constant data!"); + // Limit search depth. // All recursive calls that increase depth must come after this. if (Depth == MaxDepth) @@ -1445,13 +1586,13 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has // the bits of its aliasee. - if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { + if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { if (!GA->isInterposable()) computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, Depth + 1, Q); return; } - if (Operator *I = dyn_cast<Operator>(V)) + if (const Operator *I = dyn_cast<Operator>(V)) computeKnownBitsFromOperator(I, KnownZero, KnownOne, Depth, Q); // Aligned pointers have trailing zeros - refine KnownZero set @@ -1472,7 +1613,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, /// Determine whether the sign bit is known to be zero or one. /// Convenience wrapper around computeKnownBits. -void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, +void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne, unsigned Depth, const Query &Q) { unsigned BitWidth = getBitWidth(V->getType(), Q.DL); if (!BitWidth) { @@ -1491,9 +1632,9 @@ void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, /// bit set when defined. For vectors return true if every element is known to /// be a power of two when defined. Supports values with integer or pointer /// types and vectors of integers. -bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, +bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, const Query &Q) { - if (Constant *C = dyn_cast<Constant>(V)) { + if (const Constant *C = dyn_cast<Constant>(V)) { if (C->isNullValue()) return OrZero; @@ -1523,10 +1664,10 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, match(V, m_LShr(m_Value(X), m_Value())))) return isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q); - if (ZExtInst *ZI = dyn_cast<ZExtInst>(V)) + if (const ZExtInst *ZI = dyn_cast<ZExtInst>(V)) return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q); - if (SelectInst *SI = dyn_cast<SelectInst>(V)) + if (const SelectInst *SI = dyn_cast<SelectInst>(V)) return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q) && isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q); @@ -1544,7 +1685,7 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, // Adding a power-of-two or zero to the same power-of-two or zero yields // either the original power-of-two, a larger power-of-two or zero. if (match(V, m_Add(m_Value(X), m_Value(Y)))) { - OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V); + const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V); if (OrZero || VOBO->hasNoUnsignedWrap() || VOBO->hasNoSignedWrap()) { if (match(X, m_And(m_Specific(Y), m_Value())) || match(X, m_And(m_Value(), m_Specific(Y)))) @@ -1590,7 +1731,7 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, /// to be non-null. /// /// Currently this routine does not support vector GEPs. -static bool isGEPKnownNonNull(GEPOperator *GEP, unsigned Depth, +static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth, const Query &Q) { if (!GEP->isInBounds() || GEP->getPointerAddressSpace() != 0) return false; @@ -1609,7 +1750,7 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, unsigned Depth, for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); GTI != GTE; ++GTI) { // Struct types are easy -- they must always be indexed by a constant. - if (StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = GTI.getStructTypeOrNull()) { ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand()); unsigned ElementIdx = OpC->getZExtValue(); const StructLayout *SL = Q.DL.getStructLayout(STy); @@ -1649,7 +1790,7 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, unsigned Depth, /// Does the 'Range' metadata (which must be a valid MD_range operand list) /// ensure that the value it's attached to is never Value? 'RangeType' is /// is the type of the value described by the range. -static bool rangeMetadataExcludesValue(MDNode* Ranges, const APInt& Value) { +static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value) { const unsigned NumRanges = Ranges->getNumOperands() / 2; assert(NumRanges >= 1); for (unsigned i = 0; i < NumRanges; ++i) { @@ -1668,7 +1809,7 @@ static bool rangeMetadataExcludesValue(MDNode* Ranges, const APInt& Value) { /// For vectors return true if every element is known to be non-zero when /// defined. Supports values with integer or pointer type and vectors of /// integers. -bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q) { +bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { if (auto *C = dyn_cast<Constant>(V)) { if (C->isNullValue()) return false; @@ -1712,7 +1853,7 @@ bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q) { if (V->getType()->isPointerTy()) { if (isKnownNonNull(V)) return true; - if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) + if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) if (isGEPKnownNonNull(GEP, Depth, Q)) return true; } @@ -1732,7 +1873,7 @@ bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q) { // if the lowest bit is shifted off the end. if (BitWidth && match(V, m_Shl(m_Value(X), m_Value(Y)))) { // shl nuw can't remove any non-zero bits. - OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V); + const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V); if (BO->hasNoUnsignedWrap()) return isKnownNonZero(X, Depth, Q); @@ -1746,7 +1887,7 @@ bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q) { // defined if the sign bit is shifted off the end. else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) { // shr exact can only shift out zero bits. - PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V); + const PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V); if (BO->isExact()) return isKnownNonZero(X, Depth, Q); @@ -1817,7 +1958,7 @@ bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q) { } // X * Y. else if (match(V, m_Mul(m_Value(X), m_Value(Y)))) { - OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V); + const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V); // If X and Y are non-zero then so is X * Y as long as the multiplication // does not overflow. if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) && @@ -1825,13 +1966,13 @@ bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q) { return true; } // (C ? X : Y) != 0 if X != 0 and Y != 0. - else if (SelectInst *SI = dyn_cast<SelectInst>(V)) { + else if (const SelectInst *SI = dyn_cast<SelectInst>(V)) { if (isKnownNonZero(SI->getTrueValue(), Depth, Q) && isKnownNonZero(SI->getFalseValue(), Depth, Q)) return true; } // PHI - else if (PHINode *PN = dyn_cast<PHINode>(V)) { + else if (const PHINode *PN = dyn_cast<PHINode>(V)) { // Try and detect a recurrence that monotonically increases from a // starting value, as these are common as induction variables. if (PN->getNumIncomingValues() == 2) { @@ -1865,8 +2006,8 @@ bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q) { } /// Return true if V2 == V1 + X, where X is known non-zero. -static bool isAddOfNonZero(Value *V1, Value *V2, const Query &Q) { - BinaryOperator *BO = dyn_cast<BinaryOperator>(V1); +static bool isAddOfNonZero(const Value *V1, const Value *V2, const Query &Q) { + const BinaryOperator *BO = dyn_cast<BinaryOperator>(V1); if (!BO || BO->getOpcode() != Instruction::Add) return false; Value *Op = nullptr; @@ -1880,7 +2021,7 @@ static bool isAddOfNonZero(Value *V1, Value *V2, const Query &Q) { } /// Return true if it is known that V1 != V2. -static bool isKnownNonEqual(Value *V1, Value *V2, const Query &Q) { +static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q) { if (V1->getType()->isVectorTy() || V1 == V2) return false; if (V1->getType() != V2->getType()) @@ -1916,7 +2057,7 @@ static bool isKnownNonEqual(Value *V1, Value *V2, const Query &Q) { /// where V is a vector, the mask, known zero, and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. -bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth, +bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth, const Query &Q) { APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0); computeKnownBits(V, KnownZero, KnownOne, Depth, Q); @@ -1927,8 +2068,9 @@ bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth, /// minimum number of sign bits. Return 0 if the value is not a vector constant /// or if any element was not analyzed; otherwise, return the count for the /// element with the minimum number of sign bits. -static unsigned computeNumSignBitsVectorConstant(Value *V, unsigned TyBits) { - auto *CV = dyn_cast<Constant>(V); +static unsigned computeNumSignBitsVectorConstant(const Value *V, + unsigned TyBits) { + const auto *CV = dyn_cast<Constant>(V); if (!CV || !CV->getType()->isVectorTy()) return 0; @@ -1956,7 +2098,7 @@ static unsigned computeNumSignBitsVectorConstant(Value *V, unsigned TyBits) { /// after an "ashr X, 2", we know that the top 3 bits are all equal to each /// other, so we return 3. For vectors, return the number of sign bits for the /// vector element with the mininum number of known sign bits. -unsigned ComputeNumSignBits(Value *V, unsigned Depth, const Query &Q) { +unsigned ComputeNumSignBits(const Value *V, unsigned Depth, const Query &Q) { unsigned TyBits = Q.DL.getTypeSizeInBits(V->getType()->getScalarType()); unsigned Tmp, Tmp2; unsigned FirstAnswer = 1; @@ -1964,10 +2106,10 @@ unsigned ComputeNumSignBits(Value *V, unsigned Depth, const Query &Q) { // Note that ConstantInt is handled by the general computeKnownBits case // below. - if (Depth == 6) + if (Depth == MaxDepth) return 1; // Limit search depth. - Operator *U = dyn_cast<Operator>(V); + const Operator *U = dyn_cast<Operator>(V); switch (Operator::getOpcode(V)) { default: break; case Instruction::SExt: @@ -2125,7 +2267,7 @@ unsigned ComputeNumSignBits(Value *V, unsigned Depth, const Query &Q) { return std::min(Tmp, Tmp2)-1; case Instruction::PHI: { - PHINode *PN = cast<PHINode>(U); + const PHINode *PN = cast<PHINode>(U); unsigned NumIncomingValues = PN->getNumIncomingValues(); // Don't analyze large in-degree PHIs. if (NumIncomingValues > 4) break; @@ -2147,6 +2289,13 @@ unsigned ComputeNumSignBits(Value *V, unsigned Depth, const Query &Q) { // FIXME: it's tricky to do anything useful for this, but it is an important // case for targets like X86. break; + + case Instruction::ExtractElement: + // Look through extract element. At the moment we keep this simple and skip + // tracking the specific element. But at least we might find information + // valid for all elements of the vector (for example if vector is sign + // extended, shifted, etc). + return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); } // Finally, if we can prove that the top bits of the result are 0's or 1's, @@ -2413,10 +2562,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI, if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) return !CFP->getValueAPF().isNegZero(); - // FIXME: Magic number! At the least, this should be given a name because it's - // used similarly in CannotBeOrderedLessThanZero(). A better fix may be to - // expose it as a parameter, so it can be used for testing / experimenting. - if (Depth == 6) + if (Depth == MaxDepth) return false; // Limit search depth. const Operator *I = dyn_cast<Operator>(V); @@ -2454,54 +2600,70 @@ bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI, return false; } -bool llvm::CannotBeOrderedLessThanZero(const Value *V, - const TargetLibraryInfo *TLI, - unsigned Depth) { - if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) - return !CFP->getValueAPF().isNegative() || CFP->getValueAPF().isZero(); +/// If \p SignBitOnly is true, test for a known 0 sign bit rather than a +/// standard ordered compare. e.g. make -0.0 olt 0.0 be true because of the sign +/// bit despite comparing equal. +static bool cannotBeOrderedLessThanZeroImpl(const Value *V, + const TargetLibraryInfo *TLI, + bool SignBitOnly, + unsigned Depth) { + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) { + return !CFP->getValueAPF().isNegative() || + (!SignBitOnly && CFP->getValueAPF().isZero()); + } - // FIXME: Magic number! At the least, this should be given a name because it's - // used similarly in CannotBeNegativeZero(). A better fix may be to - // expose it as a parameter, so it can be used for testing / experimenting. - if (Depth == 6) - return false; // Limit search depth. + if (Depth == MaxDepth) + return false; // Limit search depth. const Operator *I = dyn_cast<Operator>(V); - if (!I) return false; + if (!I) + return false; switch (I->getOpcode()) { - default: break; + default: + break; // Unsigned integers are always nonnegative. case Instruction::UIToFP: return true; case Instruction::FMul: // x*x is always non-negative or a NaN. - if (I->getOperand(0) == I->getOperand(1)) + if (I->getOperand(0) == I->getOperand(1) && + (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs())) return true; - // Fall through + + LLVM_FALLTHROUGH; case Instruction::FAdd: case Instruction::FDiv: case Instruction::FRem: - return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) && - CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1); + return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, + Depth + 1) && + cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly, + Depth + 1); case Instruction::Select: - return CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1) && - CannotBeOrderedLessThanZero(I->getOperand(2), TLI, Depth + 1); + return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly, + Depth + 1) && + cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly, + Depth + 1); case Instruction::FPExt: case Instruction::FPTrunc: // Widening/narrowing never change sign. - return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1); + return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, + Depth + 1); case Instruction::Call: Intrinsic::ID IID = getIntrinsicForCallSite(cast<CallInst>(I), TLI); switch (IID) { default: break; case Intrinsic::maxnum: - return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) || - CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1); + return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, + Depth + 1) || + cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly, + Depth + 1); case Intrinsic::minnum: - return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) && - CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1); + return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, + Depth + 1) && + cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly, + Depth + 1); case Intrinsic::exp: case Intrinsic::exp2: case Intrinsic::fabs: @@ -2513,18 +2675,30 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V, if (CI->getBitWidth() <= 64 && CI->getSExtValue() % 2u == 0) return true; } - return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1); + return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, + Depth + 1); case Intrinsic::fma: case Intrinsic::fmuladd: // x*x+y is non-negative if y is non-negative. return I->getOperand(0) == I->getOperand(1) && - CannotBeOrderedLessThanZero(I->getOperand(2), TLI, Depth + 1); + (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs()) && + cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly, + Depth + 1); } break; } return false; } +bool llvm::CannotBeOrderedLessThanZero(const Value *V, + const TargetLibraryInfo *TLI) { + return cannotBeOrderedLessThanZeroImpl(V, TLI, false, 0); +} + +bool llvm::SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI) { + return cannotBeOrderedLessThanZeroImpl(V, TLI, true, 0); +} + /// If the specified value can be set by repeating the same byte in memory, /// return the i8 value that it is represented with. This is /// true for all i8 values obviously, but is also true for i32 0, i32 -1, @@ -2768,11 +2942,17 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, break; if (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) { - APInt GEPOffset(BitWidth, 0); + // If one of the values we have visited is an addrspacecast, then + // the pointer type of this GEP may be different from the type + // of the Ptr parameter which was passed to this function. This + // means when we construct GEPOffset, we need to use the size + // of GEP's pointer type rather than the size of the original + // pointer type. + APInt GEPOffset(DL.getPointerTypeSizeInBits(Ptr->getType()), 0); if (!GEP->accumulateConstantOffset(DL, GEPOffset)) break; - ByteOffset += GEPOffset; + ByteOffset += GEPOffset.getSExtValue(); Ptr = GEP->getPointerOperand(); } else if (Operator::getOpcode(Ptr) == Instruction::BitCast || @@ -2886,13 +3066,14 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, /// If we can compute the length of the string pointed to by /// the specified pointer, return 'len+1'. If we can't, return 0. -static uint64_t GetStringLengthH(Value *V, SmallPtrSetImpl<PHINode*> &PHIs) { +static uint64_t GetStringLengthH(const Value *V, + SmallPtrSetImpl<const PHINode*> &PHIs) { // Look through noop bitcast instructions. V = V->stripPointerCasts(); // If this is a PHI node, there are two cases: either we have already seen it // or we haven't. - if (PHINode *PN = dyn_cast<PHINode>(V)) { + if (const PHINode *PN = dyn_cast<PHINode>(V)) { if (!PHIs.insert(PN).second) return ~0ULL; // already in the set. @@ -2914,7 +3095,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSetImpl<PHINode*> &PHIs) { } // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y) - if (SelectInst *SI = dyn_cast<SelectInst>(V)) { + if (const SelectInst *SI = dyn_cast<SelectInst>(V)) { uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs); if (Len1 == 0) return 0; uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs); @@ -2935,10 +3116,10 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSetImpl<PHINode*> &PHIs) { /// If we can compute the length of the string pointed to by /// the specified pointer, return 'len+1'. If we can't, return 0. -uint64_t llvm::GetStringLength(Value *V) { +uint64_t llvm::GetStringLength(const Value *V) { if (!V->getType()->isPointerTy()) return 0; - SmallPtrSet<PHINode*, 32> PHIs; + SmallPtrSet<const PHINode*, 32> PHIs; uint64_t Len = GetStringLengthH(V, PHIs); // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return // an empty string as a length. @@ -2947,7 +3128,8 @@ uint64_t llvm::GetStringLength(Value *V) { /// \brief \p PN defines a loop-variant pointer to an object. Check if the /// previous iteration of the loop was referring to the same object as \p PN. -static bool isSameUnderlyingObjectInLoop(PHINode *PN, LoopInfo *LI) { +static bool isSameUnderlyingObjectInLoop(const PHINode *PN, + const LoopInfo *LI) { // Find the loop-defined value. Loop *L = LI->getLoopFor(PN->getParent()); if (PN->getNumIncomingValues() != 2) @@ -3126,6 +3308,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, case Intrinsic::dbg_value: return true; + case Intrinsic::bitreverse: case Intrinsic::bswap: case Intrinsic::ctlz: case Intrinsic::ctpop: @@ -3208,11 +3391,11 @@ bool llvm::isKnownNonNull(const Value *V) { if (const Argument *A = dyn_cast<Argument>(V)) return A->hasByValOrInAllocaAttr() || A->hasNonNullAttr(); - // A global variable in address space 0 is non null unless extern weak. - // Other address spaces may have null as a valid address for a global, - // so we can't assume anything. + // A global variable in address space 0 is non null unless extern weak + // or an absolute symbol reference. Other address spaces may have null as a + // valid address for a global, so we can't assume anything. if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) - return !GV->hasExternalWeakLinkage() && + return !GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() && GV->getType()->getAddressSpace() == 0; // A Load tagged with nonnull metadata is never null. @@ -3230,6 +3413,9 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V, const Instruction *CtxI, const DominatorTree *DT) { assert(V->getType()->isPointerTy() && "V must be pointer type"); + assert(!isa<ConstantData>(V) && "Did not expect ConstantPointerNull"); + assert(CtxI && "Context instruction required for analysis"); + assert(DT && "Dominator tree required for analysis"); unsigned NumUsesExplored = 0; for (auto *U : V->users()) { @@ -3266,13 +3452,20 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V, bool llvm::isKnownNonNullAt(const Value *V, const Instruction *CtxI, const DominatorTree *DT) { + if (isa<ConstantPointerNull>(V) || isa<UndefValue>(V)) + return false; + if (isKnownNonNull(V)) return true; - return CtxI ? ::isKnownNonNullFromDominatingCondition(V, CtxI, DT) : false; + if (!CtxI || !DT) + return false; + + return ::isKnownNonNullFromDominatingCondition(V, CtxI, DT); } -OverflowResult llvm::computeOverflowForUnsignedMul(Value *LHS, Value *RHS, +OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS, + const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, @@ -3322,7 +3515,8 @@ OverflowResult llvm::computeOverflowForUnsignedMul(Value *LHS, Value *RHS, return OverflowResult::MayOverflow; } -OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS, +OverflowResult llvm::computeOverflowForUnsignedAdd(const Value *LHS, + const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, @@ -3351,9 +3545,13 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS, return OverflowResult::MayOverflow; } -static OverflowResult computeOverflowForSignedAdd( - Value *LHS, Value *RHS, AddOperator *Add, const DataLayout &DL, - AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { +static OverflowResult computeOverflowForSignedAdd(const Value *LHS, + const Value *RHS, + const AddOperator *Add, + const DataLayout &DL, + AssumptionCache *AC, + const Instruction *CxtI, + const DominatorTree *DT) { if (Add && Add->hasNoSignedWrap()) { return OverflowResult::NeverOverflows; } @@ -3395,7 +3593,8 @@ static OverflowResult computeOverflowForSignedAdd( return OverflowResult::MayOverflow; } -bool llvm::isOverflowIntrinsicNoWrap(IntrinsicInst *II, DominatorTree &DT) { +bool llvm::isOverflowIntrinsicNoWrap(const IntrinsicInst *II, + const DominatorTree &DT) { #ifndef NDEBUG auto IID = II->getIntrinsicID(); assert((IID == Intrinsic::sadd_with_overflow || @@ -3407,11 +3606,11 @@ bool llvm::isOverflowIntrinsicNoWrap(IntrinsicInst *II, DominatorTree &DT) { "Not an overflow intrinsic!"); #endif - SmallVector<BranchInst *, 2> GuardingBranches; - SmallVector<ExtractValueInst *, 2> Results; + SmallVector<const BranchInst *, 2> GuardingBranches; + SmallVector<const ExtractValueInst *, 2> Results; - for (User *U : II->users()) { - if (auto *EVI = dyn_cast<ExtractValueInst>(U)) { + for (const User *U : II->users()) { + if (const auto *EVI = dyn_cast<ExtractValueInst>(U)) { assert(EVI->getNumIndices() == 1 && "Obvious from CI's type"); if (EVI->getIndices()[0] == 0) @@ -3419,8 +3618,8 @@ bool llvm::isOverflowIntrinsicNoWrap(IntrinsicInst *II, DominatorTree &DT) { else { assert(EVI->getIndices()[0] == 1 && "Obvious from CI's type"); - for (auto *U : EVI->users()) - if (auto *B = dyn_cast<BranchInst>(U)) { + for (const auto *U : EVI->users()) + if (const auto *B = dyn_cast<BranchInst>(U)) { assert(B->isConditional() && "How else is it using an i1?"); GuardingBranches.push_back(B); } @@ -3432,13 +3631,13 @@ bool llvm::isOverflowIntrinsicNoWrap(IntrinsicInst *II, DominatorTree &DT) { } } - auto AllUsesGuardedByBranch = [&](BranchInst *BI) { + auto AllUsesGuardedByBranch = [&](const BranchInst *BI) { BasicBlockEdge NoWrapEdge(BI->getParent(), BI->getSuccessor(1)); if (!NoWrapEdge.isSingleEdge()) return false; // Check if all users of the add are provably no-wrap. - for (auto *Result : Results) { + for (const auto *Result : Results) { // If the extractvalue itself is not executed on overflow, the we don't // need to check each use separately, since domination is transitive. if (DT.dominates(NoWrapEdge, Result->getParent())) @@ -3456,7 +3655,7 @@ bool llvm::isOverflowIntrinsicNoWrap(IntrinsicInst *II, DominatorTree &DT) { } -OverflowResult llvm::computeOverflowForSignedAdd(AddOperator *Add, +OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, @@ -3465,7 +3664,8 @@ OverflowResult llvm::computeOverflowForSignedAdd(AddOperator *Add, Add, DL, AC, CxtI, DT); } -OverflowResult llvm::computeOverflowForSignedAdd(Value *LHS, Value *RHS, +OverflowResult llvm::computeOverflowForSignedAdd(const Value *LHS, + const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, @@ -3502,12 +3702,27 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { return false; // Calls can throw, or contain an infinite loop, or kill the process. - if (CallSite CS = CallSite(const_cast<Instruction*>(I))) { - // Calls which don't write to arbitrary memory are safe. - // FIXME: Ignoring infinite loops without any side-effects is too aggressive, - // but it's consistent with other passes. See http://llvm.org/PR965 . - // FIXME: This isn't aggressive enough; a call which only writes to a - // global is guaranteed to return. + if (auto CS = ImmutableCallSite(I)) { + // Call sites that throw have implicit non-local control flow. + if (!CS.doesNotThrow()) + return false; + + // Non-throwing call sites can loop infinitely, call exit/pthread_exit + // etc. and thus not return. However, LLVM already assumes that + // + // - Thread exiting actions are modeled as writes to memory invisible to + // the program. + // + // - Loops that don't have side effects (side effects are volatile/atomic + // stores and IO) always terminate (see http://llvm.org/PR965). + // Furthermore IO itself is also modeled as writes to memory invisible to + // the program. + // + // We rely on those assumptions here, and use the memory effects of the call + // target as a proxy for checking that it always returns. + + // FIXME: This isn't aggressive enough; a call which only writes to a global + // is guaranteed to return. return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory() || match(I, m_Intrinsic<Intrinsic::assume>()); } @@ -3688,7 +3903,7 @@ bool llvm::isKnownNotFullPoison(const Instruction *PoisonI) { return false; } -static bool isKnownNonNaN(Value *V, FastMathFlags FMF) { +static bool isKnownNonNaN(const Value *V, FastMathFlags FMF) { if (FMF.noNaNs()) return true; @@ -3697,12 +3912,90 @@ static bool isKnownNonNaN(Value *V, FastMathFlags FMF) { return false; } -static bool isKnownNonZero(Value *V) { +static bool isKnownNonZero(const Value *V) { if (auto *C = dyn_cast<ConstantFP>(V)) return !C->isZero(); return false; } +/// Match non-obvious integer minimum and maximum sequences. +static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, + Value *CmpLHS, Value *CmpRHS, + Value *TrueVal, Value *FalseVal, + Value *&LHS, Value *&RHS) { + if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT) + return {SPF_UNKNOWN, SPNB_NA, false}; + + // Z = X -nsw Y + // (X >s Y) ? 0 : Z ==> (Z >s 0) ? 0 : Z ==> SMIN(Z, 0) + // (X <s Y) ? 0 : Z ==> (Z <s 0) ? 0 : Z ==> SMAX(Z, 0) + if (match(TrueVal, m_Zero()) && + match(FalseVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS)))) { + LHS = TrueVal; + RHS = FalseVal; + return {Pred == CmpInst::ICMP_SGT ? SPF_SMIN : SPF_SMAX, SPNB_NA, false}; + } + + // Z = X -nsw Y + // (X >s Y) ? Z : 0 ==> (Z >s 0) ? Z : 0 ==> SMAX(Z, 0) + // (X <s Y) ? Z : 0 ==> (Z <s 0) ? Z : 0 ==> SMIN(Z, 0) + if (match(FalseVal, m_Zero()) && + match(TrueVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS)))) { + LHS = TrueVal; + RHS = FalseVal; + return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false}; + } + + const APInt *C1; + if (!match(CmpRHS, m_APInt(C1))) + return {SPF_UNKNOWN, SPNB_NA, false}; + + // An unsigned min/max can be written with a signed compare. + const APInt *C2; + if ((CmpLHS == TrueVal && match(FalseVal, m_APInt(C2))) || + (CmpLHS == FalseVal && match(TrueVal, m_APInt(C2)))) { + // Is the sign bit set? + // (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX + // (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN + if (Pred == CmpInst::ICMP_SLT && *C1 == 0 && C2->isMaxSignedValue()) { + LHS = TrueVal; + RHS = FalseVal; + return {CmpLHS == TrueVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false}; + } + + // Is the sign bit clear? + // (X >s -1) ? MINVAL : X ==> (X <u MINVAL) ? MINVAL : X ==> UMAX + // (X >s -1) ? X : MINVAL ==> (X <u MINVAL) ? X : MINVAL ==> UMIN + if (Pred == CmpInst::ICMP_SGT && C1->isAllOnesValue() && + C2->isMinSignedValue()) { + LHS = TrueVal; + RHS = FalseVal; + return {CmpLHS == FalseVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false}; + } + } + + // Look through 'not' ops to find disguised signed min/max. + // (X >s C) ? ~X : ~C ==> (~X <s ~C) ? ~X : ~C ==> SMIN(~X, ~C) + // (X <s C) ? ~X : ~C ==> (~X >s ~C) ? ~X : ~C ==> SMAX(~X, ~C) + if (match(TrueVal, m_Not(m_Specific(CmpLHS))) && + match(FalseVal, m_APInt(C2)) && ~(*C1) == *C2) { + LHS = TrueVal; + RHS = FalseVal; + return {Pred == CmpInst::ICMP_SGT ? SPF_SMIN : SPF_SMAX, SPNB_NA, false}; + } + + // (X >s C) ? ~C : ~X ==> (~X <s ~C) ? ~C : ~X ==> SMAX(~C, ~X) + // (X <s C) ? ~C : ~X ==> (~X >s ~C) ? ~C : ~X ==> SMIN(~C, ~X) + if (match(FalseVal, m_Not(m_Specific(CmpLHS))) && + match(TrueVal, m_APInt(C2)) && ~(*C1) == *C2) { + LHS = TrueVal; + RHS = FalseVal; + return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false}; + } + + return {SPF_UNKNOWN, SPNB_NA, false}; +} + static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, FastMathFlags FMF, Value *CmpLHS, Value *CmpRHS, @@ -3801,39 +4094,26 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, } } - if (ConstantInt *C1 = dyn_cast<ConstantInt>(CmpRHS)) { + const APInt *C1; + if (match(CmpRHS, m_APInt(C1))) { if ((CmpLHS == TrueVal && match(FalseVal, m_Neg(m_Specific(CmpLHS)))) || (CmpLHS == FalseVal && match(TrueVal, m_Neg(m_Specific(CmpLHS))))) { // ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X // NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X - if (Pred == ICmpInst::ICMP_SGT && (C1->isZero() || C1->isMinusOne())) { + if (Pred == ICmpInst::ICMP_SGT && (*C1 == 0 || C1->isAllOnesValue())) { return {(CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false}; } // ABS(X) ==> (X <s 0) ? -X : X and (X <s 1) ? -X : X // NABS(X) ==> (X <s 0) ? X : -X and (X <s 1) ? X : -X - if (Pred == ICmpInst::ICMP_SLT && (C1->isZero() || C1->isOne())) { + if (Pred == ICmpInst::ICMP_SLT && (*C1 == 0 || *C1 == 1)) { return {(CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false}; } } - - // Y >s C ? ~Y : ~C == ~Y <s ~C ? ~Y : ~C = SMIN(~Y, ~C) - if (const auto *C2 = dyn_cast<ConstantInt>(FalseVal)) { - if (Pred == ICmpInst::ICMP_SGT && C1->getType() == C2->getType() && - ~C1->getValue() == C2->getValue() && - (match(TrueVal, m_Not(m_Specific(CmpLHS))) || - match(CmpLHS, m_Not(m_Specific(TrueVal))))) { - LHS = TrueVal; - RHS = FalseVal; - return {SPF_SMIN, SPNB_NA, false}; - } - } } - // TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5) - - return {SPF_UNKNOWN, SPNB_NA, false}; + return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS); } static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2, @@ -3932,30 +4212,9 @@ SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, LHS, RHS); } -ConstantRange llvm::getConstantRangeFromMetadata(MDNode &Ranges) { - const unsigned NumRanges = Ranges.getNumOperands() / 2; - assert(NumRanges >= 1 && "Must have at least one range!"); - assert(Ranges.getNumOperands() % 2 == 0 && "Must be a sequence of pairs"); - - auto *FirstLow = mdconst::extract<ConstantInt>(Ranges.getOperand(0)); - auto *FirstHigh = mdconst::extract<ConstantInt>(Ranges.getOperand(1)); - - ConstantRange CR(FirstLow->getValue(), FirstHigh->getValue()); - - for (unsigned i = 1; i < NumRanges; ++i) { - auto *Low = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0)); - auto *High = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1)); - - // Note: unionWith will potentially create a range that contains values not - // contained in any of the original N ranges. - CR = CR.unionWith(ConstantRange(Low->getValue(), High->getValue())); - } - - return CR; -} - /// Return true if "icmp Pred LHS RHS" is always true. -static bool isTruePredicate(CmpInst::Predicate Pred, Value *LHS, Value *RHS, +static bool isTruePredicate(CmpInst::Predicate Pred, + const Value *LHS, const Value *RHS, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { @@ -3984,7 +4243,8 @@ static bool isTruePredicate(CmpInst::Predicate Pred, Value *LHS, Value *RHS, return true; // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB) - auto MatchNUWAddsToSameValue = [&](Value *A, Value *B, Value *&X, + auto MatchNUWAddsToSameValue = [&](const Value *A, const Value *B, + const Value *&X, const APInt *&CA, const APInt *&CB) { if (match(A, m_NUWAdd(m_Value(X), m_APInt(CA))) && match(B, m_NUWAdd(m_Specific(X), m_APInt(CB)))) @@ -4004,7 +4264,7 @@ static bool isTruePredicate(CmpInst::Predicate Pred, Value *LHS, Value *RHS, return false; }; - Value *X; + const Value *X; const APInt *CLHS, *CRHS; if (MatchNUWAddsToSameValue(LHS, RHS, X, CLHS, CRHS)) return CLHS->ule(*CRHS); @@ -4017,8 +4277,9 @@ static bool isTruePredicate(CmpInst::Predicate Pred, Value *LHS, Value *RHS, /// Return true if "icmp Pred BLHS BRHS" is true whenever "icmp Pred /// ALHS ARHS" is true. Otherwise, return None. static Optional<bool> -isImpliedCondOperands(CmpInst::Predicate Pred, Value *ALHS, Value *ARHS, - Value *BLHS, Value *BRHS, const DataLayout &DL, +isImpliedCondOperands(CmpInst::Predicate Pred, const Value *ALHS, + const Value *ARHS, const Value *BLHS, + const Value *BRHS, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { switch (Pred) { @@ -4045,7 +4306,8 @@ isImpliedCondOperands(CmpInst::Predicate Pred, Value *ALHS, Value *ARHS, /// Return true if the operands of the two compares match. IsSwappedOps is true /// when the operands match, but are swapped. -static bool isMatchingOps(Value *ALHS, Value *ARHS, Value *BLHS, Value *BRHS, +static bool isMatchingOps(const Value *ALHS, const Value *ARHS, + const Value *BLHS, const Value *BRHS, bool &IsSwappedOps) { bool IsMatchingOps = (ALHS == BLHS && ARHS == BRHS); @@ -4057,9 +4319,11 @@ static bool isMatchingOps(Value *ALHS, Value *ARHS, Value *BLHS, Value *BRHS, /// true. Return false if "icmp1 APred ALHS ARHS" implies "icmp2 BPred BLHS /// BRHS" is false. Otherwise, return None if we can't infer anything. static Optional<bool> isImpliedCondMatchingOperands(CmpInst::Predicate APred, - Value *ALHS, Value *ARHS, + const Value *ALHS, + const Value *ARHS, CmpInst::Predicate BPred, - Value *BLHS, Value *BRHS, + const Value *BLHS, + const Value *BRHS, bool IsSwappedOps) { // Canonicalize the operands so they're matching. if (IsSwappedOps) { @@ -4078,9 +4342,10 @@ static Optional<bool> isImpliedCondMatchingOperands(CmpInst::Predicate APred, /// true. Return false if "icmp1 APred ALHS C1" implies "icmp2 BPred BLHS /// C2" is false. Otherwise, return None if we can't infer anything. static Optional<bool> -isImpliedCondMatchingImmOperands(CmpInst::Predicate APred, Value *ALHS, - ConstantInt *C1, CmpInst::Predicate BPred, - Value *BLHS, ConstantInt *C2) { +isImpliedCondMatchingImmOperands(CmpInst::Predicate APred, const Value *ALHS, + const ConstantInt *C1, + CmpInst::Predicate BPred, + const Value *BLHS, const ConstantInt *C2) { assert(ALHS == BLHS && "LHS operands must match."); ConstantRange DomCR = ConstantRange::makeExactICmpRegion(APred, C1->getValue()); @@ -4095,7 +4360,7 @@ isImpliedCondMatchingImmOperands(CmpInst::Predicate APred, Value *ALHS, return None; } -Optional<bool> llvm::isImpliedCondition(Value *LHS, Value *RHS, +Optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, bool InvertAPred, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, diff --git a/contrib/llvm/lib/Analysis/VectorUtils.cpp b/contrib/llvm/lib/Analysis/VectorUtils.cpp index 53e7153..7e598f4 100644 --- a/contrib/llvm/lib/Analysis/VectorUtils.cpp +++ b/contrib/llvm/lib/Analysis/VectorUtils.cpp @@ -107,11 +107,11 @@ unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) { while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) { // Find the type we're currently indexing into. gep_type_iterator GEPTI = gep_type_begin(Gep); - std::advance(GEPTI, LastOperand - 1); + std::advance(GEPTI, LastOperand - 2); // If it's a type with the same allocation size as the result of the GEP we // can peel off the zero index. - if (DL.getTypeAllocSize(*GEPTI) != GEPAllocSize) + if (DL.getTypeAllocSize(GEPTI.getIndexedType()) != GEPAllocSize) break; --LastOperand; } @@ -454,9 +454,10 @@ Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) { SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata; I0->getAllMetadataOtherThanDebugLoc(Metadata); - for (auto Kind : { LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, - LLVMContext::MD_noalias, LLVMContext::MD_fpmath, - LLVMContext::MD_nontemporal }) { + for (auto Kind : + {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, + LLVMContext::MD_noalias, LLVMContext::MD_fpmath, + LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load}) { MDNode *MD = I0->getMetadata(Kind); for (int J = 1, E = VL.size(); MD && J != E; ++J) { @@ -469,13 +470,12 @@ Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) { case LLVMContext::MD_alias_scope: MD = MDNode::getMostGenericAliasScope(MD, IMD); break; - case LLVMContext::MD_noalias: - MD = MDNode::intersect(MD, IMD); - break; case LLVMContext::MD_fpmath: MD = MDNode::getMostGenericFPMath(MD, IMD); break; + case LLVMContext::MD_noalias: case LLVMContext::MD_nontemporal: + case LLVMContext::MD_invariant_load: MD = MDNode::intersect(MD, IMD); break; default: |