diff options
author | dim <dim@FreeBSD.org> | 2015-12-30 13:13:10 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2015-12-30 13:13:10 +0000 |
commit | 9b5bf5c4f53d65d6a48722d7410ed7cb15f5ba3a (patch) | |
tree | b466a4817f79516eb1df8eae92bccf62ecc84003 /contrib/llvm/lib/Analysis | |
parent | f09a28d1de99fda4f5517fb12670fc36552f4927 (diff) | |
parent | e194cd6d03d91631334d9d5e55b506036f423cc8 (diff) | |
download | FreeBSD-src-9b5bf5c4f53d65d6a48722d7410ed7cb15f5ba3a.zip FreeBSD-src-9b5bf5c4f53d65d6a48722d7410ed7cb15f5ba3a.tar.gz |
Update llvm to trunk r256633.
Diffstat (limited to 'contrib/llvm/lib/Analysis')
61 files changed, 9542 insertions, 5770 deletions
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp index 44d137d..35f2e97 100644 --- a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp @@ -25,9 +25,16 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/CFLAliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/ObjCARCAliasAnalysis.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" @@ -40,44 +47,72 @@ #include "llvm/Pass.h" using namespace llvm; -// Register the AliasAnalysis interface, providing a nice name to refer to. -INITIALIZE_ANALYSIS_GROUP(AliasAnalysis, "Alias Analysis", NoAA) -char AliasAnalysis::ID = 0; +/// Allow disabling BasicAA from the AA results. This is particularly useful +/// when testing to isolate a single AA implementation. +static cl::opt<bool> DisableBasicAA("disable-basicaa", cl::Hidden, + cl::init(false)); + +AAResults::AAResults(AAResults &&Arg) : AAs(std::move(Arg.AAs)) { + for (auto &AA : AAs) + AA->setAAResults(this); +} + +AAResults &AAResults::operator=(AAResults &&Arg) { + AAs = std::move(Arg.AAs); + for (auto &AA : AAs) + AA->setAAResults(this); + return *this; +} + +AAResults::~AAResults() { +// FIXME; It would be nice to at least clear out the pointers back to this +// aggregation here, but we end up with non-nesting lifetimes in the legacy +// pass manager that prevent this from working. In the legacy pass manager +// we'll end up with dangling references here in some cases. +#if 0 + for (auto &AA : AAs) + AA->setAAResults(nullptr); +#endif +} //===----------------------------------------------------------------------===// // Default chaining methods //===----------------------------------------------------------------------===// -AliasResult AliasAnalysis::alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) { - assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); - return AA->alias(LocA, LocB); +AliasResult AAResults::alias(const MemoryLocation &LocA, + const MemoryLocation &LocB) { + for (const auto &AA : AAs) { + auto Result = AA->alias(LocA, LocB); + if (Result != MayAlias) + return Result; + } + return MayAlias; } -bool AliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc, - bool OrLocal) { - assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); - return AA->pointsToConstantMemory(Loc, OrLocal); -} +bool AAResults::pointsToConstantMemory(const MemoryLocation &Loc, + bool OrLocal) { + for (const auto &AA : AAs) + if (AA->pointsToConstantMemory(Loc, OrLocal)) + return true; -AliasAnalysis::ModRefResult -AliasAnalysis::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) { - assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); - return AA->getArgModRefInfo(CS, ArgIdx); + return false; } -void AliasAnalysis::deleteValue(Value *V) { - assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); - AA->deleteValue(V); -} +ModRefInfo AAResults::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) { + ModRefInfo Result = MRI_ModRef; + + for (const auto &AA : AAs) { + Result = ModRefInfo(Result & AA->getArgModRefInfo(CS, ArgIdx)); -void AliasAnalysis::addEscapingUse(Use &U) { - assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); - AA->addEscapingUse(U); + // Early-exit the moment we reach the bottom of the lattice. + if (Result == MRI_NoModRef) + return Result; + } + + return Result; } -AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(Instruction *I, ImmutableCallSite Call) { +ModRefInfo AAResults::getModRefInfo(Instruction *I, ImmutableCallSite Call) { // We may have two calls if (auto CS = ImmutableCallSite(I)) { // Check if the two calls modify the same memory @@ -88,289 +123,215 @@ AliasAnalysis::getModRefInfo(Instruction *I, ImmutableCallSite Call) { // is that if the call references what this instruction // defines, it must be clobbered by this location. const MemoryLocation DefLoc = MemoryLocation::get(I); - if (getModRefInfo(Call, DefLoc) != AliasAnalysis::NoModRef) - return AliasAnalysis::ModRef; - } - return AliasAnalysis::NoModRef; -} - -AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) { - assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); - - ModRefBehavior MRB = getModRefBehavior(CS); - if (MRB == DoesNotAccessMemory) - return NoModRef; - - ModRefResult Mask = ModRef; - if (onlyReadsMemory(MRB)) - Mask = Ref; - - if (onlyAccessesArgPointees(MRB)) { - bool doesAlias = false; - ModRefResult AllArgsMask = NoModRef; - if (doesAccessArgPointees(MRB)) { - for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); - AI != AE; ++AI) { - const Value *Arg = *AI; - if (!Arg->getType()->isPointerTy()) - continue; - unsigned ArgIdx = std::distance(CS.arg_begin(), AI); - MemoryLocation ArgLoc = - MemoryLocation::getForArgument(CS, ArgIdx, *TLI); - if (!isNoAlias(ArgLoc, Loc)) { - ModRefResult ArgMask = getArgModRefInfo(CS, ArgIdx); - doesAlias = true; - AllArgsMask = ModRefResult(AllArgsMask | ArgMask); - } - } - } - if (!doesAlias) - return NoModRef; - Mask = ModRefResult(Mask & AllArgsMask); + if (getModRefInfo(Call, DefLoc) != MRI_NoModRef) + return MRI_ModRef; } + return MRI_NoModRef; +} - // If Loc is a constant memory location, the call definitely could not - // modify the memory location. - if ((Mask & Mod) && pointsToConstantMemory(Loc)) - Mask = ModRefResult(Mask & ~Mod); - - // If this is the end of the chain, don't forward. - if (!AA) return Mask; - - // Otherwise, fall back to the next AA in the chain. But we can merge - // in any mask we've managed to compute. - return ModRefResult(AA->getModRefInfo(CS, Loc) & Mask); -} - -AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { - assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); - - // If CS1 or CS2 are readnone, they don't interact. - ModRefBehavior CS1B = getModRefBehavior(CS1); - if (CS1B == DoesNotAccessMemory) return NoModRef; - - ModRefBehavior CS2B = getModRefBehavior(CS2); - if (CS2B == DoesNotAccessMemory) return NoModRef; - - // If they both only read from memory, there is no dependence. - if (onlyReadsMemory(CS1B) && onlyReadsMemory(CS2B)) - return NoModRef; - - AliasAnalysis::ModRefResult Mask = ModRef; - - // If CS1 only reads memory, the only dependence on CS2 can be - // from CS1 reading memory written by CS2. - if (onlyReadsMemory(CS1B)) - Mask = ModRefResult(Mask & Ref); - - // If CS2 only access memory through arguments, accumulate the mod/ref - // information from CS1's references to the memory referenced by - // CS2's arguments. - if (onlyAccessesArgPointees(CS2B)) { - AliasAnalysis::ModRefResult R = NoModRef; - if (doesAccessArgPointees(CS2B)) { - for (ImmutableCallSite::arg_iterator - I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) { - const Value *Arg = *I; - if (!Arg->getType()->isPointerTy()) - continue; - unsigned CS2ArgIdx = std::distance(CS2.arg_begin(), I); - auto CS2ArgLoc = MemoryLocation::getForArgument(CS2, CS2ArgIdx, *TLI); - - // ArgMask indicates what CS2 might do to CS2ArgLoc, and the dependence of - // CS1 on that location is the inverse. - ModRefResult ArgMask = getArgModRefInfo(CS2, CS2ArgIdx); - if (ArgMask == Mod) - ArgMask = ModRef; - else if (ArgMask == Ref) - ArgMask = Mod; - - R = ModRefResult((R | (getModRefInfo(CS1, CS2ArgLoc) & ArgMask)) & Mask); - if (R == Mask) - break; - } - } - return R; - } +ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS, + const MemoryLocation &Loc) { + ModRefInfo Result = MRI_ModRef; - // If CS1 only accesses memory through arguments, check if CS2 references - // any of the memory referenced by CS1's arguments. If not, return NoModRef. - if (onlyAccessesArgPointees(CS1B)) { - AliasAnalysis::ModRefResult R = NoModRef; - if (doesAccessArgPointees(CS1B)) { - for (ImmutableCallSite::arg_iterator - I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) { - const Value *Arg = *I; - if (!Arg->getType()->isPointerTy()) - continue; - unsigned CS1ArgIdx = std::distance(CS1.arg_begin(), I); - auto CS1ArgLoc = MemoryLocation::getForArgument(CS1, CS1ArgIdx, *TLI); - - // ArgMask indicates what CS1 might do to CS1ArgLoc; if CS1 might Mod - // CS1ArgLoc, then we care about either a Mod or a Ref by CS2. If CS1 - // might Ref, then we care only about a Mod by CS2. - ModRefResult ArgMask = getArgModRefInfo(CS1, CS1ArgIdx); - ModRefResult ArgR = getModRefInfo(CS2, CS1ArgLoc); - if (((ArgMask & Mod) != NoModRef && (ArgR & ModRef) != NoModRef) || - ((ArgMask & Ref) != NoModRef && (ArgR & Mod) != NoModRef)) - R = ModRefResult((R | ArgMask) & Mask); - - if (R == Mask) - break; - } - } - return R; - } + for (const auto &AA : AAs) { + Result = ModRefInfo(Result & AA->getModRefInfo(CS, Loc)); - // If this is the end of the chain, don't forward. - if (!AA) return Mask; + // Early-exit the moment we reach the bottom of the lattice. + if (Result == MRI_NoModRef) + return Result; + } - // Otherwise, fall back to the next AA in the chain. But we can merge - // in any mask we've managed to compute. - return ModRefResult(AA->getModRefInfo(CS1, CS2) & Mask); + return Result; } -AliasAnalysis::ModRefBehavior -AliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { - assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); +ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + ModRefInfo Result = MRI_ModRef; + + for (const auto &AA : AAs) { + Result = ModRefInfo(Result & AA->getModRefInfo(CS1, CS2)); + + // Early-exit the moment we reach the bottom of the lattice. + if (Result == MRI_NoModRef) + return Result; + } + + return Result; +} - ModRefBehavior Min = UnknownModRefBehavior; +FunctionModRefBehavior AAResults::getModRefBehavior(ImmutableCallSite CS) { + FunctionModRefBehavior Result = FMRB_UnknownModRefBehavior; - // Call back into the alias analysis with the other form of getModRefBehavior - // to see if it can give a better response. - if (const Function *F = CS.getCalledFunction()) - Min = getModRefBehavior(F); + for (const auto &AA : AAs) { + Result = FunctionModRefBehavior(Result & AA->getModRefBehavior(CS)); - // If this is the end of the chain, don't forward. - if (!AA) return Min; + // Early-exit the moment we reach the bottom of the lattice. + if (Result == FMRB_DoesNotAccessMemory) + return Result; + } - // Otherwise, fall back to the next AA in the chain. But we can merge - // in any result we've managed to compute. - return ModRefBehavior(AA->getModRefBehavior(CS) & Min); + return Result; } -AliasAnalysis::ModRefBehavior -AliasAnalysis::getModRefBehavior(const Function *F) { - assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); - return AA->getModRefBehavior(F); +FunctionModRefBehavior AAResults::getModRefBehavior(const Function *F) { + FunctionModRefBehavior Result = FMRB_UnknownModRefBehavior; + + for (const auto &AA : AAs) { + Result = FunctionModRefBehavior(Result & AA->getModRefBehavior(F)); + + // Early-exit the moment we reach the bottom of the lattice. + if (Result == FMRB_DoesNotAccessMemory) + return Result; + } + + return Result; } //===----------------------------------------------------------------------===// -// AliasAnalysis non-virtual helper method implementation +// Helper method implementation //===----------------------------------------------------------------------===// -AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(const LoadInst *L, const MemoryLocation &Loc) { +ModRefInfo AAResults::getModRefInfo(const LoadInst *L, + const MemoryLocation &Loc) { // Be conservative in the face of volatile/atomic. if (!L->isUnordered()) - return ModRef; + return MRI_ModRef; // If the load address doesn't alias the given address, it doesn't read // or write the specified memory. if (Loc.Ptr && !alias(MemoryLocation::get(L), Loc)) - return NoModRef; + return MRI_NoModRef; // Otherwise, a load just reads. - return Ref; + return MRI_Ref; } -AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(const StoreInst *S, const MemoryLocation &Loc) { +ModRefInfo AAResults::getModRefInfo(const StoreInst *S, + const MemoryLocation &Loc) { // Be conservative in the face of volatile/atomic. if (!S->isUnordered()) - return ModRef; + return MRI_ModRef; if (Loc.Ptr) { // If the store address cannot alias the pointer in question, then the // specified memory cannot be modified by the store. if (!alias(MemoryLocation::get(S), Loc)) - return NoModRef; + return MRI_NoModRef; // If the pointer is a pointer to constant memory, then it could not have // been modified by this store. if (pointsToConstantMemory(Loc)) - return NoModRef; - + return MRI_NoModRef; } // Otherwise, a store just writes. - return Mod; + return MRI_Mod; } -AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(const VAArgInst *V, const MemoryLocation &Loc) { +ModRefInfo AAResults::getModRefInfo(const VAArgInst *V, + const MemoryLocation &Loc) { if (Loc.Ptr) { // If the va_arg address cannot alias the pointer in question, then the // specified memory cannot be accessed by the va_arg. if (!alias(MemoryLocation::get(V), Loc)) - return NoModRef; + return MRI_NoModRef; // If the pointer is a pointer to constant memory, then it could not have // been modified by this va_arg. if (pointsToConstantMemory(Loc)) - return NoModRef; + return MRI_NoModRef; } // Otherwise, a va_arg reads and writes. - return ModRef; + return MRI_ModRef; +} + +ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad, + const MemoryLocation &Loc) { + if (Loc.Ptr) { + // If the pointer is a pointer to constant memory, + // then it could not have been modified by this catchpad. + if (pointsToConstantMemory(Loc)) + return MRI_NoModRef; + } + + // Otherwise, a catchpad reads and writes. + return MRI_ModRef; +} + +ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet, + const MemoryLocation &Loc) { + if (Loc.Ptr) { + // If the pointer is a pointer to constant memory, + // then it could not have been modified by this catchpad. + if (pointsToConstantMemory(Loc)) + return MRI_NoModRef; + } + + // Otherwise, a catchret reads and writes. + return MRI_ModRef; } -AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(const AtomicCmpXchgInst *CX, - const MemoryLocation &Loc) { +ModRefInfo AAResults::getModRefInfo(const AtomicCmpXchgInst *CX, + const MemoryLocation &Loc) { // Acquire/Release cmpxchg has properties that matter for arbitrary addresses. if (CX->getSuccessOrdering() > Monotonic) - return ModRef; + return MRI_ModRef; // If the cmpxchg address does not alias the location, it does not access it. if (Loc.Ptr && !alias(MemoryLocation::get(CX), Loc)) - return NoModRef; + return MRI_NoModRef; - return ModRef; + return MRI_ModRef; } -AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW, - const MemoryLocation &Loc) { +ModRefInfo AAResults::getModRefInfo(const AtomicRMWInst *RMW, + const MemoryLocation &Loc) { // Acquire/Release atomicrmw has properties that matter for arbitrary addresses. if (RMW->getOrdering() > Monotonic) - return ModRef; + return MRI_ModRef; // If the atomicrmw address does not alias the location, it does not access it. if (Loc.Ptr && !alias(MemoryLocation::get(RMW), Loc)) - return NoModRef; + return MRI_NoModRef; - return ModRef; + return MRI_ModRef; } -// FIXME: this is really just shoring-up a deficiency in alias analysis. -// BasicAA isn't willing to spend linear time determining whether an alloca -// was captured before or after this particular call, while we are. However, -// with a smarter AA in place, this test is just wasting compile time. -AliasAnalysis::ModRefResult AliasAnalysis::callCapturesBefore( - const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT) { +/// \brief Return information about whether a particular call site modifies +/// or reads the specified memory location \p MemLoc before instruction \p I +/// in a BasicBlock. A ordered basic block \p OBB can be used to speed up +/// instruction-ordering queries inside the BasicBlock containing \p I. +/// FIXME: this is really just shoring-up a deficiency in alias analysis. +/// BasicAA isn't willing to spend linear time determining whether an alloca +/// was captured before or after this particular call, while we are. However, +/// with a smarter AA in place, this test is just wasting compile time. +ModRefInfo AAResults::callCapturesBefore(const Instruction *I, + const MemoryLocation &MemLoc, + DominatorTree *DT, + OrderedBasicBlock *OBB) { if (!DT) - return AliasAnalysis::ModRef; + return MRI_ModRef; - const Value *Object = GetUnderlyingObject(MemLoc.Ptr, *DL); + const Value *Object = + GetUnderlyingObject(MemLoc.Ptr, I->getModule()->getDataLayout()); if (!isIdentifiedObject(Object) || isa<GlobalValue>(Object) || isa<Constant>(Object)) - return AliasAnalysis::ModRef; + return MRI_ModRef; ImmutableCallSite CS(I); if (!CS.getInstruction() || CS.getInstruction() == Object) - return AliasAnalysis::ModRef; + return MRI_ModRef; if (llvm::PointerMayBeCapturedBefore(Object, /* ReturnCaptures */ true, /* StoreCaptures */ true, I, DT, - /* include Object */ true)) - return AliasAnalysis::ModRef; + /* include Object */ true, + /* OrderedBasicBlock */ OBB)) + return MRI_ModRef; unsigned ArgNo = 0; - AliasAnalysis::ModRefResult R = AliasAnalysis::NoModRef; + ModRefInfo R = MRI_NoModRef; for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); CI != CE; ++CI, ++ArgNo) { // Only look at the no-capture or byval pointer arguments. If this @@ -389,50 +350,20 @@ AliasAnalysis::ModRefResult AliasAnalysis::callCapturesBefore( if (CS.doesNotAccessMemory(ArgNo)) continue; if (CS.onlyReadsMemory(ArgNo)) { - R = AliasAnalysis::Ref; + R = MRI_Ref; continue; } - return AliasAnalysis::ModRef; + return MRI_ModRef; } return R; } -// AliasAnalysis destructor: DO NOT move this to the header file for -// AliasAnalysis or else clients of the AliasAnalysis class may not depend on -// the AliasAnalysis.o file in the current .a file, causing alias analysis -// support to not be included in the tool correctly! -// -AliasAnalysis::~AliasAnalysis() {} - -/// InitializeAliasAnalysis - Subclasses must call this method to initialize the -/// AliasAnalysis interface before any other methods are called. -/// -void AliasAnalysis::InitializeAliasAnalysis(Pass *P, const DataLayout *NewDL) { - DL = NewDL; - auto *TLIP = P->getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); - TLI = TLIP ? &TLIP->getTLI() : nullptr; - AA = &P->getAnalysis<AliasAnalysis>(); -} - -// getAnalysisUsage - All alias analysis implementations should invoke this -// directly (using AliasAnalysis::getAnalysisUsage(AU)). -void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<AliasAnalysis>(); // All AA's chain -} - -/// getTypeStoreSize - Return the DataLayout store size for the given type, -/// if known, or a conservative value otherwise. -/// -uint64_t AliasAnalysis::getTypeStoreSize(Type *Ty) { - return DL ? DL->getTypeStoreSize(Ty) : MemoryLocation::UnknownSize; -} - /// canBasicBlockModify - Return true if it is possible for execution of the /// specified basic block to modify the location Loc. /// -bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB, - const MemoryLocation &Loc) { - return canInstructionRangeModRef(BB.front(), BB.back(), Loc, Mod); +bool AAResults::canBasicBlockModify(const BasicBlock &BB, + const MemoryLocation &Loc) { + return canInstructionRangeModRef(BB.front(), BB.back(), Loc, MRI_Mod); } /// canInstructionRangeModRef - Return true if it is possible for the @@ -440,28 +371,178 @@ bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB, /// mode) the location Loc. The instructions to consider are all /// of the instructions in the range of [I1,I2] INCLUSIVE. /// I1 and I2 must be in the same basic block. -bool AliasAnalysis::canInstructionRangeModRef(const Instruction &I1, - const Instruction &I2, - const MemoryLocation &Loc, - const ModRefResult Mode) { +bool AAResults::canInstructionRangeModRef(const Instruction &I1, + const Instruction &I2, + const MemoryLocation &Loc, + const ModRefInfo Mode) { assert(I1.getParent() == I2.getParent() && "Instructions not in same basic block!"); - BasicBlock::const_iterator I = &I1; - BasicBlock::const_iterator E = &I2; + BasicBlock::const_iterator I = I1.getIterator(); + BasicBlock::const_iterator E = I2.getIterator(); ++E; // Convert from inclusive to exclusive range. for (; I != E; ++I) // Check every instruction in range - if (getModRefInfo(I, Loc) & Mode) + if (getModRefInfo(&*I, Loc) & Mode) return true; return false; } +// Provide a definition for the root virtual destructor. +AAResults::Concept::~Concept() {} + +namespace { +/// A wrapper pass for external alias analyses. This just squirrels away the +/// callback used to run any analyses and register their results. +struct ExternalAAWrapperPass : ImmutablePass { + typedef std::function<void(Pass &, Function &, AAResults &)> CallbackT; + + CallbackT CB; + + static char ID; + + ExternalAAWrapperPass() : ImmutablePass(ID) { + initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry()); + } + explicit ExternalAAWrapperPass(CallbackT CB) + : ImmutablePass(ID), CB(std::move(CB)) { + initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } +}; +} + +char ExternalAAWrapperPass::ID = 0; +INITIALIZE_PASS(ExternalAAWrapperPass, "external-aa", "External Alias Analysis", + false, true) + +ImmutablePass * +llvm::createExternalAAWrapperPass(ExternalAAWrapperPass::CallbackT Callback) { + return new ExternalAAWrapperPass(std::move(Callback)); +} + +AAResultsWrapperPass::AAResultsWrapperPass() : FunctionPass(ID) { + initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +char AAResultsWrapperPass::ID = 0; + +INITIALIZE_PASS_BEGIN(AAResultsWrapperPass, "aa", + "Function Alias Analysis Results", false, true) +INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(CFLAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ExternalAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ObjCARCAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScopedNoAliasAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TypeBasedAAWrapperPass) +INITIALIZE_PASS_END(AAResultsWrapperPass, "aa", + "Function Alias Analysis Results", false, true) + +FunctionPass *llvm::createAAResultsWrapperPass() { + return new AAResultsWrapperPass(); +} + +/// Run the wrapper pass to rebuild an aggregation over known AA passes. +/// +/// This is the legacy pass manager's interface to the new-style AA results +/// aggregation object. Because this is somewhat shoe-horned into the legacy +/// pass manager, we hard code all the specific alias analyses available into +/// it. While the particular set enabled is configured via commandline flags, +/// adding a new alias analysis to LLVM will require adding support for it to +/// this list. +bool AAResultsWrapperPass::runOnFunction(Function &F) { + // NB! This *must* be reset before adding new AA results to the new + // AAResults object because in the legacy pass manager, each instance + // of these will refer to the *same* immutable analyses, registering and + // unregistering themselves with them. We need to carefully tear down the + // previous object first, in this case replacing it with an empty one, before + // registering new results. + AAR.reset(new AAResults()); + + // BasicAA is always available for function analyses. Also, we add it first + // so that it can trump TBAA results when it proves MustAlias. + // FIXME: TBAA should have an explicit mode to support this and then we + // should reconsider the ordering here. + if (!DisableBasicAA) + AAR->addAAResult(getAnalysis<BasicAAWrapperPass>().getResult()); + + // Populate the results with the currently available AAs. + if (auto *WrapperPass = getAnalysisIfAvailable<ScopedNoAliasAAWrapperPass>()) + AAR->addAAResult(WrapperPass->getResult()); + if (auto *WrapperPass = getAnalysisIfAvailable<TypeBasedAAWrapperPass>()) + AAR->addAAResult(WrapperPass->getResult()); + if (auto *WrapperPass = + getAnalysisIfAvailable<objcarc::ObjCARCAAWrapperPass>()) + AAR->addAAResult(WrapperPass->getResult()); + if (auto *WrapperPass = getAnalysisIfAvailable<GlobalsAAWrapperPass>()) + AAR->addAAResult(WrapperPass->getResult()); + if (auto *WrapperPass = getAnalysisIfAvailable<SCEVAAWrapperPass>()) + AAR->addAAResult(WrapperPass->getResult()); + if (auto *WrapperPass = getAnalysisIfAvailable<CFLAAWrapperPass>()) + AAR->addAAResult(WrapperPass->getResult()); + + // If available, run an external AA providing callback over the results as + // well. + if (auto *WrapperPass = getAnalysisIfAvailable<ExternalAAWrapperPass>()) + if (WrapperPass->CB) + WrapperPass->CB(*this, F, *AAR); + + // Analyses don't mutate the IR, so return false. + return false; +} + +void AAResultsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<BasicAAWrapperPass>(); + + // We also need to mark all the alias analysis passes we will potentially + // probe in runOnFunction as used here to ensure the legacy pass manager + // preserves them. This hard coding of lists of alias analyses is specific to + // the legacy pass manager. + AU.addUsedIfAvailable<ScopedNoAliasAAWrapperPass>(); + AU.addUsedIfAvailable<TypeBasedAAWrapperPass>(); + AU.addUsedIfAvailable<objcarc::ObjCARCAAWrapperPass>(); + AU.addUsedIfAvailable<GlobalsAAWrapperPass>(); + AU.addUsedIfAvailable<SCEVAAWrapperPass>(); + AU.addUsedIfAvailable<CFLAAWrapperPass>(); +} + +AAResults llvm::createLegacyPMAAResults(Pass &P, Function &F, + BasicAAResult &BAR) { + AAResults AAR; + + // Add in our explicitly constructed BasicAA results. + if (!DisableBasicAA) + AAR.addAAResult(BAR); + + // Populate the results with the other currently available AAs. + if (auto *WrapperPass = + P.getAnalysisIfAvailable<ScopedNoAliasAAWrapperPass>()) + AAR.addAAResult(WrapperPass->getResult()); + if (auto *WrapperPass = P.getAnalysisIfAvailable<TypeBasedAAWrapperPass>()) + AAR.addAAResult(WrapperPass->getResult()); + if (auto *WrapperPass = + P.getAnalysisIfAvailable<objcarc::ObjCARCAAWrapperPass>()) + AAR.addAAResult(WrapperPass->getResult()); + if (auto *WrapperPass = P.getAnalysisIfAvailable<GlobalsAAWrapperPass>()) + AAR.addAAResult(WrapperPass->getResult()); + if (auto *WrapperPass = P.getAnalysisIfAvailable<SCEVAAWrapperPass>()) + AAR.addAAResult(WrapperPass->getResult()); + if (auto *WrapperPass = P.getAnalysisIfAvailable<CFLAAWrapperPass>()) + AAR.addAAResult(WrapperPass->getResult()); + + return AAR; +} + /// isNoAliasCall - Return true if this pointer is returned by a noalias /// function. bool llvm::isNoAliasCall(const Value *V) { - if (isa<CallInst>(V) || isa<InvokeInst>(V)) - return ImmutableCallSite(cast<Instruction>(V)) - .paramHasAttr(0, Attribute::NoAlias); + if (auto CS = ImmutableCallSite(V)) + return CS.paramHasAttr(0, Attribute::NoAlias); return false; } diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp deleted file mode 100644 index 9b6a5a4..0000000 --- a/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp +++ /dev/null @@ -1,173 +0,0 @@ -//===- AliasAnalysisCounter.cpp - Alias Analysis Query Counter ------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a pass which can be used to count how many alias queries -// are being made and how the alias analysis implementation being used responds. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -static cl::opt<bool> -PrintAll("count-aa-print-all-queries", cl::ReallyHidden, cl::init(true)); -static cl::opt<bool> -PrintAllFailures("count-aa-print-all-failed-queries", cl::ReallyHidden); - -namespace { - class AliasAnalysisCounter : public ModulePass, public AliasAnalysis { - unsigned No, May, Partial, Must; - unsigned NoMR, JustRef, JustMod, MR; - Module *M; - public: - static char ID; // Class identification, replacement for typeinfo - AliasAnalysisCounter() : ModulePass(ID) { - initializeAliasAnalysisCounterPass(*PassRegistry::getPassRegistry()); - No = May = Partial = Must = 0; - NoMR = JustRef = JustMod = MR = 0; - } - - void printLine(const char *Desc, unsigned Val, unsigned Sum) { - errs() << " " << Val << " " << Desc << " responses (" - << Val*100/Sum << "%)\n"; - } - ~AliasAnalysisCounter() override { - unsigned AASum = No+May+Partial+Must; - unsigned MRSum = NoMR+JustRef+JustMod+MR; - if (AASum + MRSum) { // Print a report if any counted queries occurred... - errs() << "\n===== Alias Analysis Counter Report =====\n" - << " Analysis counted:\n" - << " " << AASum << " Total Alias Queries Performed\n"; - if (AASum) { - printLine("no alias", No, AASum); - printLine("may alias", May, AASum); - printLine("partial alias", Partial, AASum); - printLine("must alias", Must, AASum); - errs() << " Alias Analysis Counter Summary: " << No*100/AASum << "%/" - << May*100/AASum << "%/" - << Partial*100/AASum << "%/" - << Must*100/AASum<<"%\n\n"; - } - - errs() << " " << MRSum << " Total Mod/Ref Queries Performed\n"; - if (MRSum) { - printLine("no mod/ref", NoMR, MRSum); - printLine("ref", JustRef, MRSum); - printLine("mod", JustMod, MRSum); - printLine("mod/ref", MR, MRSum); - errs() << " Mod/Ref Analysis Counter Summary: " <<NoMR*100/MRSum - << "%/" << JustRef*100/MRSum << "%/" << JustMod*100/MRSum - << "%/" << MR*100/MRSum <<"%\n\n"; - } - } - } - - bool runOnModule(Module &M) override { - this->M = &M; - InitializeAliasAnalysis(this, &M.getDataLayout()); - return false; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AliasAnalysis::getAnalysisUsage(AU); - AU.addRequired<AliasAnalysis>(); - AU.setPreservesAll(); - } - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - void *getAdjustedAnalysisPointer(AnalysisID PI) override { - if (PI == &AliasAnalysis::ID) - return (AliasAnalysis*)this; - return this; - } - - // FIXME: We could count these too... - bool pointsToConstantMemory(const MemoryLocation &Loc, - bool OrLocal) override { - return getAnalysis<AliasAnalysis>().pointsToConstantMemory(Loc, OrLocal); - } - - // Forwarding functions: just delegate to a real AA implementation, counting - // the number of responses... - AliasResult alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) override; - - ModRefResult getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) override; - ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) override { - return AliasAnalysis::getModRefInfo(CS1,CS2); - } - }; -} - -char AliasAnalysisCounter::ID = 0; -INITIALIZE_AG_PASS(AliasAnalysisCounter, AliasAnalysis, "count-aa", - "Count Alias Analysis Query Responses", false, true, false) - -ModulePass *llvm::createAliasAnalysisCounterPass() { - return new AliasAnalysisCounter(); -} - -AliasResult AliasAnalysisCounter::alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) { - AliasResult R = getAnalysis<AliasAnalysis>().alias(LocA, LocB); - - const char *AliasString = nullptr; - switch (R) { - case NoAlias: No++; AliasString = "No alias"; break; - case MayAlias: May++; AliasString = "May alias"; break; - case PartialAlias: Partial++; AliasString = "Partial alias"; break; - case MustAlias: Must++; AliasString = "Must alias"; break; - } - - if (PrintAll || (PrintAllFailures && R == MayAlias)) { - errs() << AliasString << ":\t"; - errs() << "[" << LocA.Size << "B] "; - LocA.Ptr->printAsOperand(errs(), true, M); - errs() << ", "; - errs() << "[" << LocB.Size << "B] "; - LocB.Ptr->printAsOperand(errs(), true, M); - errs() << "\n"; - } - - return R; -} - -AliasAnalysis::ModRefResult -AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) { - ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, Loc); - - const char *MRString = nullptr; - switch (R) { - case NoModRef: NoMR++; MRString = "NoModRef"; break; - case Ref: JustRef++; MRString = "JustRef"; break; - case Mod: JustMod++; MRString = "JustMod"; break; - case ModRef: MR++; MRString = "ModRef"; break; - } - - if (PrintAll || (PrintAllFailures && R == ModRef)) { - errs() << MRString << ": Ptr: "; - errs() << "[" << Loc.Size << "B] "; - Loc.Ptr->printAsOperand(errs(), true, M); - errs() << "\t<->" << *CS.getInstruction() << '\n'; - } - return R; -} diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp index 5d1b001..12917b6 100644 --- a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -21,8 +21,10 @@ #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/Pass.h" @@ -57,7 +59,7 @@ namespace { } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<AliasAnalysis>(); + AU.addRequired<AAResultsWrapperPass>(); AU.setPreservesAll(); } @@ -81,7 +83,7 @@ namespace { char AAEval::ID = 0; INITIALIZE_PASS_BEGIN(AAEval, "aa-eval", "Exhaustive Alias Analysis Precision Evaluator", false, true) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(AAEval, "aa-eval", "Exhaustive Alias Analysis Precision Evaluator", false, true) @@ -139,16 +141,17 @@ static inline bool isInterestingPointer(Value *V) { } bool AAEval::runOnFunction(Function &F) { - AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); + const DataLayout &DL = F.getParent()->getDataLayout(); + AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); SetVector<Value *> Pointers; - SetVector<CallSite> CallSites; + SmallSetVector<CallSite, 16> CallSites; SetVector<Value *> Loads; SetVector<Value *> Stores; - for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) - if (I->getType()->isPointerTy()) // Add all pointer arguments. - Pointers.insert(I); + for (auto &I : F.args()) + if (I.getType()->isPointerTy()) // Add all pointer arguments. + Pointers.insert(&I); for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { if (I->getType()->isPointerTy()) // Add all pointer instructions. @@ -164,10 +167,9 @@ bool AAEval::runOnFunction(Function &F) { if (!isa<Function>(Callee) && isInterestingPointer(Callee)) Pointers.insert(Callee); // Consider formals. - for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); - AI != AE; ++AI) - if (isInterestingPointer(*AI)) - Pointers.insert(*AI); + for (Use &DataOp : CS.data_ops()) + if (isInterestingPointer(DataOp)) + Pointers.insert(DataOp); CallSites.insert(CS); } else { // Consider all operands. @@ -188,12 +190,12 @@ bool AAEval::runOnFunction(Function &F) { I1 != E; ++I1) { uint64_t I1Size = MemoryLocation::UnknownSize; Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType(); - if (I1ElTy->isSized()) I1Size = AA.getTypeStoreSize(I1ElTy); + if (I1ElTy->isSized()) I1Size = DL.getTypeStoreSize(I1ElTy); for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) { uint64_t I2Size = MemoryLocation::UnknownSize; Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType(); - if (I2ElTy->isSized()) I2Size = AA.getTypeStoreSize(I2ElTy); + if (I2ElTy->isSized()) I2Size = DL.getTypeStoreSize(I2ElTy); switch (AA.alias(*I1, I1Size, *I2, I2Size)) { case NoAlias: @@ -281,30 +283,29 @@ bool AAEval::runOnFunction(Function &F) { } // Mod/ref alias analysis: compare all pairs of calls and values - for (SetVector<CallSite>::iterator C = CallSites.begin(), - Ce = CallSites.end(); C != Ce; ++C) { + for (auto C = CallSites.begin(), Ce = CallSites.end(); C != Ce; ++C) { Instruction *I = C->getInstruction(); for (SetVector<Value *>::iterator V = Pointers.begin(), Ve = Pointers.end(); V != Ve; ++V) { uint64_t Size = MemoryLocation::UnknownSize; Type *ElTy = cast<PointerType>((*V)->getType())->getElementType(); - if (ElTy->isSized()) Size = AA.getTypeStoreSize(ElTy); + if (ElTy->isSized()) Size = DL.getTypeStoreSize(ElTy); switch (AA.getModRefInfo(*C, *V, Size)) { - case AliasAnalysis::NoModRef: + case MRI_NoModRef: PrintModRefResults("NoModRef", PrintNoModRef, I, *V, F.getParent()); ++NoModRefCount; break; - case AliasAnalysis::Mod: + case MRI_Mod: PrintModRefResults("Just Mod", PrintMod, I, *V, F.getParent()); ++ModCount; break; - case AliasAnalysis::Ref: + case MRI_Ref: PrintModRefResults("Just Ref", PrintRef, I, *V, F.getParent()); ++RefCount; break; - case AliasAnalysis::ModRef: + case MRI_ModRef: PrintModRefResults("Both ModRef", PrintModRef, I, *V, F.getParent()); ++ModRefCount; break; @@ -313,25 +314,24 @@ bool AAEval::runOnFunction(Function &F) { } // Mod/ref alias analysis: compare all pairs of calls - for (SetVector<CallSite>::iterator C = CallSites.begin(), - Ce = CallSites.end(); C != Ce; ++C) { - for (SetVector<CallSite>::iterator D = CallSites.begin(); D != Ce; ++D) { + for (auto C = CallSites.begin(), Ce = CallSites.end(); C != Ce; ++C) { + for (auto D = CallSites.begin(); D != Ce; ++D) { if (D == C) continue; switch (AA.getModRefInfo(*C, *D)) { - case AliasAnalysis::NoModRef: + case MRI_NoModRef: PrintModRefResults("NoModRef", PrintNoModRef, *C, *D, F.getParent()); ++NoModRefCount; break; - case AliasAnalysis::Mod: + case MRI_Mod: PrintModRefResults("Just Mod", PrintMod, *C, *D, F.getParent()); ++ModCount; break; - case AliasAnalysis::Ref: + case MRI_Ref: PrintModRefResults("Just Ref", PrintRef, *C, *D, F.getParent()); ++RefCount; break; - case AliasAnalysis::ModRef: + case MRI_ModRef: PrintModRefResults("Both ModRef", PrintModRef, *C, *D, F.getParent()); ++ModRefCount; break; diff --git a/contrib/llvm/lib/Analysis/AliasDebugger.cpp b/contrib/llvm/lib/Analysis/AliasDebugger.cpp deleted file mode 100644 index e5107b3..0000000 --- a/contrib/llvm/lib/Analysis/AliasDebugger.cpp +++ /dev/null @@ -1,136 +0,0 @@ -//===- AliasDebugger.cpp - Simple Alias Analysis Use Checker --------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This simple pass checks alias analysis users to ensure that if they -// create a new value, they do not query AA without informing it of the value. -// It acts as a shim over any other AA pass you want. -// -// Yes keeping track of every value in the program is expensive, but this is -// a debugging pass. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include <set> -using namespace llvm; - -namespace { - - class AliasDebugger : public ModulePass, public AliasAnalysis { - - //What we do is simple. Keep track of every value the AA could - //know about, and verify that queries are one of those. - //A query to a value that didn't exist when the AA was created - //means someone forgot to update the AA when creating new values - - std::set<const Value*> Vals; - - public: - static char ID; // Class identification, replacement for typeinfo - AliasDebugger() : ModulePass(ID) { - initializeAliasDebuggerPass(*PassRegistry::getPassRegistry()); - } - - bool runOnModule(Module &M) override { - InitializeAliasAnalysis(this, &M.getDataLayout()); // set up super class - - for(Module::global_iterator I = M.global_begin(), - E = M.global_end(); I != E; ++I) { - Vals.insert(&*I); - for (User::const_op_iterator OI = I->op_begin(), - OE = I->op_end(); OI != OE; ++OI) - Vals.insert(*OI); - } - - for(Module::iterator I = M.begin(), - E = M.end(); I != E; ++I){ - Vals.insert(&*I); - if(!I->isDeclaration()) { - for (Function::arg_iterator AI = I->arg_begin(), AE = I->arg_end(); - AI != AE; ++AI) - Vals.insert(&*AI); - for (Function::const_iterator FI = I->begin(), FE = I->end(); - FI != FE; ++FI) - for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end(); - BI != BE; ++BI) { - Vals.insert(&*BI); - for (User::const_op_iterator OI = BI->op_begin(), - OE = BI->op_end(); OI != OE; ++OI) - Vals.insert(*OI); - } - } - - } - return false; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AliasAnalysis::getAnalysisUsage(AU); - AU.setPreservesAll(); // Does not transform code - } - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - void *getAdjustedAnalysisPointer(AnalysisID PI) override { - if (PI == &AliasAnalysis::ID) - return (AliasAnalysis*)this; - return this; - } - - //------------------------------------------------ - // Implement the AliasAnalysis API - // - AliasResult alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) override { - assert(Vals.find(LocA.Ptr) != Vals.end() && - "Never seen value in AA before"); - assert(Vals.find(LocB.Ptr) != Vals.end() && - "Never seen value in AA before"); - return AliasAnalysis::alias(LocA, LocB); - } - - ModRefResult getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) override { - assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before"); - return AliasAnalysis::getModRefInfo(CS, Loc); - } - - ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) override { - return AliasAnalysis::getModRefInfo(CS1,CS2); - } - - bool pointsToConstantMemory(const MemoryLocation &Loc, - bool OrLocal) override { - assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before"); - return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); - } - - void deleteValue(Value *V) override { - assert(Vals.find(V) != Vals.end() && "Never seen value in AA before"); - AliasAnalysis::deleteValue(V); - } - - }; -} - -char AliasDebugger::ID = 0; -INITIALIZE_AG_PASS(AliasDebugger, AliasAnalysis, "debug-aa", - "AA use debugger", false, true, false) - -Pass *llvm::createAliasDebugger() { return new AliasDebugger(); } - diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp index 54d0f43..3094049 100644 --- a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp +++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp @@ -17,6 +17,7 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" @@ -167,8 +168,7 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size, if (!UnknownInsts.empty()) { for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) if (AA.getModRefInfo(UnknownInsts[i], - MemoryLocation(Ptr, Size, AAInfo)) != - AliasAnalysis::NoModRef) + MemoryLocation(Ptr, Size, AAInfo)) != MRI_NoModRef) return true; } @@ -182,16 +182,14 @@ bool AliasSet::aliasesUnknownInst(const Instruction *Inst, for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) { ImmutableCallSite C1(getUnknownInst(i)), C2(Inst); - if (!C1 || !C2 || - AA.getModRefInfo(C1, C2) != AliasAnalysis::NoModRef || - AA.getModRefInfo(C2, C1) != AliasAnalysis::NoModRef) + if (!C1 || !C2 || AA.getModRefInfo(C1, C2) != MRI_NoModRef || + AA.getModRefInfo(C2, C1) != MRI_NoModRef) return true; } for (iterator I = begin(), E = end(); I != E; ++I) - if (AA.getModRefInfo( - Inst, MemoryLocation(I.getPointer(), I.getSize(), I.getAAInfo())) != - AliasAnalysis::NoModRef) + if (AA.getModRefInfo(Inst, MemoryLocation(I.getPointer(), I.getSize(), + I.getAAInfo())) != MRI_NoModRef) return true; return false; @@ -223,7 +221,7 @@ AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr, if (Cur->Forward || !Cur->aliasesPointer(Ptr, Size, AAInfo, AA)) continue; if (!FoundSet) { // If this is the first alias set ptr can go into. - FoundSet = Cur; // Remember it. + FoundSet = &*Cur; // Remember it. } else { // Otherwise, we must merge the sets. FoundSet->mergeSetIn(*Cur, *this); // Merge in contents. } @@ -257,7 +255,7 @@ AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) { if (Cur->Forward || !Cur->aliasesUnknownInst(Inst, AA)) continue; if (!FoundSet) // If this is the first alias set ptr can go into. - FoundSet = Cur; // Remember it. + FoundSet = &*Cur; // Remember it. else if (!Cur->Forward) // Otherwise, we must merge the sets. FoundSet->mergeSetIn(*Cur, *this); // Merge in contents. } @@ -309,8 +307,9 @@ bool AliasSetTracker::add(LoadInst *LI) { AliasSet::AccessLattice Access = AliasSet::RefAccess; bool NewPtr; + const DataLayout &DL = LI->getModule()->getDataLayout(); AliasSet &AS = addPointer(LI->getOperand(0), - AA.getTypeStoreSize(LI->getType()), + DL.getTypeStoreSize(LI->getType()), AAInfo, Access, NewPtr); if (LI->isVolatile()) AS.setVolatile(); return NewPtr; @@ -324,9 +323,10 @@ bool AliasSetTracker::add(StoreInst *SI) { AliasSet::AccessLattice Access = AliasSet::ModAccess; bool NewPtr; + const DataLayout &DL = SI->getModule()->getDataLayout(); Value *Val = SI->getOperand(0); AliasSet &AS = addPointer(SI->getOperand(1), - AA.getTypeStoreSize(Val->getType()), + DL.getTypeStoreSize(Val->getType()), AAInfo, Access, NewPtr); if (SI->isVolatile()) AS.setVolatile(); return NewPtr; @@ -372,8 +372,8 @@ bool AliasSetTracker::add(Instruction *I) { } void AliasSetTracker::add(BasicBlock &BB) { - for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) - add(I); + for (auto &I : BB) + add(&I); } void AliasSetTracker::add(const AliasSetTracker &AST) { @@ -443,7 +443,8 @@ AliasSetTracker::remove(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) { } bool AliasSetTracker::remove(LoadInst *LI) { - uint64_t Size = AA.getTypeStoreSize(LI->getType()); + const DataLayout &DL = LI->getModule()->getDataLayout(); + uint64_t Size = DL.getTypeStoreSize(LI->getType()); AAMDNodes AAInfo; LI->getAAMetadata(AAInfo); @@ -455,7 +456,8 @@ bool AliasSetTracker::remove(LoadInst *LI) { } bool AliasSetTracker::remove(StoreInst *SI) { - uint64_t Size = AA.getTypeStoreSize(SI->getOperand(0)->getType()); + const DataLayout &DL = SI->getModule()->getDataLayout(); + uint64_t Size = DL.getTypeStoreSize(SI->getOperand(0)->getType()); AAMDNodes AAInfo; SI->getAAMetadata(AAInfo); @@ -505,9 +507,6 @@ bool AliasSetTracker::remove(Instruction *I) { // dangling pointers to deleted instructions. // void AliasSetTracker::deleteValue(Value *PtrVal) { - // Notify the alias analysis implementation that this value is gone. - AA.deleteValue(PtrVal); - // If this is a call instruction, remove the callsite from the appropriate // AliasSet (if present). if (Instruction *Inst = dyn_cast<Instruction>(PtrVal)) { @@ -650,11 +649,12 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); - AU.addRequired<AliasAnalysis>(); + AU.addRequired<AAResultsWrapperPass>(); } bool runOnFunction(Function &F) override { - Tracker = new AliasSetTracker(getAnalysis<AliasAnalysis>()); + auto &AAWP = getAnalysis<AAResultsWrapperPass>(); + Tracker = new AliasSetTracker(AAWP.getAAResults()); for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) Tracker->add(&*I); @@ -668,6 +668,6 @@ namespace { char AliasSetPrinter::ID = 0; INITIALIZE_PASS_BEGIN(AliasSetPrinter, "print-alias-sets", "Alias Set Printer", false, true) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(AliasSetPrinter, "print-alias-sets", "Alias Set Printer", false, true) diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp index 842ff0a..9c1ac00 100644 --- a/contrib/llvm/lib/Analysis/Analysis.cpp +++ b/contrib/llvm/lib/Analysis/Analysis.cpp @@ -20,23 +20,23 @@ using namespace llvm; /// initializeAnalysis - Initialize all passes linked into the Analysis library. void llvm::initializeAnalysis(PassRegistry &Registry) { - initializeAliasAnalysisAnalysisGroup(Registry); - initializeAliasAnalysisCounterPass(Registry); initializeAAEvalPass(Registry); - initializeAliasDebuggerPass(Registry); initializeAliasSetPrinterPass(Registry); - initializeNoAAPass(Registry); - initializeBasicAliasAnalysisPass(Registry); - initializeBlockFrequencyInfoPass(Registry); - initializeBranchProbabilityInfoPass(Registry); + initializeBasicAAWrapperPassPass(Registry); + initializeBlockFrequencyInfoWrapperPassPass(Registry); + initializeBranchProbabilityInfoWrapperPassPass(Registry); + initializeCallGraphWrapperPassPass(Registry); + initializeCallGraphPrinterPass(Registry); + initializeCallGraphViewerPass(Registry); initializeCostModelAnalysisPass(Registry); initializeCFGViewerPass(Registry); initializeCFGPrinterPass(Registry); initializeCFGOnlyViewerPass(Registry); initializeCFGOnlyPrinterPass(Registry); - initializeCFLAliasAnalysisPass(Registry); + initializeCFLAAWrapperPassPass(Registry); initializeDependenceAnalysisPass(Registry); initializeDelinearizationPass(Registry); + initializeDemandedBitsPass(Registry); initializeDivergenceAnalysisPass(Registry); initializeDominanceFrontierPass(Registry); initializeDomViewerPass(Registry); @@ -47,34 +47,40 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializePostDomPrinterPass(Registry); initializePostDomOnlyViewerPass(Registry); initializePostDomOnlyPrinterPass(Registry); + initializeAAResultsWrapperPassPass(Registry); + initializeGlobalsAAWrapperPassPass(Registry); initializeIVUsersPass(Registry); initializeInstCountPass(Registry); initializeIntervalPartitionPass(Registry); initializeLazyValueInfoPass(Registry); - initializeLibCallAliasAnalysisPass(Registry); initializeLintPass(Registry); initializeLoopInfoWrapperPassPass(Registry); initializeMemDepPrinterPass(Registry); initializeMemDerefPrinterPass(Registry); initializeMemoryDependenceAnalysisPass(Registry); initializeModuleDebugInfoPrinterPass(Registry); + initializeObjCARCAAWrapperPassPass(Registry); initializePostDominatorTreePass(Registry); initializeRegionInfoPassPass(Registry); initializeRegionViewerPass(Registry); initializeRegionPrinterPass(Registry); initializeRegionOnlyViewerPass(Registry); initializeRegionOnlyPrinterPass(Registry); - initializeScalarEvolutionPass(Registry); - initializeScalarEvolutionAliasAnalysisPass(Registry); + initializeSCEVAAWrapperPassPass(Registry); + initializeScalarEvolutionWrapperPassPass(Registry); initializeTargetTransformInfoWrapperPassPass(Registry); - initializeTypeBasedAliasAnalysisPass(Registry); - initializeScopedNoAliasAAPass(Registry); + initializeTypeBasedAAWrapperPassPass(Registry); + initializeScopedNoAliasAAWrapperPassPass(Registry); } void LLVMInitializeAnalysis(LLVMPassRegistryRef R) { initializeAnalysis(*unwrap(R)); } +void LLVMInitializeIPA(LLVMPassRegistryRef R) { + initializeAnalysis(*unwrap(R)); +} + LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action, char **OutMessages) { raw_ostream *DebugOS = Action != LLVMReturnStatusAction ? &errs() : nullptr; diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 3586354..00f346e 100644 --- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -13,24 +13,21 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Passes.h" -#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" @@ -42,6 +39,18 @@ #include <algorithm> using namespace llvm; +/// Enable analysis of recursive PHI nodes. +static cl::opt<bool> EnableRecPhiAnalysis("basicaa-recphi", cl::Hidden, + cl::init(false)); + +/// SearchLimitReached / SearchTimes shows how often the limit of +/// to decompose GEPs is reached. It will affect the precision +/// of basic alias analysis. +#define DEBUG_TYPE "basicaa" +STATISTIC(SearchLimitReached, "Number of times the limit to " + "decompose GEPs is reached"); +STATISTIC(SearchTimes, "Number of times a GEP is decomposed"); + /// Cutoff after which to stop analysing a set of phi nodes potentially involved /// in a cycle. Because we are analysing 'through' phi nodes we need to be /// careful with value equivalence. We use reachability to make sure a value @@ -57,8 +66,8 @@ static const unsigned MaxLookupSearchDepth = 6; // Useful predicates //===----------------------------------------------------------------------===// -/// isNonEscapingLocalObject - Return true if the pointer is to a function-local -/// object that never escapes from the function. +/// Returns true if the pointer is to a function-local object that never +/// escapes from the function. static bool isNonEscapingLocalObject(const Value *V) { // If this is a local allocation, check to see if it escapes. if (isa<AllocaInst>(V) || isNoAliasCall(V)) @@ -82,8 +91,8 @@ static bool isNonEscapingLocalObject(const Value *V) { return false; } -/// isEscapeSource - Return true if the pointer is one which would have -/// been considered an escape by isNonEscapingLocalObject. +/// Returns true if the pointer is one which would have been considered an +/// escape by isNonEscapingLocalObject. static bool isEscapeSource(const Value *V) { if (isa<CallInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V)) return true; @@ -97,8 +106,7 @@ static bool isEscapeSource(const Value *V) { return false; } -/// getObjectSize - Return the size of the object specified by V, or -/// UnknownSize if unknown. +/// Returns the size of the object specified by V, or UnknownSize if unknown. static uint64_t getObjectSize(const Value *V, const DataLayout &DL, const TargetLibraryInfo &TLI, bool RoundToAlign = false) { @@ -108,8 +116,8 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &DL, return MemoryLocation::UnknownSize; } -/// isObjectSmallerThan - Return true if we can prove that the object specified -/// by V is smaller than Size. +/// Returns true if we can prove that the object specified by V is smaller than +/// Size. static bool isObjectSmallerThan(const Value *V, uint64_t Size, const DataLayout &DL, const TargetLibraryInfo &TLI) { @@ -144,15 +152,14 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size, // This function needs to use the aligned object size because we allow // reads a bit past the end given sufficient alignment. - uint64_t ObjectSize = getObjectSize(V, DL, TLI, /*RoundToAlign*/true); + uint64_t ObjectSize = getObjectSize(V, DL, TLI, /*RoundToAlign*/ true); return ObjectSize != MemoryLocation::UnknownSize && ObjectSize < Size; } -/// isObjectSize - Return true if we can prove that the object specified -/// by V has size Size. -static bool isObjectSize(const Value *V, uint64_t Size, - const DataLayout &DL, const TargetLibraryInfo &TLI) { +/// Returns true if we can prove that the object specified by V has size Size. +static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL, + const TargetLibraryInfo &TLI) { uint64_t ObjectSize = getObjectSize(V, DL, TLI); return ObjectSize != MemoryLocation::UnknownSize && ObjectSize == Size; } @@ -161,42 +168,20 @@ static bool isObjectSize(const Value *V, uint64_t Size, // GetElementPtr Instruction Decomposition and Analysis //===----------------------------------------------------------------------===// -namespace { - enum ExtensionKind { - EK_NotExtended, - EK_SignExt, - EK_ZeroExt - }; - - struct VariableGEPIndex { - const Value *V; - ExtensionKind Extension; - int64_t Scale; - - bool operator==(const VariableGEPIndex &Other) const { - return V == Other.V && Extension == Other.Extension && - Scale == Other.Scale; - } - - bool operator!=(const VariableGEPIndex &Other) const { - return !operator==(Other); - } - }; -} - - -/// GetLinearExpression - Analyze the specified value as a linear expression: -/// "A*V + B", where A and B are constant integers. Return the scale and offset -/// values as APInts and return V as a Value*, and return whether we looked -/// through any sign or zero extends. The incoming Value is known to have -/// IntegerType and it may already be sign or zero extended. +/// Analyzes the specified value as a linear expression: "A*V + B", where A and +/// B are constant integers. +/// +/// Returns the scale and offset values as APInts and return V as a Value*, and +/// return whether we looked through any sign or zero extends. The incoming +/// Value is known to have IntegerType and it may already be sign or zero +/// extended. /// /// Note that this looks through extends, so the high bits may not be /// represented in the result. -static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, - ExtensionKind &Extension, - const DataLayout &DL, unsigned Depth, - AssumptionCache *AC, DominatorTree *DT) { +/*static*/ const Value *BasicAAResult::GetLinearExpression( + const Value *V, APInt &Scale, APInt &Offset, unsigned &ZExtBits, + unsigned &SExtBits, const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, DominatorTree *DT, bool &NSW, bool &NUW) { assert(V->getType()->isIntegerTy() && "Not an integer value"); // Limit our recursion depth. @@ -206,54 +191,125 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, return V; } - if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) { + if (const ConstantInt *Const = dyn_cast<ConstantInt>(V)) { + // if it's a constant, just convert it to an offset and remove the variable. + // If we've been called recursively the Offset bit width will be greater + // than the constant's (the Offset's always as wide as the outermost call), + // so we'll zext here and process any extension in the isa<SExtInst> & + // isa<ZExtInst> cases below. + Offset += Const->getValue().zextOrSelf(Offset.getBitWidth()); + assert(Scale == 0 && "Constant values don't have a scale"); + return V; + } + + if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) { if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) { + + // If we've been called recursively then Offset and Scale will be wider + // that the BOp operands. We'll always zext it here as we'll process sign + // extensions below (see the isa<SExtInst> / isa<ZExtInst> cases). + APInt RHS = RHSC->getValue().zextOrSelf(Offset.getBitWidth()); + switch (BOp->getOpcode()) { - default: break; + default: + // We don't understand this instruction, so we can't decompose it any + // further. + Scale = 1; + Offset = 0; + return V; case Instruction::Or: // X|C == X+C if all the bits in C are unset in X. Otherwise we can't // analyze it. if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), DL, 0, AC, - BOp, DT)) - break; - // FALL THROUGH. + BOp, DT)) { + Scale = 1; + Offset = 0; + return V; + } + // FALL THROUGH. case Instruction::Add: - V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, - DL, Depth + 1, AC, DT); - Offset += RHSC->getValue(); - return V; + V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits, + SExtBits, DL, Depth + 1, AC, DT, NSW, NUW); + Offset += RHS; + break; + case Instruction::Sub: + V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits, + SExtBits, DL, Depth + 1, AC, DT, NSW, NUW); + Offset -= RHS; + break; case Instruction::Mul: - V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, - DL, Depth + 1, AC, DT); - Offset *= RHSC->getValue(); - Scale *= RHSC->getValue(); - return V; + V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits, + SExtBits, DL, Depth + 1, AC, DT, NSW, NUW); + Offset *= RHS; + Scale *= RHS; + break; case Instruction::Shl: - V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, - DL, Depth + 1, AC, DT); - Offset <<= RHSC->getValue().getLimitedValue(); - Scale <<= RHSC->getValue().getLimitedValue(); + V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits, + SExtBits, DL, Depth + 1, AC, DT, NSW, NUW); + Offset <<= RHS.getLimitedValue(); + Scale <<= RHS.getLimitedValue(); + // the semantics of nsw and nuw for left shifts don't match those of + // multiplications, so we won't propagate them. + NSW = NUW = false; return V; } + + if (isa<OverflowingBinaryOperator>(BOp)) { + NUW &= BOp->hasNoUnsignedWrap(); + NSW &= BOp->hasNoSignedWrap(); + } + return V; } } // Since GEP indices are sign extended anyway, we don't care about the high // bits of a sign or zero extended value - just scales and offsets. The // extensions have to be consistent though. - if ((isa<SExtInst>(V) && Extension != EK_ZeroExt) || - (isa<ZExtInst>(V) && Extension != EK_SignExt)) { + if (isa<SExtInst>(V) || isa<ZExtInst>(V)) { Value *CastOp = cast<CastInst>(V)->getOperand(0); - unsigned OldWidth = Scale.getBitWidth(); + unsigned NewWidth = V->getType()->getPrimitiveSizeInBits(); unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits(); - Scale = Scale.trunc(SmallWidth); - Offset = Offset.trunc(SmallWidth); - Extension = isa<SExtInst>(V) ? EK_SignExt : EK_ZeroExt; - - Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension, DL, - Depth + 1, AC, DT); - Scale = Scale.zext(OldWidth); - Offset = Offset.zext(OldWidth); + unsigned OldZExtBits = ZExtBits, OldSExtBits = SExtBits; + const Value *Result = + GetLinearExpression(CastOp, Scale, Offset, ZExtBits, SExtBits, DL, + Depth + 1, AC, DT, NSW, NUW); + + // zext(zext(%x)) == zext(%x), and similiarly for sext; we'll handle this + // by just incrementing the number of bits we've extended by. + unsigned ExtendedBy = NewWidth - SmallWidth; + + if (isa<SExtInst>(V) && ZExtBits == 0) { + // sext(sext(%x, a), b) == sext(%x, a + b) + + if (NSW) { + // We haven't sign-wrapped, so it's valid to decompose sext(%x + c) + // into sext(%x) + sext(c). We'll sext the Offset ourselves: + unsigned OldWidth = Offset.getBitWidth(); + Offset = Offset.trunc(SmallWidth).sext(NewWidth).zextOrSelf(OldWidth); + } else { + // We may have signed-wrapped, so don't decompose sext(%x + c) into + // sext(%x) + sext(c) + Scale = 1; + Offset = 0; + Result = CastOp; + ZExtBits = OldZExtBits; + SExtBits = OldSExtBits; + } + SExtBits += ExtendedBy; + } else { + // sext(zext(%x, a), b) = zext(zext(%x, a), b) = zext(%x, a + b) + + if (!NUW) { + // We may have unsigned-wrapped, so don't decompose zext(%x + c) into + // zext(%x) + zext(c) + Scale = 1; + Offset = 0; + Result = CastOp; + ZExtBits = OldZExtBits; + SExtBits = OldSExtBits; + } + ZExtBits += ExtendedBy; + } return Result; } @@ -263,29 +319,27 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, return V; } -/// DecomposeGEPExpression - If V is a symbolic pointer expression, decompose it -/// into a base pointer with a constant offset and a number of scaled symbolic -/// offsets. +/// If V is a symbolic pointer expression, decompose it into a base pointer +/// with a constant offset and a number of scaled symbolic offsets. /// -/// The scaled symbolic offsets (represented by pairs of a Value* and a scale in -/// the VarIndices vector) are Value*'s that are known to be scaled by the -/// specified amount, but which may have other unrepresented high bits. As such, -/// the gep cannot necessarily be reconstructed from its decomposed form. +/// The scaled symbolic offsets (represented by pairs of a Value* and a scale +/// in the VarIndices vector) are Value*'s that are known to be scaled by the +/// specified amount, but which may have other unrepresented high bits. As +/// such, the gep cannot necessarily be reconstructed from its decomposed form. /// /// When DataLayout is around, this function is capable of analyzing everything /// that GetUnderlyingObject can look through. To be able to do that /// GetUnderlyingObject and DecomposeGEPExpression must use the same search -/// depth (MaxLookupSearchDepth). -/// When DataLayout not is around, it just looks through pointer casts. -/// -static const Value * -DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, - SmallVectorImpl<VariableGEPIndex> &VarIndices, - bool &MaxLookupReached, const DataLayout &DL, - AssumptionCache *AC, DominatorTree *DT) { +/// depth (MaxLookupSearchDepth). When DataLayout not is around, it just looks +/// through pointer casts. +/*static*/ const Value *BasicAAResult::DecomposeGEPExpression( + const Value *V, int64_t &BaseOffs, + SmallVectorImpl<VariableGEPIndex> &VarIndices, bool &MaxLookupReached, + const DataLayout &DL, AssumptionCache *AC, DominatorTree *DT) { // Limit recursion depth to limit compile time in crazy cases. unsigned MaxLookup = MaxLookupSearchDepth; MaxLookupReached = false; + SearchTimes++; BaseOffs = 0; do { @@ -318,7 +372,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, // updated when GetUnderlyingObject is updated). TLI should be // provided also. if (const Value *Simplified = - SimplifyInstruction(const_cast<Instruction *>(I), DL)) { + SimplifyInstruction(const_cast<Instruction *>(I), DL)) { V = Simplified; continue; } @@ -333,43 +387,47 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, unsigned AS = GEPOp->getPointerAddressSpace(); // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices. gep_type_iterator GTI = gep_type_begin(GEPOp); - for (User::const_op_iterator I = GEPOp->op_begin()+1, - E = GEPOp->op_end(); I != E; ++I) { - Value *Index = *I; + for (User::const_op_iterator I = GEPOp->op_begin() + 1, E = GEPOp->op_end(); + I != E; ++I) { + const Value *Index = *I; // Compute the (potentially symbolic) offset in bytes for this index. if (StructType *STy = dyn_cast<StructType>(*GTI++)) { // For a struct, add the member offset. unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue(); - if (FieldNo == 0) continue; + if (FieldNo == 0) + continue; BaseOffs += DL.getStructLayout(STy)->getElementOffset(FieldNo); continue; } // For an array/pointer, add the element offset, explicitly scaled. - if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) { - if (CIdx->isZero()) continue; + if (const ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) { + if (CIdx->isZero()) + continue; BaseOffs += DL.getTypeAllocSize(*GTI) * CIdx->getSExtValue(); continue; } uint64_t Scale = DL.getTypeAllocSize(*GTI); - ExtensionKind Extension = EK_NotExtended; + unsigned ZExtBits = 0, SExtBits = 0; // If the integer type is smaller than the pointer size, it is implicitly // sign extended to pointer size. unsigned Width = Index->getType()->getIntegerBitWidth(); - if (DL.getPointerSizeInBits(AS) > Width) - Extension = EK_SignExt; + unsigned PointerSize = DL.getPointerSizeInBits(AS); + if (PointerSize > Width) + SExtBits += PointerSize - Width; // Use GetLinearExpression to decompose the index into a C1*V+C2 form. APInt IndexScale(Width, 0), IndexOffset(Width, 0); - Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension, DL, - 0, AC, DT); + bool NSW = true, NUW = true; + Index = GetLinearExpression(Index, IndexScale, IndexOffset, ZExtBits, + SExtBits, DL, 0, AC, DT, NSW, NUW); // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale. // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale. - BaseOffs += IndexOffset.getSExtValue()*Scale; + BaseOffs += IndexOffset.getSExtValue() * Scale; Scale *= IndexScale.getSExtValue(); // If we already had an occurrence of this index variable, merge this @@ -377,23 +435,23 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, // A[x][x] -> x*16 + x*4 -> x*20 // This also ensures that 'x' only appears in the index list once. for (unsigned i = 0, e = VarIndices.size(); i != e; ++i) { - if (VarIndices[i].V == Index && - VarIndices[i].Extension == Extension) { + if (VarIndices[i].V == Index && VarIndices[i].ZExtBits == ZExtBits && + VarIndices[i].SExtBits == SExtBits) { Scale += VarIndices[i].Scale; - VarIndices.erase(VarIndices.begin()+i); + VarIndices.erase(VarIndices.begin() + i); break; } } // Make sure that we have a scale that makes sense for this target's // pointer size. - if (unsigned ShiftBits = 64 - DL.getPointerSizeInBits(AS)) { + if (unsigned ShiftBits = 64 - PointerSize) { Scale <<= ShiftBits; Scale = (int64_t)Scale >> ShiftBits; } if (Scale) { - VariableGEPIndex Entry = {Index, Extension, + VariableGEPIndex Entry = {Index, ZExtBits, SExtBits, static_cast<int64_t>(Scale)}; VarIndices.push_back(Entry); } @@ -405,196 +463,25 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, // If the chain of expressions is too deep, just return early. MaxLookupReached = true; + SearchLimitReached++; return V; } -//===----------------------------------------------------------------------===// -// BasicAliasAnalysis Pass -//===----------------------------------------------------------------------===// - -#ifndef NDEBUG -static const Function *getParent(const Value *V) { - if (const Instruction *inst = dyn_cast<Instruction>(V)) - return inst->getParent()->getParent(); - - if (const Argument *arg = dyn_cast<Argument>(V)) - return arg->getParent(); - - return nullptr; -} - -static bool notDifferentParent(const Value *O1, const Value *O2) { - - const Function *F1 = getParent(O1); - const Function *F2 = getParent(O2); - - return !F1 || !F2 || F1 == F2; -} -#endif - -namespace { - /// BasicAliasAnalysis - This is the primary alias analysis implementation. - struct BasicAliasAnalysis : public ImmutablePass, public AliasAnalysis { - static char ID; // Class identification, replacement for typeinfo - BasicAliasAnalysis() : ImmutablePass(ID) { - initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry()); - } - - bool doInitialization(Module &M) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<AliasAnalysis>(); - AU.addRequired<AssumptionCacheTracker>(); - AU.addRequired<TargetLibraryInfoWrapperPass>(); - } - - AliasResult alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) override { - assert(AliasCache.empty() && "AliasCache must be cleared after use!"); - assert(notDifferentParent(LocA.Ptr, LocB.Ptr) && - "BasicAliasAnalysis doesn't support interprocedural queries."); - AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.AATags, - LocB.Ptr, LocB.Size, LocB.AATags); - // AliasCache rarely has more than 1 or 2 elements, always use - // shrink_and_clear so it quickly returns to the inline capacity of the - // SmallDenseMap if it ever grows larger. - // FIXME: This should really be shrink_to_inline_capacity_and_clear(). - AliasCache.shrink_and_clear(); - VisitedPhiBBs.clear(); - return Alias; - } - - ModRefResult getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) override; - - ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) override; - - /// pointsToConstantMemory - Chase pointers until we find a (constant - /// global) or not. - bool pointsToConstantMemory(const MemoryLocation &Loc, - bool OrLocal) override; - - /// Get the location associated with a pointer argument of a callsite. - ModRefResult getArgModRefInfo(ImmutableCallSite CS, - unsigned ArgIdx) override; - - /// getModRefBehavior - Return the behavior when calling the given - /// call site. - ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override; - - /// getModRefBehavior - Return the behavior when calling the given function. - /// For use when the call site is not known. - ModRefBehavior getModRefBehavior(const Function *F) override; - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - void *getAdjustedAnalysisPointer(const void *ID) override { - if (ID == &AliasAnalysis::ID) - return (AliasAnalysis*)this; - return this; - } - - private: - // AliasCache - Track alias queries to guard against recursion. - typedef std::pair<MemoryLocation, MemoryLocation> LocPair; - typedef SmallDenseMap<LocPair, AliasResult, 8> AliasCacheTy; - AliasCacheTy AliasCache; - - /// \brief Track phi nodes we have visited. When interpret "Value" pointer - /// equality as value equality we need to make sure that the "Value" is not - /// part of a cycle. Otherwise, two uses could come from different - /// "iterations" of a cycle and see different values for the same "Value" - /// pointer. - /// The following example shows the problem: - /// %p = phi(%alloca1, %addr2) - /// %l = load %ptr - /// %addr1 = gep, %alloca2, 0, %l - /// %addr2 = gep %alloca2, 0, (%l + 1) - /// alias(%p, %addr1) -> MayAlias ! - /// store %l, ... - SmallPtrSet<const BasicBlock*, 8> VisitedPhiBBs; - - // Visited - Track instructions visited by pointsToConstantMemory. - SmallPtrSet<const Value*, 16> Visited; - - /// \brief Check whether two Values can be considered equivalent. - /// - /// In addition to pointer equivalence of \p V1 and \p V2 this checks - /// whether they can not be part of a cycle in the value graph by looking at - /// all visited phi nodes an making sure that the phis cannot reach the - /// value. We have to do this because we are looking through phi nodes (That - /// is we say noalias(V, phi(VA, VB)) if noalias(V, VA) and noalias(V, VB). - bool isValueEqualInPotentialCycles(const Value *V1, const Value *V2); - - /// \brief Dest and Src are the variable indices from two decomposed - /// GetElementPtr instructions GEP1 and GEP2 which have common base - /// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic - /// difference between the two pointers. - void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest, - const SmallVectorImpl<VariableGEPIndex> &Src); - - // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP - // instruction against another. - AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size, - const AAMDNodes &V1AAInfo, - const Value *V2, uint64_t V2Size, - const AAMDNodes &V2AAInfo, - const Value *UnderlyingV1, const Value *UnderlyingV2); - - // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI - // instruction against another. - AliasResult aliasPHI(const PHINode *PN, uint64_t PNSize, - const AAMDNodes &PNAAInfo, - const Value *V2, uint64_t V2Size, - const AAMDNodes &V2AAInfo); - - /// aliasSelect - Disambiguate a Select instruction against another value. - AliasResult aliasSelect(const SelectInst *SI, uint64_t SISize, - const AAMDNodes &SIAAInfo, - const Value *V2, uint64_t V2Size, - const AAMDNodes &V2AAInfo); - - AliasResult aliasCheck(const Value *V1, uint64_t V1Size, - AAMDNodes V1AATag, - const Value *V2, uint64_t V2Size, - AAMDNodes V2AATag); - }; -} // End of anonymous namespace - -// Register this pass... -char BasicAliasAnalysis::ID = 0; -INITIALIZE_AG_PASS_BEGIN(BasicAliasAnalysis, AliasAnalysis, "basicaa", - "Basic Alias Analysis (stateless AA impl)", - false, true, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_AG_PASS_END(BasicAliasAnalysis, AliasAnalysis, "basicaa", - "Basic Alias Analysis (stateless AA impl)", - false, true, false) - - -ImmutablePass *llvm::createBasicAliasAnalysisPass() { - return new BasicAliasAnalysis(); -} - -/// pointsToConstantMemory - Returns whether the given pointer value -/// points to memory that is local to the function, with global constants being -/// considered local to all functions. -bool BasicAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc, - bool OrLocal) { +/// Returns whether the given pointer value points to memory that is local to +/// the function, with global constants being considered local to all +/// functions. +bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc, + bool OrLocal) { assert(Visited.empty() && "Visited must be cleared after use!"); unsigned MaxLookup = 8; SmallVector<const Value *, 16> Worklist; Worklist.push_back(Loc.Ptr); do { - const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), *DL); + const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), DL); if (!Visited.insert(V).second) { Visited.clear(); - return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + return AAResultBase::pointsToConstantMemory(Loc, OrLocal); } // An alloca instruction defines local memory. @@ -608,7 +495,7 @@ bool BasicAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc, // others. GV may even be a declaration, not a definition. if (!GV->isConstant()) { Visited.clear(); - return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + return AAResultBase::pointsToConstantMemory(Loc, OrLocal); } continue; } @@ -626,7 +513,7 @@ bool BasicAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc, // Don't bother inspecting phi nodes with many operands. if (PN->getNumIncomingValues() > MaxLookup) { Visited.clear(); - return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + return AAResultBase::pointsToConstantMemory(Loc, OrLocal); } for (Value *IncValue : PN->incoming_values()) Worklist.push_back(IncValue); @@ -635,7 +522,7 @@ bool BasicAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc, // Otherwise be conservative. Visited.clear(); - return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + return AAResultBase::pointsToConstantMemory(Loc, OrLocal); } while (!Worklist.empty() && --MaxLookup); @@ -660,62 +547,51 @@ static bool isMemsetPattern16(const Function *MS, return false; } -/// getModRefBehavior - Return the behavior when calling the given call site. -AliasAnalysis::ModRefBehavior -BasicAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { +/// Returns the behavior when calling the given call site. +FunctionModRefBehavior BasicAAResult::getModRefBehavior(ImmutableCallSite CS) { if (CS.doesNotAccessMemory()) // Can't do better than this. - return DoesNotAccessMemory; + return FMRB_DoesNotAccessMemory; - ModRefBehavior Min = UnknownModRefBehavior; + FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior; // If the callsite knows it only reads memory, don't return worse // than that. if (CS.onlyReadsMemory()) - Min = OnlyReadsMemory; + Min = FMRB_OnlyReadsMemory; if (CS.onlyAccessesArgMemory()) - Min = ModRefBehavior(Min & OnlyAccessesArgumentPointees); + Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees); - // The AliasAnalysis base class has some smarts, lets use them. - return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); + // The AAResultBase base class has some smarts, lets use them. + return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min); } -/// getModRefBehavior - Return the behavior when calling the given function. -/// For use when the call site is not known. -AliasAnalysis::ModRefBehavior -BasicAliasAnalysis::getModRefBehavior(const Function *F) { +/// Returns the behavior when calling the given function. For use when the call +/// site is not known. +FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) { // If the function declares it doesn't access memory, we can't do better. if (F->doesNotAccessMemory()) - return DoesNotAccessMemory; - - // For intrinsics, we can check the table. - if (Intrinsic::ID iid = F->getIntrinsicID()) { -#define GET_INTRINSIC_MODREF_BEHAVIOR -#include "llvm/IR/Intrinsics.gen" -#undef GET_INTRINSIC_MODREF_BEHAVIOR - } + return FMRB_DoesNotAccessMemory; - ModRefBehavior Min = UnknownModRefBehavior; + FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior; // If the function declares it only reads memory, go with that. if (F->onlyReadsMemory()) - Min = OnlyReadsMemory; + Min = FMRB_OnlyReadsMemory; if (F->onlyAccessesArgMemory()) - Min = ModRefBehavior(Min & OnlyAccessesArgumentPointees); + Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees); - const TargetLibraryInfo &TLI = - getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); if (isMemsetPattern16(F, TLI)) - Min = OnlyAccessesArgumentPointees; + Min = FMRB_OnlyAccessesArgumentPointees; // Otherwise be conservative. - return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min); + return FunctionModRefBehavior(AAResultBase::getModRefBehavior(F) & Min); } -AliasAnalysis::ModRefResult -BasicAliasAnalysis::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) { +ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS, + unsigned ArgIdx) { if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) switch (II->getIntrinsicID()) { default: @@ -725,7 +601,7 @@ BasicAliasAnalysis::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) { case Intrinsic::memmove: assert((ArgIdx == 0 || ArgIdx == 1) && "Invalid argument index for memory intrinsic"); - return ArgIdx ? Ref : Mod; + return ArgIdx ? MRI_Ref : MRI_Mod; } // We can bound the aliasing properties of memset_pattern16 just as we can @@ -733,40 +609,82 @@ BasicAliasAnalysis::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) { // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16 // whenever possible. if (CS.getCalledFunction() && - isMemsetPattern16(CS.getCalledFunction(), *TLI)) { + isMemsetPattern16(CS.getCalledFunction(), TLI)) { assert((ArgIdx == 0 || ArgIdx == 1) && "Invalid argument index for memset_pattern16"); - return ArgIdx ? Ref : Mod; + return ArgIdx ? MRI_Ref : MRI_Mod; } // FIXME: Handle memset_pattern4 and memset_pattern8 also. - return AliasAnalysis::getArgModRefInfo(CS, ArgIdx); + if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadOnly)) + return MRI_Ref; + + if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadNone)) + return MRI_NoModRef; + + return AAResultBase::getArgModRefInfo(CS, ArgIdx); } static bool isAssumeIntrinsic(ImmutableCallSite CS) { const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction()); - if (II && II->getIntrinsicID() == Intrinsic::assume) - return true; + return II && II->getIntrinsicID() == Intrinsic::assume; +} - return false; +#ifndef NDEBUG +static const Function *getParent(const Value *V) { + if (const Instruction *inst = dyn_cast<Instruction>(V)) + return inst->getParent()->getParent(); + + if (const Argument *arg = dyn_cast<Argument>(V)) + return arg->getParent(); + + return nullptr; } -bool BasicAliasAnalysis::doInitialization(Module &M) { - InitializeAliasAnalysis(this, &M.getDataLayout()); - return true; +static bool notDifferentParent(const Value *O1, const Value *O2) { + + const Function *F1 = getParent(O1); + const Function *F2 = getParent(O2); + + return !F1 || !F2 || F1 == F2; +} +#endif + +AliasResult BasicAAResult::alias(const MemoryLocation &LocA, + const MemoryLocation &LocB) { + assert(notDifferentParent(LocA.Ptr, LocB.Ptr) && + "BasicAliasAnalysis doesn't support interprocedural queries."); + + // If we have a directly cached entry for these locations, we have recursed + // through this once, so just return the cached results. Notably, when this + // happens, we don't clear the cache. + auto CacheIt = AliasCache.find(LocPair(LocA, LocB)); + if (CacheIt != AliasCache.end()) + return CacheIt->second; + + AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.AATags, LocB.Ptr, + LocB.Size, LocB.AATags); + // AliasCache rarely has more than 1 or 2 elements, always use + // shrink_and_clear so it quickly returns to the inline capacity of the + // SmallDenseMap if it ever grows larger. + // FIXME: This should really be shrink_to_inline_capacity_and_clear(). + AliasCache.shrink_and_clear(); + VisitedPhiBBs.clear(); + return Alias; } -/// getModRefInfo - Check to see if the specified callsite can clobber the -/// specified memory object. Since we only look at local properties of this -/// function, we really can't say much about this query. We do, however, use -/// simple "address taken" analysis on local objects. -AliasAnalysis::ModRefResult -BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) { +/// Checks to see if the specified callsite can clobber the specified memory +/// object. +/// +/// Since we only look at local properties of this function, we really can't +/// say much about this query. We do, however, use simple "address taken" +/// analysis on local objects. +ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, + const MemoryLocation &Loc) { assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) && "AliasAnalysis query involving multiple functions!"); - const Value *Object = GetUnderlyingObject(Loc.Ptr, *DL); + const Value *Object = GetUnderlyingObject(Loc.Ptr, DL); // If this is a tail call and Loc.Ptr points to a stack location, we know that // the tail call cannot access or modify the local stack. @@ -776,7 +694,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, if (isa<AllocaInst>(Object)) if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) if (CI->isTailCall()) - return NoModRef; + return MRI_NoModRef; // If the pointer is to a locally allocated object that does not escape, // then the call can not mod/ref the pointer unless the call takes the pointer @@ -798,41 +716,42 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, // is impossible to alias the pointer we're checking. If not, we have to // assume that the call could touch the pointer, even though it doesn't // escape. - if (!isNoAlias(MemoryLocation(*CI), MemoryLocation(Object))) { + AliasResult AR = + getBestAAResults().alias(MemoryLocation(*CI), MemoryLocation(Object)); + if (AR) { PassedAsArg = true; break; } } if (!PassedAsArg) - return NoModRef; + return MRI_NoModRef; } // While the assume intrinsic is marked as arbitrarily writing so that // proper control dependencies will be maintained, it never aliases any // particular memory location. if (isAssumeIntrinsic(CS)) - return NoModRef; + return MRI_NoModRef; - // The AliasAnalysis base class has some smarts, lets use them. - return AliasAnalysis::getModRefInfo(CS, Loc); + // The AAResultBase base class has some smarts, lets use them. + return AAResultBase::getModRefInfo(CS, Loc); } -AliasAnalysis::ModRefResult -BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) { +ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { // While the assume intrinsic is marked as arbitrarily writing so that // proper control dependencies will be maintained, it never aliases any // particular memory location. if (isAssumeIntrinsic(CS1) || isAssumeIntrinsic(CS2)) - return NoModRef; + return MRI_NoModRef; - // The AliasAnalysis base class has some smarts, lets use them. - return AliasAnalysis::getModRefInfo(CS1, CS2); + // The AAResultBase base class has some smarts, lets use them. + return AAResultBase::getModRefInfo(CS1, CS2); } -/// \brief Provide ad-hoc rules to disambiguate accesses through two GEP -/// operators, both having the exact same pointer operand. +/// Provide ad-hoc rules to disambiguate accesses through two GEP operators, +/// both having the exact same pointer operand. static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1, uint64_t V1Size, const GEPOperator *GEP2, @@ -860,10 +779,9 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1, ConstantInt *C2 = dyn_cast<ConstantInt>(GEP2->getOperand(GEP2->getNumOperands() - 1)); - // If the last (struct) indices aren't constants, we can't say anything. - // If they're identical, the other indices might be also be dynamically - // equal, so the GEPs can alias. - if (!C1 || !C2 || C1 == C2) + // If the last (struct) indices are constants and are equal, the other indices + // might be also be dynamically equal, so the GEPs can alias. + if (C1 && C2 && C1 == C2) return MayAlias; // Find the last-indexed type of the GEP, i.e., the type you'd get if @@ -886,12 +804,49 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1, IntermediateIndices.push_back(GEP1->getOperand(i + 1)); } - StructType *LastIndexedStruct = - dyn_cast<StructType>(GetElementPtrInst::getIndexedType( - GEP1->getSourceElementType(), IntermediateIndices)); + auto *Ty = GetElementPtrInst::getIndexedType( + GEP1->getSourceElementType(), IntermediateIndices); + StructType *LastIndexedStruct = dyn_cast<StructType>(Ty); + + if (isa<SequentialType>(Ty)) { + // We know that: + // - both GEPs begin indexing from the exact same pointer; + // - the last indices in both GEPs are constants, indexing into a sequential + // type (array or pointer); + // - both GEPs only index through arrays prior to that. + // + // Because array indices greater than the number of elements are valid in + // GEPs, unless we know the intermediate indices are identical between + // GEP1 and GEP2 we cannot guarantee that the last indexed arrays don't + // partially overlap. We also need to check that the loaded size matches + // the element size, otherwise we could still have overlap. + const uint64_t ElementSize = + DL.getTypeStoreSize(cast<SequentialType>(Ty)->getElementType()); + if (V1Size != ElementSize || V2Size != ElementSize) + return MayAlias; + + for (unsigned i = 0, e = GEP1->getNumIndices() - 1; i != e; ++i) + if (GEP1->getOperand(i + 1) != GEP2->getOperand(i + 1)) + return MayAlias; - if (!LastIndexedStruct) + // Now we know that the array/pointer that GEP1 indexes into and that + // that GEP2 indexes into must either precisely overlap or be disjoint. + // Because they cannot partially overlap and because fields in an array + // cannot overlap, if we can prove the final indices are different between + // GEP1 and GEP2, we can conclude GEP1 and GEP2 don't alias. + + // If the last indices are constants, we've already checked they don't + // equal each other so we can exit early. + if (C1 && C2) + return NoAlias; + if (isKnownNonEqual(GEP1->getOperand(GEP1->getNumOperands() - 1), + GEP2->getOperand(GEP2->getNumOperands() - 1), + DL)) + return NoAlias; + return MayAlias; + } else if (!LastIndexedStruct || !C1 || !C2) { return MayAlias; + } // We know that: // - both GEPs begin indexing from the exact same pointer; @@ -925,39 +880,21 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1, return MayAlias; } -/// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction -/// against another pointer. We know that V1 is a GEP, but we don't know -/// anything about V2. UnderlyingV1 is GetUnderlyingObject(GEP1, DL), -/// UnderlyingV2 is the same for V2. +/// Provides a bunch of ad-hoc rules to disambiguate a GEP instruction against +/// another pointer. /// -AliasResult BasicAliasAnalysis::aliasGEP( - const GEPOperator *GEP1, uint64_t V1Size, const AAMDNodes &V1AAInfo, - const Value *V2, uint64_t V2Size, const AAMDNodes &V2AAInfo, - const Value *UnderlyingV1, const Value *UnderlyingV2) { +/// We know that V1 is a GEP, but we don't know anything about V2. +/// UnderlyingV1 is GetUnderlyingObject(GEP1, DL), UnderlyingV2 is the same for +/// V2. +AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, + const AAMDNodes &V1AAInfo, const Value *V2, + uint64_t V2Size, const AAMDNodes &V2AAInfo, + const Value *UnderlyingV1, + const Value *UnderlyingV2) { int64_t GEP1BaseOffset; bool GEP1MaxLookupReached; SmallVector<VariableGEPIndex, 4> GEP1VariableIndices; - // We have to get two AssumptionCaches here because GEP1 and V2 may be from - // different functions. - // FIXME: This really doesn't make any sense. We get a dominator tree below - // that can only refer to a single function. But this function (aliasGEP) is - // a method on an immutable pass that can be called when there *isn't* - // a single function. The old pass management layer makes this "work", but - // this isn't really a clean solution. - AssumptionCacheTracker &ACT = getAnalysis<AssumptionCacheTracker>(); - AssumptionCache *AC1 = nullptr, *AC2 = nullptr; - if (auto *GEP1I = dyn_cast<Instruction>(GEP1)) - AC1 = &ACT.getAssumptionCache( - const_cast<Function &>(*GEP1I->getParent()->getParent())); - if (auto *I2 = dyn_cast<Instruction>(V2)) - AC2 = &ACT.getAssumptionCache( - const_cast<Function &>(*I2->getParent()->getParent())); - - DominatorTreeWrapperPass *DTWP = - getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; - // If we have two gep instructions with must-alias or not-alias'ing base // pointers, figure out if the indexes to the GEP tell us anything about the // derived pointer. @@ -971,9 +908,8 @@ AliasResult BasicAliasAnalysis::aliasGEP( // identical. if ((BaseAlias == MayAlias) && V1Size == V2Size) { // Do the base pointers alias assuming type and size. - AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size, - V1AAInfo, UnderlyingV2, - V2Size, V2AAInfo); + AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size, V1AAInfo, + UnderlyingV2, V2Size, V2AAInfo); if (PreciseBaseAlias == NoAlias) { // See if the computed offset from the common pointer tells us about the // relation of the resulting pointer. @@ -982,15 +918,15 @@ AliasResult BasicAliasAnalysis::aliasGEP( SmallVector<VariableGEPIndex, 4> GEP2VariableIndices; const Value *GEP2BasePtr = DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, - GEP2MaxLookupReached, *DL, AC2, DT); + GEP2MaxLookupReached, DL, &AC, DT); const Value *GEP1BasePtr = DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, - GEP1MaxLookupReached, *DL, AC1, DT); + GEP1MaxLookupReached, DL, &AC, DT); // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. + // FIXME: They always have a DataLayout so this should become an + // assert. if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { - assert(!DL && - "DecomposeGEPExpression and GetUnderlyingObject disagree!"); return MayAlias; } // If the max search depth is reached the result is undefined @@ -1007,35 +943,35 @@ AliasResult BasicAliasAnalysis::aliasGEP( // If we get a No or May, then return it immediately, no amount of analysis // will improve this situation. - if (BaseAlias != MustAlias) return BaseAlias; + if (BaseAlias != MustAlias) + return BaseAlias; // Otherwise, we have a MustAlias. Since the base pointers alias each other // exactly, see if the computed offset from the common pointer tells us // about the relation of the resulting pointer. const Value *GEP1BasePtr = DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, - GEP1MaxLookupReached, *DL, AC1, DT); + GEP1MaxLookupReached, DL, &AC, DT); int64_t GEP2BaseOffset; bool GEP2MaxLookupReached; SmallVector<VariableGEPIndex, 4> GEP2VariableIndices; const Value *GEP2BasePtr = DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, - GEP2MaxLookupReached, *DL, AC2, DT); + GEP2MaxLookupReached, DL, &AC, DT); // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. + // FIXME: They always have a DataLayout so this should become an assert. if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { - assert(!DL && - "DecomposeGEPExpression and GetUnderlyingObject disagree!"); return MayAlias; } // If we know the two GEPs are based off of the exact same pointer (and not // just the same underlying object), see if that tells us anything about // the resulting pointers. - if (DL && GEP1->getPointerOperand() == GEP2->getPointerOperand()) { - AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, *DL); + if (GEP1->getPointerOperand() == GEP2->getPointerOperand()) { + AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, DL); // If we couldn't find anything interesting, don't abandon just yet. if (R != MayAlias) return R; @@ -1072,13 +1008,12 @@ AliasResult BasicAliasAnalysis::aliasGEP( const Value *GEP1BasePtr = DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, - GEP1MaxLookupReached, *DL, AC1, DT); + GEP1MaxLookupReached, DL, &AC, DT); // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. + // FIXME: They always have a DataLayout so this should become an assert. if (GEP1BasePtr != UnderlyingV1) { - assert(!DL && - "DecomposeGEPExpression and GetUnderlyingObject disagree!"); return MayAlias; } // If the max search depth is reached the result is undefined @@ -1124,12 +1059,42 @@ AliasResult BasicAliasAnalysis::aliasGEP( } } - // Try to distinguish something like &A[i][1] against &A[42][0]. - // Grab the least significant bit set in any of the scales. if (!GEP1VariableIndices.empty()) { uint64_t Modulo = 0; - for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i) - Modulo |= (uint64_t) GEP1VariableIndices[i].Scale; + bool AllPositive = true; + for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i) { + + // Try to distinguish something like &A[i][1] against &A[42][0]. + // Grab the least significant bit set in any of the scales. We + // don't need std::abs here (even if the scale's negative) as we'll + // be ^'ing Modulo with itself later. + Modulo |= (uint64_t)GEP1VariableIndices[i].Scale; + + if (AllPositive) { + // If the Value could change between cycles, then any reasoning about + // the Value this cycle may not hold in the next cycle. We'll just + // give up if we can't determine conditions that hold for every cycle: + const Value *V = GEP1VariableIndices[i].V; + + bool SignKnownZero, SignKnownOne; + ComputeSignBit(const_cast<Value *>(V), SignKnownZero, SignKnownOne, DL, + 0, &AC, nullptr, DT); + + // Zero-extension widens the variable, and so forces the sign + // bit to zero. + bool IsZExt = GEP1VariableIndices[i].ZExtBits > 0 || isa<ZExtInst>(V); + SignKnownZero |= IsZExt; + SignKnownOne &= !IsZExt; + + // If the variable begins with a zero then we know it's + // positive, regardless of whether the value is signed or + // unsigned. + int64_t Scale = GEP1VariableIndices[i].Scale; + AllPositive = + (SignKnownZero && Scale >= 0) || (SignKnownOne && Scale < 0); + } + } + Modulo = Modulo ^ (Modulo & (Modulo - 1)); // We can compute the difference between the two addresses @@ -1140,6 +1105,16 @@ AliasResult BasicAliasAnalysis::aliasGEP( V2Size != MemoryLocation::UnknownSize && ModOffset >= V2Size && V1Size <= Modulo - ModOffset) return NoAlias; + + // If we know all the variables are positive, then GEP1 >= GEP1BasePtr. + // If GEP1BasePtr > V2 (GEP1BaseOffset > 0) then we know the pointers + // don't alias if V2Size can fit in the gap between V2 and GEP1BasePtr. + if (AllPositive && GEP1BaseOffset > 0 && V2Size <= (uint64_t)GEP1BaseOffset) + return NoAlias; + + if (constantOffsetHeuristic(GEP1VariableIndices, V1Size, V2Size, + GEP1BaseOffset, &AC, DT)) + return NoAlias; } // Statically, we can see that the base objects are the same, but the @@ -1164,46 +1139,44 @@ static AliasResult MergeAliasResults(AliasResult A, AliasResult B) { return MayAlias; } -/// aliasSelect - Provide a bunch of ad-hoc rules to disambiguate a Select -/// instruction against another. -AliasResult BasicAliasAnalysis::aliasSelect(const SelectInst *SI, - uint64_t SISize, - const AAMDNodes &SIAAInfo, - const Value *V2, uint64_t V2Size, - const AAMDNodes &V2AAInfo) { +/// Provides a bunch of ad-hoc rules to disambiguate a Select instruction +/// against another. +AliasResult BasicAAResult::aliasSelect(const SelectInst *SI, uint64_t SISize, + const AAMDNodes &SIAAInfo, + const Value *V2, uint64_t V2Size, + const AAMDNodes &V2AAInfo) { // If the values are Selects with the same condition, we can do a more precise // check: just check for aliases between the values on corresponding arms. if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2)) if (SI->getCondition() == SI2->getCondition()) { - AliasResult Alias = - aliasCheck(SI->getTrueValue(), SISize, SIAAInfo, - SI2->getTrueValue(), V2Size, V2AAInfo); + AliasResult Alias = aliasCheck(SI->getTrueValue(), SISize, SIAAInfo, + SI2->getTrueValue(), V2Size, V2AAInfo); if (Alias == MayAlias) return MayAlias; AliasResult ThisAlias = - aliasCheck(SI->getFalseValue(), SISize, SIAAInfo, - SI2->getFalseValue(), V2Size, V2AAInfo); + aliasCheck(SI->getFalseValue(), SISize, SIAAInfo, + SI2->getFalseValue(), V2Size, V2AAInfo); return MergeAliasResults(ThisAlias, Alias); } // If both arms of the Select node NoAlias or MustAlias V2, then returns // NoAlias / MustAlias. Otherwise, returns MayAlias. AliasResult Alias = - aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(), SISize, SIAAInfo); + aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(), SISize, SIAAInfo); if (Alias == MayAlias) return MayAlias; AliasResult ThisAlias = - aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), SISize, SIAAInfo); + aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), SISize, SIAAInfo); return MergeAliasResults(ThisAlias, Alias); } -// aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction -// against another. -AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, - const AAMDNodes &PNAAInfo, - const Value *V2, uint64_t V2Size, - const AAMDNodes &V2AAInfo) { +/// Provide a bunch of ad-hoc rules to disambiguate a PHI instruction against +/// another. +AliasResult BasicAAResult::aliasPHI(const PHINode *PN, uint64_t PNSize, + const AAMDNodes &PNAAInfo, const Value *V2, + uint64_t V2Size, + const AAMDNodes &V2AAInfo) { // Track phi nodes we have visited. We use this information when we determine // value equivalence. VisitedPhiBBs.insert(PN->getParent()); @@ -1232,9 +1205,9 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { AliasResult ThisAlias = - aliasCheck(PN->getIncomingValue(i), PNSize, PNAAInfo, - PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)), - V2Size, V2AAInfo); + aliasCheck(PN->getIncomingValue(i), PNSize, PNAAInfo, + PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)), + V2Size, V2AAInfo); Alias = MergeAliasResults(ThisAlias, Alias); if (Alias == MayAlias) break; @@ -1247,8 +1220,9 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, return Alias; } - SmallPtrSet<Value*, 4> UniqueSrc; - SmallVector<Value*, 4> V1Srcs; + SmallPtrSet<Value *, 4> UniqueSrc; + SmallVector<Value *, 4> V1Srcs; + bool isRecursive = false; for (Value *PV1 : PN->incoming_values()) { if (isa<PHINode>(PV1)) // If any of the source itself is a PHI, return MayAlias conservatively @@ -1256,12 +1230,33 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, // sides are PHI nodes. In which case, this is O(m x n) time where 'm' // and 'n' are the number of PHI sources. return MayAlias; + + if (EnableRecPhiAnalysis) + if (GEPOperator *PV1GEP = dyn_cast<GEPOperator>(PV1)) { + // Check whether the incoming value is a GEP that advances the pointer + // result of this PHI node (e.g. in a loop). If this is the case, we + // would recurse and always get a MayAlias. Handle this case specially + // below. + if (PV1GEP->getPointerOperand() == PN && PV1GEP->getNumIndices() == 1 && + isa<ConstantInt>(PV1GEP->idx_begin())) { + isRecursive = true; + continue; + } + } + if (UniqueSrc.insert(PV1).second) V1Srcs.push_back(PV1); } - AliasResult Alias = aliasCheck(V2, V2Size, V2AAInfo, - V1Srcs[0], PNSize, PNAAInfo); + // If this PHI node is recursive, set the size of the accessed memory to + // unknown to represent all the possible values the GEP could advance the + // pointer to. + if (isRecursive) + PNSize = MemoryLocation::UnknownSize; + + AliasResult Alias = + aliasCheck(V2, V2Size, V2AAInfo, V1Srcs[0], PNSize, PNAAInfo); + // Early exit if the check of the first PHI source against V2 is MayAlias. // Other results are not possible. if (Alias == MayAlias) @@ -1272,8 +1267,8 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) { Value *V = V1Srcs[i]; - AliasResult ThisAlias = aliasCheck(V2, V2Size, V2AAInfo, - V, PNSize, PNAAInfo); + AliasResult ThisAlias = + aliasCheck(V2, V2Size, V2AAInfo, V, PNSize, PNAAInfo); Alias = MergeAliasResults(ThisAlias, Alias); if (Alias == MayAlias) break; @@ -1282,13 +1277,11 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, return Alias; } -// aliasCheck - Provide a bunch of ad-hoc rules to disambiguate in common cases, -// such as array references. -// -AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, - AAMDNodes V1AAInfo, const Value *V2, - uint64_t V2Size, - AAMDNodes V2AAInfo) { +/// Provides a bunch of ad-hoc rules to disambiguate in common cases, such as +/// array references. +AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size, + AAMDNodes V1AAInfo, const Value *V2, + uint64_t V2Size, AAMDNodes V2AAInfo) { // If either of the memory references is empty, it doesn't matter what the // pointer values are. if (V1Size == 0 || V2Size == 0) @@ -1313,11 +1306,11 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, return MustAlias; if (!V1->getType()->isPointerTy() || !V2->getType()->isPointerTy()) - return NoAlias; // Scalars cannot alias each other + return NoAlias; // Scalars cannot alias each other // Figure out what objects these things are pointing to if we can. - const Value *O1 = GetUnderlyingObject(V1, *DL, MaxLookupSearchDepth); - const Value *O2 = GetUnderlyingObject(V2, *DL, MaxLookupSearchDepth); + const Value *O1 = GetUnderlyingObject(V1, DL, MaxLookupSearchDepth); + const Value *O2 = GetUnderlyingObject(V2, DL, MaxLookupSearchDepth); // Null values in the default address space don't point to any object, so they // don't alias any other pointer. @@ -1366,12 +1359,11 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, // If the size of one access is larger than the entire object on the other // side, then we know such behavior is undefined and can assume no alias. - if (DL) - if ((V1Size != MemoryLocation::UnknownSize && - isObjectSmallerThan(O2, V1Size, *DL, *TLI)) || - (V2Size != MemoryLocation::UnknownSize && - isObjectSmallerThan(O1, V2Size, *DL, *TLI))) - return NoAlias; + if ((V1Size != MemoryLocation::UnknownSize && + isObjectSmallerThan(O2, V1Size, DL, TLI)) || + (V2Size != MemoryLocation::UnknownSize && + isObjectSmallerThan(O1, V2Size, DL, TLI))) + return NoAlias; // Check the cache before climbing up use-def chains. This also terminates // otherwise infinitely recursive queries. @@ -1380,7 +1372,7 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, if (V1 > V2) std::swap(Locs.first, Locs.second); std::pair<AliasCacheTy::iterator, bool> Pair = - AliasCache.insert(std::make_pair(Locs, MayAlias)); + AliasCache.insert(std::make_pair(Locs, MayAlias)); if (!Pair.second) return Pair.first->second; @@ -1393,8 +1385,10 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, std::swap(V1AAInfo, V2AAInfo); } if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) { - AliasResult Result = aliasGEP(GV1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O1, O2); - if (Result != MayAlias) return AliasCache[Locs] = Result; + AliasResult Result = + aliasGEP(GV1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O1, O2); + if (Result != MayAlias) + return AliasCache[Locs] = Result; } if (isa<PHINode>(V2) && !isa<PHINode>(V1)) { @@ -1403,9 +1397,9 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, std::swap(V1AAInfo, V2AAInfo); } if (const PHINode *PN = dyn_cast<PHINode>(V1)) { - AliasResult Result = aliasPHI(PN, V1Size, V1AAInfo, - V2, V2Size, V2AAInfo); - if (Result != MayAlias) return AliasCache[Locs] = Result; + AliasResult Result = aliasPHI(PN, V1Size, V1AAInfo, V2, V2Size, V2AAInfo); + if (Result != MayAlias) + return AliasCache[Locs] = Result; } if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) { @@ -1414,29 +1408,38 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, std::swap(V1AAInfo, V2AAInfo); } if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) { - AliasResult Result = aliasSelect(S1, V1Size, V1AAInfo, - V2, V2Size, V2AAInfo); - if (Result != MayAlias) return AliasCache[Locs] = Result; + AliasResult Result = + aliasSelect(S1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo); + if (Result != MayAlias) + return AliasCache[Locs] = Result; } // If both pointers are pointing into the same object and one of them // accesses is accessing the entire object, then the accesses must // overlap in some way. - if (DL && O1 == O2) + if (O1 == O2) if ((V1Size != MemoryLocation::UnknownSize && - isObjectSize(O1, V1Size, *DL, *TLI)) || + isObjectSize(O1, V1Size, DL, TLI)) || (V2Size != MemoryLocation::UnknownSize && - isObjectSize(O2, V2Size, *DL, *TLI))) + isObjectSize(O2, V2Size, DL, TLI))) return AliasCache[Locs] = PartialAlias; - AliasResult Result = - AliasAnalysis::alias(MemoryLocation(V1, V1Size, V1AAInfo), - MemoryLocation(V2, V2Size, V2AAInfo)); + // Recurse back into the best AA results we have, potentially with refined + // memory locations. We have already ensured that BasicAA has a MayAlias + // cache result for these, so any recursion back into BasicAA won't loop. + AliasResult Result = getBestAAResults().alias(Locs.first, Locs.second); return AliasCache[Locs] = Result; } -bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V, - const Value *V2) { +/// Check whether two Values can be considered equivalent. +/// +/// In addition to pointer equivalence of \p V1 and \p V2 this checks whether +/// they can not be part of a cycle in the value graph by looking at all +/// visited phi nodes an making sure that the phis cannot reach the value. We +/// have to do this because we are looking through phi nodes (That is we say +/// noalias(V, phi(VA, VB)) if noalias(V, VA) and noalias(V, VB). +bool BasicAAResult::isValueEqualInPotentialCycles(const Value *V, + const Value *V2) { if (V != V2) return false; @@ -1450,28 +1453,21 @@ bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V, if (VisitedPhiBBs.size() > MaxNumPhiBBsValueReachabilityCheck) return false; - // Use dominance or loop info if available. - DominatorTreeWrapperPass *DTWP = - getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; - auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); - LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; - // Make sure that the visited phis cannot reach the Value. This ensures that // the Values cannot come from different iterations of a potential cycle the // phi nodes could be involved in. for (auto *P : VisitedPhiBBs) - if (isPotentiallyReachable(P->begin(), Inst, DT, LI)) + if (isPotentiallyReachable(&P->front(), Inst, DT, LI)) return false; return true; } -/// GetIndexDifference - Dest and Src are the variable indices from two -/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base -/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic -/// difference between the two pointers. -void BasicAliasAnalysis::GetIndexDifference( +/// Computes the symbolic difference between two de-composed GEPs. +/// +/// Dest and Src are the variable indices from two decomposed GetElementPtr +/// instructions GEP1 and GEP2 which have common base pointers. +void BasicAAResult::GetIndexDifference( SmallVectorImpl<VariableGEPIndex> &Dest, const SmallVectorImpl<VariableGEPIndex> &Src) { if (Src.empty()) @@ -1479,14 +1475,14 @@ void BasicAliasAnalysis::GetIndexDifference( for (unsigned i = 0, e = Src.size(); i != e; ++i) { const Value *V = Src[i].V; - ExtensionKind Extension = Src[i].Extension; + unsigned ZExtBits = Src[i].ZExtBits, SExtBits = Src[i].SExtBits; int64_t Scale = Src[i].Scale; // Find V in Dest. This is N^2, but pointer indices almost never have more // than a few variable indexes. for (unsigned j = 0, e = Dest.size(); j != e; ++j) { if (!isValueEqualInPotentialCycles(Dest[j].V, V) || - Dest[j].Extension != Extension) + Dest[j].ZExtBits != ZExtBits || Dest[j].SExtBits != SExtBits) continue; // If we found it, subtract off Scale V's from the entry in Dest. If it @@ -1501,8 +1497,120 @@ void BasicAliasAnalysis::GetIndexDifference( // If we didn't consume this entry, add it to the end of the Dest list. if (Scale) { - VariableGEPIndex Entry = { V, Extension, -Scale }; + VariableGEPIndex Entry = {V, ZExtBits, SExtBits, -Scale}; Dest.push_back(Entry); } } } + +bool BasicAAResult::constantOffsetHeuristic( + const SmallVectorImpl<VariableGEPIndex> &VarIndices, uint64_t V1Size, + uint64_t V2Size, int64_t BaseOffset, AssumptionCache *AC, + DominatorTree *DT) { + if (VarIndices.size() != 2 || V1Size == MemoryLocation::UnknownSize || + V2Size == MemoryLocation::UnknownSize) + return false; + + const VariableGEPIndex &Var0 = VarIndices[0], &Var1 = VarIndices[1]; + + if (Var0.ZExtBits != Var1.ZExtBits || Var0.SExtBits != Var1.SExtBits || + Var0.Scale != -Var1.Scale) + return false; + + unsigned Width = Var1.V->getType()->getIntegerBitWidth(); + + // We'll strip off the Extensions of Var0 and Var1 and do another round + // of GetLinearExpression decomposition. In the example above, if Var0 + // is zext(%x + 1) we should get V1 == %x and V1Offset == 1. + + APInt V0Scale(Width, 0), V0Offset(Width, 0), V1Scale(Width, 0), + V1Offset(Width, 0); + bool NSW = true, NUW = true; + unsigned V0ZExtBits = 0, V0SExtBits = 0, V1ZExtBits = 0, V1SExtBits = 0; + const Value *V0 = GetLinearExpression(Var0.V, V0Scale, V0Offset, V0ZExtBits, + V0SExtBits, DL, 0, AC, DT, NSW, NUW); + NSW = true, NUW = true; + const Value *V1 = GetLinearExpression(Var1.V, V1Scale, V1Offset, V1ZExtBits, + V1SExtBits, DL, 0, AC, DT, NSW, NUW); + + if (V0Scale != V1Scale || V0ZExtBits != V1ZExtBits || + V0SExtBits != V1SExtBits || !isValueEqualInPotentialCycles(V0, V1)) + return false; + + // We have a hit - Var0 and Var1 only differ by a constant offset! + + // If we've been sext'ed then zext'd the maximum difference between Var0 and + // Var1 is possible to calculate, but we're just interested in the absolute + // minimum difference between the two. The minimum distance may occur due to + // wrapping; consider "add i3 %i, 5": if %i == 7 then 7 + 5 mod 8 == 4, and so + // the minimum distance between %i and %i + 5 is 3. + APInt MinDiff = V0Offset - V1Offset, Wrapped = -MinDiff; + MinDiff = APIntOps::umin(MinDiff, Wrapped); + uint64_t MinDiffBytes = MinDiff.getZExtValue() * std::abs(Var0.Scale); + + // We can't definitely say whether GEP1 is before or after V2 due to wrapping + // arithmetic (i.e. for some values of GEP1 and V2 GEP1 < V2, and for other + // values GEP1 > V2). We'll therefore only declare NoAlias if both V1Size and + // V2Size can fit in the MinDiffBytes gap. + return V1Size + std::abs(BaseOffset) <= MinDiffBytes && + V2Size + std::abs(BaseOffset) <= MinDiffBytes; +} + +//===----------------------------------------------------------------------===// +// BasicAliasAnalysis Pass +//===----------------------------------------------------------------------===// + +char BasicAA::PassID; + +BasicAAResult BasicAA::run(Function &F, AnalysisManager<Function> *AM) { + return BasicAAResult(F.getParent()->getDataLayout(), + AM->getResult<TargetLibraryAnalysis>(F), + AM->getResult<AssumptionAnalysis>(F), + AM->getCachedResult<DominatorTreeAnalysis>(F), + AM->getCachedResult<LoopAnalysis>(F)); +} + +BasicAAWrapperPass::BasicAAWrapperPass() : FunctionPass(ID) { + initializeBasicAAWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +char BasicAAWrapperPass::ID = 0; +void BasicAAWrapperPass::anchor() {} + +INITIALIZE_PASS_BEGIN(BasicAAWrapperPass, "basicaa", + "Basic Alias Analysis (stateless AA impl)", true, true) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(BasicAAWrapperPass, "basicaa", + "Basic Alias Analysis (stateless AA impl)", true, true) + +FunctionPass *llvm::createBasicAAWrapperPass() { + return new BasicAAWrapperPass(); +} + +bool BasicAAWrapperPass::runOnFunction(Function &F) { + auto &ACT = getAnalysis<AssumptionCacheTracker>(); + auto &TLIWP = getAnalysis<TargetLibraryInfoWrapperPass>(); + auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); + + Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), TLIWP.getTLI(), + ACT.getAssumptionCache(F), + DTWP ? &DTWP->getDomTree() : nullptr, + LIWP ? &LIWP->getLoopInfo() : nullptr)); + + return false; +} + +void BasicAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); +} + +BasicAAResult llvm::createLegacyPMBasicAAResult(Pass &P, Function &F) { + return BasicAAResult( + F.getParent()->getDataLayout(), + P.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(), + P.getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F)); +} diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp index 3d819eb..90b7a33 100644 --- a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp +++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp @@ -55,7 +55,7 @@ struct GraphTraits<BlockFrequencyInfo *> { typedef Function::const_iterator nodes_iterator; static inline const NodeType *getEntryNode(const BlockFrequencyInfo *G) { - return G->getFunction()->begin(); + return &G->getFunction()->front(); } static ChildIteratorType child_begin(const NodeType *N) { return succ_begin(N); @@ -105,51 +105,36 @@ struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits { } // end namespace llvm #endif -INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq", - "Block Frequency Analysis", true, true) -INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo) -INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq", - "Block Frequency Analysis", true, true) - -char BlockFrequencyInfo::ID = 0; - +BlockFrequencyInfo::BlockFrequencyInfo() {} -BlockFrequencyInfo::BlockFrequencyInfo() : FunctionPass(ID) { - initializeBlockFrequencyInfoPass(*PassRegistry::getPassRegistry()); -} - -BlockFrequencyInfo::~BlockFrequencyInfo() {} - -void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<BranchProbabilityInfo>(); - AU.addRequired<LoopInfoWrapperPass>(); - AU.setPreservesAll(); +BlockFrequencyInfo::BlockFrequencyInfo(const Function &F, + const BranchProbabilityInfo &BPI, + const LoopInfo &LI) { + calculate(F, BPI, LI); } -bool BlockFrequencyInfo::runOnFunction(Function &F) { - BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>(); - LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); +void BlockFrequencyInfo::calculate(const Function &F, + const BranchProbabilityInfo &BPI, + const LoopInfo &LI) { if (!BFI) BFI.reset(new ImplType); - BFI->doFunction(&F, &BPI, &LI); + BFI->calculate(F, BPI, LI); #ifndef NDEBUG if (ViewBlockFreqPropagationDAG != GVDT_None) view(); #endif - return false; -} - -void BlockFrequencyInfo::releaseMemory() { BFI.reset(); } - -void BlockFrequencyInfo::print(raw_ostream &O, const Module *) const { - if (BFI) BFI->print(O); } BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const { return BFI ? BFI->getBlockFreq(BB) : 0; } +void BlockFrequencyInfo::setBlockFreq(const BasicBlock *BB, + uint64_t Freq) { + assert(BFI && "Expected analysis to be available"); + BFI->setBlockFreq(BB, Freq); +} + /// Pop up a ghostview window with the current block frequency propagation /// rendered using dot. void BlockFrequencyInfo::view() const { @@ -180,3 +165,49 @@ BlockFrequencyInfo::printBlockFreq(raw_ostream &OS, uint64_t BlockFrequencyInfo::getEntryFreq() const { return BFI ? BFI->getEntryFreq() : 0; } + +void BlockFrequencyInfo::releaseMemory() { BFI.reset(); } + +void BlockFrequencyInfo::print(raw_ostream &OS) const { + if (BFI) + BFI->print(OS); +} + + +INITIALIZE_PASS_BEGIN(BlockFrequencyInfoWrapperPass, "block-freq", + "Block Frequency Analysis", true, true) +INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_END(BlockFrequencyInfoWrapperPass, "block-freq", + "Block Frequency Analysis", true, true) + +char BlockFrequencyInfoWrapperPass::ID = 0; + + +BlockFrequencyInfoWrapperPass::BlockFrequencyInfoWrapperPass() + : FunctionPass(ID) { + initializeBlockFrequencyInfoWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +BlockFrequencyInfoWrapperPass::~BlockFrequencyInfoWrapperPass() {} + +void BlockFrequencyInfoWrapperPass::print(raw_ostream &OS, + const Module *) const { + BFI.print(OS); +} + +void BlockFrequencyInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<BranchProbabilityInfoWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); + AU.setPreservesAll(); +} + +void BlockFrequencyInfoWrapperPass::releaseMemory() { BFI.releaseMemory(); } + +bool BlockFrequencyInfoWrapperPass::runOnFunction(Function &F) { + BranchProbabilityInfo &BPI = + getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); + LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + BFI.calculate(F, BPI, LI); + return false; +} diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp index 6ceda06..48e23af 100644 --- a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp +++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -530,6 +530,13 @@ BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const { return Freqs[Node.Index].Scaled; } +void BlockFrequencyInfoImplBase::setBlockFreq(const BlockNode &Node, + uint64_t Freq) { + assert(Node.isValid() && "Expected valid node"); + assert(Node.Index < Freqs.size() && "Expected legal index"); + Freqs[Node.Index].Integer = Freq; +} + std::string BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const { return std::string(); @@ -743,7 +750,10 @@ void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) { auto &BackedgeMass = Loop.BackedgeMass[Loop.getHeaderIndex(HeaderNode)]; DEBUG(dbgs() << " - Add back edge mass for node " << getBlockName(HeaderNode) << ": " << BackedgeMass << "\n"); - Dist.addLocal(HeaderNode, BackedgeMass.getMass()); + if (BackedgeMass.getMass() > 0) + Dist.addLocal(HeaderNode, BackedgeMass.getMass()); + else + DEBUG(dbgs() << " Nothing added. Back edge mass is zero\n"); } DitheringDistributer D(Dist, LoopMass); diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp index 430b412..cf0cc8d 100644 --- a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -27,13 +27,13 @@ using namespace llvm; #define DEBUG_TYPE "branch-prob" -INITIALIZE_PASS_BEGIN(BranchProbabilityInfo, "branch-prob", +INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob", "Branch Probability Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_END(BranchProbabilityInfo, "branch-prob", +INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob", "Branch Probability Analysis", false, true) -char BranchProbabilityInfo::ID = 0; +char BranchProbabilityInfoWrapperPass::ID = 0; // Weights are for internal use only. They are used by heuristics to help to // estimate edges' probability. Example: @@ -108,13 +108,6 @@ static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1; /// instruction. This is essentially never taken. static const uint32_t IH_NONTAKEN_WEIGHT = 1; -// Standard weight value. Used when none of the heuristics set weight for -// the edge. -static const uint32_t NORMAL_WEIGHT = 16; - -// Minimum weight of an edge. Please note, that weight is NEVER 0. -static const uint32_t MIN_WEIGHT = 1; - /// \brief Calculate edge weights for successors lead to unreachable. /// /// Predict that a successor which leads necessarily to an @@ -147,22 +140,34 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(BasicBlock *BB) { if (TI->getNumSuccessors() == 1 || UnreachableEdges.empty()) return false; - uint32_t UnreachableWeight = - std::max(UR_TAKEN_WEIGHT / (unsigned)UnreachableEdges.size(), MIN_WEIGHT); - for (SmallVectorImpl<unsigned>::iterator I = UnreachableEdges.begin(), - E = UnreachableEdges.end(); - I != E; ++I) - setEdgeWeight(BB, *I, UnreachableWeight); + // If the terminator is an InvokeInst, check only the normal destination block + // as the unwind edge of InvokeInst is also very unlikely taken. + if (auto *II = dyn_cast<InvokeInst>(TI)) + if (PostDominatedByUnreachable.count(II->getNormalDest())) { + PostDominatedByUnreachable.insert(BB); + // Return false here so that edge weights for InvokeInst could be decided + // in calcInvokeHeuristics(). + return false; + } - if (ReachableEdges.empty()) + if (ReachableEdges.empty()) { + BranchProbability Prob(1, UnreachableEdges.size()); + for (unsigned SuccIdx : UnreachableEdges) + setEdgeProbability(BB, SuccIdx, Prob); return true; - uint32_t ReachableWeight = - std::max(UR_NONTAKEN_WEIGHT / (unsigned)ReachableEdges.size(), - NORMAL_WEIGHT); - for (SmallVectorImpl<unsigned>::iterator I = ReachableEdges.begin(), - E = ReachableEdges.end(); - I != E; ++I) - setEdgeWeight(BB, *I, ReachableWeight); + } + + BranchProbability UnreachableProb(UR_TAKEN_WEIGHT, + (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * + UnreachableEdges.size()); + BranchProbability ReachableProb(UR_NONTAKEN_WEIGHT, + (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * + ReachableEdges.size()); + + for (unsigned SuccIdx : UnreachableEdges) + setEdgeProbability(BB, SuccIdx, UnreachableProb); + for (unsigned SuccIdx : ReachableEdges) + setEdgeProbability(BB, SuccIdx, ReachableProb); return true; } @@ -213,10 +218,18 @@ bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) { WeightSum = 0; for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { - uint32_t W = Weights[i] / ScalingFactor; - WeightSum += W; - setEdgeWeight(BB, i, W); + Weights[i] /= ScalingFactor; + WeightSum += Weights[i]; } + + if (WeightSum == 0) { + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + setEdgeProbability(BB, i, {1, e}); + } else { + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + setEdgeProbability(BB, i, {Weights[i], static_cast<uint32_t>(WeightSum)}); + } + assert(WeightSum <= UINT32_MAX && "Expected weights to scale down to 32 bits"); @@ -265,21 +278,24 @@ bool BranchProbabilityInfo::calcColdCallHeuristics(BasicBlock *BB) { if (TI->getNumSuccessors() == 1 || ColdEdges.empty()) return false; - uint32_t ColdWeight = - std::max(CC_TAKEN_WEIGHT / (unsigned) ColdEdges.size(), MIN_WEIGHT); - for (SmallVectorImpl<unsigned>::iterator I = ColdEdges.begin(), - E = ColdEdges.end(); - I != E; ++I) - setEdgeWeight(BB, *I, ColdWeight); - - if (NormalEdges.empty()) + if (NormalEdges.empty()) { + BranchProbability Prob(1, ColdEdges.size()); + for (unsigned SuccIdx : ColdEdges) + setEdgeProbability(BB, SuccIdx, Prob); return true; - uint32_t NormalWeight = std::max( - CC_NONTAKEN_WEIGHT / (unsigned) NormalEdges.size(), NORMAL_WEIGHT); - for (SmallVectorImpl<unsigned>::iterator I = NormalEdges.begin(), - E = NormalEdges.end(); - I != E; ++I) - setEdgeWeight(BB, *I, NormalWeight); + } + + BranchProbability ColdProb(CC_TAKEN_WEIGHT, + (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) * + ColdEdges.size()); + BranchProbability NormalProb(CC_NONTAKEN_WEIGHT, + (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) * + NormalEdges.size()); + + for (unsigned SuccIdx : ColdEdges) + setEdgeProbability(BB, SuccIdx, ColdProb); + for (unsigned SuccIdx : NormalEdges) + setEdgeProbability(BB, SuccIdx, NormalProb); return true; } @@ -312,15 +328,18 @@ bool BranchProbabilityInfo::calcPointerHeuristics(BasicBlock *BB) { if (!isProb) std::swap(TakenIdx, NonTakenIdx); - setEdgeWeight(BB, TakenIdx, PH_TAKEN_WEIGHT); - setEdgeWeight(BB, NonTakenIdx, PH_NONTAKEN_WEIGHT); + BranchProbability TakenProb(PH_TAKEN_WEIGHT, + PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT); + setEdgeProbability(BB, TakenIdx, TakenProb); + setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl()); return true; } // Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges // as taken, exiting edges as not-taken. -bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { - Loop *L = LI->getLoopFor(BB); +bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB, + const LoopInfo &LI) { + Loop *L = LI.getLoopFor(BB); if (!L) return false; @@ -340,37 +359,35 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { if (BackEdges.empty() && ExitingEdges.empty()) return false; - if (uint32_t numBackEdges = BackEdges.size()) { - uint32_t backWeight = LBH_TAKEN_WEIGHT / numBackEdges; - if (backWeight < NORMAL_WEIGHT) - backWeight = NORMAL_WEIGHT; + // Collect the sum of probabilities of back-edges/in-edges/exiting-edges, and + // normalize them so that they sum up to one. + SmallVector<BranchProbability, 4> Probs(3, BranchProbability::getZero()); + unsigned Denom = (BackEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) + + (InEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) + + (ExitingEdges.empty() ? 0 : LBH_NONTAKEN_WEIGHT); + if (!BackEdges.empty()) + Probs[0] = BranchProbability(LBH_TAKEN_WEIGHT, Denom); + if (!InEdges.empty()) + Probs[1] = BranchProbability(LBH_TAKEN_WEIGHT, Denom); + if (!ExitingEdges.empty()) + Probs[2] = BranchProbability(LBH_NONTAKEN_WEIGHT, Denom); - for (SmallVectorImpl<unsigned>::iterator EI = BackEdges.begin(), - EE = BackEdges.end(); EI != EE; ++EI) { - setEdgeWeight(BB, *EI, backWeight); - } + if (uint32_t numBackEdges = BackEdges.size()) { + auto Prob = Probs[0] / numBackEdges; + for (unsigned SuccIdx : BackEdges) + setEdgeProbability(BB, SuccIdx, Prob); } if (uint32_t numInEdges = InEdges.size()) { - uint32_t inWeight = LBH_TAKEN_WEIGHT / numInEdges; - if (inWeight < NORMAL_WEIGHT) - inWeight = NORMAL_WEIGHT; - - for (SmallVectorImpl<unsigned>::iterator EI = InEdges.begin(), - EE = InEdges.end(); EI != EE; ++EI) { - setEdgeWeight(BB, *EI, inWeight); - } + auto Prob = Probs[1] / numInEdges; + for (unsigned SuccIdx : InEdges) + setEdgeProbability(BB, SuccIdx, Prob); } if (uint32_t numExitingEdges = ExitingEdges.size()) { - uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numExitingEdges; - if (exitWeight < MIN_WEIGHT) - exitWeight = MIN_WEIGHT; - - for (SmallVectorImpl<unsigned>::iterator EI = ExitingEdges.begin(), - EE = ExitingEdges.end(); EI != EE; ++EI) { - setEdgeWeight(BB, *EI, exitWeight); - } + auto Prob = Probs[2] / numExitingEdges; + for (unsigned SuccIdx : ExitingEdges) + setEdgeProbability(BB, SuccIdx, Prob); } return true; @@ -452,9 +469,10 @@ bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) { if (!isProb) std::swap(TakenIdx, NonTakenIdx); - setEdgeWeight(BB, TakenIdx, ZH_TAKEN_WEIGHT); - setEdgeWeight(BB, NonTakenIdx, ZH_NONTAKEN_WEIGHT); - + BranchProbability TakenProb(ZH_TAKEN_WEIGHT, + ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT); + setEdgeProbability(BB, TakenIdx, TakenProb); + setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl()); return true; } @@ -488,9 +506,10 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(BasicBlock *BB) { if (!isProb) std::swap(TakenIdx, NonTakenIdx); - setEdgeWeight(BB, TakenIdx, FPH_TAKEN_WEIGHT); - setEdgeWeight(BB, NonTakenIdx, FPH_NONTAKEN_WEIGHT); - + BranchProbability TakenProb(FPH_TAKEN_WEIGHT, + FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT); + setEdgeProbability(BB, TakenIdx, TakenProb); + setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl()); return true; } @@ -499,82 +518,30 @@ bool BranchProbabilityInfo::calcInvokeHeuristics(BasicBlock *BB) { if (!II) return false; - setEdgeWeight(BB, 0/*Index for Normal*/, IH_TAKEN_WEIGHT); - setEdgeWeight(BB, 1/*Index for Unwind*/, IH_NONTAKEN_WEIGHT); + BranchProbability TakenProb(IH_TAKEN_WEIGHT, + IH_TAKEN_WEIGHT + IH_NONTAKEN_WEIGHT); + setEdgeProbability(BB, 0 /*Index for Normal*/, TakenProb); + setEdgeProbability(BB, 1 /*Index for Unwind*/, TakenProb.getCompl()); return true; } -void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<LoopInfoWrapperPass>(); - AU.setPreservesAll(); -} - -bool BranchProbabilityInfo::runOnFunction(Function &F) { - DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName() - << " ----\n\n"); - LastF = &F; // Store the last function we ran on for printing. - LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - assert(PostDominatedByUnreachable.empty()); - assert(PostDominatedByColdCall.empty()); - - // Walk the basic blocks in post-order so that we can build up state about - // the successors of a block iteratively. - for (auto BB : post_order(&F.getEntryBlock())) { - DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n"); - if (calcUnreachableHeuristics(BB)) - continue; - if (calcMetadataWeights(BB)) - continue; - if (calcColdCallHeuristics(BB)) - continue; - if (calcLoopBranchHeuristics(BB)) - continue; - if (calcPointerHeuristics(BB)) - continue; - if (calcZeroHeuristics(BB)) - continue; - if (calcFloatingPointHeuristics(BB)) - continue; - calcInvokeHeuristics(BB); - } - - PostDominatedByUnreachable.clear(); - PostDominatedByColdCall.clear(); - return false; -} - void BranchProbabilityInfo::releaseMemory() { - Weights.clear(); + Probs.clear(); } -void BranchProbabilityInfo::print(raw_ostream &OS, const Module *) const { +void BranchProbabilityInfo::print(raw_ostream &OS) const { OS << "---- Branch Probabilities ----\n"; // We print the probabilities from the last function the analysis ran over, // or the function it is currently running over. assert(LastF && "Cannot print prior to running over a function"); - for (Function::const_iterator BI = LastF->begin(), BE = LastF->end(); - BI != BE; ++BI) { - for (succ_const_iterator SI = succ_begin(BI), SE = succ_end(BI); - SI != SE; ++SI) { - printEdgeProbability(OS << " ", BI, *SI); + for (const auto &BI : *LastF) { + for (succ_const_iterator SI = succ_begin(&BI), SE = succ_end(&BI); SI != SE; + ++SI) { + printEdgeProbability(OS << " ", &BI, *SI); } } } -uint32_t BranchProbabilityInfo::getSumForBlock(const BasicBlock *BB) const { - uint32_t Sum = 0; - - for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { - uint32_t Weight = getEdgeWeight(BB, I.getSuccessorIndex()); - uint32_t PrevSum = Sum; - - Sum += Weight; - assert(Sum >= PrevSum); (void) PrevSum; - } - - return Sum; -} - bool BranchProbabilityInfo:: isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const { // Hot probability is at least 4/5 = 80% @@ -583,97 +550,74 @@ isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const { } BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const { - uint32_t Sum = 0; - uint32_t MaxWeight = 0; + auto MaxProb = BranchProbability::getZero(); BasicBlock *MaxSucc = nullptr; for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { BasicBlock *Succ = *I; - uint32_t Weight = getEdgeWeight(BB, Succ); - uint32_t PrevSum = Sum; - - Sum += Weight; - assert(Sum > PrevSum); (void) PrevSum; - - if (Weight > MaxWeight) { - MaxWeight = Weight; + auto Prob = getEdgeProbability(BB, Succ); + if (Prob > MaxProb) { + MaxProb = Prob; MaxSucc = Succ; } } // Hot probability is at least 4/5 = 80% - if (BranchProbability(MaxWeight, Sum) > BranchProbability(4, 5)) + if (MaxProb > BranchProbability(4, 5)) return MaxSucc; return nullptr; } -/// Get the raw edge weight for the edge. If can't find it, return -/// DEFAULT_WEIGHT value. Here an edge is specified using PredBlock and an index -/// to the successors. -uint32_t BranchProbabilityInfo:: -getEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors) const { - DenseMap<Edge, uint32_t>::const_iterator I = - Weights.find(std::make_pair(Src, IndexInSuccessors)); +/// Get the raw edge probability for the edge. If can't find it, return a +/// default probability 1/N where N is the number of successors. Here an edge is +/// specified using PredBlock and an +/// index to the successors. +BranchProbability +BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, + unsigned IndexInSuccessors) const { + auto I = Probs.find(std::make_pair(Src, IndexInSuccessors)); - if (I != Weights.end()) + if (I != Probs.end()) return I->second; - return DEFAULT_WEIGHT; + return {1, + static_cast<uint32_t>(std::distance(succ_begin(Src), succ_end(Src)))}; } -uint32_t BranchProbabilityInfo::getEdgeWeight(const BasicBlock *Src, - succ_const_iterator Dst) const { - return getEdgeWeight(Src, Dst.getSuccessorIndex()); +BranchProbability +BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, + succ_const_iterator Dst) const { + return getEdgeProbability(Src, Dst.getSuccessorIndex()); } -/// Get the raw edge weight calculated for the block pair. This returns the sum -/// of all raw edge weights from Src to Dst. -uint32_t BranchProbabilityInfo:: -getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const { - uint32_t Weight = 0; - bool FoundWeight = false; - DenseMap<Edge, uint32_t>::const_iterator MapI; +/// Get the raw edge probability calculated for the block pair. This returns the +/// sum of all raw edge probabilities from Src to Dst. +BranchProbability +BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, + const BasicBlock *Dst) const { + auto Prob = BranchProbability::getZero(); + bool FoundProb = false; for (succ_const_iterator I = succ_begin(Src), E = succ_end(Src); I != E; ++I) if (*I == Dst) { - MapI = Weights.find(std::make_pair(Src, I.getSuccessorIndex())); - if (MapI != Weights.end()) { - FoundWeight = true; - Weight += MapI->second; + auto MapI = Probs.find(std::make_pair(Src, I.getSuccessorIndex())); + if (MapI != Probs.end()) { + FoundProb = true; + Prob += MapI->second; } } - return (!FoundWeight) ? DEFAULT_WEIGHT : Weight; + uint32_t succ_num = std::distance(succ_begin(Src), succ_end(Src)); + return FoundProb ? Prob : BranchProbability(1, succ_num); } -/// Set the edge weight for a given edge specified by PredBlock and an index -/// to the successors. -void BranchProbabilityInfo:: -setEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors, - uint32_t Weight) { - Weights[std::make_pair(Src, IndexInSuccessors)] = Weight; - DEBUG(dbgs() << "set edge " << Src->getName() << " -> " - << IndexInSuccessors << " successor weight to " - << Weight << "\n"); -} - -/// Get an edge's probability, relative to other out-edges from Src. -BranchProbability BranchProbabilityInfo:: -getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const { - uint32_t N = getEdgeWeight(Src, IndexInSuccessors); - uint32_t D = getSumForBlock(Src); - - return BranchProbability(N, D); -} - -/// Get the probability of going from Src to Dst. It returns the sum of all -/// probabilities for edges from Src to Dst. -BranchProbability BranchProbabilityInfo:: -getEdgeProbability(const BasicBlock *Src, const BasicBlock *Dst) const { - - uint32_t N = getEdgeWeight(Src, Dst); - uint32_t D = getSumForBlock(Src); - - return BranchProbability(N, D); +/// Set the edge probability for a given edge specified by PredBlock and an +/// index to the successors. +void BranchProbabilityInfo::setEdgeProbability(const BasicBlock *Src, + unsigned IndexInSuccessors, + BranchProbability Prob) { + Probs[std::make_pair(Src, IndexInSuccessors)] = Prob; + DEBUG(dbgs() << "set edge " << Src->getName() << " -> " << IndexInSuccessors + << " successor probability to " << Prob << "\n"); } raw_ostream & @@ -688,3 +632,54 @@ BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, return OS; } + +void BranchProbabilityInfo::calculate(Function &F, const LoopInfo& LI) { + DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName() + << " ----\n\n"); + LastF = &F; // Store the last function we ran on for printing. + assert(PostDominatedByUnreachable.empty()); + assert(PostDominatedByColdCall.empty()); + + // Walk the basic blocks in post-order so that we can build up state about + // the successors of a block iteratively. + for (auto BB : post_order(&F.getEntryBlock())) { + DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n"); + if (calcUnreachableHeuristics(BB)) + continue; + if (calcMetadataWeights(BB)) + continue; + if (calcColdCallHeuristics(BB)) + continue; + if (calcLoopBranchHeuristics(BB, LI)) + continue; + if (calcPointerHeuristics(BB)) + continue; + if (calcZeroHeuristics(BB)) + continue; + if (calcFloatingPointHeuristics(BB)) + continue; + calcInvokeHeuristics(BB); + } + + PostDominatedByUnreachable.clear(); + PostDominatedByColdCall.clear(); +} + +void BranchProbabilityInfoWrapperPass::getAnalysisUsage( + AnalysisUsage &AU) const { + AU.addRequired<LoopInfoWrapperPass>(); + AU.setPreservesAll(); +} + +bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) { + const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + BPI.calculate(F, LI); + return false; +} + +void BranchProbabilityInfoWrapperPass::releaseMemory() { BPI.releaseMemory(); } + +void BranchProbabilityInfoWrapperPass::print(raw_ostream &OS, + const Module *) const { + BPI.print(OS); +} diff --git a/contrib/llvm/lib/Analysis/CFG.cpp b/contrib/llvm/lib/Analysis/CFG.cpp index e15109b..0dfd57d 100644 --- a/contrib/llvm/lib/Analysis/CFG.cpp +++ b/contrib/llvm/lib/Analysis/CFG.cpp @@ -69,8 +69,9 @@ void llvm::FindFunctionBackedges(const Function &F, /// and return its position in the terminator instruction's list of /// successors. It is an error to call this with a block that is not a /// successor. -unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) { - TerminatorInst *Term = BB->getTerminator(); +unsigned llvm::GetSuccessorNumber(const BasicBlock *BB, + const BasicBlock *Succ) { + const TerminatorInst *Term = BB->getTerminator(); #ifndef NDEBUG unsigned e = Term->getNumSuccessors(); #endif @@ -203,7 +204,8 @@ bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B, return true; // Linear scan, start at 'A', see whether we hit 'B' or the end first. - for (BasicBlock::const_iterator I = A, E = BB->end(); I != E; ++I) { + for (BasicBlock::const_iterator I = A->getIterator(), E = BB->end(); I != E; + ++I) { if (&*I == B) return true; } diff --git a/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp index fe1c088..4843ed6 100644 --- a/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp @@ -27,18 +27,17 @@ // time. //===----------------------------------------------------------------------===// +#include "llvm/Analysis/CFLAliasAnalysis.h" #include "StratifiedSets.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" @@ -47,7 +46,6 @@ #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cassert> -#include <forward_list> #include <memory> #include <tuple> @@ -55,6 +53,19 @@ using namespace llvm; #define DEBUG_TYPE "cfl-aa" +CFLAAResult::CFLAAResult(const TargetLibraryInfo &TLI) : AAResultBase(TLI) {} +CFLAAResult::CFLAAResult(CFLAAResult &&Arg) : AAResultBase(std::move(Arg)) {} + +// \brief Information we have about a function and would like to keep around +struct CFLAAResult::FunctionInfo { + StratifiedSets<Value *> Sets; + // Lots of functions have < 4 returns. Adjust as necessary. + SmallVector<Value *, 4> ReturnedValues; + + FunctionInfo(StratifiedSets<Value *> &&S, SmallVector<Value *, 4> &&RV) + : Sets(std::move(S)), ReturnedValues(std::move(RV)) {} +}; + // Try to go from a Value* to a Function*. Never returns nullptr. static Optional<Function *> parentFunctionOfValue(Value *); @@ -141,129 +152,13 @@ struct Edge { : From(From), To(To), Weight(W), AdditionalAttrs(A) {} }; -// \brief Information we have about a function and would like to keep around -struct FunctionInfo { - StratifiedSets<Value *> Sets; - // Lots of functions have < 4 returns. Adjust as necessary. - SmallVector<Value *, 4> ReturnedValues; - - FunctionInfo(StratifiedSets<Value *> &&S, SmallVector<Value *, 4> &&RV) - : Sets(std::move(S)), ReturnedValues(std::move(RV)) {} -}; - -struct CFLAliasAnalysis; - -struct FunctionHandle : public CallbackVH { - FunctionHandle(Function *Fn, CFLAliasAnalysis *CFLAA) - : CallbackVH(Fn), CFLAA(CFLAA) { - assert(Fn != nullptr); - assert(CFLAA != nullptr); - } - - ~FunctionHandle() override {} - - void deleted() override { removeSelfFromCache(); } - void allUsesReplacedWith(Value *) override { removeSelfFromCache(); } - -private: - CFLAliasAnalysis *CFLAA; - - void removeSelfFromCache(); -}; - -struct CFLAliasAnalysis : public ImmutablePass, public AliasAnalysis { -private: - /// \brief Cached mapping of Functions to their StratifiedSets. - /// If a function's sets are currently being built, it is marked - /// in the cache as an Optional without a value. This way, if we - /// have any kind of recursion, it is discernable from a function - /// that simply has empty sets. - DenseMap<Function *, Optional<FunctionInfo>> Cache; - std::forward_list<FunctionHandle> Handles; - -public: - static char ID; - - CFLAliasAnalysis() : ImmutablePass(ID) { - initializeCFLAliasAnalysisPass(*PassRegistry::getPassRegistry()); - } - - ~CFLAliasAnalysis() override {} - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AliasAnalysis::getAnalysisUsage(AU); - } - - void *getAdjustedAnalysisPointer(const void *ID) override { - if (ID == &AliasAnalysis::ID) - return (AliasAnalysis *)this; - return this; - } - - /// \brief Inserts the given Function into the cache. - void scan(Function *Fn); - - void evict(Function *Fn) { Cache.erase(Fn); } - - /// \brief Ensures that the given function is available in the cache. - /// Returns the appropriate entry from the cache. - const Optional<FunctionInfo> &ensureCached(Function *Fn) { - auto Iter = Cache.find(Fn); - if (Iter == Cache.end()) { - scan(Fn); - Iter = Cache.find(Fn); - assert(Iter != Cache.end()); - assert(Iter->second.hasValue()); - } - return Iter->second; - } - - AliasResult query(const MemoryLocation &LocA, const MemoryLocation &LocB); - - AliasResult alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) override { - if (LocA.Ptr == LocB.Ptr) { - if (LocA.Size == LocB.Size) { - return MustAlias; - } else { - return PartialAlias; - } - } - - // Comparisons between global variables and other constants should be - // handled by BasicAA. - // TODO: ConstantExpr handling -- CFLAA may report NoAlias when comparing - // a GlobalValue and ConstantExpr, but every query needs to have at least - // one Value tied to a Function, and neither GlobalValues nor ConstantExprs - // are. - if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr)) { - return AliasAnalysis::alias(LocA, LocB); - } - - AliasResult QueryResult = query(LocA, LocB); - if (QueryResult == MayAlias) - return AliasAnalysis::alias(LocA, LocB); - - return QueryResult; - } - - bool doInitialization(Module &M) override; -}; - -void FunctionHandle::removeSelfFromCache() { - assert(CFLAA != nullptr); - auto *Val = getValPtr(); - CFLAA->evict(cast<Function>(Val)); - setValPtr(nullptr); -} - // \brief Gets the edges our graph should have, based on an Instruction* class GetEdgesVisitor : public InstVisitor<GetEdgesVisitor, void> { - CFLAliasAnalysis &AA; + CFLAAResult &AA; SmallVectorImpl<Edge> &Output; public: - GetEdgesVisitor(CFLAliasAnalysis &AA, SmallVectorImpl<Edge> &Output) + GetEdgesVisitor(CFLAAResult &AA, SmallVectorImpl<Edge> &Output) : AA(AA), Output(Output) {} void visitInstruction(Instruction &) { @@ -480,6 +375,8 @@ public: } template <typename InstT> void visitCallLikeInst(InstT &Inst) { + // TODO: Add support for noalias args/all the other fun function attributes + // that we can tack on. SmallVector<Function *, 4> Targets; if (getPossibleTargets(&Inst, Targets)) { if (tryInterproceduralAnalysis(Targets, &Inst, Inst.arg_operands())) @@ -488,8 +385,16 @@ public: Output.clear(); } + // Because the function is opaque, we need to note that anything + // could have happened to the arguments, and that the result could alias + // just about anything, too. + // The goal of the loop is in part to unify many Values into one set, so we + // don't care if the function is void there. for (Value *V : Inst.arg_operands()) Output.push_back(Edge(&Inst, V, EdgeType::Assign, AttrAll)); + if (Inst.getNumArgOperands() == 0 && + Inst.getType() != Type::getVoidTy(Inst.getContext())) + Output.push_back(Edge(&Inst, &Inst, EdgeType::Assign, AttrAll)); } void visitCallInst(CallInst &Inst) { visitCallLikeInst(Inst); } @@ -624,7 +529,7 @@ public: // ----- Various Edge iterators for the graph ----- // // \brief Iterator for edges. Because this graph is bidirected, we don't - // allow modificaiton of the edges using this iterator. Additionally, the + // allow modification of the edges using this iterator. Additionally, the // iterator becomes invalid if you add edges to or from the node you're // getting the edges of. struct EdgeIterator : public std::iterator<std::forward_iterator_tag, @@ -727,16 +632,6 @@ typedef WeightedBidirectionalGraph<std::pair<EdgeType, StratifiedAttrs>> GraphT; typedef DenseMap<Value *, GraphT::Node> NodeMapT; } -// -- Setting up/registering CFLAA pass -- // -char CFLAliasAnalysis::ID = 0; - -INITIALIZE_AG_PASS(CFLAliasAnalysis, AliasAnalysis, "cfl-aa", - "CFL-Based AA implementation", false, true, false) - -ImmutablePass *llvm::createCFLAliasAnalysisPass() { - return new CFLAliasAnalysis(); -} - //===----------------------------------------------------------------------===// // Function declarations that require types defined in the namespace above //===----------------------------------------------------------------------===// @@ -751,12 +646,10 @@ static Optional<StratifiedAttr> valueToAttrIndex(Value *Val); static EdgeType flipWeight(EdgeType); // Gets edges of the given Instruction*, writing them to the SmallVector*. -static void argsToEdges(CFLAliasAnalysis &, Instruction *, - SmallVectorImpl<Edge> &); +static void argsToEdges(CFLAAResult &, Instruction *, SmallVectorImpl<Edge> &); // Gets edges of the given ConstantExpr*, writing them to the SmallVector*. -static void argsToEdges(CFLAliasAnalysis &, ConstantExpr *, - SmallVectorImpl<Edge> &); +static void argsToEdges(CFLAAResult &, ConstantExpr *, SmallVectorImpl<Edge> &); // Gets the "Level" that one should travel in StratifiedSets // given an EdgeType. @@ -764,13 +657,13 @@ static Level directionOfEdgeType(EdgeType); // Builds the graph needed for constructing the StratifiedSets for the // given function -static void buildGraphFrom(CFLAliasAnalysis &, Function *, +static void buildGraphFrom(CFLAAResult &, Function *, SmallVectorImpl<Value *> &, NodeMapT &, GraphT &); // Gets the edges of a ConstantExpr as if it was an Instruction. This // function also acts on any nested ConstantExprs, adding the edges // of those to the given SmallVector as well. -static void constexprToEdges(CFLAliasAnalysis &, ConstantExpr &, +static void constexprToEdges(CFLAAResult &, ConstantExpr &, SmallVectorImpl<Edge> &); // Given an Instruction, this will add it to the graph, along with any @@ -779,16 +672,13 @@ static void constexprToEdges(CFLAliasAnalysis &, ConstantExpr &, // %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2 // addInstructionToGraph would add both the `load` and `getelementptr` // instructions to the graph appropriately. -static void addInstructionToGraph(CFLAliasAnalysis &, Instruction &, +static void addInstructionToGraph(CFLAAResult &, Instruction &, SmallVectorImpl<Value *> &, NodeMapT &, GraphT &); // Notes whether it would be pointless to add the given Value to our sets. static bool canSkipAddingToSets(Value *Val); -// Builds the graph + StratifiedSets for a function. -static FunctionInfo buildSetsFrom(CFLAliasAnalysis &, Function *); - static Optional<Function *> parentFunctionOfValue(Value *Val) { if (auto *Inst = dyn_cast<Instruction>(Val)) { auto *Bb = Inst->getParent(); @@ -825,7 +715,7 @@ static bool hasUsefulEdges(Instruction *Inst) { } static bool hasUsefulEdges(ConstantExpr *CE) { - // ConstantExpr doens't have terminators, invokes, or fences, so only needs + // ConstantExpr doesn't have terminators, invokes, or fences, so only needs // to check for compares. return CE->getOpcode() != Instruction::ICmp && CE->getOpcode() != Instruction::FCmp; @@ -862,7 +752,7 @@ static EdgeType flipWeight(EdgeType Initial) { llvm_unreachable("Incomplete coverage of EdgeType enum"); } -static void argsToEdges(CFLAliasAnalysis &Analysis, Instruction *Inst, +static void argsToEdges(CFLAAResult &Analysis, Instruction *Inst, SmallVectorImpl<Edge> &Output) { assert(hasUsefulEdges(Inst) && "Expected instructions to have 'useful' edges"); @@ -870,7 +760,7 @@ static void argsToEdges(CFLAliasAnalysis &Analysis, Instruction *Inst, v.visit(Inst); } -static void argsToEdges(CFLAliasAnalysis &Analysis, ConstantExpr *CE, +static void argsToEdges(CFLAAResult &Analysis, ConstantExpr *CE, SmallVectorImpl<Edge> &Output) { assert(hasUsefulEdges(CE) && "Expected constant expr to have 'useful' edges"); GetEdgesVisitor v(Analysis, Output); @@ -889,7 +779,7 @@ static Level directionOfEdgeType(EdgeType Weight) { llvm_unreachable("Incomplete switch coverage"); } -static void constexprToEdges(CFLAliasAnalysis &Analysis, +static void constexprToEdges(CFLAAResult &Analysis, ConstantExpr &CExprToCollapse, SmallVectorImpl<Edge> &Results) { SmallVector<ConstantExpr *, 4> Worklist; @@ -919,7 +809,7 @@ static void constexprToEdges(CFLAliasAnalysis &Analysis, } } -static void addInstructionToGraph(CFLAliasAnalysis &Analysis, Instruction &Inst, +static void addInstructionToGraph(CFLAAResult &Analysis, Instruction &Inst, SmallVectorImpl<Value *> &ReturnedValues, NodeMapT &Map, GraphT &Graph) { const auto findOrInsertNode = [&Map, &Graph](Value *Val) { @@ -982,7 +872,7 @@ static void addInstructionToGraph(CFLAliasAnalysis &Analysis, Instruction &Inst, // buy us much that we don't already have. I'd like to add interprocedural // analysis prior to this however, in case that somehow requires the graph // produced by this for efficient execution -static void buildGraphFrom(CFLAliasAnalysis &Analysis, Function *Fn, +static void buildGraphFrom(CFLAAResult &Analysis, Function *Fn, SmallVectorImpl<Value *> &ReturnedValues, NodeMapT &Map, GraphT &Graph) { for (auto &Bb : Fn->getBasicBlockList()) @@ -1012,12 +902,13 @@ static bool canSkipAddingToSets(Value *Val) { return false; } -static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) { +// Builds the graph + StratifiedSets for a function. +CFLAAResult::FunctionInfo CFLAAResult::buildSetsFrom(Function *Fn) { NodeMapT Map; GraphT Graph; SmallVector<Value *, 4> ReturnedValues; - buildGraphFrom(Analysis, Fn, ReturnedValues, Map, Graph); + buildGraphFrom(*this, Fn, ReturnedValues, Map, Graph); DenseMap<GraphT::Node, Value *> NodeValueMap; NodeValueMap.resize(Map.size()); @@ -1098,19 +989,35 @@ static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) { return FunctionInfo(Builder.build(), std::move(ReturnedValues)); } -void CFLAliasAnalysis::scan(Function *Fn) { +void CFLAAResult::scan(Function *Fn) { auto InsertPair = Cache.insert(std::make_pair(Fn, Optional<FunctionInfo>())); (void)InsertPair; assert(InsertPair.second && "Trying to scan a function that has already been cached"); - FunctionInfo Info(buildSetsFrom(*this, Fn)); + FunctionInfo Info(buildSetsFrom(Fn)); Cache[Fn] = std::move(Info); Handles.push_front(FunctionHandle(Fn, this)); } -AliasResult CFLAliasAnalysis::query(const MemoryLocation &LocA, - const MemoryLocation &LocB) { +void CFLAAResult::evict(Function *Fn) { Cache.erase(Fn); } + +/// \brief Ensures that the given function is available in the cache. +/// Returns the appropriate entry from the cache. +const Optional<CFLAAResult::FunctionInfo> & +CFLAAResult::ensureCached(Function *Fn) { + auto Iter = Cache.find(Fn); + if (Iter == Cache.end()) { + scan(Fn); + Iter = Cache.find(Fn); + assert(Iter != Cache.end()); + assert(Iter->second.hasValue()); + } + return Iter->second; +} + +AliasResult CFLAAResult::query(const MemoryLocation &LocA, + const MemoryLocation &LocB) { auto *ValA = const_cast<Value *>(LocA.Ptr); auto *ValB = const_cast<Value *>(LocB.Ptr); @@ -1176,7 +1083,37 @@ AliasResult CFLAliasAnalysis::query(const MemoryLocation &LocA, return NoAlias; } -bool CFLAliasAnalysis::doInitialization(Module &M) { - InitializeAliasAnalysis(this, &M.getDataLayout()); - return true; +CFLAAResult CFLAA::run(Function &F, AnalysisManager<Function> *AM) { + return CFLAAResult(AM->getResult<TargetLibraryAnalysis>(F)); +} + +char CFLAA::PassID; + +char CFLAAWrapperPass::ID = 0; +INITIALIZE_PASS_BEGIN(CFLAAWrapperPass, "cfl-aa", "CFL-Based Alias Analysis", + false, true) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(CFLAAWrapperPass, "cfl-aa", "CFL-Based Alias Analysis", + false, true) + +ImmutablePass *llvm::createCFLAAWrapperPass() { return new CFLAAWrapperPass(); } + +CFLAAWrapperPass::CFLAAWrapperPass() : ImmutablePass(ID) { + initializeCFLAAWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +bool CFLAAWrapperPass::doInitialization(Module &M) { + Result.reset( + new CFLAAResult(getAnalysis<TargetLibraryInfoWrapperPass>().getTLI())); + return false; +} + +bool CFLAAWrapperPass::doFinalization(Module &M) { + Result.reset(); + return false; +} + +void CFLAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); } diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp b/contrib/llvm/lib/Analysis/CallGraph.cpp index e2799d9..7cec962 100644 --- a/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp +++ b/contrib/llvm/lib/Analysis/CallGraph.cpp @@ -22,7 +22,7 @@ using namespace llvm; CallGraph::CallGraph(Module &M) : M(M), Root(nullptr), ExternalCallingNode(getOrInsertFunction(nullptr)), - CallsExternalNode(new CallGraphNode(nullptr)) { + CallsExternalNode(llvm::make_unique<CallGraphNode>(nullptr)) { // Add every function to the call graph. for (Function &F : M) addToCallGraph(&F); @@ -32,10 +32,19 @@ CallGraph::CallGraph(Module &M) Root = ExternalCallingNode; } +CallGraph::CallGraph(CallGraph &&Arg) + : M(Arg.M), FunctionMap(std::move(Arg.FunctionMap)), Root(Arg.Root), + ExternalCallingNode(Arg.ExternalCallingNode), + CallsExternalNode(std::move(Arg.CallsExternalNode)) { + Arg.FunctionMap.clear(); + Arg.Root = nullptr; + Arg.ExternalCallingNode = nullptr; +} + CallGraph::~CallGraph() { // CallsExternalNode is not in the function map, delete it explicitly. - CallsExternalNode->allReferencesDropped(); - delete CallsExternalNode; + if (CallsExternalNode) + CallsExternalNode->allReferencesDropped(); // Reset all node's use counts to zero before deleting them to prevent an // assertion from firing. @@ -43,8 +52,6 @@ CallGraph::~CallGraph() { for (auto &I : FunctionMap) I.second->allReferencesDropped(); #endif - for (auto &I : FunctionMap) - delete I.second; } void CallGraph::addToCallGraph(Function *F) { @@ -70,7 +77,7 @@ void CallGraph::addToCallGraph(Function *F) { // If this function is not defined in this translation unit, it could call // anything. if (F->isDeclaration() && !F->isIntrinsic()) - Node->addCalledFunction(CallSite(), CallsExternalNode); + Node->addCalledFunction(CallSite(), CallsExternalNode.get()); // Look for calls by this function. for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB) @@ -83,7 +90,7 @@ void CallGraph::addToCallGraph(Function *F) { // Indirect calls of intrinsics are not allowed so no need to check. // We can be more precise here by using TargetArg returned by // Intrinsic::isLeaf. - Node->addCalledFunction(CS, CallsExternalNode); + Node->addCalledFunction(CS, CallsExternalNode.get()); else if (!Callee->isIntrinsic()) Node->addCalledFunction(CS, getOrInsertFunction(Callee)); } @@ -105,7 +112,7 @@ void CallGraph::print(raw_ostream &OS) const { Nodes.reserve(FunctionMap.size()); for (auto I = begin(), E = end(); I != E; ++I) - Nodes.push_back(I->second); + Nodes.push_back(I->second.get()); std::sort(Nodes.begin(), Nodes.end(), [](CallGraphNode *LHS, CallGraphNode *RHS) { @@ -120,9 +127,8 @@ void CallGraph::print(raw_ostream &OS) const { CN->print(OS); } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void CallGraph::dump() const { print(dbgs()); } -#endif // removeFunctionFromModule - Unlink the function from this module, returning // it. Because this removes the function from the module, the call graph node @@ -134,7 +140,6 @@ Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) { assert(CGN->empty() && "Cannot remove function from call " "graph if it references other functions!"); Function *F = CGN->getFunction(); // Get the function for the call graph node - delete CGN; // Delete the call graph node for this func FunctionMap.erase(F); // Remove the call graph node from the map M.getFunctionList().remove(F); @@ -152,7 +157,7 @@ void CallGraph::spliceFunction(const Function *From, const Function *To) { "Pointing CallGraphNode at a function that already exists"); FunctionMapTy::iterator I = FunctionMap.find(From); I->second->F = const_cast<Function*>(To); - FunctionMap[To] = I->second; + FunctionMap[To] = std::move(I->second); FunctionMap.erase(I); } @@ -160,12 +165,13 @@ void CallGraph::spliceFunction(const Function *From, const Function *To) { // it will insert a new CallGraphNode for the specified function if one does // not already exist. CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) { - CallGraphNode *&CGN = FunctionMap[F]; + auto &CGN = FunctionMap[F]; if (CGN) - return CGN; + return CGN.get(); assert((!F || F->getParent() == &M) && "Function not in current module!"); - return CGN = new CallGraphNode(const_cast<Function*>(F)); + CGN = llvm::make_unique<CallGraphNode>(const_cast<Function *>(F)); + return CGN.get(); } //===----------------------------------------------------------------------===// @@ -190,9 +196,8 @@ void CallGraphNode::print(raw_ostream &OS) const { OS << '\n'; } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void CallGraphNode::dump() const { print(dbgs()); } -#endif /// removeCallEdgeFor - This method removes the edge in the node for the /// specified call site. Note that this method takes linear time, so it @@ -297,6 +302,5 @@ void CallGraphWrapperPass::print(raw_ostream &OS, const Module *) const { G->print(OS); } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void CallGraphWrapperPass::dump() const { print(dbgs(), nullptr); } -#endif diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp index 07b389a..07b389a 100644 --- a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp diff --git a/contrib/llvm/lib/Analysis/IPA/CallPrinter.cpp b/contrib/llvm/lib/Analysis/CallPrinter.cpp index 68dcd3c..68dcd3c 100644 --- a/contrib/llvm/lib/Analysis/IPA/CallPrinter.cpp +++ b/contrib/llvm/lib/Analysis/CallPrinter.cpp diff --git a/contrib/llvm/lib/Analysis/CaptureTracking.cpp b/contrib/llvm/lib/Analysis/CaptureTracking.cpp index 52ef807..1add2fa 100644 --- a/contrib/llvm/lib/Analysis/CaptureTracking.cpp +++ b/contrib/llvm/lib/Analysis/CaptureTracking.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/OrderedBasicBlock.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" @@ -52,63 +53,6 @@ namespace { bool Captured; }; - struct NumberedInstCache { - SmallDenseMap<const Instruction *, unsigned, 32> NumberedInsts; - BasicBlock::const_iterator LastInstFound; - unsigned LastInstPos; - const BasicBlock *BB; - - NumberedInstCache(const BasicBlock *BasicB) : LastInstPos(0), BB(BasicB) { - LastInstFound = BB->end(); - } - - /// \brief Find the first instruction 'A' or 'B' in 'BB'. Number out - /// instruction while walking 'BB'. - const Instruction *find(const Instruction *A, const Instruction *B) { - const Instruction *Inst = nullptr; - assert(!(LastInstFound == BB->end() && LastInstPos != 0) && - "Instruction supposed to be in NumberedInsts"); - - // Start the search with the instruction found in the last lookup round. - auto II = BB->begin(); - auto IE = BB->end(); - if (LastInstFound != IE) - II = std::next(LastInstFound); - - // Number all instructions up to the point where we find 'A' or 'B'. - for (++LastInstPos; II != IE; ++II, ++LastInstPos) { - Inst = cast<Instruction>(II); - NumberedInsts[Inst] = LastInstPos; - if (Inst == A || Inst == B) - break; - } - - assert(II != IE && "Instruction not found?"); - LastInstFound = II; - return Inst; - } - - /// \brief Find out whether 'A' dominates 'B', meaning whether 'A' - /// comes before 'B' in 'BB'. This is a simplification that considers - /// cached instruction positions and ignores other basic blocks, being - /// only relevant to compare relative instructions positions inside 'BB'. - bool dominates(const Instruction *A, const Instruction *B) { - assert(A->getParent() == B->getParent() && - "Instructions must be in the same basic block!"); - - unsigned NA = NumberedInsts.lookup(A); - unsigned NB = NumberedInsts.lookup(B); - if (NA && NB) - return NA < NB; - if (NA) - return true; - if (NB) - return false; - - return A == find(A, B); - } - }; - /// Only find pointer captures which happen before the given instruction. Uses /// the dominator tree to determine whether one instruction is before another. /// Only support the case where the Value is defined in the same basic block @@ -116,8 +60,8 @@ namespace { struct CapturesBefore : public CaptureTracker { CapturesBefore(bool ReturnCaptures, const Instruction *I, DominatorTree *DT, - bool IncludeI) - : LocalInstCache(I->getParent()), BeforeHere(I), DT(DT), + bool IncludeI, OrderedBasicBlock *IC) + : OrderedBB(IC), BeforeHere(I), DT(DT), ReturnCaptures(ReturnCaptures), IncludeI(IncludeI), Captured(false) {} void tooManyUses() override { Captured = true; } @@ -131,18 +75,18 @@ namespace { // Compute the case where both instructions are inside the same basic // block. Since instructions in the same BB as BeforeHere are numbered in - // 'LocalInstCache', avoid using 'dominates' and 'isPotentiallyReachable' + // 'OrderedBB', avoid using 'dominates' and 'isPotentiallyReachable' // which are very expensive for large basic blocks. if (BB == BeforeHere->getParent()) { // 'I' dominates 'BeforeHere' => not safe to prune. // - // The value defined by an invoke dominates an instruction only if it - // dominates every instruction in UseBB. A PHI is dominated only if - // the instruction dominates every possible use in the UseBB. Since + // The value defined by an invoke dominates an instruction only + // if it dominates every instruction in UseBB. A PHI is dominated only + // if the instruction dominates every possible use in the UseBB. Since // UseBB == BB, avoid pruning. if (isa<InvokeInst>(BeforeHere) || isa<PHINode>(I) || I == BeforeHere) return false; - if (!LocalInstCache.dominates(BeforeHere, I)) + if (!OrderedBB->dominates(BeforeHere, I)) return false; // 'BeforeHere' comes before 'I', it's safe to prune if we also @@ -157,10 +101,7 @@ namespace { SmallVector<BasicBlock*, 32> Worklist; Worklist.append(succ_begin(BB), succ_end(BB)); - if (!isPotentiallyReachableFromMany(Worklist, BB, DT)) - return true; - - return false; + return !isPotentiallyReachableFromMany(Worklist, BB, DT); } // If the value is defined in the same basic block as use and BeforeHere, @@ -196,7 +137,7 @@ namespace { return true; } - NumberedInstCache LocalInstCache; + OrderedBasicBlock *OrderedBB; const Instruction *BeforeHere; DominatorTree *DT; @@ -238,21 +179,29 @@ bool llvm::PointerMayBeCaptured(const Value *V, /// returning the value (or part of it) from the function counts as capturing /// it or not. The boolean StoreCaptures specified whether storing the value /// (or part of it) into memory anywhere automatically counts as capturing it -/// or not. +/// or not. A ordered basic block \p OBB can be used in order to speed up +/// queries about relative order among instructions in the same basic block. bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, bool StoreCaptures, const Instruction *I, - DominatorTree *DT, bool IncludeI) { + DominatorTree *DT, bool IncludeI, + OrderedBasicBlock *OBB) { assert(!isa<GlobalValue>(V) && "It doesn't make sense to ask whether a global is captured."); + bool UseNewOBB = OBB == nullptr; if (!DT) return PointerMayBeCaptured(V, ReturnCaptures, StoreCaptures); + if (UseNewOBB) + OBB = new OrderedBasicBlock(I->getParent()); // TODO: See comment in PointerMayBeCaptured regarding what could be done // with StoreCaptures. - CapturesBefore CB(ReturnCaptures, I, DT, IncludeI); + CapturesBefore CB(ReturnCaptures, I, DT, IncludeI, OBB); PointerMayBeCaptured(V, &CB); + + if (UseNewOBB) + delete OBB; return CB.Captured; } @@ -300,8 +249,9 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) { // that loading a value from a pointer does not cause the pointer to be // captured, even though the loaded value might be the pointer itself // (think of self-referential objects). - CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end(); - for (CallSite::arg_iterator A = B; A != E; ++A) + CallSite::data_operand_iterator B = + CS.data_operands_begin(), E = CS.data_operands_end(); + for (CallSite::data_operand_iterator A = B; A != E; ++A) if (A->get() == V && !CS.doesNotCapture(A - B)) // The parameter is not marked 'nocapture' - captured. if (Tracker->captured(U)) diff --git a/contrib/llvm/lib/Analysis/CodeMetrics.cpp b/contrib/llvm/lib/Analysis/CodeMetrics.cpp index 46a2c43..4090b4c 100644 --- a/contrib/llvm/lib/Analysis/CodeMetrics.cpp +++ b/contrib/llvm/lib/Analysis/CodeMetrics.cpp @@ -45,14 +45,8 @@ static void completeEphemeralValues(SmallVector<const Value *, 16> &WorkSet, continue; // If all uses of this value are ephemeral, then so is this value. - bool FoundNEUse = false; - for (const User *I : V->users()) - if (!EphValues.count(I)) { - FoundNEUse = true; - break; - } - - if (FoundNEUse) + if (!std::all_of(V->user_begin(), V->user_end(), + [&](const User *U) { return EphValues.count(U); })) continue; EphValues.insert(V); @@ -116,7 +110,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); II != E; ++II) { // Skip ephemeral values. - if (EphValues.count(II)) + if (EphValues.count(&*II)) continue; // Special handling for calls. @@ -155,6 +149,9 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy()) ++NumVectorInsts; + if (II->getType()->isTokenTy() && II->isUsedOutsideOfBlock(BB)) + notDuplicatable = true; + if (const CallInst *CI = dyn_cast<CallInst>(II)) if (CI->cannotDuplicate()) notDuplicatable = true; diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp index 02a5aef..ccb5663 100644 --- a/contrib/llvm/lib/Analysis/ConstantFolding.cpp +++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp @@ -248,8 +248,7 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, // Look through ptr->int and ptr->ptr casts. if (CE->getOpcode() == Instruction::PtrToInt || - CE->getOpcode() == Instruction::BitCast || - CE->getOpcode() == Instruction::AddrSpaceCast) + CE->getOpcode() == Instruction::BitCast) return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, DL); // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5) @@ -532,6 +531,10 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, if (GV->isConstant() && GV->hasDefinitiveInitializer()) return GV->getInitializer(); + if (auto *GA = dyn_cast<GlobalAlias>(C)) + if (GA->getAliasee() && !GA->mayBeOverridden()) + return ConstantFoldLoadFromConstPtr(GA->getAliasee(), DL); + // If the loaded value isn't a constant expr, we can't handle it. ConstantExpr *CE = dyn_cast<ConstantExpr>(C); if (!CE) @@ -1236,6 +1239,9 @@ bool llvm::canConstantFoldCallTo(const Function *F) { case Intrinsic::sqrt: case Intrinsic::sin: case Intrinsic::cos: + case Intrinsic::trunc: + case Intrinsic::rint: + case Intrinsic::nearbyint: case Intrinsic::pow: case Intrinsic::powi: case Intrinsic::bswap: @@ -1276,24 +1282,30 @@ bool llvm::canConstantFoldCallTo(const Function *F) { // return true for a name like "cos\0blah" which strcmp would return equal to // "cos", but has length 8. switch (Name[0]) { - default: return false; + default: + return false; case 'a': - return Name == "acos" || Name == "asin" || Name == "atan" || Name =="atan2"; + return Name == "acos" || Name == "asin" || Name == "atan" || + Name == "atan2" || Name == "acosf" || Name == "asinf" || + Name == "atanf" || Name == "atan2f"; case 'c': - return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh"; + return Name == "ceil" || Name == "cos" || Name == "cosh" || + Name == "ceilf" || Name == "cosf" || Name == "coshf"; case 'e': - return Name == "exp" || Name == "exp2"; + return Name == "exp" || Name == "exp2" || Name == "expf" || Name == "exp2f"; case 'f': - return Name == "fabs" || Name == "fmod" || Name == "floor"; + return Name == "fabs" || Name == "floor" || Name == "fmod" || + Name == "fabsf" || Name == "floorf" || Name == "fmodf"; case 'l': - return Name == "log" || Name == "log10"; + return Name == "log" || Name == "log10" || Name == "logf" || + Name == "log10f"; case 'p': - return Name == "pow"; + return Name == "pow" || Name == "powf"; case 's': return Name == "sin" || Name == "sinh" || Name == "sqrt" || - Name == "sinf" || Name == "sqrtf"; + Name == "sinf" || Name == "sinhf" || Name == "sqrtf"; case 't': - return Name == "tan" || Name == "tanh"; + return Name == "tan" || Name == "tanh" || Name == "tanf" || Name == "tanhf"; } } @@ -1422,6 +1434,36 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, return ConstantFP::get(Ty->getContext(), V); } + if (IntrinsicID == Intrinsic::floor) { + APFloat V = Op->getValueAPF(); + V.roundToIntegral(APFloat::rmTowardNegative); + return ConstantFP::get(Ty->getContext(), V); + } + + if (IntrinsicID == Intrinsic::ceil) { + APFloat V = Op->getValueAPF(); + V.roundToIntegral(APFloat::rmTowardPositive); + return ConstantFP::get(Ty->getContext(), V); + } + + if (IntrinsicID == Intrinsic::trunc) { + APFloat V = Op->getValueAPF(); + V.roundToIntegral(APFloat::rmTowardZero); + return ConstantFP::get(Ty->getContext(), V); + } + + if (IntrinsicID == Intrinsic::rint) { + APFloat V = Op->getValueAPF(); + V.roundToIntegral(APFloat::rmNearestTiesToEven); + return ConstantFP::get(Ty->getContext(), V); + } + + if (IntrinsicID == Intrinsic::nearbyint) { + APFloat V = Op->getValueAPF(); + V.roundToIntegral(APFloat::rmNearestTiesToEven); + return ConstantFP::get(Ty->getContext(), V); + } + /// We only fold functions with finite arguments. Folding NaN and inf is /// likely to be aborted with an exception anyway, and some host libms /// have known errors raising exceptions. @@ -1448,10 +1490,6 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, return ConstantFoldFP(exp, V, Ty); case Intrinsic::exp2: return ConstantFoldFP(exp2, V, Ty); - case Intrinsic::floor: - return ConstantFoldFP(floor, V, Ty); - case Intrinsic::ceil: - return ConstantFoldFP(ceil, V, Ty); case Intrinsic::sin: return ConstantFoldFP(sin, V, Ty); case Intrinsic::cos: @@ -1463,43 +1501,51 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, switch (Name[0]) { case 'a': - if (Name == "acos" && TLI->has(LibFunc::acos)) + if ((Name == "acos" && TLI->has(LibFunc::acos)) || + (Name == "acosf" && TLI->has(LibFunc::acosf))) return ConstantFoldFP(acos, V, Ty); - else if (Name == "asin" && TLI->has(LibFunc::asin)) + else if ((Name == "asin" && TLI->has(LibFunc::asin)) || + (Name == "asinf" && TLI->has(LibFunc::asinf))) return ConstantFoldFP(asin, V, Ty); - else if (Name == "atan" && TLI->has(LibFunc::atan)) + else if ((Name == "atan" && TLI->has(LibFunc::atan)) || + (Name == "atanf" && TLI->has(LibFunc::atanf))) return ConstantFoldFP(atan, V, Ty); break; case 'c': - if (Name == "ceil" && TLI->has(LibFunc::ceil)) + if ((Name == "ceil" && TLI->has(LibFunc::ceil)) || + (Name == "ceilf" && TLI->has(LibFunc::ceilf))) return ConstantFoldFP(ceil, V, Ty); - else if (Name == "cos" && TLI->has(LibFunc::cos)) + else if ((Name == "cos" && TLI->has(LibFunc::cos)) || + (Name == "cosf" && TLI->has(LibFunc::cosf))) return ConstantFoldFP(cos, V, Ty); - else if (Name == "cosh" && TLI->has(LibFunc::cosh)) + else if ((Name == "cosh" && TLI->has(LibFunc::cosh)) || + (Name == "coshf" && TLI->has(LibFunc::coshf))) return ConstantFoldFP(cosh, V, Ty); - else if (Name == "cosf" && TLI->has(LibFunc::cosf)) - return ConstantFoldFP(cos, V, Ty); break; case 'e': - if (Name == "exp" && TLI->has(LibFunc::exp)) + if ((Name == "exp" && TLI->has(LibFunc::exp)) || + (Name == "expf" && TLI->has(LibFunc::expf))) return ConstantFoldFP(exp, V, Ty); - - if (Name == "exp2" && TLI->has(LibFunc::exp2)) { + if ((Name == "exp2" && TLI->has(LibFunc::exp2)) || + (Name == "exp2f" && TLI->has(LibFunc::exp2f))) // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a // C99 library. return ConstantFoldBinaryFP(pow, 2.0, V, Ty); - } break; case 'f': - if (Name == "fabs" && TLI->has(LibFunc::fabs)) + if ((Name == "fabs" && TLI->has(LibFunc::fabs)) || + (Name == "fabsf" && TLI->has(LibFunc::fabsf))) return ConstantFoldFP(fabs, V, Ty); - else if (Name == "floor" && TLI->has(LibFunc::floor)) + else if ((Name == "floor" && TLI->has(LibFunc::floor)) || + (Name == "floorf" && TLI->has(LibFunc::floorf))) return ConstantFoldFP(floor, V, Ty); break; case 'l': - if (Name == "log" && V > 0 && TLI->has(LibFunc::log)) + if ((Name == "log" && V > 0 && TLI->has(LibFunc::log)) || + (Name == "logf" && V > 0 && TLI->has(LibFunc::logf))) return ConstantFoldFP(log, V, Ty); - else if (Name == "log10" && V > 0 && TLI->has(LibFunc::log10)) + else if ((Name == "log10" && V > 0 && TLI->has(LibFunc::log10)) || + (Name == "log10f" && V > 0 && TLI->has(LibFunc::log10f))) return ConstantFoldFP(log10, V, Ty); else if (IntrinsicID == Intrinsic::sqrt && (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())) { @@ -1516,21 +1562,22 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, } break; case 's': - if (Name == "sin" && TLI->has(LibFunc::sin)) + if ((Name == "sin" && TLI->has(LibFunc::sin)) || + (Name == "sinf" && TLI->has(LibFunc::sinf))) return ConstantFoldFP(sin, V, Ty); - else if (Name == "sinh" && TLI->has(LibFunc::sinh)) + else if ((Name == "sinh" && TLI->has(LibFunc::sinh)) || + (Name == "sinhf" && TLI->has(LibFunc::sinhf))) return ConstantFoldFP(sinh, V, Ty); - else if (Name == "sqrt" && V >= 0 && TLI->has(LibFunc::sqrt)) - return ConstantFoldFP(sqrt, V, Ty); - else if (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc::sqrtf)) + else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc::sqrt)) || + (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc::sqrtf))) return ConstantFoldFP(sqrt, V, Ty); - else if (Name == "sinf" && TLI->has(LibFunc::sinf)) - return ConstantFoldFP(sin, V, Ty); break; case 't': - if (Name == "tan" && TLI->has(LibFunc::tan)) + if ((Name == "tan" && TLI->has(LibFunc::tan)) || + (Name == "tanf" && TLI->has(LibFunc::tanf))) return ConstantFoldFP(tan, V, Ty); - else if (Name == "tanh" && TLI->has(LibFunc::tanh)) + else if ((Name == "tanh" && TLI->has(LibFunc::tanh)) || + (Name == "tanhf" && TLI->has(LibFunc::tanhf))) return ConstantFoldFP(tanh, V, Ty); break; default: @@ -1633,11 +1680,14 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, if (!TLI) return nullptr; - if (Name == "pow" && TLI->has(LibFunc::pow)) + if ((Name == "pow" && TLI->has(LibFunc::pow)) || + (Name == "powf" && TLI->has(LibFunc::powf))) return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); - if (Name == "fmod" && TLI->has(LibFunc::fmod)) + if ((Name == "fmod" && TLI->has(LibFunc::fmod)) || + (Name == "fmodf" && TLI->has(LibFunc::fmodf))) return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty); - if (Name == "atan2" && TLI->has(LibFunc::atan2)) + if ((Name == "atan2" && TLI->has(LibFunc::atan2)) || + (Name == "atan2f" && TLI->has(LibFunc::atan2f))) return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) { if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy()) diff --git a/contrib/llvm/lib/Analysis/CostModel.cpp b/contrib/llvm/lib/Analysis/CostModel.cpp index b529c1a..0383cbf 100644 --- a/contrib/llvm/lib/Analysis/CostModel.cpp +++ b/contrib/llvm/lib/Analysis/CostModel.cpp @@ -152,10 +152,7 @@ static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, Mask[i] = val; SmallVector<int, 16> ActualMask = SI->getShuffleMask(); - if (Mask != ActualMask) - return false; - - return true; + return Mask == ActualMask; } static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp, @@ -383,10 +380,8 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { return -1; switch (I->getOpcode()) { - case Instruction::GetElementPtr:{ - Type *ValTy = I->getOperand(0)->getType()->getPointerElementType(); - return TTI->getAddressComputationCost(ValTy); - } + case Instruction::GetElementPtr: + return TTI->getUserCost(I); case Instruction::Ret: case Instruction::PHI: @@ -505,12 +500,12 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { } case Instruction::Call: if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { - SmallVector<Type*, 4> Tys; + SmallVector<Value *, 4> Args; for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J) - Tys.push_back(II->getArgOperand(J)->getType()); + Args.push_back(II->getArgOperand(J)); return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), - Tys); + Args); } return -1; default: @@ -525,7 +520,7 @@ void CostModelAnalysis::print(raw_ostream &OS, const Module*) const { for (Function::iterator B = F->begin(), BE = F->end(); B != BE; ++B) { for (BasicBlock::iterator it = B->begin(), e = B->end(); it != e; ++it) { - Instruction *Inst = it; + Instruction *Inst = &*it; unsigned Cost = getInstructionCost(Inst); if (Cost != (unsigned)-1) OS << "Cost Model: Found an estimated cost of " << Cost; diff --git a/contrib/llvm/lib/Analysis/Delinearization.cpp b/contrib/llvm/lib/Analysis/Delinearization.cpp index 9d15786..baee8b3 100644 --- a/contrib/llvm/lib/Analysis/Delinearization.cpp +++ b/contrib/llvm/lib/Analysis/Delinearization.cpp @@ -60,12 +60,12 @@ public: void Delinearization::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired<LoopInfoWrapperPass>(); - AU.addRequired<ScalarEvolution>(); + AU.addRequired<ScalarEvolutionWrapperPass>(); } bool Delinearization::runOnFunction(Function &F) { this->F = &F; - SE = &getAnalysis<ScalarEvolution>(); + SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); return false; } @@ -102,20 +102,14 @@ void Delinearization::print(raw_ostream &O, const Module *) const { if (!BasePointer) break; AccessFn = SE->getMinusSCEV(AccessFn, BasePointer); - const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(AccessFn); - - // Do not try to delinearize memory accesses that are not AddRecs. - if (!AR) - break; - O << "\n"; O << "Inst:" << *Inst << "\n"; O << "In Loop with Header: " << L->getHeader()->getName() << "\n"; - O << "AddRec: " << *AR << "\n"; + O << "AccessFunction: " << *AccessFn << "\n"; SmallVector<const SCEV *, 3> Subscripts, Sizes; - SE->delinearize(AR, Subscripts, Sizes, SE->getElementSize(Inst)); + SE->delinearize(AccessFn, Subscripts, Sizes, SE->getElementSize(Inst)); if (Subscripts.size() == 0 || Sizes.size() == 0 || Subscripts.size() != Sizes.size()) { O << "failed to delinearize\n"; diff --git a/contrib/llvm/lib/Analysis/DemandedBits.cpp b/contrib/llvm/lib/Analysis/DemandedBits.cpp new file mode 100644 index 0000000..912c5ce --- /dev/null +++ b/contrib/llvm/lib/Analysis/DemandedBits.cpp @@ -0,0 +1,392 @@ +//===---- DemandedBits.cpp - Determine demanded bits ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements a demanded bits analysis. A demanded bit is one that +// contributes to a result; bits that are not demanded can be either zero or +// one without affecting control or data flow. For example in this sequence: +// +// %1 = add i32 %x, %y +// %2 = trunc i32 %1 to i16 +// +// Only the lowest 16 bits of %1 are demanded; the rest are removed by the +// trunc. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/DemandedBits.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "demanded-bits" + +char DemandedBits::ID = 0; +INITIALIZE_PASS_BEGIN(DemandedBits, "demanded-bits", "Demanded bits analysis", + false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(DemandedBits, "demanded-bits", "Demanded bits analysis", + false, false) + +DemandedBits::DemandedBits() : FunctionPass(ID), F(nullptr), Analyzed(false) { + initializeDemandedBitsPass(*PassRegistry::getPassRegistry()); +} + +void DemandedBits::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.setPreservesAll(); +} + +static bool isAlwaysLive(Instruction *I) { + return isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) || + I->isEHPad() || I->mayHaveSideEffects(); +} + +void DemandedBits::determineLiveOperandBits( + const Instruction *UserI, const Instruction *I, unsigned OperandNo, + const APInt &AOut, APInt &AB, APInt &KnownZero, APInt &KnownOne, + APInt &KnownZero2, APInt &KnownOne2) { + unsigned BitWidth = AB.getBitWidth(); + + // We're called once per operand, but for some instructions, we need to + // compute known bits of both operands in order to determine the live bits of + // either (when both operands are instructions themselves). We don't, + // however, want to do this twice, so we cache the result in APInts that live + // in the caller. For the two-relevant-operands case, both operand values are + // provided here. + auto ComputeKnownBits = + [&](unsigned BitWidth, const Value *V1, const Value *V2) { + const DataLayout &DL = I->getModule()->getDataLayout(); + KnownZero = APInt(BitWidth, 0); + KnownOne = APInt(BitWidth, 0); + computeKnownBits(const_cast<Value *>(V1), KnownZero, KnownOne, DL, 0, + AC, UserI, DT); + + if (V2) { + KnownZero2 = APInt(BitWidth, 0); + KnownOne2 = APInt(BitWidth, 0); + computeKnownBits(const_cast<Value *>(V2), KnownZero2, KnownOne2, DL, + 0, AC, UserI, DT); + } + }; + + switch (UserI->getOpcode()) { + default: break; + case Instruction::Call: + case Instruction::Invoke: + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(UserI)) + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::bswap: + // The alive bits of the input are the swapped alive bits of + // the output. + AB = AOut.byteSwap(); + break; + case Intrinsic::ctlz: + if (OperandNo == 0) { + // We need some output bits, so we need all bits of the + // input to the left of, and including, the leftmost bit + // known to be one. + ComputeKnownBits(BitWidth, I, nullptr); + AB = APInt::getHighBitsSet(BitWidth, + std::min(BitWidth, KnownOne.countLeadingZeros()+1)); + } + break; + case Intrinsic::cttz: + if (OperandNo == 0) { + // We need some output bits, so we need all bits of the + // input to the right of, and including, the rightmost bit + // known to be one. + ComputeKnownBits(BitWidth, I, nullptr); + AB = APInt::getLowBitsSet(BitWidth, + std::min(BitWidth, KnownOne.countTrailingZeros()+1)); + } + break; + } + break; + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + // Find the highest live output bit. We don't need any more input + // bits than that (adds, and thus subtracts, ripple only to the + // left). + AB = APInt::getLowBitsSet(BitWidth, AOut.getActiveBits()); + break; + case Instruction::Shl: + if (OperandNo == 0) + if (ConstantInt *CI = + dyn_cast<ConstantInt>(UserI->getOperand(1))) { + uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1); + AB = AOut.lshr(ShiftAmt); + + // If the shift is nuw/nsw, then the high bits are not dead + // (because we've promised that they *must* be zero). + const ShlOperator *S = cast<ShlOperator>(UserI); + if (S->hasNoSignedWrap()) + AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1); + else if (S->hasNoUnsignedWrap()) + AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt); + } + break; + case Instruction::LShr: + if (OperandNo == 0) + if (ConstantInt *CI = + dyn_cast<ConstantInt>(UserI->getOperand(1))) { + uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1); + AB = AOut.shl(ShiftAmt); + + // If the shift is exact, then the low bits are not dead + // (they must be zero). + if (cast<LShrOperator>(UserI)->isExact()) + AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt); + } + break; + case Instruction::AShr: + if (OperandNo == 0) + if (ConstantInt *CI = + dyn_cast<ConstantInt>(UserI->getOperand(1))) { + uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1); + AB = AOut.shl(ShiftAmt); + // Because the high input bit is replicated into the + // high-order bits of the result, if we need any of those + // bits, then we must keep the highest input bit. + if ((AOut & APInt::getHighBitsSet(BitWidth, ShiftAmt)) + .getBoolValue()) + AB.setBit(BitWidth-1); + + // If the shift is exact, then the low bits are not dead + // (they must be zero). + if (cast<AShrOperator>(UserI)->isExact()) + AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt); + } + break; + case Instruction::And: + AB = AOut; + + // For bits that are known zero, the corresponding bits in the + // other operand are dead (unless they're both zero, in which + // case they can't both be dead, so just mark the LHS bits as + // dead). + if (OperandNo == 0) { + ComputeKnownBits(BitWidth, I, UserI->getOperand(1)); + AB &= ~KnownZero2; + } else { + if (!isa<Instruction>(UserI->getOperand(0))) + ComputeKnownBits(BitWidth, UserI->getOperand(0), I); + AB &= ~(KnownZero & ~KnownZero2); + } + break; + case Instruction::Or: + AB = AOut; + + // For bits that are known one, the corresponding bits in the + // other operand are dead (unless they're both one, in which + // case they can't both be dead, so just mark the LHS bits as + // dead). + if (OperandNo == 0) { + ComputeKnownBits(BitWidth, I, UserI->getOperand(1)); + AB &= ~KnownOne2; + } else { + if (!isa<Instruction>(UserI->getOperand(0))) + ComputeKnownBits(BitWidth, UserI->getOperand(0), I); + AB &= ~(KnownOne & ~KnownOne2); + } + break; + case Instruction::Xor: + case Instruction::PHI: + AB = AOut; + break; + case Instruction::Trunc: + AB = AOut.zext(BitWidth); + break; + case Instruction::ZExt: + AB = AOut.trunc(BitWidth); + break; + case Instruction::SExt: + AB = AOut.trunc(BitWidth); + // Because the high input bit is replicated into the + // high-order bits of the result, if we need any of those + // bits, then we must keep the highest input bit. + if ((AOut & APInt::getHighBitsSet(AOut.getBitWidth(), + AOut.getBitWidth() - BitWidth)) + .getBoolValue()) + AB.setBit(BitWidth-1); + break; + case Instruction::Select: + if (OperandNo != 0) + AB = AOut; + break; + case Instruction::ICmp: + // Count the number of leading zeroes in each operand. + ComputeKnownBits(BitWidth, I, UserI->getOperand(1)); + auto NumLeadingZeroes = std::min(KnownZero.countLeadingOnes(), + KnownZero2.countLeadingOnes()); + AB = ~APInt::getHighBitsSet(BitWidth, NumLeadingZeroes); + break; + } +} + +bool DemandedBits::runOnFunction(Function& Fn) { + F = &Fn; + Analyzed = false; + return false; +} + +void DemandedBits::performAnalysis() { + if (Analyzed) + // Analysis already completed for this function. + return; + Analyzed = true; + AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(*F); + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + + Visited.clear(); + AliveBits.clear(); + + SmallVector<Instruction*, 128> Worklist; + + // Collect the set of "root" instructions that are known live. + for (Instruction &I : instructions(*F)) { + if (!isAlwaysLive(&I)) + continue; + + DEBUG(dbgs() << "DemandedBits: Root: " << I << "\n"); + // For integer-valued instructions, set up an initial empty set of alive + // bits and add the instruction to the work list. For other instructions + // add their operands to the work list (for integer values operands, mark + // all bits as live). + if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) { + if (!AliveBits.count(&I)) { + AliveBits[&I] = APInt(IT->getBitWidth(), 0); + Worklist.push_back(&I); + } + + continue; + } + + // Non-integer-typed instructions... + for (Use &OI : I.operands()) { + if (Instruction *J = dyn_cast<Instruction>(OI)) { + if (IntegerType *IT = dyn_cast<IntegerType>(J->getType())) + AliveBits[J] = APInt::getAllOnesValue(IT->getBitWidth()); + Worklist.push_back(J); + } + } + // To save memory, we don't add I to the Visited set here. Instead, we + // check isAlwaysLive on every instruction when searching for dead + // instructions later (we need to check isAlwaysLive for the + // integer-typed instructions anyway). + } + + // Propagate liveness backwards to operands. + while (!Worklist.empty()) { + Instruction *UserI = Worklist.pop_back_val(); + + DEBUG(dbgs() << "DemandedBits: Visiting: " << *UserI); + APInt AOut; + if (UserI->getType()->isIntegerTy()) { + AOut = AliveBits[UserI]; + DEBUG(dbgs() << " Alive Out: " << AOut); + } + DEBUG(dbgs() << "\n"); + + if (!UserI->getType()->isIntegerTy()) + Visited.insert(UserI); + + APInt KnownZero, KnownOne, KnownZero2, KnownOne2; + // Compute the set of alive bits for each operand. These are anded into the + // existing set, if any, and if that changes the set of alive bits, the + // operand is added to the work-list. + for (Use &OI : UserI->operands()) { + if (Instruction *I = dyn_cast<Instruction>(OI)) { + if (IntegerType *IT = dyn_cast<IntegerType>(I->getType())) { + unsigned BitWidth = IT->getBitWidth(); + APInt AB = APInt::getAllOnesValue(BitWidth); + if (UserI->getType()->isIntegerTy() && !AOut && + !isAlwaysLive(UserI)) { + AB = APInt(BitWidth, 0); + } else { + // If all bits of the output are dead, then all bits of the input + // Bits of each operand that are used to compute alive bits of the + // output are alive, all others are dead. + determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB, + KnownZero, KnownOne, + KnownZero2, KnownOne2); + } + + // If we've added to the set of alive bits (or the operand has not + // been previously visited), then re-queue the operand to be visited + // again. + APInt ABPrev(BitWidth, 0); + auto ABI = AliveBits.find(I); + if (ABI != AliveBits.end()) + ABPrev = ABI->second; + + APInt ABNew = AB | ABPrev; + if (ABNew != ABPrev || ABI == AliveBits.end()) { + AliveBits[I] = std::move(ABNew); + Worklist.push_back(I); + } + } else if (!Visited.count(I)) { + Worklist.push_back(I); + } + } + } + } +} + +APInt DemandedBits::getDemandedBits(Instruction *I) { + performAnalysis(); + + const DataLayout &DL = I->getParent()->getModule()->getDataLayout(); + if (AliveBits.count(I)) + return AliveBits[I]; + return APInt::getAllOnesValue(DL.getTypeSizeInBits(I->getType())); +} + +bool DemandedBits::isInstructionDead(Instruction *I) { + performAnalysis(); + + return !Visited.count(I) && AliveBits.find(I) == AliveBits.end() && + !isAlwaysLive(I); +} + +void DemandedBits::print(raw_ostream &OS, const Module *M) const { + // This is gross. But the alternative is making all the state mutable + // just because of this one debugging method. + const_cast<DemandedBits*>(this)->performAnalysis(); + for (auto &KV : AliveBits) { + OS << "DemandedBits: 0x" << utohexstr(KV.second.getLimitedValue()) << " for " + << *KV.first << "\n"; + } +} + +FunctionPass *llvm::createDemandedBitsPass() { + return new DemandedBits(); +} diff --git a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp index 4826ac4..4040ad3 100644 --- a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -117,8 +117,8 @@ Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore, INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da", "Dependence Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(DependenceAnalysis, "da", "Dependence Analysis", true, true) @@ -132,8 +132,8 @@ FunctionPass *llvm::createDependenceAnalysisPass() { bool DependenceAnalysis::runOnFunction(Function &F) { this->F = &F; - AA = &getAnalysis<AliasAnalysis>(); - SE = &getAnalysis<ScalarEvolution>(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); return false; } @@ -145,8 +145,8 @@ void DependenceAnalysis::releaseMemory() { void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequiredTransitive<AliasAnalysis>(); - AU.addRequiredTransitive<ScalarEvolution>(); + AU.addRequiredTransitive<AAResultsWrapperPass>(); + AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); AU.addRequiredTransitive<LoopInfoWrapperPass>(); } @@ -233,7 +233,8 @@ FullDependence::FullDependence(Instruction *Source, Instruction *Destination, : Dependence(Source, Destination), Levels(CommonLevels), LoopIndependent(PossiblyLoopIndependent) { Consistent = true; - DV = CommonLevels ? new DVEntry[CommonLevels] : nullptr; + if (CommonLevels) + DV = make_unique<DVEntry[]>(CommonLevels); } // The rest are simple getters that hide the implementation. @@ -371,7 +372,7 @@ void DependenceAnalysis::Constraint::setLine(const SCEV *AA, void DependenceAnalysis::Constraint::setDistance(const SCEV *D, const Loop *CurLoop) { Kind = Distance; - A = SE->getConstant(D->getType(), 1); + A = SE->getOne(D->getType()); B = SE->getNegativeSCEV(A); C = SE->getNegativeSCEV(D); AssociatedLoop = CurLoop; @@ -500,10 +501,10 @@ bool DependenceAnalysis::intersectConstraints(Constraint *X, if (!C1B2_C2B1 || !C1A2_C2A1 || !A1B2_A2B1 || !A2B1_A1B2) return false; - APInt Xtop = C1B2_C2B1->getValue()->getValue(); - APInt Xbot = A1B2_A2B1->getValue()->getValue(); - APInt Ytop = C1A2_C2A1->getValue()->getValue(); - APInt Ybot = A2B1_A1B2->getValue()->getValue(); + APInt Xtop = C1B2_C2B1->getAPInt(); + APInt Xbot = A1B2_A2B1->getAPInt(); + APInt Ytop = C1A2_C2A1->getAPInt(); + APInt Ybot = A2B1_A1B2->getAPInt(); DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); @@ -527,7 +528,7 @@ bool DependenceAnalysis::intersectConstraints(Constraint *X, } if (const SCEVConstant *CUB = collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { - APInt UpperBound = CUB->getValue()->getValue(); + APInt UpperBound = CUB->getAPInt(); DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { X->setEmpty(); @@ -630,8 +631,8 @@ static AliasResult underlyingObjectsAlias(AliasAnalysis *AA, const Value *B) { const Value *AObj = GetUnderlyingObject(A, DL); const Value *BObj = GetUnderlyingObject(B, DL); - return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()), - BObj, AA->getTypeStoreSize(BObj->getType())); + return AA->alias(AObj, DL.getTypeStoreSize(AObj->getType()), + BObj, DL.getTypeStoreSize(BObj->getType())); } @@ -1114,8 +1115,8 @@ bool DependenceAnalysis::strongSIVtest(const SCEV *Coeff, // Can we compute distance? if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { - APInt ConstDelta = cast<SCEVConstant>(Delta)->getValue()->getValue(); - APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getValue()->getValue(); + APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt(); + APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt(); APInt Distance = ConstDelta; // these need to be initialized APInt Remainder = ConstDelta; APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); @@ -1256,11 +1257,9 @@ bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff, assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); // compute SplitIter for use by DependenceAnalysis::getSplitIteration() - SplitIter = - SE->getUDivExpr(SE->getSMaxExpr(SE->getConstant(Delta->getType(), 0), - Delta), - SE->getMulExpr(SE->getConstant(Delta->getType(), 2), - ConstCoeff)); + SplitIter = SE->getUDivExpr( + SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta), + SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff)); DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); @@ -1302,14 +1301,14 @@ bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff, return true; } Result.DV[Level].Splitable = false; - Result.DV[Level].Distance = SE->getConstant(Delta->getType(), 0); + Result.DV[Level].Distance = SE->getZero(Delta->getType()); return false; } } // check that Coeff divides Delta - APInt APDelta = ConstDelta->getValue()->getValue(); - APInt APCoeff = ConstCoeff->getValue()->getValue(); + APInt APDelta = ConstDelta->getAPInt(); + APInt APCoeff = ConstCoeff->getAPInt(); APInt Distance = APDelta; // these need to be initialzed APInt Remainder = APDelta; APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); @@ -1463,10 +1462,10 @@ bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff, // find gcd APInt G, X, Y; - APInt AM = ConstSrcCoeff->getValue()->getValue(); - APInt BM = ConstDstCoeff->getValue()->getValue(); + APInt AM = ConstSrcCoeff->getAPInt(); + APInt BM = ConstDstCoeff->getAPInt(); unsigned Bits = AM.getBitWidth(); - if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { + if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) { // gcd doesn't divide Delta, no dependence ++ExactSIVindependence; ++ExactSIVsuccesses; @@ -1481,7 +1480,7 @@ bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff, // UM is perhaps unavailable, let's check if (const SCEVConstant *CUB = collectConstantUpperBound(CurLoop, Delta->getType())) { - UM = CUB->getValue()->getValue(); + UM = CUB->getAPInt(); DEBUG(dbgs() << "\t UM = " << UM << "\n"); UMvalid = true; } @@ -1609,8 +1608,8 @@ bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff, static bool isRemainderZero(const SCEVConstant *Dividend, const SCEVConstant *Divisor) { - APInt ConstDividend = Dividend->getValue()->getValue(); - APInt ConstDivisor = Divisor->getValue()->getValue(); + APInt ConstDividend = Dividend->getAPInt(); + APInt ConstDivisor = Divisor->getAPInt(); return ConstDividend.srem(ConstDivisor) == 0; } @@ -1665,8 +1664,8 @@ bool DependenceAnalysis::weakZeroSrcSIVtest(const SCEV *DstCoeff, Level--; Result.Consistent = false; const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); - NewConstraint.setLine(SE->getConstant(Delta->getType(), 0), - DstCoeff, Delta, CurLoop); + NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta, + CurLoop); DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { if (Level < CommonLevels) { @@ -1775,8 +1774,8 @@ bool DependenceAnalysis::weakZeroDstSIVtest(const SCEV *SrcCoeff, Level--; Result.Consistent = false; const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); - NewConstraint.setLine(SrcCoeff, SE->getConstant(Delta->getType(), 0), - Delta, CurLoop); + NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta, + CurLoop); DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { if (Level < CommonLevels) { @@ -1867,10 +1866,10 @@ bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff, // find gcd APInt G, X, Y; - APInt AM = ConstSrcCoeff->getValue()->getValue(); - APInt BM = ConstDstCoeff->getValue()->getValue(); + APInt AM = ConstSrcCoeff->getAPInt(); + APInt BM = ConstDstCoeff->getAPInt(); unsigned Bits = AM.getBitWidth(); - if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { + if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) { // gcd doesn't divide Delta, no dependence ++ExactRDIVindependence; return true; @@ -1884,7 +1883,7 @@ bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff, // SrcUM is perhaps unavailable, let's check if (const SCEVConstant *UpperBound = collectConstantUpperBound(SrcLoop, Delta->getType())) { - SrcUM = UpperBound->getValue()->getValue(); + SrcUM = UpperBound->getAPInt(); DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); SrcUMvalid = true; } @@ -1894,7 +1893,7 @@ bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff, // UM is perhaps unavailable, let's check if (const SCEVConstant *UpperBound = collectConstantUpperBound(DstLoop, Delta->getType())) { - DstUM = UpperBound->getValue()->getValue(); + DstUM = UpperBound->getAPInt(); DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); DstUMvalid = true; } @@ -2307,7 +2306,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, Constant = getConstantPart(Product); if (!Constant) return false; - APInt ConstCoeff = Constant->getValue()->getValue(); + APInt ConstCoeff = Constant->getAPInt(); RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); Coefficients = AddRec->getStart(); } @@ -2328,7 +2327,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, Constant = getConstantPart(Product); if (!Constant) return false; - APInt ConstCoeff = Constant->getValue()->getValue(); + APInt ConstCoeff = Constant->getAPInt(); RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); Coefficients = AddRec->getStart(); } @@ -2352,7 +2351,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, const SCEVConstant *ConstOp = getConstantPart(Product); if (!ConstOp) return false; - APInt ConstOpValue = ConstOp->getValue()->getValue(); + APInt ConstOpValue = ConstOp->getAPInt(); ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, ConstOpValue.abs()); } @@ -2362,7 +2361,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, } if (!Constant) return false; - APInt ConstDelta = cast<SCEVConstant>(Constant)->getValue()->getValue(); + APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt(); DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); if (ConstDelta == 0) return false; @@ -2410,7 +2409,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, Constant = getConstantPart(Product); else Constant = cast<SCEVConstant>(Coeff); - APInt ConstCoeff = Constant->getValue()->getValue(); + APInt ConstCoeff = Constant->getAPInt(); RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); } Inner = AddRec->getStart(); @@ -2428,7 +2427,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, Constant = getConstantPart(Product); else Constant = cast<SCEVConstant>(Coeff); - APInt ConstCoeff = Constant->getValue()->getValue(); + APInt ConstCoeff = Constant->getAPInt(); RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); } Inner = AddRec->getStart(); @@ -2445,7 +2444,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, // or constant, in which case we give up on this direction. continue; } - APInt ConstCoeff = Constant->getValue()->getValue(); + APInt ConstCoeff = Constant->getAPInt(); RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); if (RunningGCD != 0) { @@ -2728,10 +2727,10 @@ void DependenceAnalysis::findBoundsALL(CoefficientInfo *A, // If the difference is 0, we won't need to know the number of iterations. if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) Bound[K].Lower[Dependence::DVEntry::ALL] = - SE->getConstant(A[K].Coeff->getType(), 0); + SE->getZero(A[K].Coeff->getType()); if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) Bound[K].Upper[Dependence::DVEntry::ALL] = - SE->getConstant(A[K].Coeff->getType(), 0); + SE->getZero(A[K].Coeff->getType()); } } @@ -2800,9 +2799,8 @@ void DependenceAnalysis::findBoundsLT(CoefficientInfo *A, Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. if (Bound[K].Iterations) { - const SCEV *Iter_1 = - SE->getMinusSCEV(Bound[K].Iterations, - SE->getConstant(Bound[K].Iterations->getType(), 1)); + const SCEV *Iter_1 = SE->getMinusSCEV( + Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); Bound[K].Lower[Dependence::DVEntry::LT] = @@ -2847,9 +2845,8 @@ void DependenceAnalysis::findBoundsGT(CoefficientInfo *A, Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. if (Bound[K].Iterations) { - const SCEV *Iter_1 = - SE->getMinusSCEV(Bound[K].Iterations, - SE->getConstant(Bound[K].Iterations->getType(), 1)); + const SCEV *Iter_1 = SE->getMinusSCEV( + Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); Bound[K].Lower[Dependence::DVEntry::GT] = @@ -2874,13 +2871,13 @@ void DependenceAnalysis::findBoundsGT(CoefficientInfo *A, // X^+ = max(X, 0) const SCEV *DependenceAnalysis::getPositivePart(const SCEV *X) const { - return SE->getSMaxExpr(X, SE->getConstant(X->getType(), 0)); + return SE->getSMaxExpr(X, SE->getZero(X->getType())); } // X^- = min(X, 0) const SCEV *DependenceAnalysis::getNegativePart(const SCEV *X) const { - return SE->getSMinExpr(X, SE->getConstant(X->getType(), 0)); + return SE->getSMinExpr(X, SE->getZero(X->getType())); } @@ -2891,7 +2888,7 @@ DependenceAnalysis::CoefficientInfo * DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag, const SCEV *&Constant) const { - const SCEV *Zero = SE->getConstant(Subscript->getType(), 0); + const SCEV *Zero = SE->getZero(Subscript->getType()); CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; for (unsigned K = 1; K <= MaxLevels; ++K) { CI[K].Coeff = Zero; @@ -2975,7 +2972,7 @@ const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr, const Loop *TargetLoop) const { const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); if (!AddRec) - return SE->getConstant(Expr->getType(), 0); + return SE->getZero(Expr->getType()); if (AddRec->getLoop() == TargetLoop) return AddRec->getStepRecurrence(*SE); return findCoefficient(AddRec->getStart(), TargetLoop); @@ -3110,8 +3107,8 @@ bool DependenceAnalysis::propagateLine(const SCEV *&Src, const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); if (!Bconst || !Cconst) return false; - APInt Beta = Bconst->getValue()->getValue(); - APInt Charlie = Cconst->getValue()->getValue(); + APInt Beta = Bconst->getAPInt(); + APInt Charlie = Cconst->getAPInt(); APInt CdivB = Charlie.sdiv(Beta); assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); const SCEV *AP_K = findCoefficient(Dst, CurLoop); @@ -3125,8 +3122,8 @@ bool DependenceAnalysis::propagateLine(const SCEV *&Src, const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); if (!Aconst || !Cconst) return false; - APInt Alpha = Aconst->getValue()->getValue(); - APInt Charlie = Cconst->getValue()->getValue(); + APInt Alpha = Aconst->getAPInt(); + APInt Charlie = Cconst->getAPInt(); APInt CdivA = Charlie.sdiv(Alpha); assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); const SCEV *A_K = findCoefficient(Src, CurLoop); @@ -3139,8 +3136,8 @@ bool DependenceAnalysis::propagateLine(const SCEV *&Src, const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); if (!Aconst || !Cconst) return false; - APInt Alpha = Aconst->getValue()->getValue(); - APInt Charlie = Cconst->getValue()->getValue(); + APInt Alpha = Aconst->getAPInt(); + APInt Charlie = Cconst->getAPInt(); APInt CdivA = Charlie.sdiv(Alpha); assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); const SCEV *A_K = findCoefficient(Src, CurLoop); @@ -3244,20 +3241,36 @@ void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level, /// source and destination array references are recurrences on a nested loop, /// this function flattens the nested recurrences into separate recurrences /// for each loop level. -bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, - const SCEV *DstSCEV, - SmallVectorImpl<Subscript> &Pair, - const SCEV *ElementSize) { +bool DependenceAnalysis::tryDelinearize(Instruction *Src, + Instruction *Dst, + SmallVectorImpl<Subscript> &Pair) +{ + Value *SrcPtr = getPointerOperand(Src); + Value *DstPtr = getPointerOperand(Dst); + + Loop *SrcLoop = LI->getLoopFor(Src->getParent()); + Loop *DstLoop = LI->getLoopFor(Dst->getParent()); + + // Below code mimics the code in Delinearization.cpp + const SCEV *SrcAccessFn = + SE->getSCEVAtScope(SrcPtr, SrcLoop); + const SCEV *DstAccessFn = + SE->getSCEVAtScope(DstPtr, DstLoop); + const SCEVUnknown *SrcBase = - dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcSCEV)); + dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); const SCEVUnknown *DstBase = - dyn_cast<SCEVUnknown>(SE->getPointerBase(DstSCEV)); + dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); if (!SrcBase || !DstBase || SrcBase != DstBase) return false; - SrcSCEV = SE->getMinusSCEV(SrcSCEV, SrcBase); - DstSCEV = SE->getMinusSCEV(DstSCEV, DstBase); + const SCEV *ElementSize = SE->getElementSize(Src); + if (ElementSize != SE->getElementSize(Dst)) + return false; + + const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase); + const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase); const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); @@ -3330,7 +3343,6 @@ static void dumpSmallBitVector(SmallBitVector &BV) { } #endif - // depends - // Returns NULL if there is no dependence. // Otherwise, return a Dependence with as many details as possible. @@ -3425,10 +3437,11 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst, Pair[0].Dst = DstSCEV; } - if (Delinearize && Pairs == 1 && CommonLevels > 1 && - tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { - DEBUG(dbgs() << " delinerized GEP\n"); - Pairs = Pair.size(); + if (Delinearize && CommonLevels > 1) { + if (tryDelinearize(Src, Dst, Pair)) { + DEBUG(dbgs() << " delinerized GEP\n"); + Pairs = Pair.size(); + } } for (unsigned P = 0; P < Pairs; ++P) { @@ -3746,9 +3759,7 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst, return nullptr; } - auto Final = make_unique<FullDependence>(Result); - Result.DV = nullptr; - return std::move(Final); + return make_unique<FullDependence>(std::move(Result)); } @@ -3852,10 +3863,11 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence &Dep, Pair[0].Dst = DstSCEV; } - if (Delinearize && Pairs == 1 && CommonLevels > 1 && - tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { - DEBUG(dbgs() << " delinerized GEP\n"); - Pairs = Pair.size(); + if (Delinearize && CommonLevels > 1) { + if (tryDelinearize(Src, Dst, Pair)) { + DEBUG(dbgs() << " delinerized GEP\n"); + Pairs = Pair.size(); + } } for (unsigned P = 0; P < Pairs; ++P) { diff --git a/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp index e5ee295..5ae6d74 100644 --- a/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp @@ -1,4 +1,4 @@ -//===- DivergenceAnalysis.cpp ------ Divergence Analysis ------------------===// +//===- DivergenceAnalysis.cpp --------- Divergence Analysis Implementation -==// // // The LLVM Compiler Infrastructure // @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file defines divergence analysis which determines whether a branch in a -// GPU program is divergent. It can help branch optimizations such as jump +// This file implements divergence analysis which determines whether a branch +// in a GPU program is divergent.It can help branch optimizations such as jump // threading and loop unswitching to make better decisions. // // GPU programs typically use the SIMD execution model, where multiple threads @@ -61,75 +61,31 @@ // 2. memory as black box. It conservatively considers values loaded from // generic or local address as divergent. This can be improved by leveraging // pointer analysis. +// //===----------------------------------------------------------------------===// -#include <vector> -#include "llvm/IR/Dominators.h" -#include "llvm/ADT/DenseSet.h" +#include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/Function.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Value.h" -#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" +#include <vector> using namespace llvm; -#define DEBUG_TYPE "divergence" - -namespace { -class DivergenceAnalysis : public FunctionPass { -public: - static char ID; - - DivergenceAnalysis() : FunctionPass(ID) { - initializeDivergenceAnalysisPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addRequired<PostDominatorTree>(); - AU.setPreservesAll(); - } - - bool runOnFunction(Function &F) override; - - // Print all divergent branches in the function. - void print(raw_ostream &OS, const Module *) const override; - - // Returns true if V is divergent. - bool isDivergent(const Value *V) const { return DivergentValues.count(V); } - // Returns true if V is uniform/non-divergent. - bool isUniform(const Value *V) const { return !isDivergent(V); } - -private: - // Stores all divergent values. - DenseSet<const Value *> DivergentValues; -}; -} // End of anonymous namespace - -// Register this pass. -char DivergenceAnalysis::ID = 0; -INITIALIZE_PASS_BEGIN(DivergenceAnalysis, "divergence", "Divergence Analysis", - false, true) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(PostDominatorTree) -INITIALIZE_PASS_END(DivergenceAnalysis, "divergence", "Divergence Analysis", - false, true) - namespace { class DivergencePropagator { public: - DivergencePropagator(Function &F, TargetTransformInfo &TTI, - DominatorTree &DT, PostDominatorTree &PDT, - DenseSet<const Value *> &DV) + DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT, + PostDominatorTree &PDT, DenseSet<const Value *> &DV) : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {} void populateWithSourcesOfDivergence(); void propagate(); @@ -140,7 +96,7 @@ private: // A helper function that explores sync dependents of TI. void exploreSyncDependency(TerminatorInst *TI); // Computes the influence region from Start to End. This region includes all - // basic blocks on any path from Start to End. + // basic blocks on any simple path from Start to End. void computeInfluenceRegion(BasicBlock *Start, BasicBlock *End, DenseSet<BasicBlock *> &InfluenceRegion); // Finds all users of I that are outside the influence region, and add these @@ -153,13 +109,13 @@ private: DominatorTree &DT; PostDominatorTree &PDT; std::vector<Value *> Worklist; // Stack for DFS. - DenseSet<const Value *> &DV; // Stores all divergent values. + DenseSet<const Value *> &DV; // Stores all divergent values. }; void DivergencePropagator::populateWithSourcesOfDivergence() { Worklist.clear(); DV.clear(); - for (auto &I : inst_range(F)) { + for (auto &I : instructions(F)) { if (TTI.isSourceOfDivergence(&I)) { Worklist.push_back(&I); DV.insert(&I); @@ -191,8 +147,8 @@ void DivergencePropagator::exploreSyncDependency(TerminatorInst *TI) { for (auto I = IPostDom->begin(); isa<PHINode>(I); ++I) { // A PHINode is uniform if it returns the same value no matter which path is // taken. - if (!cast<PHINode>(I)->hasConstantValue() && DV.insert(I).second) - Worklist.push_back(I); + if (!cast<PHINode>(I)->hasConstantValue() && DV.insert(&*I).second) + Worklist.push_back(&*I); } // Propagation rule 2: if a value defined in a loop is used outside, the user @@ -242,21 +198,33 @@ void DivergencePropagator::findUsersOutsideInfluenceRegion( } } +// A helper function for computeInfluenceRegion that adds successors of "ThisBB" +// to the influence region. +static void +addSuccessorsToInfluenceRegion(BasicBlock *ThisBB, BasicBlock *End, + DenseSet<BasicBlock *> &InfluenceRegion, + std::vector<BasicBlock *> &InfluenceStack) { + for (BasicBlock *Succ : successors(ThisBB)) { + if (Succ != End && InfluenceRegion.insert(Succ).second) + InfluenceStack.push_back(Succ); + } +} + void DivergencePropagator::computeInfluenceRegion( BasicBlock *Start, BasicBlock *End, DenseSet<BasicBlock *> &InfluenceRegion) { assert(PDT.properlyDominates(End, Start) && "End does not properly dominate Start"); + + // The influence region starts from the end of "Start" to the beginning of + // "End". Therefore, "Start" should not be in the region unless "Start" is in + // a loop that doesn't contain "End". std::vector<BasicBlock *> InfluenceStack; - InfluenceStack.push_back(Start); - InfluenceRegion.insert(Start); + addSuccessorsToInfluenceRegion(Start, End, InfluenceRegion, InfluenceStack); while (!InfluenceStack.empty()) { BasicBlock *BB = InfluenceStack.back(); InfluenceStack.pop_back(); - for (BasicBlock *Succ : successors(BB)) { - if (End != Succ && InfluenceRegion.insert(Succ).second) - InfluenceStack.push_back(Succ); - } + addSuccessorsToInfluenceRegion(BB, End, InfluenceRegion, InfluenceStack); } } @@ -286,10 +254,25 @@ void DivergencePropagator::propagate() { } /// end namespace anonymous +// Register this pass. +char DivergenceAnalysis::ID = 0; +INITIALIZE_PASS_BEGIN(DivergenceAnalysis, "divergence", "Divergence Analysis", + false, true) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTree) +INITIALIZE_PASS_END(DivergenceAnalysis, "divergence", "Divergence Analysis", + false, true) + FunctionPass *llvm::createDivergenceAnalysisPass() { return new DivergenceAnalysis(); } +void DivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<PostDominatorTree>(); + AU.setPreservesAll(); +} + bool DivergenceAnalysis::runOnFunction(Function &F) { auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); if (TTIWP == nullptr) @@ -329,8 +312,8 @@ void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const { if (DivergentValues.count(&Arg)) OS << "DIVERGENT: " << Arg << "\n"; } - // Iterate instructions using inst_range to ensure a deterministic order. - for (auto &I : inst_range(F)) { + // Iterate instructions using instructions() to ensure a deterministic order. + for (auto &I : instructions(F)) { if (DivergentValues.count(&I)) OS << "DIVERGENT:" << I << "\n"; } diff --git a/contrib/llvm/lib/Analysis/EHPersonalities.cpp b/contrib/llvm/lib/Analysis/EHPersonalities.cpp new file mode 100644 index 0000000..01be8b3 --- /dev/null +++ b/contrib/llvm/lib/Analysis/EHPersonalities.cpp @@ -0,0 +1,106 @@ +//===- EHPersonalities.cpp - Compute EH-related information ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/EHPersonalities.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +/// See if the given exception handling personality function is one that we +/// understand. If so, return a description of it; otherwise return Unknown. +EHPersonality llvm::classifyEHPersonality(const Value *Pers) { + const Function *F = + Pers ? dyn_cast<Function>(Pers->stripPointerCasts()) : nullptr; + if (!F) + return EHPersonality::Unknown; + return StringSwitch<EHPersonality>(F->getName()) + .Case("__gnat_eh_personality", EHPersonality::GNU_Ada) + .Case("__gxx_personality_v0", EHPersonality::GNU_CXX) + .Case("__gcc_personality_v0", EHPersonality::GNU_C) + .Case("__objc_personality_v0", EHPersonality::GNU_ObjC) + .Case("_except_handler3", EHPersonality::MSVC_X86SEH) + .Case("_except_handler4", EHPersonality::MSVC_X86SEH) + .Case("__C_specific_handler", EHPersonality::MSVC_Win64SEH) + .Case("__CxxFrameHandler3", EHPersonality::MSVC_CXX) + .Case("ProcessCLRException", EHPersonality::CoreCLR) + .Default(EHPersonality::Unknown); +} + +bool llvm::canSimplifyInvokeNoUnwind(const Function *F) { + EHPersonality Personality = classifyEHPersonality(F->getPersonalityFn()); + // We can't simplify any invokes to nounwind functions if the personality + // function wants to catch asynch exceptions. The nounwind attribute only + // implies that the function does not throw synchronous exceptions. + return !isAsynchronousEHPersonality(Personality); +} + +DenseMap<BasicBlock *, ColorVector> llvm::colorEHFunclets(Function &F) { + SmallVector<std::pair<BasicBlock *, BasicBlock *>, 16> Worklist; + BasicBlock *EntryBlock = &F.getEntryBlock(); + DenseMap<BasicBlock *, ColorVector> BlockColors; + + // Build up the color map, which maps each block to its set of 'colors'. + // For any block B the "colors" of B are the set of funclets F (possibly + // including a root "funclet" representing the main function) such that + // F will need to directly contain B or a copy of B (where the term "directly + // contain" is used to distinguish from being "transitively contained" in + // a nested funclet). + // + // Note: Despite not being a funclet in the truest sense, a catchswitch is + // considered to belong to its own funclet for the purposes of coloring. + + DEBUG_WITH_TYPE("winehprepare-coloring", dbgs() << "\nColoring funclets for " + << F.getName() << "\n"); + + Worklist.push_back({EntryBlock, EntryBlock}); + + while (!Worklist.empty()) { + BasicBlock *Visiting; + BasicBlock *Color; + std::tie(Visiting, Color) = Worklist.pop_back_val(); + DEBUG_WITH_TYPE("winehprepare-coloring", + dbgs() << "Visiting " << Visiting->getName() << ", " + << Color->getName() << "\n"); + Instruction *VisitingHead = Visiting->getFirstNonPHI(); + if (VisitingHead->isEHPad()) { + // Mark this funclet head as a member of itself. + Color = Visiting; + } + // Note that this is a member of the given color. + ColorVector &Colors = BlockColors[Visiting]; + if (std::find(Colors.begin(), Colors.end(), Color) == Colors.end()) + Colors.push_back(Color); + else + continue; + + DEBUG_WITH_TYPE("winehprepare-coloring", + dbgs() << " Assigned color \'" << Color->getName() + << "\' to block \'" << Visiting->getName() + << "\'.\n"); + + BasicBlock *SuccColor = Color; + TerminatorInst *Terminator = Visiting->getTerminator(); + if (auto *CatchRet = dyn_cast<CatchReturnInst>(Terminator)) { + Value *ParentPad = CatchRet->getParentPad(); + if (isa<ConstantTokenNone>(ParentPad)) + SuccColor = EntryBlock; + else + SuccColor = cast<Instruction>(ParentPad)->getParent(); + } + + for (BasicBlock *Succ : successors(Visiting)) + Worklist.push_back({Succ, SuccColor}); + } + return BlockColors; +} diff --git a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp new file mode 100644 index 0000000..ab2263a --- /dev/null +++ b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp @@ -0,0 +1,1002 @@ +//===- GlobalsModRef.cpp - Simple Mod/Ref Analysis for Globals ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This simple pass provides alias and mod/ref information for global values +// that do not have their address taken, and keeps track of whether functions +// read or write memory (are "pure"). For this simple (but very common) case, +// we can provide pretty accurate and useful information. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +#define DEBUG_TYPE "globalsmodref-aa" + +STATISTIC(NumNonAddrTakenGlobalVars, + "Number of global vars without address taken"); +STATISTIC(NumNonAddrTakenFunctions,"Number of functions without address taken"); +STATISTIC(NumNoMemFunctions, "Number of functions that do not access memory"); +STATISTIC(NumReadMemFunctions, "Number of functions that only read memory"); +STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects"); + +// An option to enable unsafe alias results from the GlobalsModRef analysis. +// When enabled, GlobalsModRef will provide no-alias results which in extremely +// rare cases may not be conservatively correct. In particular, in the face of +// transforms which cause assymetry between how effective GetUnderlyingObject +// is for two pointers, it may produce incorrect results. +// +// These unsafe results have been returned by GMR for many years without +// causing significant issues in the wild and so we provide a mechanism to +// re-enable them for users of LLVM that have a particular performance +// sensitivity and no known issues. The option also makes it easy to evaluate +// the performance impact of these results. +static cl::opt<bool> EnableUnsafeGlobalsModRefAliasResults( + "enable-unsafe-globalsmodref-alias-results", cl::init(false), cl::Hidden); + +/// The mod/ref information collected for a particular function. +/// +/// We collect information about mod/ref behavior of a function here, both in +/// general and as pertains to specific globals. We only have this detailed +/// information when we know *something* useful about the behavior. If we +/// saturate to fully general mod/ref, we remove the info for the function. +class GlobalsAAResult::FunctionInfo { + typedef SmallDenseMap<const GlobalValue *, ModRefInfo, 16> GlobalInfoMapType; + + /// Build a wrapper struct that has 8-byte alignment. All heap allocations + /// should provide this much alignment at least, but this makes it clear we + /// specifically rely on this amount of alignment. + struct LLVM_ALIGNAS(8) AlignedMap { + AlignedMap() {} + AlignedMap(const AlignedMap &Arg) : Map(Arg.Map) {} + GlobalInfoMapType Map; + }; + + /// Pointer traits for our aligned map. + struct AlignedMapPointerTraits { + static inline void *getAsVoidPointer(AlignedMap *P) { return P; } + static inline AlignedMap *getFromVoidPointer(void *P) { + return (AlignedMap *)P; + } + enum { NumLowBitsAvailable = 3 }; + static_assert(AlignOf<AlignedMap>::Alignment >= (1 << NumLowBitsAvailable), + "AlignedMap insufficiently aligned to have enough low bits."); + }; + + /// The bit that flags that this function may read any global. This is + /// chosen to mix together with ModRefInfo bits. + enum { MayReadAnyGlobal = 4 }; + + /// Checks to document the invariants of the bit packing here. + static_assert((MayReadAnyGlobal & MRI_ModRef) == 0, + "ModRef and the MayReadAnyGlobal flag bits overlap."); + static_assert(((MayReadAnyGlobal | MRI_ModRef) >> + AlignedMapPointerTraits::NumLowBitsAvailable) == 0, + "Insufficient low bits to store our flag and ModRef info."); + +public: + FunctionInfo() : Info() {} + ~FunctionInfo() { + delete Info.getPointer(); + } + // Spell out the copy ond move constructors and assignment operators to get + // deep copy semantics and correct move semantics in the face of the + // pointer-int pair. + FunctionInfo(const FunctionInfo &Arg) + : Info(nullptr, Arg.Info.getInt()) { + if (const auto *ArgPtr = Arg.Info.getPointer()) + Info.setPointer(new AlignedMap(*ArgPtr)); + } + FunctionInfo(FunctionInfo &&Arg) + : Info(Arg.Info.getPointer(), Arg.Info.getInt()) { + Arg.Info.setPointerAndInt(nullptr, 0); + } + FunctionInfo &operator=(const FunctionInfo &RHS) { + delete Info.getPointer(); + Info.setPointerAndInt(nullptr, RHS.Info.getInt()); + if (const auto *RHSPtr = RHS.Info.getPointer()) + Info.setPointer(new AlignedMap(*RHSPtr)); + return *this; + } + FunctionInfo &operator=(FunctionInfo &&RHS) { + delete Info.getPointer(); + Info.setPointerAndInt(RHS.Info.getPointer(), RHS.Info.getInt()); + RHS.Info.setPointerAndInt(nullptr, 0); + return *this; + } + + /// Returns the \c ModRefInfo info for this function. + ModRefInfo getModRefInfo() const { + return ModRefInfo(Info.getInt() & MRI_ModRef); + } + + /// Adds new \c ModRefInfo for this function to its state. + void addModRefInfo(ModRefInfo NewMRI) { + Info.setInt(Info.getInt() | NewMRI); + } + + /// Returns whether this function may read any global variable, and we don't + /// know which global. + bool mayReadAnyGlobal() const { return Info.getInt() & MayReadAnyGlobal; } + + /// Sets this function as potentially reading from any global. + void setMayReadAnyGlobal() { Info.setInt(Info.getInt() | MayReadAnyGlobal); } + + /// Returns the \c ModRefInfo info for this function w.r.t. a particular + /// global, which may be more precise than the general information above. + ModRefInfo getModRefInfoForGlobal(const GlobalValue &GV) const { + ModRefInfo GlobalMRI = mayReadAnyGlobal() ? MRI_Ref : MRI_NoModRef; + if (AlignedMap *P = Info.getPointer()) { + auto I = P->Map.find(&GV); + if (I != P->Map.end()) + GlobalMRI = ModRefInfo(GlobalMRI | I->second); + } + return GlobalMRI; + } + + /// Add mod/ref info from another function into ours, saturating towards + /// MRI_ModRef. + void addFunctionInfo(const FunctionInfo &FI) { + addModRefInfo(FI.getModRefInfo()); + + if (FI.mayReadAnyGlobal()) + setMayReadAnyGlobal(); + + if (AlignedMap *P = FI.Info.getPointer()) + for (const auto &G : P->Map) + addModRefInfoForGlobal(*G.first, G.second); + } + + void addModRefInfoForGlobal(const GlobalValue &GV, ModRefInfo NewMRI) { + AlignedMap *P = Info.getPointer(); + if (!P) { + P = new AlignedMap(); + Info.setPointer(P); + } + auto &GlobalMRI = P->Map[&GV]; + GlobalMRI = ModRefInfo(GlobalMRI | NewMRI); + } + + /// Clear a global's ModRef info. Should be used when a global is being + /// deleted. + void eraseModRefInfoForGlobal(const GlobalValue &GV) { + if (AlignedMap *P = Info.getPointer()) + P->Map.erase(&GV); + } + +private: + /// All of the information is encoded into a single pointer, with a three bit + /// integer in the low three bits. The high bit provides a flag for when this + /// function may read any global. The low two bits are the ModRefInfo. And + /// the pointer, when non-null, points to a map from GlobalValue to + /// ModRefInfo specific to that GlobalValue. + PointerIntPair<AlignedMap *, 3, unsigned, AlignedMapPointerTraits> Info; +}; + +void GlobalsAAResult::DeletionCallbackHandle::deleted() { + Value *V = getValPtr(); + if (auto *F = dyn_cast<Function>(V)) + GAR->FunctionInfos.erase(F); + + if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { + if (GAR->NonAddressTakenGlobals.erase(GV)) { + // This global might be an indirect global. If so, remove it and + // remove any AllocRelatedValues for it. + if (GAR->IndirectGlobals.erase(GV)) { + // Remove any entries in AllocsForIndirectGlobals for this global. + for (auto I = GAR->AllocsForIndirectGlobals.begin(), + E = GAR->AllocsForIndirectGlobals.end(); + I != E; ++I) + if (I->second == GV) + GAR->AllocsForIndirectGlobals.erase(I); + } + + // Scan the function info we have collected and remove this global + // from all of them. + for (auto &FIPair : GAR->FunctionInfos) + FIPair.second.eraseModRefInfoForGlobal(*GV); + } + } + + // If this is an allocation related to an indirect global, remove it. + GAR->AllocsForIndirectGlobals.erase(V); + + // And clear out the handle. + setValPtr(nullptr); + GAR->Handles.erase(I); + // This object is now destroyed! +} + +FunctionModRefBehavior GlobalsAAResult::getModRefBehavior(const Function *F) { + FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior; + + if (FunctionInfo *FI = getFunctionInfo(F)) { + if (FI->getModRefInfo() == MRI_NoModRef) + Min = FMRB_DoesNotAccessMemory; + else if ((FI->getModRefInfo() & MRI_Mod) == 0) + Min = FMRB_OnlyReadsMemory; + } + + return FunctionModRefBehavior(AAResultBase::getModRefBehavior(F) & Min); +} + +FunctionModRefBehavior +GlobalsAAResult::getModRefBehavior(ImmutableCallSite CS) { + FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior; + + if (const Function *F = CS.getCalledFunction()) + if (FunctionInfo *FI = getFunctionInfo(F)) { + if (FI->getModRefInfo() == MRI_NoModRef) + Min = FMRB_DoesNotAccessMemory; + else if ((FI->getModRefInfo() & MRI_Mod) == 0) + Min = FMRB_OnlyReadsMemory; + } + + return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min); +} + +/// Returns the function info for the function, or null if we don't have +/// anything useful to say about it. +GlobalsAAResult::FunctionInfo * +GlobalsAAResult::getFunctionInfo(const Function *F) { + auto I = FunctionInfos.find(F); + if (I != FunctionInfos.end()) + return &I->second; + return nullptr; +} + +/// AnalyzeGlobals - Scan through the users of all of the internal +/// GlobalValue's in the program. If none of them have their "address taken" +/// (really, their address passed to something nontrivial), record this fact, +/// and record the functions that they are used directly in. +void GlobalsAAResult::AnalyzeGlobals(Module &M) { + SmallPtrSet<Function *, 64> TrackedFunctions; + for (Function &F : M) + if (F.hasLocalLinkage()) + if (!AnalyzeUsesOfPointer(&F)) { + // Remember that we are tracking this global. + NonAddressTakenGlobals.insert(&F); + TrackedFunctions.insert(&F); + Handles.emplace_front(*this, &F); + Handles.front().I = Handles.begin(); + ++NumNonAddrTakenFunctions; + } + + SmallPtrSet<Function *, 64> Readers, Writers; + for (GlobalVariable &GV : M.globals()) + if (GV.hasLocalLinkage()) { + if (!AnalyzeUsesOfPointer(&GV, &Readers, + GV.isConstant() ? nullptr : &Writers)) { + // Remember that we are tracking this global, and the mod/ref fns + NonAddressTakenGlobals.insert(&GV); + Handles.emplace_front(*this, &GV); + Handles.front().I = Handles.begin(); + + for (Function *Reader : Readers) { + if (TrackedFunctions.insert(Reader).second) { + Handles.emplace_front(*this, Reader); + Handles.front().I = Handles.begin(); + } + FunctionInfos[Reader].addModRefInfoForGlobal(GV, MRI_Ref); + } + + if (!GV.isConstant()) // No need to keep track of writers to constants + for (Function *Writer : Writers) { + if (TrackedFunctions.insert(Writer).second) { + Handles.emplace_front(*this, Writer); + Handles.front().I = Handles.begin(); + } + FunctionInfos[Writer].addModRefInfoForGlobal(GV, MRI_Mod); + } + ++NumNonAddrTakenGlobalVars; + + // If this global holds a pointer type, see if it is an indirect global. + if (GV.getType()->getElementType()->isPointerTy() && + AnalyzeIndirectGlobalMemory(&GV)) + ++NumIndirectGlobalVars; + } + Readers.clear(); + Writers.clear(); + } +} + +/// AnalyzeUsesOfPointer - Look at all of the users of the specified pointer. +/// If this is used by anything complex (i.e., the address escapes), return +/// true. Also, while we are at it, keep track of those functions that read and +/// write to the value. +/// +/// If OkayStoreDest is non-null, stores into this global are allowed. +bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V, + SmallPtrSetImpl<Function *> *Readers, + SmallPtrSetImpl<Function *> *Writers, + GlobalValue *OkayStoreDest) { + if (!V->getType()->isPointerTy()) + return true; + + for (Use &U : V->uses()) { + User *I = U.getUser(); + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + if (Readers) + Readers->insert(LI->getParent()->getParent()); + } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + if (V == SI->getOperand(1)) { + if (Writers) + Writers->insert(SI->getParent()->getParent()); + } else if (SI->getOperand(1) != OkayStoreDest) { + return true; // Storing the pointer + } + } else if (Operator::getOpcode(I) == Instruction::GetElementPtr) { + if (AnalyzeUsesOfPointer(I, Readers, Writers)) + return true; + } else if (Operator::getOpcode(I) == Instruction::BitCast) { + if (AnalyzeUsesOfPointer(I, Readers, Writers, OkayStoreDest)) + return true; + } else if (auto CS = CallSite(I)) { + // Make sure that this is just the function being called, not that it is + // passing into the function. + if (CS.isDataOperand(&U)) { + // Detect calls to free. + if (CS.isArgOperand(&U) && isFreeCall(I, &TLI)) { + if (Writers) + Writers->insert(CS->getParent()->getParent()); + } else if (CS.doesNotCapture(CS.getDataOperandNo(&U))) { + Function *ParentF = CS->getParent()->getParent(); + // A nocapture argument may be read from or written to, but does not + // escape unless the call can somehow recurse. + // + // nocapture "indicates that the callee does not make any copies of + // the pointer that outlive itself". Therefore if we directly or + // indirectly recurse, we must treat the pointer as escaping. + if (FunctionToSCCMap[ParentF] == + FunctionToSCCMap[CS.getCalledFunction()]) + return true; + if (Readers) + Readers->insert(ParentF); + if (Writers) + Writers->insert(ParentF); + } else { + return true; // Argument of an unknown call. + } + // If the Callee is not ReadNone, it may read the global, + // and if it is not ReadOnly, it may also write to it. + Function *CalleeF = CS.getCalledFunction(); + if (!CalleeF->doesNotAccessMemory()) { + if (Readers) + Readers->insert(CalleeF); + if (Writers && !CalleeF->onlyReadsMemory()) + Writers->insert(CalleeF); + } + } + } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) { + if (!isa<ConstantPointerNull>(ICI->getOperand(1))) + return true; // Allow comparison against null. + } else { + return true; + } + } + + return false; +} + +/// AnalyzeIndirectGlobalMemory - We found an non-address-taken global variable +/// which holds a pointer type. See if the global always points to non-aliased +/// heap memory: that is, all initializers of the globals are allocations, and +/// those allocations have no use other than initialization of the global. +/// Further, all loads out of GV must directly use the memory, not store the +/// pointer somewhere. If this is true, we consider the memory pointed to by +/// GV to be owned by GV and can disambiguate other pointers from it. +bool GlobalsAAResult::AnalyzeIndirectGlobalMemory(GlobalVariable *GV) { + // Keep track of values related to the allocation of the memory, f.e. the + // value produced by the malloc call and any casts. + std::vector<Value *> AllocRelatedValues; + + // If the initializer is a valid pointer, bail. + if (Constant *C = GV->getInitializer()) + if (!C->isNullValue()) + return false; + + // Walk the user list of the global. If we find anything other than a direct + // load or store, bail out. + for (User *U : GV->users()) { + if (LoadInst *LI = dyn_cast<LoadInst>(U)) { + // The pointer loaded from the global can only be used in simple ways: + // we allow addressing of it and loading storing to it. We do *not* allow + // storing the loaded pointer somewhere else or passing to a function. + if (AnalyzeUsesOfPointer(LI)) + return false; // Loaded pointer escapes. + // TODO: Could try some IP mod/ref of the loaded pointer. + } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { + // Storing the global itself. + if (SI->getOperand(0) == GV) + return false; + + // If storing the null pointer, ignore it. + if (isa<ConstantPointerNull>(SI->getOperand(0))) + continue; + + // Check the value being stored. + Value *Ptr = GetUnderlyingObject(SI->getOperand(0), + GV->getParent()->getDataLayout()); + + if (!isAllocLikeFn(Ptr, &TLI)) + return false; // Too hard to analyze. + + // Analyze all uses of the allocation. If any of them are used in a + // non-simple way (e.g. stored to another global) bail out. + if (AnalyzeUsesOfPointer(Ptr, /*Readers*/ nullptr, /*Writers*/ nullptr, + GV)) + return false; // Loaded pointer escapes. + + // Remember that this allocation is related to the indirect global. + AllocRelatedValues.push_back(Ptr); + } else { + // Something complex, bail out. + return false; + } + } + + // Okay, this is an indirect global. Remember all of the allocations for + // this global in AllocsForIndirectGlobals. + while (!AllocRelatedValues.empty()) { + AllocsForIndirectGlobals[AllocRelatedValues.back()] = GV; + Handles.emplace_front(*this, AllocRelatedValues.back()); + Handles.front().I = Handles.begin(); + AllocRelatedValues.pop_back(); + } + IndirectGlobals.insert(GV); + Handles.emplace_front(*this, GV); + Handles.front().I = Handles.begin(); + return true; +} + +void GlobalsAAResult::CollectSCCMembership(CallGraph &CG) { + // We do a bottom-up SCC traversal of the call graph. In other words, we + // visit all callees before callers (leaf-first). + unsigned SCCID = 0; + for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) { + const std::vector<CallGraphNode *> &SCC = *I; + assert(!SCC.empty() && "SCC with no functions?"); + + for (auto *CGN : SCC) + if (Function *F = CGN->getFunction()) + FunctionToSCCMap[F] = SCCID; + ++SCCID; + } +} + +/// AnalyzeCallGraph - At this point, we know the functions where globals are +/// immediately stored to and read from. Propagate this information up the call +/// graph to all callers and compute the mod/ref info for all memory for each +/// function. +void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { + // We do a bottom-up SCC traversal of the call graph. In other words, we + // visit all callees before callers (leaf-first). + for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) { + const std::vector<CallGraphNode *> &SCC = *I; + assert(!SCC.empty() && "SCC with no functions?"); + + if (!SCC[0]->getFunction() || SCC[0]->getFunction()->mayBeOverridden()) { + // Calls externally or is weak - can't say anything useful. Remove any existing + // function records (may have been created when scanning globals). + for (auto *Node : SCC) + FunctionInfos.erase(Node->getFunction()); + continue; + } + + FunctionInfo &FI = FunctionInfos[SCC[0]->getFunction()]; + bool KnowNothing = false; + + // Collect the mod/ref properties due to called functions. We only compute + // one mod-ref set. + for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) { + Function *F = SCC[i]->getFunction(); + if (!F) { + KnowNothing = true; + break; + } + + if (F->isDeclaration()) { + // Try to get mod/ref behaviour from function attributes. + if (F->doesNotAccessMemory() || F->onlyAccessesInaccessibleMemory()) { + // Can't do better than that! + } else if (F->onlyReadsMemory()) { + FI.addModRefInfo(MRI_Ref); + if (!F->isIntrinsic()) + // This function might call back into the module and read a global - + // consider every global as possibly being read by this function. + FI.setMayReadAnyGlobal(); + } else if (F->onlyAccessesArgMemory() || + F->onlyAccessesInaccessibleMemOrArgMem()) { + // This function may only access (read/write) memory pointed to by its + // arguments. If this pointer is to a global, this escaping use of the + // pointer is captured in AnalyzeUsesOfPointer(). + FI.addModRefInfo(MRI_ModRef); + } else { + FI.addModRefInfo(MRI_ModRef); + // Can't say anything useful unless it's an intrinsic - they don't + // read or write global variables of the kind considered here. + KnowNothing = !F->isIntrinsic(); + } + continue; + } + + for (CallGraphNode::iterator CI = SCC[i]->begin(), E = SCC[i]->end(); + CI != E && !KnowNothing; ++CI) + if (Function *Callee = CI->second->getFunction()) { + if (FunctionInfo *CalleeFI = getFunctionInfo(Callee)) { + // Propagate function effect up. + FI.addFunctionInfo(*CalleeFI); + } else { + // Can't say anything about it. However, if it is inside our SCC, + // then nothing needs to be done. + CallGraphNode *CalleeNode = CG[Callee]; + if (std::find(SCC.begin(), SCC.end(), CalleeNode) == SCC.end()) + KnowNothing = true; + } + } else { + KnowNothing = true; + } + } + + // If we can't say anything useful about this SCC, remove all SCC functions + // from the FunctionInfos map. + if (KnowNothing) { + for (auto *Node : SCC) + FunctionInfos.erase(Node->getFunction()); + continue; + } + + // Scan the function bodies for explicit loads or stores. + for (auto *Node : SCC) { + if (FI.getModRefInfo() == MRI_ModRef) + break; // The mod/ref lattice saturates here. + for (Instruction &I : instructions(Node->getFunction())) { + if (FI.getModRefInfo() == MRI_ModRef) + break; // The mod/ref lattice saturates here. + + // We handle calls specially because the graph-relevant aspects are + // handled above. + if (auto CS = CallSite(&I)) { + if (isAllocationFn(&I, &TLI) || isFreeCall(&I, &TLI)) { + // FIXME: It is completely unclear why this is necessary and not + // handled by the above graph code. + FI.addModRefInfo(MRI_ModRef); + } else if (Function *Callee = CS.getCalledFunction()) { + // The callgraph doesn't include intrinsic calls. + if (Callee->isIntrinsic()) { + FunctionModRefBehavior Behaviour = + AAResultBase::getModRefBehavior(Callee); + FI.addModRefInfo(ModRefInfo(Behaviour & MRI_ModRef)); + } + } + continue; + } + + // All non-call instructions we use the primary predicates for whether + // thay read or write memory. + if (I.mayReadFromMemory()) + FI.addModRefInfo(MRI_Ref); + if (I.mayWriteToMemory()) + FI.addModRefInfo(MRI_Mod); + } + } + + if ((FI.getModRefInfo() & MRI_Mod) == 0) + ++NumReadMemFunctions; + if (FI.getModRefInfo() == MRI_NoModRef) + ++NumNoMemFunctions; + + // Finally, now that we know the full effect on this SCC, clone the + // information to each function in the SCC. + // FI is a reference into FunctionInfos, so copy it now so that it doesn't + // get invalidated if DenseMap decides to re-hash. + FunctionInfo CachedFI = FI; + for (unsigned i = 1, e = SCC.size(); i != e; ++i) + FunctionInfos[SCC[i]->getFunction()] = CachedFI; + } +} + +// GV is a non-escaping global. V is a pointer address that has been loaded from. +// If we can prove that V must escape, we can conclude that a load from V cannot +// alias GV. +static bool isNonEscapingGlobalNoAliasWithLoad(const GlobalValue *GV, + const Value *V, + int &Depth, + const DataLayout &DL) { + SmallPtrSet<const Value *, 8> Visited; + SmallVector<const Value *, 8> Inputs; + Visited.insert(V); + Inputs.push_back(V); + do { + const Value *Input = Inputs.pop_back_val(); + + if (isa<GlobalValue>(Input) || isa<Argument>(Input) || isa<CallInst>(Input) || + isa<InvokeInst>(Input)) + // Arguments to functions or returns from functions are inherently + // escaping, so we can immediately classify those as not aliasing any + // non-addr-taken globals. + // + // (Transitive) loads from a global are also safe - if this aliased + // another global, its address would escape, so no alias. + continue; + + // Recurse through a limited number of selects, loads and PHIs. This is an + // arbitrary depth of 4, lower numbers could be used to fix compile time + // issues if needed, but this is generally expected to be only be important + // for small depths. + if (++Depth > 4) + return false; + + if (auto *LI = dyn_cast<LoadInst>(Input)) { + Inputs.push_back(GetUnderlyingObject(LI->getPointerOperand(), DL)); + continue; + } + if (auto *SI = dyn_cast<SelectInst>(Input)) { + const Value *LHS = GetUnderlyingObject(SI->getTrueValue(), DL); + const Value *RHS = GetUnderlyingObject(SI->getFalseValue(), DL); + if (Visited.insert(LHS).second) + Inputs.push_back(LHS); + if (Visited.insert(RHS).second) + Inputs.push_back(RHS); + continue; + } + if (auto *PN = dyn_cast<PHINode>(Input)) { + for (const Value *Op : PN->incoming_values()) { + Op = GetUnderlyingObject(Op, DL); + if (Visited.insert(Op).second) + Inputs.push_back(Op); + } + continue; + } + + return false; + } while (!Inputs.empty()); + + // All inputs were known to be no-alias. + return true; +} + +// There are particular cases where we can conclude no-alias between +// a non-addr-taken global and some other underlying object. Specifically, +// a non-addr-taken global is known to not be escaped from any function. It is +// also incorrect for a transformation to introduce an escape of a global in +// a way that is observable when it was not there previously. One function +// being transformed to introduce an escape which could possibly be observed +// (via loading from a global or the return value for example) within another +// function is never safe. If the observation is made through non-atomic +// operations on different threads, it is a data-race and UB. If the +// observation is well defined, by being observed the transformation would have +// changed program behavior by introducing the observed escape, making it an +// invalid transform. +// +// This property does require that transformations which *temporarily* escape +// a global that was not previously escaped, prior to restoring it, cannot rely +// on the results of GMR::alias. This seems a reasonable restriction, although +// currently there is no way to enforce it. There is also no realistic +// optimization pass that would make this mistake. The closest example is +// a transformation pass which does reg2mem of SSA values but stores them into +// global variables temporarily before restoring the global variable's value. +// This could be useful to expose "benign" races for example. However, it seems +// reasonable to require that a pass which introduces escapes of global +// variables in this way to either not trust AA results while the escape is +// active, or to be forced to operate as a module pass that cannot co-exist +// with an alias analysis such as GMR. +bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV, + const Value *V) { + // In order to know that the underlying object cannot alias the + // non-addr-taken global, we must know that it would have to be an escape. + // Thus if the underlying object is a function argument, a load from + // a global, or the return of a function, it cannot alias. We can also + // recurse through PHI nodes and select nodes provided all of their inputs + // resolve to one of these known-escaping roots. + SmallPtrSet<const Value *, 8> Visited; + SmallVector<const Value *, 8> Inputs; + Visited.insert(V); + Inputs.push_back(V); + int Depth = 0; + do { + const Value *Input = Inputs.pop_back_val(); + + if (auto *InputGV = dyn_cast<GlobalValue>(Input)) { + // If one input is the very global we're querying against, then we can't + // conclude no-alias. + if (InputGV == GV) + return false; + + // Distinct GlobalVariables never alias, unless overriden or zero-sized. + // FIXME: The condition can be refined, but be conservative for now. + auto *GVar = dyn_cast<GlobalVariable>(GV); + auto *InputGVar = dyn_cast<GlobalVariable>(InputGV); + if (GVar && InputGVar && + !GVar->isDeclaration() && !InputGVar->isDeclaration() && + !GVar->mayBeOverridden() && !InputGVar->mayBeOverridden()) { + Type *GVType = GVar->getInitializer()->getType(); + Type *InputGVType = InputGVar->getInitializer()->getType(); + if (GVType->isSized() && InputGVType->isSized() && + (DL.getTypeAllocSize(GVType) > 0) && + (DL.getTypeAllocSize(InputGVType) > 0)) + continue; + } + + // Conservatively return false, even though we could be smarter + // (e.g. look through GlobalAliases). + return false; + } + + if (isa<Argument>(Input) || isa<CallInst>(Input) || + isa<InvokeInst>(Input)) { + // Arguments to functions or returns from functions are inherently + // escaping, so we can immediately classify those as not aliasing any + // non-addr-taken globals. + continue; + } + + // Recurse through a limited number of selects, loads and PHIs. This is an + // arbitrary depth of 4, lower numbers could be used to fix compile time + // issues if needed, but this is generally expected to be only be important + // for small depths. + if (++Depth > 4) + return false; + + if (auto *LI = dyn_cast<LoadInst>(Input)) { + // A pointer loaded from a global would have been captured, and we know + // that the global is non-escaping, so no alias. + const Value *Ptr = GetUnderlyingObject(LI->getPointerOperand(), DL); + if (isNonEscapingGlobalNoAliasWithLoad(GV, Ptr, Depth, DL)) + // The load does not alias with GV. + continue; + // Otherwise, a load could come from anywhere, so bail. + return false; + } + if (auto *SI = dyn_cast<SelectInst>(Input)) { + const Value *LHS = GetUnderlyingObject(SI->getTrueValue(), DL); + const Value *RHS = GetUnderlyingObject(SI->getFalseValue(), DL); + if (Visited.insert(LHS).second) + Inputs.push_back(LHS); + if (Visited.insert(RHS).second) + Inputs.push_back(RHS); + continue; + } + if (auto *PN = dyn_cast<PHINode>(Input)) { + for (const Value *Op : PN->incoming_values()) { + Op = GetUnderlyingObject(Op, DL); + if (Visited.insert(Op).second) + Inputs.push_back(Op); + } + continue; + } + + // FIXME: It would be good to handle other obvious no-alias cases here, but + // it isn't clear how to do so reasonbly without building a small version + // of BasicAA into this code. We could recurse into AAResultBase::alias + // here but that seems likely to go poorly as we're inside the + // implementation of such a query. Until then, just conservatievly retun + // false. + return false; + } while (!Inputs.empty()); + + // If all the inputs to V were definitively no-alias, then V is no-alias. + return true; +} + +/// alias - If one of the pointers is to a global that we are tracking, and the +/// other is some random pointer, we know there cannot be an alias, because the +/// address of the global isn't taken. +AliasResult GlobalsAAResult::alias(const MemoryLocation &LocA, + const MemoryLocation &LocB) { + // Get the base object these pointers point to. + const Value *UV1 = GetUnderlyingObject(LocA.Ptr, DL); + const Value *UV2 = GetUnderlyingObject(LocB.Ptr, DL); + + // If either of the underlying values is a global, they may be non-addr-taken + // globals, which we can answer queries about. + const GlobalValue *GV1 = dyn_cast<GlobalValue>(UV1); + const GlobalValue *GV2 = dyn_cast<GlobalValue>(UV2); + if (GV1 || GV2) { + // If the global's address is taken, pretend we don't know it's a pointer to + // the global. + if (GV1 && !NonAddressTakenGlobals.count(GV1)) + GV1 = nullptr; + if (GV2 && !NonAddressTakenGlobals.count(GV2)) + GV2 = nullptr; + + // If the two pointers are derived from two different non-addr-taken + // globals we know these can't alias. + if (GV1 && GV2 && GV1 != GV2) + return NoAlias; + + // If one is and the other isn't, it isn't strictly safe but we can fake + // this result if necessary for performance. This does not appear to be + // a common problem in practice. + if (EnableUnsafeGlobalsModRefAliasResults) + if ((GV1 || GV2) && GV1 != GV2) + return NoAlias; + + // Check for a special case where a non-escaping global can be used to + // conclude no-alias. + if ((GV1 || GV2) && GV1 != GV2) { + const GlobalValue *GV = GV1 ? GV1 : GV2; + const Value *UV = GV1 ? UV2 : UV1; + if (isNonEscapingGlobalNoAlias(GV, UV)) + return NoAlias; + } + + // Otherwise if they are both derived from the same addr-taken global, we + // can't know the two accesses don't overlap. + } + + // These pointers may be based on the memory owned by an indirect global. If + // so, we may be able to handle this. First check to see if the base pointer + // is a direct load from an indirect global. + GV1 = GV2 = nullptr; + if (const LoadInst *LI = dyn_cast<LoadInst>(UV1)) + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0))) + if (IndirectGlobals.count(GV)) + GV1 = GV; + if (const LoadInst *LI = dyn_cast<LoadInst>(UV2)) + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0))) + if (IndirectGlobals.count(GV)) + GV2 = GV; + + // These pointers may also be from an allocation for the indirect global. If + // so, also handle them. + if (!GV1) + GV1 = AllocsForIndirectGlobals.lookup(UV1); + if (!GV2) + GV2 = AllocsForIndirectGlobals.lookup(UV2); + + // Now that we know whether the two pointers are related to indirect globals, + // use this to disambiguate the pointers. If the pointers are based on + // different indirect globals they cannot alias. + if (GV1 && GV2 && GV1 != GV2) + return NoAlias; + + // If one is based on an indirect global and the other isn't, it isn't + // strictly safe but we can fake this result if necessary for performance. + // This does not appear to be a common problem in practice. + if (EnableUnsafeGlobalsModRefAliasResults) + if ((GV1 || GV2) && GV1 != GV2) + return NoAlias; + + return AAResultBase::alias(LocA, LocB); +} + +ModRefInfo GlobalsAAResult::getModRefInfoForArgument(ImmutableCallSite CS, + const GlobalValue *GV) { + if (CS.doesNotAccessMemory()) + return MRI_NoModRef; + ModRefInfo ConservativeResult = CS.onlyReadsMemory() ? MRI_Ref : MRI_ModRef; + + // Iterate through all the arguments to the called function. If any argument + // is based on GV, return the conservative result. + for (auto &A : CS.args()) { + SmallVector<Value*, 4> Objects; + GetUnderlyingObjects(A, Objects, DL); + + // All objects must be identified. + if (!std::all_of(Objects.begin(), Objects.end(), isIdentifiedObject)) + return ConservativeResult; + + if (std::find(Objects.begin(), Objects.end(), GV) != Objects.end()) + return ConservativeResult; + } + + // We identified all objects in the argument list, and none of them were GV. + return MRI_NoModRef; +} + +ModRefInfo GlobalsAAResult::getModRefInfo(ImmutableCallSite CS, + const MemoryLocation &Loc) { + unsigned Known = MRI_ModRef; + + // If we are asking for mod/ref info of a direct call with a pointer to a + // global we are tracking, return information if we have it. + if (const GlobalValue *GV = + dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr, DL))) + if (GV->hasLocalLinkage()) + if (const Function *F = CS.getCalledFunction()) + if (NonAddressTakenGlobals.count(GV)) + if (const FunctionInfo *FI = getFunctionInfo(F)) + Known = FI->getModRefInfoForGlobal(*GV) | + getModRefInfoForArgument(CS, GV); + + if (Known == MRI_NoModRef) + return MRI_NoModRef; // No need to query other mod/ref analyses + return ModRefInfo(Known & AAResultBase::getModRefInfo(CS, Loc)); +} + +GlobalsAAResult::GlobalsAAResult(const DataLayout &DL, + const TargetLibraryInfo &TLI) + : AAResultBase(TLI), DL(DL) {} + +GlobalsAAResult::GlobalsAAResult(GlobalsAAResult &&Arg) + : AAResultBase(std::move(Arg)), DL(Arg.DL), + NonAddressTakenGlobals(std::move(Arg.NonAddressTakenGlobals)), + IndirectGlobals(std::move(Arg.IndirectGlobals)), + AllocsForIndirectGlobals(std::move(Arg.AllocsForIndirectGlobals)), + FunctionInfos(std::move(Arg.FunctionInfos)), + Handles(std::move(Arg.Handles)) { + // Update the parent for each DeletionCallbackHandle. + for (auto &H : Handles) { + assert(H.GAR == &Arg); + H.GAR = this; + } +} + +/*static*/ GlobalsAAResult +GlobalsAAResult::analyzeModule(Module &M, const TargetLibraryInfo &TLI, + CallGraph &CG) { + GlobalsAAResult Result(M.getDataLayout(), TLI); + + // Discover which functions aren't recursive, to feed into AnalyzeGlobals. + Result.CollectSCCMembership(CG); + + // Find non-addr taken globals. + Result.AnalyzeGlobals(M); + + // Propagate on CG. + Result.AnalyzeCallGraph(CG, M); + + return Result; +} + +GlobalsAAResult GlobalsAA::run(Module &M, AnalysisManager<Module> *AM) { + return GlobalsAAResult::analyzeModule(M, + AM->getResult<TargetLibraryAnalysis>(M), + AM->getResult<CallGraphAnalysis>(M)); +} + +char GlobalsAA::PassID; + +char GlobalsAAWrapperPass::ID = 0; +INITIALIZE_PASS_BEGIN(GlobalsAAWrapperPass, "globals-aa", + "Globals Alias Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(GlobalsAAWrapperPass, "globals-aa", + "Globals Alias Analysis", false, true) + +ModulePass *llvm::createGlobalsAAWrapperPass() { + return new GlobalsAAWrapperPass(); +} + +GlobalsAAWrapperPass::GlobalsAAWrapperPass() : ModulePass(ID) { + initializeGlobalsAAWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +bool GlobalsAAWrapperPass::runOnModule(Module &M) { + Result.reset(new GlobalsAAResult(GlobalsAAResult::analyzeModule( + M, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(), + getAnalysis<CallGraphWrapperPass>().getCallGraph()))); + return false; +} + +bool GlobalsAAWrapperPass::doFinalization(Module &M) { + Result.reset(); + return false; +} + +void GlobalsAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<CallGraphWrapperPass>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); +} diff --git a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp deleted file mode 100644 index 28fb49c..0000000 --- a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp +++ /dev/null @@ -1,609 +0,0 @@ -//===- GlobalsModRef.cpp - Simple Mod/Ref Analysis for Globals ------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This simple pass provides alias and mod/ref information for global values -// that do not have their address taken, and keeps track of whether functions -// read or write memory (are "pure"). For this simple (but very common) case, -// we can provide pretty accurate and useful information. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/Passes.h" -#include "llvm/ADT/SCCIterator.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include <set> -using namespace llvm; - -#define DEBUG_TYPE "globalsmodref-aa" - -STATISTIC(NumNonAddrTakenGlobalVars, - "Number of global vars without address taken"); -STATISTIC(NumNonAddrTakenFunctions,"Number of functions without address taken"); -STATISTIC(NumNoMemFunctions, "Number of functions that do not access memory"); -STATISTIC(NumReadMemFunctions, "Number of functions that only read memory"); -STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects"); - -namespace { -/// FunctionRecord - One instance of this structure is stored for every -/// function in the program. Later, the entries for these functions are -/// removed if the function is found to call an external function (in which -/// case we know nothing about it. -struct FunctionRecord { - /// GlobalInfo - Maintain mod/ref info for all of the globals without - /// addresses taken that are read or written (transitively) by this - /// function. - std::map<const GlobalValue *, unsigned> GlobalInfo; - - /// MayReadAnyGlobal - May read global variables, but it is not known which. - bool MayReadAnyGlobal; - - unsigned getInfoForGlobal(const GlobalValue *GV) const { - unsigned Effect = MayReadAnyGlobal ? AliasAnalysis::Ref : 0; - std::map<const GlobalValue *, unsigned>::const_iterator I = - GlobalInfo.find(GV); - if (I != GlobalInfo.end()) - Effect |= I->second; - return Effect; - } - - /// FunctionEffect - Capture whether or not this function reads or writes to - /// ANY memory. If not, we can do a lot of aggressive analysis on it. - unsigned FunctionEffect; - - FunctionRecord() : MayReadAnyGlobal(false), FunctionEffect(0) {} -}; - -/// GlobalsModRef - The actual analysis pass. -class GlobalsModRef : public ModulePass, public AliasAnalysis { - /// NonAddressTakenGlobals - The globals that do not have their addresses - /// taken. - std::set<const GlobalValue *> NonAddressTakenGlobals; - - /// IndirectGlobals - The memory pointed to by this global is known to be - /// 'owned' by the global. - std::set<const GlobalValue *> IndirectGlobals; - - /// AllocsForIndirectGlobals - If an instruction allocates memory for an - /// indirect global, this map indicates which one. - std::map<const Value *, const GlobalValue *> AllocsForIndirectGlobals; - - /// FunctionInfo - For each function, keep track of what globals are - /// modified or read. - std::map<const Function *, FunctionRecord> FunctionInfo; - -public: - static char ID; - GlobalsModRef() : ModulePass(ID) { - initializeGlobalsModRefPass(*PassRegistry::getPassRegistry()); - } - - bool runOnModule(Module &M) override { - InitializeAliasAnalysis(this, &M.getDataLayout()); - - // Find non-addr taken globals. - AnalyzeGlobals(M); - - // Propagate on CG. - AnalyzeCallGraph(getAnalysis<CallGraphWrapperPass>().getCallGraph(), M); - return false; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AliasAnalysis::getAnalysisUsage(AU); - AU.addRequired<CallGraphWrapperPass>(); - AU.setPreservesAll(); // Does not transform code - } - - //------------------------------------------------ - // Implement the AliasAnalysis API - // - AliasResult alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) override; - ModRefResult getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) override; - ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) override { - return AliasAnalysis::getModRefInfo(CS1, CS2); - } - - /// getModRefBehavior - Return the behavior of the specified function if - /// called from the specified call site. The call site may be null in which - /// case the most generic behavior of this function should be returned. - ModRefBehavior getModRefBehavior(const Function *F) override { - ModRefBehavior Min = UnknownModRefBehavior; - - if (FunctionRecord *FR = getFunctionInfo(F)) { - if (FR->FunctionEffect == 0) - Min = DoesNotAccessMemory; - else if ((FR->FunctionEffect & Mod) == 0) - Min = OnlyReadsMemory; - } - - return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min); - } - - /// getModRefBehavior - Return the behavior of the specified function if - /// called from the specified call site. The call site may be null in which - /// case the most generic behavior of this function should be returned. - ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override { - ModRefBehavior Min = UnknownModRefBehavior; - - if (const Function *F = CS.getCalledFunction()) - if (FunctionRecord *FR = getFunctionInfo(F)) { - if (FR->FunctionEffect == 0) - Min = DoesNotAccessMemory; - else if ((FR->FunctionEffect & Mod) == 0) - Min = OnlyReadsMemory; - } - - return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); - } - - void deleteValue(Value *V) override; - void addEscapingUse(Use &U) override; - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - void *getAdjustedAnalysisPointer(AnalysisID PI) override { - if (PI == &AliasAnalysis::ID) - return (AliasAnalysis *)this; - return this; - } - -private: - /// getFunctionInfo - Return the function info for the function, or null if - /// we don't have anything useful to say about it. - FunctionRecord *getFunctionInfo(const Function *F) { - std::map<const Function *, FunctionRecord>::iterator I = - FunctionInfo.find(F); - if (I != FunctionInfo.end()) - return &I->second; - return nullptr; - } - - void AnalyzeGlobals(Module &M); - void AnalyzeCallGraph(CallGraph &CG, Module &M); - bool AnalyzeUsesOfPointer(Value *V, std::vector<Function *> &Readers, - std::vector<Function *> &Writers, - GlobalValue *OkayStoreDest = nullptr); - bool AnalyzeIndirectGlobalMemory(GlobalValue *GV); -}; -} - -char GlobalsModRef::ID = 0; -INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis, "globalsmodref-aa", - "Simple mod/ref analysis for globals", false, true, - false) -INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) -INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis, "globalsmodref-aa", - "Simple mod/ref analysis for globals", false, true, - false) - -Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); } - -/// AnalyzeGlobals - Scan through the users of all of the internal -/// GlobalValue's in the program. If none of them have their "address taken" -/// (really, their address passed to something nontrivial), record this fact, -/// and record the functions that they are used directly in. -void GlobalsModRef::AnalyzeGlobals(Module &M) { - std::vector<Function *> Readers, Writers; - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - if (I->hasLocalLinkage()) { - if (!AnalyzeUsesOfPointer(I, Readers, Writers)) { - // Remember that we are tracking this global. - NonAddressTakenGlobals.insert(I); - ++NumNonAddrTakenFunctions; - } - Readers.clear(); - Writers.clear(); - } - - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; - ++I) - if (I->hasLocalLinkage()) { - if (!AnalyzeUsesOfPointer(I, Readers, Writers)) { - // Remember that we are tracking this global, and the mod/ref fns - NonAddressTakenGlobals.insert(I); - - for (unsigned i = 0, e = Readers.size(); i != e; ++i) - FunctionInfo[Readers[i]].GlobalInfo[I] |= Ref; - - if (!I->isConstant()) // No need to keep track of writers to constants - for (unsigned i = 0, e = Writers.size(); i != e; ++i) - FunctionInfo[Writers[i]].GlobalInfo[I] |= Mod; - ++NumNonAddrTakenGlobalVars; - - // If this global holds a pointer type, see if it is an indirect global. - if (I->getType()->getElementType()->isPointerTy() && - AnalyzeIndirectGlobalMemory(I)) - ++NumIndirectGlobalVars; - } - Readers.clear(); - Writers.clear(); - } -} - -/// AnalyzeUsesOfPointer - Look at all of the users of the specified pointer. -/// If this is used by anything complex (i.e., the address escapes), return -/// true. Also, while we are at it, keep track of those functions that read and -/// write to the value. -/// -/// If OkayStoreDest is non-null, stores into this global are allowed. -bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, - std::vector<Function *> &Readers, - std::vector<Function *> &Writers, - GlobalValue *OkayStoreDest) { - if (!V->getType()->isPointerTy()) - return true; - - for (Use &U : V->uses()) { - User *I = U.getUser(); - if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - Readers.push_back(LI->getParent()->getParent()); - } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { - if (V == SI->getOperand(1)) { - Writers.push_back(SI->getParent()->getParent()); - } else if (SI->getOperand(1) != OkayStoreDest) { - return true; // Storing the pointer - } - } else if (Operator::getOpcode(I) == Instruction::GetElementPtr) { - if (AnalyzeUsesOfPointer(I, Readers, Writers)) - return true; - } else if (Operator::getOpcode(I) == Instruction::BitCast) { - if (AnalyzeUsesOfPointer(I, Readers, Writers, OkayStoreDest)) - return true; - } else if (auto CS = CallSite(I)) { - // Make sure that this is just the function being called, not that it is - // passing into the function. - if (!CS.isCallee(&U)) { - // Detect calls to free. - if (isFreeCall(I, TLI)) - Writers.push_back(CS->getParent()->getParent()); - else - return true; // Argument of an unknown call. - } - } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) { - if (!isa<ConstantPointerNull>(ICI->getOperand(1))) - return true; // Allow comparison against null. - } else { - return true; - } - } - - return false; -} - -/// AnalyzeIndirectGlobalMemory - We found an non-address-taken global variable -/// which holds a pointer type. See if the global always points to non-aliased -/// heap memory: that is, all initializers of the globals are allocations, and -/// those allocations have no use other than initialization of the global. -/// Further, all loads out of GV must directly use the memory, not store the -/// pointer somewhere. If this is true, we consider the memory pointed to by -/// GV to be owned by GV and can disambiguate other pointers from it. -bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { - // Keep track of values related to the allocation of the memory, f.e. the - // value produced by the malloc call and any casts. - std::vector<Value *> AllocRelatedValues; - - // Walk the user list of the global. If we find anything other than a direct - // load or store, bail out. - for (User *U : GV->users()) { - if (LoadInst *LI = dyn_cast<LoadInst>(U)) { - // The pointer loaded from the global can only be used in simple ways: - // we allow addressing of it and loading storing to it. We do *not* allow - // storing the loaded pointer somewhere else or passing to a function. - std::vector<Function *> ReadersWriters; - if (AnalyzeUsesOfPointer(LI, ReadersWriters, ReadersWriters)) - return false; // Loaded pointer escapes. - // TODO: Could try some IP mod/ref of the loaded pointer. - } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { - // Storing the global itself. - if (SI->getOperand(0) == GV) - return false; - - // If storing the null pointer, ignore it. - if (isa<ConstantPointerNull>(SI->getOperand(0))) - continue; - - // Check the value being stored. - Value *Ptr = GetUnderlyingObject(SI->getOperand(0), - GV->getParent()->getDataLayout()); - - if (!isAllocLikeFn(Ptr, TLI)) - return false; // Too hard to analyze. - - // Analyze all uses of the allocation. If any of them are used in a - // non-simple way (e.g. stored to another global) bail out. - std::vector<Function *> ReadersWriters; - if (AnalyzeUsesOfPointer(Ptr, ReadersWriters, ReadersWriters, GV)) - return false; // Loaded pointer escapes. - - // Remember that this allocation is related to the indirect global. - AllocRelatedValues.push_back(Ptr); - } else { - // Something complex, bail out. - return false; - } - } - - // Okay, this is an indirect global. Remember all of the allocations for - // this global in AllocsForIndirectGlobals. - while (!AllocRelatedValues.empty()) { - AllocsForIndirectGlobals[AllocRelatedValues.back()] = GV; - AllocRelatedValues.pop_back(); - } - IndirectGlobals.insert(GV); - return true; -} - -/// AnalyzeCallGraph - At this point, we know the functions where globals are -/// immediately stored to and read from. Propagate this information up the call -/// graph to all callers and compute the mod/ref info for all memory for each -/// function. -void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { - // We do a bottom-up SCC traversal of the call graph. In other words, we - // visit all callees before callers (leaf-first). - for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) { - const std::vector<CallGraphNode *> &SCC = *I; - assert(!SCC.empty() && "SCC with no functions?"); - - if (!SCC[0]->getFunction()) { - // Calls externally - can't say anything useful. Remove any existing - // function records (may have been created when scanning globals). - for (unsigned i = 0, e = SCC.size(); i != e; ++i) - FunctionInfo.erase(SCC[i]->getFunction()); - continue; - } - - FunctionRecord &FR = FunctionInfo[SCC[0]->getFunction()]; - - bool KnowNothing = false; - unsigned FunctionEffect = 0; - - // Collect the mod/ref properties due to called functions. We only compute - // one mod-ref set. - for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) { - Function *F = SCC[i]->getFunction(); - if (!F) { - KnowNothing = true; - break; - } - - if (F->isDeclaration()) { - // Try to get mod/ref behaviour from function attributes. - if (F->doesNotAccessMemory()) { - // Can't do better than that! - } else if (F->onlyReadsMemory()) { - FunctionEffect |= Ref; - if (!F->isIntrinsic()) - // This function might call back into the module and read a global - - // consider every global as possibly being read by this function. - FR.MayReadAnyGlobal = true; - } else { - FunctionEffect |= ModRef; - // Can't say anything useful unless it's an intrinsic - they don't - // read or write global variables of the kind considered here. - KnowNothing = !F->isIntrinsic(); - } - continue; - } - - for (CallGraphNode::iterator CI = SCC[i]->begin(), E = SCC[i]->end(); - CI != E && !KnowNothing; ++CI) - if (Function *Callee = CI->second->getFunction()) { - if (FunctionRecord *CalleeFR = getFunctionInfo(Callee)) { - // Propagate function effect up. - FunctionEffect |= CalleeFR->FunctionEffect; - - // Incorporate callee's effects on globals into our info. - for (const auto &G : CalleeFR->GlobalInfo) - FR.GlobalInfo[G.first] |= G.second; - FR.MayReadAnyGlobal |= CalleeFR->MayReadAnyGlobal; - } else { - // Can't say anything about it. However, if it is inside our SCC, - // then nothing needs to be done. - CallGraphNode *CalleeNode = CG[Callee]; - if (std::find(SCC.begin(), SCC.end(), CalleeNode) == SCC.end()) - KnowNothing = true; - } - } else { - KnowNothing = true; - } - } - - // If we can't say anything useful about this SCC, remove all SCC functions - // from the FunctionInfo map. - if (KnowNothing) { - for (unsigned i = 0, e = SCC.size(); i != e; ++i) - FunctionInfo.erase(SCC[i]->getFunction()); - continue; - } - - // Scan the function bodies for explicit loads or stores. - for (auto *Node : SCC) { - if (FunctionEffect == ModRef) - break; // The mod/ref lattice saturates here. - for (Instruction &I : inst_range(Node->getFunction())) { - if (FunctionEffect == ModRef) - break; // The mod/ref lattice saturates here. - - // We handle calls specially because the graph-relevant aspects are - // handled above. - if (auto CS = CallSite(&I)) { - if (isAllocationFn(&I, TLI) || isFreeCall(&I, TLI)) { - // FIXME: It is completely unclear why this is necessary and not - // handled by the above graph code. - FunctionEffect |= ModRef; - } else if (Function *Callee = CS.getCalledFunction()) { - // The callgraph doesn't include intrinsic calls. - if (Callee->isIntrinsic()) { - ModRefBehavior Behaviour = - AliasAnalysis::getModRefBehavior(Callee); - FunctionEffect |= (Behaviour & ModRef); - } - } - continue; - } - - // All non-call instructions we use the primary predicates for whether - // thay read or write memory. - if (I.mayReadFromMemory()) - FunctionEffect |= Ref; - if (I.mayWriteToMemory()) - FunctionEffect |= Mod; - } - } - - if ((FunctionEffect & Mod) == 0) - ++NumReadMemFunctions; - if (FunctionEffect == 0) - ++NumNoMemFunctions; - FR.FunctionEffect = FunctionEffect; - - // Finally, now that we know the full effect on this SCC, clone the - // information to each function in the SCC. - for (unsigned i = 1, e = SCC.size(); i != e; ++i) - FunctionInfo[SCC[i]->getFunction()] = FR; - } -} - -/// alias - If one of the pointers is to a global that we are tracking, and the -/// other is some random pointer, we know there cannot be an alias, because the -/// address of the global isn't taken. -AliasResult GlobalsModRef::alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) { - // Get the base object these pointers point to. - const Value *UV1 = GetUnderlyingObject(LocA.Ptr, *DL); - const Value *UV2 = GetUnderlyingObject(LocB.Ptr, *DL); - - // If either of the underlying values is a global, they may be non-addr-taken - // globals, which we can answer queries about. - const GlobalValue *GV1 = dyn_cast<GlobalValue>(UV1); - const GlobalValue *GV2 = dyn_cast<GlobalValue>(UV2); - if (GV1 || GV2) { - // If the global's address is taken, pretend we don't know it's a pointer to - // the global. - if (GV1 && !NonAddressTakenGlobals.count(GV1)) - GV1 = nullptr; - if (GV2 && !NonAddressTakenGlobals.count(GV2)) - GV2 = nullptr; - - // If the two pointers are derived from two different non-addr-taken - // globals, or if one is and the other isn't, we know these can't alias. - if ((GV1 || GV2) && GV1 != GV2) - return NoAlias; - - // Otherwise if they are both derived from the same addr-taken global, we - // can't know the two accesses don't overlap. - } - - // These pointers may be based on the memory owned by an indirect global. If - // so, we may be able to handle this. First check to see if the base pointer - // is a direct load from an indirect global. - GV1 = GV2 = nullptr; - if (const LoadInst *LI = dyn_cast<LoadInst>(UV1)) - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0))) - if (IndirectGlobals.count(GV)) - GV1 = GV; - if (const LoadInst *LI = dyn_cast<LoadInst>(UV2)) - if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0))) - if (IndirectGlobals.count(GV)) - GV2 = GV; - - // These pointers may also be from an allocation for the indirect global. If - // so, also handle them. - if (AllocsForIndirectGlobals.count(UV1)) - GV1 = AllocsForIndirectGlobals[UV1]; - if (AllocsForIndirectGlobals.count(UV2)) - GV2 = AllocsForIndirectGlobals[UV2]; - - // Now that we know whether the two pointers are related to indirect globals, - // use this to disambiguate the pointers. If either pointer is based on an - // indirect global and if they are not both based on the same indirect global, - // they cannot alias. - if ((GV1 || GV2) && GV1 != GV2) - return NoAlias; - - return AliasAnalysis::alias(LocA, LocB); -} - -AliasAnalysis::ModRefResult -GlobalsModRef::getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) { - unsigned Known = ModRef; - - // If we are asking for mod/ref info of a direct call with a pointer to a - // global we are tracking, return information if we have it. - const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout(); - if (const GlobalValue *GV = - dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr, DL))) - if (GV->hasLocalLinkage()) - if (const Function *F = CS.getCalledFunction()) - if (NonAddressTakenGlobals.count(GV)) - if (const FunctionRecord *FR = getFunctionInfo(F)) - Known = FR->getInfoForGlobal(GV); - - if (Known == NoModRef) - return NoModRef; // No need to query other mod/ref analyses - return ModRefResult(Known & AliasAnalysis::getModRefInfo(CS, Loc)); -} - -//===----------------------------------------------------------------------===// -// Methods to update the analysis as a result of the client transformation. -// -void GlobalsModRef::deleteValue(Value *V) { - if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { - if (NonAddressTakenGlobals.erase(GV)) { - // This global might be an indirect global. If so, remove it and remove - // any AllocRelatedValues for it. - if (IndirectGlobals.erase(GV)) { - // Remove any entries in AllocsForIndirectGlobals for this global. - for (std::map<const Value *, const GlobalValue *>::iterator - I = AllocsForIndirectGlobals.begin(), - E = AllocsForIndirectGlobals.end(); - I != E;) { - if (I->second == GV) { - AllocsForIndirectGlobals.erase(I++); - } else { - ++I; - } - } - } - } - } - - // Otherwise, if this is an allocation related to an indirect global, remove - // it. - AllocsForIndirectGlobals.erase(V); - - AliasAnalysis::deleteValue(V); -} - -void GlobalsModRef::addEscapingUse(Use &U) { - // For the purposes of this analysis, it is conservatively correct to treat - // a newly escaping value equivalently to a deleted one. We could perhaps - // be more precise by processing the new use and attempting to update our - // saved analysis results to accommodate it. - deleteValue(U); - - AliasAnalysis::addEscapingUse(U); -} diff --git a/contrib/llvm/lib/Analysis/IPA/IPA.cpp b/contrib/llvm/lib/Analysis/IPA/IPA.cpp deleted file mode 100644 index 806bfb8..0000000 --- a/contrib/llvm/lib/Analysis/IPA/IPA.cpp +++ /dev/null @@ -1,30 +0,0 @@ -//===-- IPA.cpp -----------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the common initialization routines for the IPA library. -// -//===----------------------------------------------------------------------===// - -#include "llvm/InitializePasses.h" -#include "llvm-c/Initialization.h" -#include "llvm/PassRegistry.h" - -using namespace llvm; - -/// initializeIPA - Initialize all passes linked into the IPA library. -void llvm::initializeIPA(PassRegistry &Registry) { - initializeCallGraphWrapperPassPass(Registry); - initializeCallGraphPrinterPass(Registry); - initializeCallGraphViewerPass(Registry); - initializeGlobalsModRefPass(Registry); -} - -void LLVMInitializeIPA(LLVMPassRegistryRef R) { - initializeIPA(*unwrap(R)); -} diff --git a/contrib/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm/lib/Analysis/IVUsers.cpp index 926787d..e0c5d8f 100644 --- a/contrib/llvm/lib/Analysis/IVUsers.cpp +++ b/contrib/llvm/lib/Analysis/IVUsers.cpp @@ -39,7 +39,7 @@ INITIALIZE_PASS_BEGIN(IVUsers, "iv-users", INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_END(IVUsers, "iv-users", "Induction Variable Users", false, true) @@ -255,7 +255,7 @@ void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<LoopInfoWrapperPass>(); AU.addRequired<DominatorTreeWrapperPass>(); - AU.addRequired<ScalarEvolution>(); + AU.addRequired<ScalarEvolutionWrapperPass>(); AU.setPreservesAll(); } @@ -266,7 +266,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { *L->getHeader()->getParent()); LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - SE = &getAnalysis<ScalarEvolution>(); + SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); // Collect ephemeral values so that AddUsersIfInteresting skips them. EphValues.clear(); @@ -276,7 +276,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { // them by stride. Start by finding all of the PHI nodes in the header for // this loop. If they are induction variables, inspect their uses. for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) - (void)AddUsersIfInteresting(I); + (void)AddUsersIfInteresting(&*I); return false; } diff --git a/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp b/contrib/llvm/lib/Analysis/InlineCost.cpp index c0d2e37..a86a703 100644 --- a/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp +++ b/contrib/llvm/lib/Analysis/InlineCost.cpp @@ -115,11 +115,11 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { /// inlining has the given attribute set either at the call site or the /// function declaration. Primarily used to inspect call site specific /// attributes since these can be more precise than the ones on the callee - /// itself. + /// itself. bool paramHasAttr(Argument *A, Attribute::AttrKind Attr); /// Return true if the given value is known non null within the callee if - /// inlined through this particular callsite. + /// inlined through this particular callsite. bool isKnownNonNullInCallee(Value *V); // Custom analysis routines. @@ -156,6 +156,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { bool visitSwitchInst(SwitchInst &SI); bool visitIndirectBrInst(IndirectBrInst &IBI); bool visitResumeInst(ResumeInst &RI); + bool visitCleanupReturnInst(CleanupReturnInst &RI); + bool visitCatchReturnInst(CatchReturnInst &RI); bool visitUnreachableInst(UnreachableInst &I); public: @@ -832,8 +834,8 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS); if (CA.analyzeCall(CS)) { // We were able to inline the indirect call! Subtract the cost from the - // bonus we want to apply, but don't go below zero. - Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost()); + // threshold to get the bonus we want to apply, but don't go below zero. + Cost -= std::max(0, CA.getThreshold() - CA.getCost()); } return Base::visitCallSite(CS); @@ -903,6 +905,18 @@ bool CallAnalyzer::visitResumeInst(ResumeInst &RI) { return false; } +bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) { + // FIXME: It's not clear that a single instruction is an accurate model for + // the inline cost of a cleanupret instruction. + return false; +} + +bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) { + // FIXME: It's not clear that a single instruction is an accurate model for + // the inline cost of a catchret instruction. + return false; +} + bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) { // FIXME: It might be reasonably to discount the cost of instructions leading // to unreachable as they have the lowest possible impact on both runtime and @@ -946,20 +960,21 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB, continue; // Skip ephemeral values. - if (EphValues.count(I)) + if (EphValues.count(&*I)) continue; ++NumInstructions; if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy()) ++NumVectorInstructions; - // If the instruction is floating point, and the target says this operation is - // expensive or the function has the "use-soft-float" attribute, this may - // eventually become a library call. Treat the cost as such. + // If the instruction is floating point, and the target says this operation + // is expensive or the function has the "use-soft-float" attribute, this may + // eventually become a library call. Treat the cost as such. if (I->getType()->isFloatingPointTy()) { bool hasSoftFloatAttr = false; - // If the function has the "use-soft-float" attribute, mark it as expensive. + // If the function has the "use-soft-float" attribute, mark it as + // expensive. if (F.hasFnAttribute("use-soft-float")) { Attribute Attr = F.getFnAttribute("use-soft-float"); StringRef Val = Attr.getValueAsString(); @@ -977,7 +992,7 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB, // all of the per-instruction logic. The visit tree returns true if we // consumed the instruction in any way, and false if the instruction's base // cost should count against inlining. - if (Base::visit(I)) + if (Base::visit(&*I)) ++NumInstructionsSimplified; else Cost += InlineConstants::InstrCost; @@ -1157,15 +1172,15 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { FAI != FAE; ++FAI, ++CAI) { assert(CAI != CS.arg_end()); if (Constant *C = dyn_cast<Constant>(CAI)) - SimplifiedValues[FAI] = C; + SimplifiedValues[&*FAI] = C; Value *PtrArg = *CAI; if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) { - ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue()); + ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue()); // We can SROA any pointer arguments derived from alloca instructions. if (isa<AllocaInst>(PtrArg)) { - SROAArgValues[FAI] = PtrArg; + SROAArgValues[&*FAI] = PtrArg; SROAArgCosts[PtrArg] = 0; } } @@ -1281,7 +1296,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { else if (NumVectorInstructions <= NumInstructions / 2) Threshold -= (FiftyPercentVectorBonus - TenPercentVectorBonus); - return Cost < Threshold; + return Cost <= std::max(0, Threshold); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -1304,36 +1319,6 @@ void CallAnalyzer::dump() { } #endif -INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", - true, true) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", - true, true) - -char InlineCostAnalysis::ID = 0; - -InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID) {} - -InlineCostAnalysis::~InlineCostAnalysis() {} - -void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequired<AssumptionCacheTracker>(); - AU.addRequired<TargetTransformInfoWrapperPass>(); - CallGraphSCCPass::getAnalysisUsage(AU); -} - -bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) { - TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>(); - ACT = &getAnalysis<AssumptionCacheTracker>(); - return false; -} - -InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) { - return getInlineCost(CS, CS.getCalledFunction(), Threshold); -} - /// \brief Test that two functions either have or have not the given attribute /// at the same time. template<typename AttrKind> @@ -1346,14 +1331,19 @@ static bool attributeMatches(Function *F1, Function *F2, AttrKind Attr) { static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI) { - return TTI.hasCompatibleFunctionAttributes(Caller, Callee) && - attributeMatches(Caller, Callee, Attribute::SanitizeAddress) && - attributeMatches(Caller, Callee, Attribute::SanitizeMemory) && - attributeMatches(Caller, Callee, Attribute::SanitizeThread); + return TTI.areInlineCompatible(Caller, Callee) && + AttributeFuncs::areInlineCompatible(*Caller, *Callee); +} + +InlineCost llvm::getInlineCost(CallSite CS, int Threshold, + TargetTransformInfo &CalleeTTI, + AssumptionCacheTracker *ACT) { + return getInlineCost(CS, CS.getCalledFunction(), Threshold, CalleeTTI, ACT); } -InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, - int Threshold) { +InlineCost llvm::getInlineCost(CallSite CS, Function *Callee, int Threshold, + TargetTransformInfo &CalleeTTI, + AssumptionCacheTracker *ACT) { // Cannot inline indirect calls. if (!Callee) return llvm::InlineCost::getNever(); @@ -1368,8 +1358,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, // Never inline functions with conflicting attributes (unless callee has // always-inline attribute). - if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee, - TTIWP->getTTI(*Callee))) + if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee, CalleeTTI)) return llvm::InlineCost::getNever(); // Don't inline this call if the caller has the optnone attribute. @@ -1386,7 +1375,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "...\n"); - CallAnalyzer CA(TTIWP->getTTI(*Callee), ACT, *Callee, Threshold, CS); + CallAnalyzer CA(CalleeTTI, ACT, *Callee, Threshold, CS); bool ShouldInline = CA.analyzeCall(CS); DEBUG(CA.dump()); @@ -1400,7 +1389,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, return llvm::InlineCost::get(CA.getCost(), CA.getThreshold()); } -bool InlineCostAnalysis::isInlineViable(Function &F) { +bool llvm::isInlineViable(Function &F) { bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice); for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { // Disallow inlining of functions which contain indirect branches or @@ -1408,9 +1397,8 @@ bool InlineCostAnalysis::isInlineViable(Function &F) { if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken()) return false; - for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; - ++II) { - CallSite CS(II); + for (auto &II : *BI) { + CallSite CS(&II); if (!CS) continue; diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp index a7f8f5c..b89ff26 100644 --- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp @@ -122,7 +122,7 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { return DT->dominates(I, P); } - // Otherwise, if the instruction is in the entry block, and is not an invoke, + // Otherwise, if the instruction is in the entry block and is not an invoke, // then it obviously dominates all phi nodes. if (I->getParent() == &I->getParent()->getParent()->getEntryBlock() && !isa<InvokeInst>(I)) @@ -2090,8 +2090,7 @@ static Constant *computePointerICmp(const DataLayout &DL, // Is the set of underlying objects all noalias calls? auto IsNAC = [](SmallVectorImpl<Value *> &Objects) { - return std::all_of(Objects.begin(), Objects.end(), - [](Value *V){ return isNoAliasCall(V); }); + return std::all_of(Objects.begin(), Objects.end(), isNoAliasCall); }; // Is the set of underlying objects all things which must be disjoint from @@ -2176,6 +2175,19 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // X >=u 1 -> X if (match(RHS, m_One())) return LHS; + if (isImpliedCondition(RHS, LHS, Q.DL)) + return getTrue(ITy); + break; + case ICmpInst::ICMP_SGE: + /// For signed comparison, the values for an i1 are 0 and -1 + /// respectively. This maps into a truth table of: + /// LHS | RHS | LHS >=s RHS | LHS implies RHS + /// 0 | 0 | 1 (0 >= 0) | 1 + /// 0 | 1 | 1 (0 >= -1) | 1 + /// 1 | 0 | 0 (-1 >= 0) | 0 + /// 1 | 1 | 1 (-1 >= -1) | 1 + if (isImpliedCondition(LHS, RHS, Q.DL)) + return getTrue(ITy); break; case ICmpInst::ICMP_SLT: // X <s 0 -> X @@ -2187,6 +2199,10 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (match(RHS, m_One())) return LHS; break; + case ICmpInst::ICMP_ULE: + if (isImpliedCondition(LHS, RHS, Q.DL)) + return getTrue(ITy); + break; } } @@ -2360,9 +2376,19 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } else if (match(LHS, m_And(m_Value(), m_ConstantInt(CI2)))) { // 'and x, CI2' produces [0, CI2]. Upper = CI2->getValue() + 1; + } else if (match(LHS, m_NUWAdd(m_Value(), m_ConstantInt(CI2)))) { + // 'add nuw x, CI2' produces [CI2, UINT_MAX]. + Lower = CI2->getValue(); } - if (Lower != Upper) { - ConstantRange LHS_CR = ConstantRange(Lower, Upper); + + ConstantRange LHS_CR = Lower != Upper ? ConstantRange(Lower, Upper) + : ConstantRange(Width, true); + + if (auto *I = dyn_cast<Instruction>(LHS)) + if (auto *Ranges = I->getMetadata(LLVMContext::MD_range)) + LHS_CR = LHS_CR.intersectWith(getConstantRangeFromMetadata(*Ranges)); + + if (!LHS_CR.isFullSet()) { if (RHS_CR.contains(LHS_CR)) return ConstantInt::getTrue(RHS->getContext()); if (RHS_CR.inverse().contains(LHS_CR)) @@ -2370,6 +2396,30 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } + // If both operands have range metadata, use the metadata + // to simplify the comparison. + if (isa<Instruction>(RHS) && isa<Instruction>(LHS)) { + auto RHS_Instr = dyn_cast<Instruction>(RHS); + auto LHS_Instr = dyn_cast<Instruction>(LHS); + + if (RHS_Instr->getMetadata(LLVMContext::MD_range) && + LHS_Instr->getMetadata(LLVMContext::MD_range)) { + auto RHS_CR = getConstantRangeFromMetadata( + *RHS_Instr->getMetadata(LLVMContext::MD_range)); + auto LHS_CR = getConstantRangeFromMetadata( + *LHS_Instr->getMetadata(LLVMContext::MD_range)); + + auto Satisfied_CR = ConstantRange::makeSatisfyingICmpRegion(Pred, RHS_CR); + if (Satisfied_CR.contains(LHS_CR)) + return ConstantInt::getTrue(RHS->getContext()); + + auto InversedSatisfied_CR = ConstantRange::makeSatisfyingICmpRegion( + CmpInst::getInversePredicate(Pred), RHS_CR); + if (InversedSatisfied_CR.contains(LHS_CR)) + return ConstantInt::getFalse(RHS->getContext()); + } + } + // Compare of cast, for example (zext X) != 0 -> X != 0 if (isa<CastInst>(LHS) && (isa<Constant>(RHS) || isa<CastInst>(RHS))) { Instruction *LI = cast<CastInst>(LHS); @@ -2529,6 +2579,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } + // icmp eq|ne X, Y -> false|true if X != Y + if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) && + isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT)) { + LLVMContext &Ctx = LHS->getType()->getContext(); + return Pred == ICmpInst::ICMP_NE ? + ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx); + } + // Special logic for binary operators. BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS); BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS); @@ -3039,7 +3097,7 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, - Instruction *CxtI) { + const Instruction *CxtI) { return ::SimplifyICmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -4024,6 +4082,17 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL, break; } + // In general, it is possible for computeKnownBits to determine all bits in a + // value even when the operands are not all constants. + if (!Result && I->getType()->isIntegerTy()) { + unsigned BitWidth = I->getType()->getScalarSizeInBits(); + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + computeKnownBits(I, KnownZero, KnownOne, DL, /*Depth*/0, AC, I, DT); + if ((KnownZero | KnownOne).isAllOnesValue()) + Result = ConstantInt::get(I->getContext(), KnownOne); + } + /// If called on unreachable code, the above logic may report that the /// instruction simplified to itself. Make life easier for users by /// detecting that case here, returning a safe value instead. diff --git a/contrib/llvm/lib/Analysis/LazyCallGraph.cpp b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp index c8d0410..0f0f31e 100644 --- a/contrib/llvm/lib/Analysis/LazyCallGraph.cpp +++ b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp @@ -198,7 +198,8 @@ void LazyCallGraph::SCC::insertOutgoingEdge(Node &CallerN, Node &CalleeN) { assert(CalleeC.isDescendantOf(*this) && "Callee must be a descendant of the Caller."); - // The only change required is to add this SCC to the parent set of the callee. + // The only change required is to add this SCC to the parent set of the + // callee. CalleeC.ParentSCCs.insert(this); } @@ -454,8 +455,7 @@ void LazyCallGraph::SCC::internalDFS( } SmallVector<LazyCallGraph::SCC *, 1> -LazyCallGraph::SCC::removeIntraSCCEdge(Node &CallerN, - Node &CalleeN) { +LazyCallGraph::SCC::removeIntraSCCEdge(Node &CallerN, Node &CalleeN) { // First remove it from the node. CallerN.removeEdgeInternal(CalleeN.getFunction()); @@ -522,7 +522,7 @@ LazyCallGraph::SCC::removeIntraSCCEdge(Node &CallerN, // the leaf SCC list. if (!IsLeafSCC && !ResultSCCs.empty()) G->LeafSCCs.erase(std::remove(G->LeafSCCs.begin(), G->LeafSCCs.end(), this), - G->LeafSCCs.end()); + G->LeafSCCs.end()); // Return the new list of SCCs. return ResultSCCs; diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp index a6ae7f2..0d1d34e 100644 --- a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" @@ -64,10 +65,10 @@ class LVILatticeVal { enum LatticeValueTy { /// This Value has no known value yet. undefined, - + /// This Value has a specific constant value. constant, - + /// This Value is known to not have the specified value. notconstant, @@ -77,13 +78,13 @@ class LVILatticeVal { /// This value is not known to be constant, and we know that it has a value. overdefined }; - + /// Val: This stores the current lattice value along with the Constant* for /// the constant if this is a 'constant' or 'notconstant' value. LatticeValueTy Tag; Constant *Val; ConstantRange Range; - + public: LVILatticeVal() : Tag(undefined), Val(nullptr), Range(1, true) {} @@ -104,29 +105,34 @@ public: Res.markConstantRange(CR); return Res; } + static LVILatticeVal getOverdefined() { + LVILatticeVal Res; + Res.markOverdefined(); + return Res; + } bool isUndefined() const { return Tag == undefined; } bool isConstant() const { return Tag == constant; } bool isNotConstant() const { return Tag == notconstant; } bool isConstantRange() const { return Tag == constantrange; } bool isOverdefined() const { return Tag == overdefined; } - + Constant *getConstant() const { assert(isConstant() && "Cannot get the constant of a non-constant!"); return Val; } - + Constant *getNotConstant() const { assert(isNotConstant() && "Cannot get the constant of a non-notconstant!"); return Val; } - + ConstantRange getConstantRange() const { assert(isConstantRange() && "Cannot get the constant-range of a non-constant-range!"); return Range; } - + /// Return true if this is a change in status. bool markOverdefined() { if (isOverdefined()) @@ -150,7 +156,7 @@ public: Val = V; return true; } - + /// Return true if this is a change in status. bool markNotConstant(Constant *V) { assert(V && "Marking constant with NULL"); @@ -168,27 +174,27 @@ public: Val = V; return true; } - + /// Return true if this is a change in status. bool markConstantRange(const ConstantRange NewR) { if (isConstantRange()) { if (NewR.isEmptySet()) return markOverdefined(); - + bool changed = Range != NewR; Range = NewR; return changed; } - + assert(isUndefined()); if (NewR.isEmptySet()) return markOverdefined(); - + Tag = constantrange; Range = NewR; return true; } - + /// Merge the specified lattice value into this one, updating this /// one and returning true if anything changed. bool mergeIn(const LVILatticeVal &RHS, const DataLayout &DL) { @@ -267,7 +273,7 @@ public: return markConstantRange(NewR); } }; - + } // end anonymous namespace. namespace llvm { @@ -295,9 +301,9 @@ raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) { namespace { /// A callback value handle updates the cache when values are erased. class LazyValueInfoCache; - struct LVIValueHandle : public CallbackVH { + struct LVIValueHandle final : public CallbackVH { LazyValueInfoCache *Parent; - + LVIValueHandle(Value *V, LazyValueInfoCache *P) : CallbackVH(V), Parent(P) { } @@ -308,24 +314,27 @@ namespace { }; } -namespace { +namespace { /// This is the cache kept by LazyValueInfo which /// maintains information about queries across the clients' queries. class LazyValueInfoCache { /// This is all of the cached block information for exactly one Value*. /// The entries are sorted by the BasicBlock* of the /// entries, allowing us to do a lookup with a binary search. - typedef std::map<AssertingVH<BasicBlock>, LVILatticeVal> ValueCacheEntryTy; + /// Over-defined lattice values are recorded in OverDefinedCache to reduce + /// memory overhead. + typedef SmallDenseMap<AssertingVH<BasicBlock>, LVILatticeVal, 4> + ValueCacheEntryTy; /// This is all of the cached information for all values, /// mapped from Value* to key information. std::map<LVIValueHandle, ValueCacheEntryTy> ValueCache; - + /// This tracks, on a per-block basis, the set of values that are - /// over-defined at the end of that block. This is required - /// for cache updating. - typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy; - DenseSet<OverDefinedPairTy> OverDefinedCache; + /// over-defined at the end of that block. + typedef DenseMap<AssertingVH<BasicBlock>, SmallPtrSet<Value *, 4>> + OverDefinedCacheTy; + OverDefinedCacheTy OverDefinedCache; /// Keep track of all blocks that we have ever seen, so we /// don't spend time removing unused blocks from our caches. @@ -357,9 +366,13 @@ namespace { void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) { SeenBlocks.insert(BB); - lookup(Val)[BB] = Result; + + // Insert over-defined values into their own cache to reduce memory + // overhead. if (Result.isOverdefined()) - OverDefinedCache.insert(std::make_pair(BB, Val)); + OverDefinedCache[BB].insert(Val); + else + lookup(Val)[BB] = Result; } LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB); @@ -382,11 +395,39 @@ namespace { Instruction *BBI); void solve(); - + ValueCacheEntryTy &lookup(Value *V) { return ValueCache[LVIValueHandle(V, this)]; } + bool isOverdefined(Value *V, BasicBlock *BB) const { + auto ODI = OverDefinedCache.find(BB); + + if (ODI == OverDefinedCache.end()) + return false; + + return ODI->second.count(V); + } + + bool hasCachedValueInfo(Value *V, BasicBlock *BB) { + if (isOverdefined(V, BB)) + return true; + + LVIValueHandle ValHandle(V, this); + auto I = ValueCache.find(ValHandle); + if (I == ValueCache.end()) + return false; + + return I->second.count(BB); + } + + LVILatticeVal getCachedValueInfo(Value *V, BasicBlock *BB) { + if (isOverdefined(V, BB)) + return LVILatticeVal::getOverdefined(); + + return lookup(V)[BB]; + } + public: /// This is the query interface to determine the lattice /// value for the specified Value* at the end of the specified block. @@ -402,15 +443,15 @@ namespace { /// value for the specified Value* that is true on the specified edge. LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB, Instruction *CxtI = nullptr); - + /// This is the update interface to inform the cache that an edge from /// PredBB to OldSucc has been threaded to be from PredBB to NewSucc. void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc); - + /// This is part of the update interface to inform the cache /// that a block has been deleted. void eraseBlock(BasicBlock *BB); - + /// clear - Empty the cache. void clear() { SeenBlocks.clear(); @@ -425,15 +466,17 @@ namespace { } // end anonymous namespace void LVIValueHandle::deleted() { - typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy; - - SmallVector<OverDefinedPairTy, 4> ToErase; - for (const OverDefinedPairTy &P : Parent->OverDefinedCache) - if (P.second == getValPtr()) - ToErase.push_back(P); - for (const OverDefinedPairTy &P : ToErase) - Parent->OverDefinedCache.erase(P); - + SmallVector<AssertingVH<BasicBlock>, 4> ToErase; + for (auto &I : Parent->OverDefinedCache) { + SmallPtrSetImpl<Value *> &ValueSet = I.second; + if (ValueSet.count(getValPtr())) + ValueSet.erase(getValPtr()); + if (ValueSet.empty()) + ToErase.push_back(I.first); + } + for (auto &BB : ToErase) + Parent->OverDefinedCache.erase(BB); + // This erasure deallocates *this, so it MUST happen after we're done // using any and all members of *this. Parent->ValueCache.erase(*this); @@ -446,15 +489,11 @@ void LazyValueInfoCache::eraseBlock(BasicBlock *BB) { return; SeenBlocks.erase(I); - SmallVector<OverDefinedPairTy, 4> ToErase; - for (const OverDefinedPairTy& P : OverDefinedCache) - if (P.first == BB) - ToErase.push_back(P); - for (const OverDefinedPairTy &P : ToErase) - OverDefinedCache.erase(P); + auto ODI = OverDefinedCache.find(BB); + if (ODI != OverDefinedCache.end()) + OverDefinedCache.erase(ODI); - for (std::map<LVIValueHandle, ValueCacheEntryTy>::iterator - I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I) + for (auto I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I) I->second.erase(BB); } @@ -466,7 +505,8 @@ void LazyValueInfoCache::solve() { if (solveBlockValue(e.second, e.first)) { // The work item was completely processed. assert(BlockValueStack.top() == e && "Nothing should have been pushed!"); - assert(lookup(e.second).count(e.first) && "Result should be in cache!"); + assert(hasCachedValueInfo(e.second, e.first) && + "Result should be in cache!"); BlockValueStack.pop(); BlockValueSet.erase(e); @@ -482,11 +522,7 @@ bool LazyValueInfoCache::hasBlockValue(Value *Val, BasicBlock *BB) { if (isa<Constant>(Val)) return true; - LVIValueHandle ValHandle(Val, this); - std::map<LVIValueHandle, ValueCacheEntryTy>::iterator I = - ValueCache.find(ValHandle); - if (I == ValueCache.end()) return false; - return I->second.count(BB); + return hasCachedValueInfo(Val, BB); } LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) { @@ -495,17 +531,36 @@ LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) { return LVILatticeVal::get(VC); SeenBlocks.insert(BB); - return lookup(Val)[BB]; + return getCachedValueInfo(Val, BB); +} + +static LVILatticeVal getFromRangeMetadata(Instruction *BBI) { + switch (BBI->getOpcode()) { + default: break; + case Instruction::Load: + case Instruction::Call: + case Instruction::Invoke: + if (MDNode *Ranges = BBI->getMetadata(LLVMContext::MD_range)) + if (isa<IntegerType>(BBI->getType())) { + ConstantRange Result = getConstantRangeFromMetadata(*Ranges); + return LVILatticeVal::getRange(Result); + } + break; + }; + // Nothing known - Note that we do not want overdefined here. We may know + // something else about the value and not having range metadata shouldn't + // cause us to throw away those facts. + return LVILatticeVal(); } bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { if (isa<Constant>(Val)) return true; - if (lookup(Val).count(BB)) { + if (hasCachedValueInfo(Val, BB)) { // If we have a cached value, use that. DEBUG(dbgs() << " reuse BB '" << BB->getName() - << "' val=" << lookup(Val)[BB] << '\n'); + << "' val=" << getCachedValueInfo(Val, BB) << '\n'); // Since we're reusing a cached value, we don't need to update the // OverDefinedCache. The cache will have been properly updated whenever the @@ -516,7 +571,7 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { // Hold off inserting this value into the Cache in case we have to return // false and come back later. LVILatticeVal Res; - + Instruction *BBI = dyn_cast<Instruction>(Val); if (!BBI || BBI->getParent() != BB) { if (!solveBlockValueNonLocal(Res, Val, BB)) @@ -532,12 +587,18 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { return true; } - if (AllocaInst *AI = dyn_cast<AllocaInst>(BBI)) { - Res = LVILatticeVal::getNot(ConstantPointerNull::get(AI->getType())); + // If this value is a nonnull pointer, record it's range and bailout. + PointerType *PT = dyn_cast<PointerType>(BBI->getType()); + if (PT && isKnownNonNull(BBI)) { + Res = LVILatticeVal::getNot(ConstantPointerNull::get(PT)); insertResult(Val, BB, Res); return true; } + // If this is an instruction which supports range metadata, return the + // implied range. TODO: This should be an intersection, not a union. + Res.mergeIn(getFromRangeMetadata(BBI), DL); + // We can only analyze the definitions of certain classes of instructions // (integral binops and casts at the moment), so bail if this isn't one. LVILatticeVal Result; @@ -661,7 +722,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV, PointerType *PTy = cast<PointerType>(Val->getType()); Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy)); } - + BBLV = Result; return true; } @@ -674,7 +735,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV, BBLV = Result; return true; } - + bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV, PHINode *PN, BasicBlock *BB) { LVILatticeVal Result; // Start Undefined. @@ -700,7 +761,7 @@ bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV, if (Result.isOverdefined()) { DEBUG(dbgs() << " compute BB '" << BB->getName() << "' - overdefined because of pred.\n"); - + BBLV = Result; return true; } @@ -765,7 +826,7 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV, BBLV.markOverdefined(); return true; } - + ConstantRange LHSRange = LHSVal.getConstantRange(); ConstantRange RHSRange(1); IntegerType *ResultTy = cast<IntegerType>(BBI->getType()); @@ -819,7 +880,7 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV, case Instruction::Or: Result.markConstantRange(LHSRange.binaryOr(RHSRange)); break; - + // Unhandled instructions are overdefined. default: DEBUG(dbgs() << " compute BB '" << BB->getName() @@ -827,7 +888,7 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV, Result.markOverdefined(); break; } - + BBLV = Result; return true; } @@ -877,7 +938,7 @@ bool getValueFromFromCondition(Value *Val, ICmpInst *ICI, /// Val is not constrained on the edge. static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, BasicBlock *BBTo, LVILatticeVal &Result) { - // TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we + // TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we // know that v != 0. if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) { // If this is a conditional branch and only one successor goes to BBTo, then @@ -887,7 +948,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, bool isTrueDest = BI->getSuccessor(0) == BBTo; assert(BI->getSuccessor(!isTrueDest) == BBTo && "BBTo isn't a successor of BBFrom"); - + // If V is the condition of the branch itself, then we know exactly what // it is. if (BI->getCondition() == Val) { @@ -895,7 +956,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, Type::getInt1Ty(Val->getContext()), isTrueDest)); return true; } - + // If the condition of the branch is an equality comparison, we may be // able to infer the value. if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) @@ -997,7 +1058,7 @@ LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB, Instruction *CxtI) { DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '" << BB->getName() << "'\n"); - + assert(BlockValueStack.empty() && BlockValueSet.empty()); pushBlockValue(std::make_pair(BB, V)); @@ -1014,6 +1075,8 @@ LVILatticeVal LazyValueInfoCache::getValueAt(Value *V, Instruction *CxtI) { << CxtI->getName() << "'\n"); LVILatticeVal Result; + if (auto *I = dyn_cast<Instruction>(V)) + Result = getFromRangeMetadata(I); mergeAssumeBlockValueConstantRange(V, Result, CxtI); DEBUG(dbgs() << " Result = " << Result << "\n"); @@ -1025,7 +1088,7 @@ getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI) { DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '" << FromBB->getName() << "' to '" << ToBB->getName() << "'\n"); - + LVILatticeVal Result; if (!getEdgeValue(V, FromBB, ToBB, Result, CxtI)) { solve(); @@ -1040,24 +1103,24 @@ getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, BasicBlock *NewSucc) { - // When an edge in the graph has been threaded, values that we could not - // determine a value for before (i.e. were marked overdefined) may be possible - // to solve now. We do NOT try to proactively update these values. Instead, - // we clear their entries from the cache, and allow lazy updating to recompute - // them when needed. - + // When an edge in the graph has been threaded, values that we could not + // determine a value for before (i.e. were marked overdefined) may be + // possible to solve now. We do NOT try to proactively update these values. + // Instead, we clear their entries from the cache, and allow lazy updating to + // recompute them when needed. + // The updating process is fairly simple: we need to drop cached info // for all values that were marked overdefined in OldSucc, and for those same // values in any successor of OldSucc (except NewSucc) in which they were // also marked overdefined. std::vector<BasicBlock*> worklist; worklist.push_back(OldSucc); - - DenseSet<Value*> ClearSet; - for (OverDefinedPairTy &P : OverDefinedCache) - if (P.first == OldSucc) - ClearSet.insert(P.second); - + + auto I = OverDefinedCache.find(OldSucc); + if (I == OverDefinedCache.end()) + return; // Nothing to process here. + SmallVector<Value *, 4> ValsToClear(I->second.begin(), I->second.end()); + // Use a worklist to perform a depth-first search of OldSucc's successors. // NOTE: We do not need a visited list since any blocks we have already // visited will have had their overdefined markers cleared already, and we @@ -1065,32 +1128,31 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, while (!worklist.empty()) { BasicBlock *ToUpdate = worklist.back(); worklist.pop_back(); - + // Skip blocks only accessible through NewSucc. if (ToUpdate == NewSucc) continue; - + bool changed = false; - for (Value *V : ClearSet) { + for (Value *V : ValsToClear) { // If a value was marked overdefined in OldSucc, and is here too... - DenseSet<OverDefinedPairTy>::iterator OI = - OverDefinedCache.find(std::make_pair(ToUpdate, V)); - if (OI == OverDefinedCache.end()) continue; - - // Remove it from the caches. - ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(V, this)]; - ValueCacheEntryTy::iterator CI = Entry.find(ToUpdate); - - assert(CI != Entry.end() && "Couldn't find entry to update?"); - Entry.erase(CI); - OverDefinedCache.erase(OI); - - // If we removed anything, then we potentially need to update + auto OI = OverDefinedCache.find(ToUpdate); + if (OI == OverDefinedCache.end()) + continue; + SmallPtrSetImpl<Value *> &ValueSet = OI->second; + if (!ValueSet.count(V)) + continue; + + ValueSet.erase(V); + if (ValueSet.empty()) + OverDefinedCache.erase(OI); + + // If we removed anything, then we potentially need to update // blocks successors too. changed = true; } if (!changed) continue; - + worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate)); } } @@ -1158,7 +1220,7 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB, } /// Determine whether the specified value is known to be a -/// constant on the specified edge. Return null if not. +/// constant on the specified edge. Return null if not. Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI) { @@ -1190,26 +1252,26 @@ static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C, return ResCI->isZero() ? LazyValueInfo::False : LazyValueInfo::True; return LazyValueInfo::Unknown; } - + if (Result.isConstantRange()) { ConstantInt *CI = dyn_cast<ConstantInt>(C); if (!CI) return LazyValueInfo::Unknown; - + ConstantRange CR = Result.getConstantRange(); if (Pred == ICmpInst::ICMP_EQ) { if (!CR.contains(CI->getValue())) return LazyValueInfo::False; - + if (CR.isSingleElement() && CR.contains(CI->getValue())) return LazyValueInfo::True; } else if (Pred == ICmpInst::ICMP_NE) { if (!CR.contains(CI->getValue())) return LazyValueInfo::True; - + if (CR.isSingleElement() && CR.contains(CI->getValue())) return LazyValueInfo::False; } - + // Handle more complex predicates. ConstantRange TrueValues = ICmpInst::makeConstantRange((ICmpInst::Predicate)Pred, CI->getValue()); @@ -1219,7 +1281,7 @@ static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C, return LazyValueInfo::False; return LazyValueInfo::Unknown; } - + if (Result.isNotConstant()) { // If this is an equality comparison, we can try to fold it knowing that // "V != C1". @@ -1240,7 +1302,7 @@ static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C, } return LazyValueInfo::Unknown; } - + return LazyValueInfo::Unknown; } @@ -1266,20 +1328,69 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C, if (Ret != Unknown) return Ret; - // TODO: Move this logic inside getValueAt so that it can be cached rather - // than re-queried on each call. This would also allow us to merge the - // underlying lattice values to get more information + // Note: The following bit of code is somewhat distinct from the rest of LVI; + // LVI as a whole tries to compute a lattice value which is conservatively + // correct at a given location. In this case, we have a predicate which we + // weren't able to prove about the merged result, and we're pushing that + // predicate back along each incoming edge to see if we can prove it + // separately for each input. As a motivating example, consider: + // bb1: + // %v1 = ... ; constantrange<1, 5> + // br label %merge + // bb2: + // %v2 = ... ; constantrange<10, 20> + // br label %merge + // merge: + // %phi = phi [%v1, %v2] ; constantrange<1,20> + // %pred = icmp eq i32 %phi, 8 + // We can't tell from the lattice value for '%phi' that '%pred' is false + // along each path, but by checking the predicate over each input separately, + // we can. + // We limit the search to one step backwards from the current BB and value. + // We could consider extending this to search further backwards through the + // CFG and/or value graph, but there are non-obvious compile time vs quality + // tradeoffs. if (CxtI) { - // For a comparison where the V is outside this block, it's possible - // that we've branched on it before. Look to see if the value is known - // on all incoming edges. BasicBlock *BB = CxtI->getParent(); + + // Function entry or an unreachable block. Bail to avoid confusing + // analysis below. pred_iterator PI = pred_begin(BB), PE = pred_end(BB); - if (PI != PE && - (!isa<Instruction>(V) || - cast<Instruction>(V)->getParent() != BB)) { + if (PI == PE) + return Unknown; + + // If V is a PHI node in the same block as the context, we need to ask + // questions about the predicate as applied to the incoming value along + // each edge. This is useful for eliminating cases where the predicate is + // known along all incoming edges. + if (auto *PHI = dyn_cast<PHINode>(V)) + if (PHI->getParent() == BB) { + Tristate Baseline = Unknown; + for (unsigned i = 0, e = PHI->getNumIncomingValues(); i < e; i++) { + Value *Incoming = PHI->getIncomingValue(i); + BasicBlock *PredBB = PHI->getIncomingBlock(i); + // Note that PredBB may be BB itself. + Tristate Result = getPredicateOnEdge(Pred, Incoming, C, PredBB, BB, + CxtI); + + // Keep going as long as we've seen a consistent known result for + // all inputs. + Baseline = (i == 0) ? Result /* First iteration */ + : (Baseline == Result ? Baseline : Unknown); /* All others */ + if (Baseline == Unknown) + break; + } + if (Baseline != Unknown) + return Baseline; + } + + // For a comparison where the V is outside this block, it's possible + // that we've branched on it before. Look to see if the value is known + // on all incoming edges. + if (!isa<Instruction>(V) || + cast<Instruction>(V)->getParent() != BB) { // For predecessor edge, determine if the comparison is true or false - // on that edge. If they're all true or all false, we can conclude + // on that edge. If they're all true or all false, we can conclude // the value of the comparison in this block. Tristate Baseline = getPredicateOnEdge(Pred, V, C, *PI, BB, CxtI); if (Baseline != Unknown) { diff --git a/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp deleted file mode 100644 index 991a0e3..0000000 --- a/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp +++ /dev/null @@ -1,141 +0,0 @@ -//===- LibCallAliasAnalysis.cpp - Implement AliasAnalysis for libcalls ----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the LibCallAliasAnalysis class. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/LibCallAliasAnalysis.h" -#include "llvm/Analysis/LibCallSemantics.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/IR/Function.h" -#include "llvm/Pass.h" -using namespace llvm; - -// Register this pass... -char LibCallAliasAnalysis::ID = 0; -INITIALIZE_AG_PASS(LibCallAliasAnalysis, AliasAnalysis, "libcall-aa", - "LibCall Alias Analysis", false, true, false) - -FunctionPass *llvm::createLibCallAliasAnalysisPass(LibCallInfo *LCI) { - return new LibCallAliasAnalysis(LCI); -} - -LibCallAliasAnalysis::~LibCallAliasAnalysis() { - delete LCI; -} - -void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AliasAnalysis::getAnalysisUsage(AU); - AU.setPreservesAll(); // Does not transform code -} - -bool LibCallAliasAnalysis::runOnFunction(Function &F) { - // set up super class - InitializeAliasAnalysis(this, &F.getParent()->getDataLayout()); - return false; -} - -/// AnalyzeLibCallDetails - Given a call to a function with the specified -/// LibCallFunctionInfo, see if we can improve the mod/ref footprint of the call -/// vs the specified pointer/size. -AliasAnalysis::ModRefResult -LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI, - ImmutableCallSite CS, - const MemoryLocation &Loc) { - // If we have a function, check to see what kind of mod/ref effects it - // has. Start by including any info globally known about the function. - AliasAnalysis::ModRefResult MRInfo = FI->UniversalBehavior; - if (MRInfo == NoModRef) return MRInfo; - - // If that didn't tell us that the function is 'readnone', check to see - // if we have detailed info and if 'P' is any of the locations we know - // about. - const LibCallFunctionInfo::LocationMRInfo *Details = FI->LocationDetails; - if (Details == nullptr) - return MRInfo; - - // If the details array is of the 'DoesNot' kind, we only know something if - // the pointer is a match for one of the locations in 'Details'. If we find a - // match, we can prove some interactions cannot happen. - // - if (FI->DetailsType == LibCallFunctionInfo::DoesNot) { - // Find out if the pointer refers to a known location. - for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) { - const LibCallLocationInfo &LocInfo = - LCI->getLocationInfo(Details[i].LocationID); - LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc); - if (Res != LibCallLocationInfo::Yes) continue; - - // If we find a match against a location that we 'do not' interact with, - // learn this info into MRInfo. - return ModRefResult(MRInfo & ~Details[i].MRInfo); - } - return MRInfo; - } - - // If the details are of the 'DoesOnly' sort, we know something if the pointer - // is a match for one of the locations in 'Details'. Also, if we can prove - // that the pointers is *not* one of the locations in 'Details', we know that - // the call is NoModRef. - assert(FI->DetailsType == LibCallFunctionInfo::DoesOnly); - - // Find out if the pointer refers to a known location. - bool NoneMatch = true; - for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) { - const LibCallLocationInfo &LocInfo = - LCI->getLocationInfo(Details[i].LocationID); - LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc); - if (Res == LibCallLocationInfo::No) continue; - - // If we don't know if this pointer points to the location, then we have to - // assume it might alias in some case. - if (Res == LibCallLocationInfo::Unknown) { - NoneMatch = false; - continue; - } - - // If we know that this pointer definitely is pointing into the location, - // merge in this information. - return ModRefResult(MRInfo & Details[i].MRInfo); - } - - // If we found that the pointer is guaranteed to not match any of the - // locations in our 'DoesOnly' rule, then we know that the pointer must point - // to some other location. Since the libcall doesn't mod/ref any other - // locations, return NoModRef. - if (NoneMatch) - return NoModRef; - - // Otherwise, return any other info gained so far. - return MRInfo; -} - -// getModRefInfo - Check to see if the specified callsite can clobber the -// specified memory object. -// -AliasAnalysis::ModRefResult -LibCallAliasAnalysis::getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) { - ModRefResult MRInfo = ModRef; - - // If this is a direct call to a function that LCI knows about, get the - // information about the runtime function. - if (LCI) { - if (const Function *F = CS.getCalledFunction()) { - if (const LibCallFunctionInfo *FI = LCI->getFunctionInfo(F)) { - MRInfo = ModRefResult(MRInfo & AnalyzeLibCallDetails(FI, CS, Loc)); - if (MRInfo == NoModRef) return NoModRef; - } - } - } - - // The AliasAnalysis base class has some smarts, lets use them. - return (ModRefResult)(MRInfo | AliasAnalysis::getModRefInfo(CS, Loc)); -} diff --git a/contrib/llvm/lib/Analysis/LibCallSemantics.cpp b/contrib/llvm/lib/Analysis/LibCallSemantics.cpp deleted file mode 100644 index 003c81e..0000000 --- a/contrib/llvm/lib/Analysis/LibCallSemantics.cpp +++ /dev/null @@ -1,89 +0,0 @@ -//===- LibCallSemantics.cpp - Describe library semantics ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements interfaces that can be used to describe language -// specific runtime library interfaces (e.g. libc, libm, etc) to LLVM -// optimizers. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/LibCallSemantics.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/IR/Function.h" -using namespace llvm; - -/// This impl pointer in ~LibCallInfo is actually a StringMap. This -/// helper does the cast. -static StringMap<const LibCallFunctionInfo*> *getMap(void *Ptr) { - return static_cast<StringMap<const LibCallFunctionInfo*> *>(Ptr); -} - -LibCallInfo::~LibCallInfo() { - delete getMap(Impl); -} - -const LibCallLocationInfo &LibCallInfo::getLocationInfo(unsigned LocID) const { - // Get location info on the first call. - if (NumLocations == 0) - NumLocations = getLocationInfo(Locations); - - assert(LocID < NumLocations && "Invalid location ID!"); - return Locations[LocID]; -} - - -/// Return the LibCallFunctionInfo object corresponding to -/// the specified function if we have it. If not, return null. -const LibCallFunctionInfo * -LibCallInfo::getFunctionInfo(const Function *F) const { - StringMap<const LibCallFunctionInfo*> *Map = getMap(Impl); - - /// If this is the first time we are querying for this info, lazily construct - /// the StringMap to index it. - if (!Map) { - Impl = Map = new StringMap<const LibCallFunctionInfo*>(); - - const LibCallFunctionInfo *Array = getFunctionInfoArray(); - if (!Array) return nullptr; - - // We now have the array of entries. Populate the StringMap. - for (unsigned i = 0; Array[i].Name; ++i) - (*Map)[Array[i].Name] = Array+i; - } - - // Look up this function in the string map. - return Map->lookup(F->getName()); -} - -/// See if the given exception handling personality function is one that we -/// understand. If so, return a description of it; otherwise return Unknown. -EHPersonality llvm::classifyEHPersonality(const Value *Pers) { - const Function *F = dyn_cast<Function>(Pers->stripPointerCasts()); - if (!F) - return EHPersonality::Unknown; - return StringSwitch<EHPersonality>(F->getName()) - .Case("__gnat_eh_personality", EHPersonality::GNU_Ada) - .Case("__gxx_personality_v0", EHPersonality::GNU_CXX) - .Case("__gcc_personality_v0", EHPersonality::GNU_C) - .Case("__objc_personality_v0", EHPersonality::GNU_ObjC) - .Case("_except_handler3", EHPersonality::MSVC_X86SEH) - .Case("_except_handler4", EHPersonality::MSVC_X86SEH) - .Case("__C_specific_handler", EHPersonality::MSVC_Win64SEH) - .Case("__CxxFrameHandler3", EHPersonality::MSVC_CXX) - .Default(EHPersonality::Unknown); -} - -bool llvm::canSimplifyInvokeNoUnwind(const Function *F) { - EHPersonality Personality = classifyEHPersonality(F->getPersonalityFn()); - // We can't simplify any invokes to nounwind functions if the personality - // function wants to catch asynch exceptions. The nounwind attribute only - // implies that the function does not throw synchronous exceptions. - return !isAsynchronousEHPersonality(Personality); -} diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp index 0b9308a..2dfb09c 100644 --- a/contrib/llvm/lib/Analysis/Lint.cpp +++ b/contrib/llvm/lib/Analysis/Lint.cpp @@ -49,6 +49,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LegacyPassManager.h" @@ -98,12 +99,13 @@ namespace { void visitInsertElementInst(InsertElementInst &I); void visitUnreachableInst(UnreachableInst &I); - Value *findValue(Value *V, const DataLayout &DL, bool OffsetOk) const; - Value *findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk, + Value *findValue(Value *V, bool OffsetOk) const; + Value *findValueImpl(Value *V, bool OffsetOk, SmallPtrSetImpl<Value *> &Visited) const; public: Module *Mod; + const DataLayout *DL; AliasAnalysis *AA; AssumptionCache *AC; DominatorTree *DT; @@ -121,7 +123,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); - AU.addRequired<AliasAnalysis>(); + AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); AU.addRequired<DominatorTreeWrapperPass>(); @@ -165,7 +167,7 @@ INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR", INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR", false, true) @@ -178,7 +180,8 @@ INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR", // bool Lint::runOnFunction(Function &F) { Mod = F.getParent(); - AA = &getAnalysis<AliasAnalysis>(); + DL = &F.getParent()->getDataLayout(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); @@ -200,12 +203,11 @@ void Lint::visitFunction(Function &F) { void Lint::visitCallSite(CallSite CS) { Instruction &I = *CS.getInstruction(); Value *Callee = CS.getCalledValue(); - const DataLayout &DL = CS->getModule()->getDataLayout(); visitMemoryReference(I, Callee, MemoryLocation::UnknownSize, 0, nullptr, MemRef::Callee); - if (Function *F = dyn_cast<Function>(findValue(Callee, DL, + if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) { Assert(CS.getCallingConv() == F->getCallingConv(), "Undefined behavior: Caller and callee calling convention differ", @@ -232,7 +234,7 @@ void Lint::visitCallSite(CallSite CS) { for (; AI != AE; ++AI) { Value *Actual = *AI; if (PI != PE) { - Argument *Formal = PI++; + Argument *Formal = &*PI++; Assert(Formal->getType() == Actual->getType(), "Undefined behavior: Call argument type mismatches " "callee parameter type", @@ -253,8 +255,8 @@ void Lint::visitCallSite(CallSite CS) { if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { Type *Ty = cast<PointerType>(Formal->getType())->getElementType(); - visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty), - DL.getABITypeAlignment(Ty), Ty, + visitMemoryReference(I, Actual, DL->getTypeStoreSize(Ty), + DL->getABITypeAlignment(Ty), Ty, MemRef::Read | MemRef::Write); } } @@ -264,7 +266,7 @@ void Lint::visitCallSite(CallSite CS) { if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall()) for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); AI != AE; ++AI) { - Value *Obj = findValue(*AI, DL, /*OffsetOk=*/true); + Value *Obj = findValue(*AI, /*OffsetOk=*/true); Assert(!isa<AllocaInst>(Obj), "Undefined behavior: Call with \"tail\" keyword references " "alloca", @@ -291,7 +293,7 @@ void Lint::visitCallSite(CallSite CS) { // overlap is not distinguished from the case where nothing is known. uint64_t Size = 0; if (const ConstantInt *Len = - dyn_cast<ConstantInt>(findValue(MCI->getLength(), DL, + dyn_cast<ConstantInt>(findValue(MCI->getLength(), /*OffsetOk=*/false))) if (Len->getValue().isIntN(32)) Size = Len->getValue().getZExtValue(); @@ -343,13 +345,6 @@ void Lint::visitCallSite(CallSite CS) { visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, nullptr, MemRef::Read | MemRef::Write); break; - - case Intrinsic::eh_begincatch: - visitEHBeginCatch(II); - break; - case Intrinsic::eh_endcatch: - visitEHEndCatch(II); - break; } } @@ -367,8 +362,7 @@ void Lint::visitReturnInst(ReturnInst &I) { "Unusual: Return statement in function with noreturn attribute", &I); if (Value *V = I.getReturnValue()) { - Value *Obj = - findValue(V, F->getParent()->getDataLayout(), /*OffsetOk=*/true); + Value *Obj = findValue(V, /*OffsetOk=*/true); Assert(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I); } } @@ -383,8 +377,7 @@ void Lint::visitMemoryReference(Instruction &I, if (Size == 0) return; - Value *UnderlyingObject = - findValue(Ptr, I.getModule()->getDataLayout(), /*OffsetOk=*/true); + Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true); Assert(!isa<ConstantPointerNull>(UnderlyingObject), "Undefined behavior: Null pointer dereference", &I); Assert(!isa<UndefValue>(UnderlyingObject), @@ -423,9 +416,8 @@ void Lint::visitMemoryReference(Instruction &I, // Check for buffer overflows and misalignment. // Only handles memory references that read/write something simple like an // alloca instruction or a global variable. - auto &DL = I.getModule()->getDataLayout(); int64_t Offset = 0; - if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, DL)) { + if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, *DL)) { // OK, so the access is to a constant offset from Ptr. Check that Ptr is // something we can handle and if so extract the size of this base object // along with its alignment. @@ -435,20 +427,20 @@ void Lint::visitMemoryReference(Instruction &I, if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { Type *ATy = AI->getAllocatedType(); if (!AI->isArrayAllocation() && ATy->isSized()) - BaseSize = DL.getTypeAllocSize(ATy); + BaseSize = DL->getTypeAllocSize(ATy); BaseAlign = AI->getAlignment(); if (BaseAlign == 0 && ATy->isSized()) - BaseAlign = DL.getABITypeAlignment(ATy); + BaseAlign = DL->getABITypeAlignment(ATy); } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) { // If the global may be defined differently in another compilation unit // then don't warn about funky memory accesses. if (GV->hasDefinitiveInitializer()) { Type *GTy = GV->getType()->getElementType(); if (GTy->isSized()) - BaseSize = DL.getTypeAllocSize(GTy); + BaseSize = DL->getTypeAllocSize(GTy); BaseAlign = GV->getAlignment(); if (BaseAlign == 0 && GTy->isSized()) - BaseAlign = DL.getABITypeAlignment(GTy); + BaseAlign = DL->getABITypeAlignment(GTy); } } @@ -462,7 +454,7 @@ void Lint::visitMemoryReference(Instruction &I, // Accesses that say that the memory is more aligned than it is are not // defined. if (Align == 0 && Ty && Ty->isSized()) - Align = DL.getABITypeAlignment(Ty); + Align = DL->getABITypeAlignment(Ty); Assert(!BaseAlign || Align <= MinAlign(BaseAlign, Offset), "Undefined behavior: Memory reference address is misaligned", &I); } @@ -470,13 +462,13 @@ void Lint::visitMemoryReference(Instruction &I, void Lint::visitLoadInst(LoadInst &I) { visitMemoryReference(I, I.getPointerOperand(), - AA->getTypeStoreSize(I.getType()), I.getAlignment(), + DL->getTypeStoreSize(I.getType()), I.getAlignment(), I.getType(), MemRef::Read); } void Lint::visitStoreInst(StoreInst &I) { visitMemoryReference(I, I.getPointerOperand(), - AA->getTypeStoreSize(I.getOperand(0)->getType()), + DL->getTypeStoreSize(I.getOperand(0)->getType()), I.getAlignment(), I.getOperand(0)->getType(), MemRef::Write); } @@ -492,208 +484,26 @@ void Lint::visitSub(BinaryOperator &I) { } void Lint::visitLShr(BinaryOperator &I) { - if (ConstantInt *CI = dyn_cast<ConstantInt>( - findValue(I.getOperand(1), I.getModule()->getDataLayout(), - /*OffsetOk=*/false))) + if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(1), + /*OffsetOk=*/false))) Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), "Undefined result: Shift count out of range", &I); } void Lint::visitAShr(BinaryOperator &I) { - if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue( - I.getOperand(1), I.getModule()->getDataLayout(), /*OffsetOk=*/false))) + if (ConstantInt *CI = + dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), "Undefined result: Shift count out of range", &I); } void Lint::visitShl(BinaryOperator &I) { - if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue( - I.getOperand(1), I.getModule()->getDataLayout(), /*OffsetOk=*/false))) + if (ConstantInt *CI = + dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), "Undefined result: Shift count out of range", &I); } -static bool -allPredsCameFromLandingPad(BasicBlock *BB, - SmallSet<BasicBlock *, 4> &VisitedBlocks) { - VisitedBlocks.insert(BB); - if (BB->isLandingPad()) - return true; - // If we find a block with no predecessors, the search failed. - if (pred_empty(BB)) - return false; - for (BasicBlock *Pred : predecessors(BB)) { - if (VisitedBlocks.count(Pred)) - continue; - if (!allPredsCameFromLandingPad(Pred, VisitedBlocks)) - return false; - } - return true; -} - -static bool -allSuccessorsReachEndCatch(BasicBlock *BB, BasicBlock::iterator InstBegin, - IntrinsicInst **SecondBeginCatch, - SmallSet<BasicBlock *, 4> &VisitedBlocks) { - VisitedBlocks.insert(BB); - for (BasicBlock::iterator I = InstBegin, E = BB->end(); I != E; ++I) { - IntrinsicInst *IC = dyn_cast<IntrinsicInst>(I); - if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch) - return true; - // If we find another begincatch while looking for an endcatch, - // that's also an error. - if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch) { - *SecondBeginCatch = IC; - return false; - } - } - - // If we reach a block with no successors while searching, the - // search has failed. - if (succ_empty(BB)) - return false; - // Otherwise, search all of the successors. - for (BasicBlock *Succ : successors(BB)) { - if (VisitedBlocks.count(Succ)) - continue; - if (!allSuccessorsReachEndCatch(Succ, Succ->begin(), SecondBeginCatch, - VisitedBlocks)) - return false; - } - return true; -} - -void Lint::visitEHBeginCatch(IntrinsicInst *II) { - // The checks in this function make a potentially dubious assumption about - // the CFG, namely that any block involved in a catch is only used for the - // catch. This will very likely be true of IR generated by a front end, - // but it may cease to be true, for example, if the IR is run through a - // pass which combines similar blocks. - // - // In general, if we encounter a block the isn't dominated by the catch - // block while we are searching the catch block's successors for a call - // to end catch intrinsic, then it is possible that it will be legal for - // a path through this block to never reach a call to llvm.eh.endcatch. - // An analogous statement could be made about our search for a landing - // pad among the catch block's predecessors. - // - // What is actually required is that no path is possible at runtime that - // reaches a call to llvm.eh.begincatch without having previously visited - // a landingpad instruction and that no path is possible at runtime that - // calls llvm.eh.begincatch and does not subsequently call llvm.eh.endcatch - // (mentally adjusting for the fact that in reality these calls will be - // removed before code generation). - // - // Because this is a lint check, we take a pessimistic approach and warn if - // the control flow is potentially incorrect. - - SmallSet<BasicBlock *, 4> VisitedBlocks; - BasicBlock *CatchBB = II->getParent(); - - // The begin catch must occur in a landing pad block or all paths - // to it must have come from a landing pad. - Assert(allPredsCameFromLandingPad(CatchBB, VisitedBlocks), - "llvm.eh.begincatch may be reachable without passing a landingpad", - II); - - // Reset the visited block list. - VisitedBlocks.clear(); - - IntrinsicInst *SecondBeginCatch = nullptr; - - // This has to be called before it is asserted. Otherwise, the first assert - // below can never be hit. - bool EndCatchFound = allSuccessorsReachEndCatch( - CatchBB, std::next(static_cast<BasicBlock::iterator>(II)), - &SecondBeginCatch, VisitedBlocks); - Assert( - SecondBeginCatch == nullptr, - "llvm.eh.begincatch may be called a second time before llvm.eh.endcatch", - II, SecondBeginCatch); - Assert(EndCatchFound, - "Some paths from llvm.eh.begincatch may not reach llvm.eh.endcatch", - II); -} - -static bool allPredCameFromBeginCatch( - BasicBlock *BB, BasicBlock::reverse_iterator InstRbegin, - IntrinsicInst **SecondEndCatch, SmallSet<BasicBlock *, 4> &VisitedBlocks) { - VisitedBlocks.insert(BB); - // Look for a begincatch in this block. - for (BasicBlock::reverse_iterator RI = InstRbegin, RE = BB->rend(); RI != RE; - ++RI) { - IntrinsicInst *IC = dyn_cast<IntrinsicInst>(&*RI); - if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch) - return true; - // If we find another end catch before we find a begin catch, that's - // an error. - if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch) { - *SecondEndCatch = IC; - return false; - } - // If we encounter a landingpad instruction, the search failed. - if (isa<LandingPadInst>(*RI)) - return false; - } - // If while searching we find a block with no predeccesors, - // the search failed. - if (pred_empty(BB)) - return false; - // Search any predecessors we haven't seen before. - for (BasicBlock *Pred : predecessors(BB)) { - if (VisitedBlocks.count(Pred)) - continue; - if (!allPredCameFromBeginCatch(Pred, Pred->rbegin(), SecondEndCatch, - VisitedBlocks)) - return false; - } - return true; -} - -void Lint::visitEHEndCatch(IntrinsicInst *II) { - // The check in this function makes a potentially dubious assumption about - // the CFG, namely that any block involved in a catch is only used for the - // catch. This will very likely be true of IR generated by a front end, - // but it may cease to be true, for example, if the IR is run through a - // pass which combines similar blocks. - // - // In general, if we encounter a block the isn't post-dominated by the - // end catch block while we are searching the end catch block's predecessors - // for a call to the begin catch intrinsic, then it is possible that it will - // be legal for a path to reach the end catch block without ever having - // called llvm.eh.begincatch. - // - // What is actually required is that no path is possible at runtime that - // reaches a call to llvm.eh.endcatch without having previously visited - // a call to llvm.eh.begincatch (mentally adjusting for the fact that in - // reality these calls will be removed before code generation). - // - // Because this is a lint check, we take a pessimistic approach and warn if - // the control flow is potentially incorrect. - - BasicBlock *EndCatchBB = II->getParent(); - - // Alls paths to the end catch call must pass through a begin catch call. - - // If llvm.eh.begincatch wasn't called in the current block, we'll use this - // lambda to recursively look for it in predecessors. - SmallSet<BasicBlock *, 4> VisitedBlocks; - IntrinsicInst *SecondEndCatch = nullptr; - - // This has to be called before it is asserted. Otherwise, the first assert - // below can never be hit. - bool BeginCatchFound = - allPredCameFromBeginCatch(EndCatchBB, BasicBlock::reverse_iterator(II), - &SecondEndCatch, VisitedBlocks); - Assert( - SecondEndCatch == nullptr, - "llvm.eh.endcatch may be called a second time after llvm.eh.begincatch", - II, SecondEndCatch); - Assert(BeginCatchFound, - "llvm.eh.endcatch may be reachable without passing llvm.eh.begincatch", - II); -} - static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC) { // Assume undef could be zero. @@ -777,25 +587,23 @@ void Lint::visitIndirectBrInst(IndirectBrInst &I) { } void Lint::visitExtractElementInst(ExtractElementInst &I) { - if (ConstantInt *CI = dyn_cast<ConstantInt>( - findValue(I.getIndexOperand(), I.getModule()->getDataLayout(), - /*OffsetOk=*/false))) + if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getIndexOperand(), + /*OffsetOk=*/false))) Assert(CI->getValue().ult(I.getVectorOperandType()->getNumElements()), "Undefined result: extractelement index out of range", &I); } void Lint::visitInsertElementInst(InsertElementInst &I) { - if (ConstantInt *CI = dyn_cast<ConstantInt>( - findValue(I.getOperand(2), I.getModule()->getDataLayout(), - /*OffsetOk=*/false))) + if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(2), + /*OffsetOk=*/false))) Assert(CI->getValue().ult(I.getType()->getNumElements()), "Undefined result: insertelement index out of range", &I); } void Lint::visitUnreachableInst(UnreachableInst &I) { // This isn't undefined behavior, it's merely suspicious. - Assert(&I == I.getParent()->begin() || - std::prev(BasicBlock::iterator(&I))->mayHaveSideEffects(), + Assert(&I == &I.getParent()->front() || + std::prev(I.getIterator())->mayHaveSideEffects(), "Unusual: unreachable immediately preceded by instruction without " "side effects", &I); @@ -808,13 +616,13 @@ void Lint::visitUnreachableInst(UnreachableInst &I) { /// Most analysis passes don't require this logic, because instcombine /// will simplify most of these kinds of things away. But it's a goal of /// this Lint pass to be useful even on non-optimized IR. -Value *Lint::findValue(Value *V, const DataLayout &DL, bool OffsetOk) const { +Value *Lint::findValue(Value *V, bool OffsetOk) const { SmallPtrSet<Value *, 4> Visited; - return findValueImpl(V, DL, OffsetOk, Visited); + return findValueImpl(V, OffsetOk, Visited); } /// findValueImpl - Implementation helper for findValue. -Value *Lint::findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk, +Value *Lint::findValueImpl(Value *V, bool OffsetOk, SmallPtrSetImpl<Value *> &Visited) const { // Detect self-referential values. if (!Visited.insert(V).second) @@ -825,17 +633,18 @@ Value *Lint::findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk, // TODO: Look through eliminable cast pairs. // TODO: Look through calls with unique return values. // TODO: Look through vector insert/extract/shuffle. - V = OffsetOk ? GetUnderlyingObject(V, DL) : V->stripPointerCasts(); + V = OffsetOk ? GetUnderlyingObject(V, *DL) : V->stripPointerCasts(); if (LoadInst *L = dyn_cast<LoadInst>(V)) { - BasicBlock::iterator BBI = L; + BasicBlock::iterator BBI = L->getIterator(); BasicBlock *BB = L->getParent(); SmallPtrSet<BasicBlock *, 4> VisitedBlocks; for (;;) { if (!VisitedBlocks.insert(BB).second) break; - if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(), - BB, BBI, 6, AA)) - return findValueImpl(U, DL, OffsetOk, Visited); + if (Value *U = + FindAvailableLoadedValue(L->getPointerOperand(), + BB, BBI, DefMaxInstsToScan, AA)) + return findValueImpl(U, OffsetOk, Visited); if (BBI != BB->begin()) break; BB = BB->getUniquePredecessor(); if (!BB) break; @@ -844,38 +653,38 @@ Value *Lint::findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk, } else if (PHINode *PN = dyn_cast<PHINode>(V)) { if (Value *W = PN->hasConstantValue()) if (W != V) - return findValueImpl(W, DL, OffsetOk, Visited); + return findValueImpl(W, OffsetOk, Visited); } else if (CastInst *CI = dyn_cast<CastInst>(V)) { - if (CI->isNoopCast(DL)) - return findValueImpl(CI->getOperand(0), DL, OffsetOk, Visited); + if (CI->isNoopCast(*DL)) + return findValueImpl(CI->getOperand(0), OffsetOk, Visited); } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) { if (Value *W = FindInsertedValue(Ex->getAggregateOperand(), Ex->getIndices())) if (W != V) - return findValueImpl(W, DL, OffsetOk, Visited); + return findValueImpl(W, OffsetOk, Visited); } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { // Same as above, but for ConstantExpr instead of Instruction. if (Instruction::isCast(CE->getOpcode())) { if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()), CE->getOperand(0)->getType(), CE->getType(), - DL.getIntPtrType(V->getType()))) - return findValueImpl(CE->getOperand(0), DL, OffsetOk, Visited); + DL->getIntPtrType(V->getType()))) + return findValueImpl(CE->getOperand(0), OffsetOk, Visited); } else if (CE->getOpcode() == Instruction::ExtractValue) { ArrayRef<unsigned> Indices = CE->getIndices(); if (Value *W = FindInsertedValue(CE->getOperand(0), Indices)) if (W != V) - return findValueImpl(W, DL, OffsetOk, Visited); + return findValueImpl(W, OffsetOk, Visited); } } // As a last resort, try SimplifyInstruction or constant folding. if (Instruction *Inst = dyn_cast<Instruction>(V)) { - if (Value *W = SimplifyInstruction(Inst, DL, TLI, DT, AC)) - return findValueImpl(W, DL, OffsetOk, Visited); + if (Value *W = SimplifyInstruction(Inst, *DL, TLI, DT, AC)) + return findValueImpl(W, OffsetOk, Visited); } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { - if (Value *W = ConstantFoldConstantExpression(CE, DL, TLI)) + if (Value *W = ConstantFoldConstantExpression(CE, *DL, TLI)) if (W != V) - return findValueImpl(W, DL, OffsetOk, Visited); + return findValueImpl(W, OffsetOk, Visited); } return V; diff --git a/contrib/llvm/lib/Analysis/Loads.cpp b/contrib/llvm/lib/Analysis/Loads.cpp index 624c5a1..4b2fa3c 100644 --- a/contrib/llvm/lib/Analysis/Loads.cpp +++ b/contrib/llvm/lib/Analysis/Loads.cpp @@ -118,7 +118,8 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, // from/to. If so, the previous load or store would have already trapped, // so there is no harm doing an extra load (also, CSE will later eliminate // the load entirely). - BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin(); + BasicBlock::iterator BBI = ScanFrom->getIterator(), + E = ScanFrom->getParent()->begin(); // We can at least always strip pointer casts even though we can't use the // base here. @@ -161,6 +162,18 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, return false; } +/// DefMaxInstsToScan - the default number of maximum instructions +/// to scan in the block, used by FindAvailableLoadedValue(). +/// FindAvailableLoadedValue() was introduced in r60148, to improve jump +/// threading in part by eliminating partially redundant loads. +/// At that point, the value of MaxInstsToScan was already set to '6' +/// without documented explanation. +cl::opt<unsigned> +llvm::DefMaxInstsToScan("available-load-scan-limit", cl::init(6), cl::Hidden, + cl::desc("Use this to specify the default maximum number of instructions " + "to scan backward from a given instruction, when searching for " + "available loaded value")); + /// \brief Scan the ScanBB block backwards to see if we have the value at the /// memory address *Ptr locally available within a small number of instructions. /// @@ -199,7 +212,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, while (ScanFrom != ScanBB->begin()) { // We must ignore debug info directives when counting (otherwise they // would affect codegen). - Instruction *Inst = --ScanFrom; + Instruction *Inst = &*--ScanFrom; if (isa<DbgInfoIntrinsic>(Inst)) continue; @@ -246,9 +259,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, // If we have alias analysis and it says the store won't modify the loaded // value, ignore the store. - if (AA && - (AA->getModRefInfo(SI, StrippedPtr, AccessSize) & - AliasAnalysis::Mod) == 0) + if (AA && (AA->getModRefInfo(SI, StrippedPtr, AccessSize) & MRI_Mod) == 0) continue; // Otherwise the store that may or may not alias the pointer, bail out. @@ -261,8 +272,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, // If alias analysis claims that it really won't modify the load, // ignore it. if (AA && - (AA->getModRefInfo(Inst, StrippedPtr, AccessSize) & - AliasAnalysis::Mod) == 0) + (AA->getModRefInfo(Inst, StrippedPtr, AccessSize) & MRI_Mod) == 0) continue; // May modify the pointer, bail out. diff --git a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp index becbae4..d7896ad 100644 --- a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -58,12 +58,12 @@ static cl::opt<unsigned> MemoryCheckMergeThreshold( /// Maximum SIMD width. const unsigned VectorizerParams::MaxVectorWidth = 64; -/// \brief We collect interesting dependences up to this threshold. -static cl::opt<unsigned> MaxInterestingDependence( - "max-interesting-dependences", cl::Hidden, - cl::desc("Maximum number of interesting dependences collected by " - "loop-access analysis (default = 100)"), - cl::init(100)); +/// \brief We collect dependences up to this threshold. +static cl::opt<unsigned> + MaxDependences("max-dependences", cl::Hidden, + cl::desc("Maximum number of dependences collected by " + "loop-access analysis (default = 100)"), + cl::init(100)); bool VectorizerParams::isInterleaveForced() { return ::VectorizationInterleave.getNumOccurrences() > 0; @@ -87,11 +87,10 @@ Value *llvm::stripIntegerCast(Value *V) { return V; } -const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE, +const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr) { - - const SCEV *OrigSCEV = SE->getSCEV(Ptr); + const SCEV *OrigSCEV = PSE.getSCEV(Ptr); // If there is an entry in the map return the SCEV of the pointer with the // symbolic stride replaced by one. @@ -108,36 +107,82 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE, ValueToValueMap RewriteMap; RewriteMap[StrideVal] = One; - const SCEV *ByOne = - SCEVParameterRewriter::rewrite(OrigSCEV, *SE, RewriteMap, true); - DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne + ScalarEvolution *SE = PSE.getSE(); + const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal)); + const auto *CT = + static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType())); + + PSE.addPredicate(*SE->getEqualPredicate(U, CT)); + auto *Expr = PSE.getSCEV(Ptr); + + DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *Expr << "\n"); - return ByOne; + return Expr; } // Otherwise, just return the SCEV of the original pointer. - return SE->getSCEV(Ptr); + return OrigSCEV; } void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, - const ValueToValueMap &Strides) { + const ValueToValueMap &Strides, + PredicatedScalarEvolution &PSE) { // Get the stride replaced scev. - const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr); + const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc); assert(AR && "Invalid addrec expression"); + ScalarEvolution *SE = PSE.getSE(); const SCEV *Ex = SE->getBackedgeTakenCount(Lp); + + const SCEV *ScStart = AR->getStart(); const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE); - Pointers.emplace_back(Ptr, AR->getStart(), ScEnd, WritePtr, DepSetId, ASId, - Sc); + const SCEV *Step = AR->getStepRecurrence(*SE); + + // For expressions with negative step, the upper bound is ScStart and the + // lower bound is ScEnd. + if (const SCEVConstant *CStep = dyn_cast<const SCEVConstant>(Step)) { + if (CStep->getValue()->isNegative()) + std::swap(ScStart, ScEnd); + } else { + // Fallback case: the step is not constant, but the we can still + // get the upper and lower bounds of the interval by using min/max + // expressions. + ScStart = SE->getUMinExpr(ScStart, ScEnd); + ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd); + } + + Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc); +} + +SmallVector<RuntimePointerChecking::PointerCheck, 4> +RuntimePointerChecking::generateChecks() const { + SmallVector<PointerCheck, 4> Checks; + + for (unsigned I = 0; I < CheckingGroups.size(); ++I) { + for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) { + const RuntimePointerChecking::CheckingPtrGroup &CGI = CheckingGroups[I]; + const RuntimePointerChecking::CheckingPtrGroup &CGJ = CheckingGroups[J]; + + if (needsChecking(CGI, CGJ)) + Checks.push_back(std::make_pair(&CGI, &CGJ)); + } + } + return Checks; +} + +void RuntimePointerChecking::generateChecks( + MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) { + assert(Checks.empty() && "Checks is not empty"); + groupChecks(DepCands, UseDependencies); + Checks = generateChecks(); } -bool RuntimePointerChecking::needsChecking( - const CheckingPtrGroup &M, const CheckingPtrGroup &N, - const SmallVectorImpl<int> *PtrPartition) const { +bool RuntimePointerChecking::needsChecking(const CheckingPtrGroup &M, + const CheckingPtrGroup &N) const { for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I) for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J) - if (needsChecking(M.Members[I], N.Members[J], PtrPartition)) + if (needsChecking(M.Members[I], N.Members[J])) return true; return false; } @@ -204,8 +249,31 @@ void RuntimePointerChecking::groupChecks( CheckingGroups.clear(); + // If we need to check two pointers to the same underlying object + // with a non-constant difference, we shouldn't perform any pointer + // grouping with those pointers. This is because we can easily get + // into cases where the resulting check would return false, even when + // the accesses are safe. + // + // The following example shows this: + // for (i = 0; i < 1000; ++i) + // a[5000 + i * m] = a[i] + a[i + 9000] + // + // Here grouping gives a check of (5000, 5000 + 1000 * m) against + // (0, 10000) which is always false. However, if m is 1, there is no + // dependence. Not grouping the checks for a[i] and a[i + 9000] allows + // us to perform an accurate check in this case. + // + // The above case requires that we have an UnknownDependence between + // accesses to the same underlying object. This cannot happen unless + // ShouldRetryWithRuntimeCheck is set, and therefore UseDependencies + // is also false. In this case we will use the fallback path and create + // separate checking groups for all pointers. + // If we don't have the dependency partitions, construct a new - // checking pointer group for each pointer. + // checking pointer group for each pointer. This is also required + // for correctness, because in this case we can have checking between + // pointers to the same underlying object. if (!UseDependencies) { for (unsigned I = 0; I < Pointers.size(); ++I) CheckingGroups.push_back(CheckingPtrGroup(I, *this)); @@ -222,7 +290,7 @@ void RuntimePointerChecking::groupChecks( // don't process them twice. SmallSet<unsigned, 2> Seen; - // Go through all equivalence classes, get the the "pointer check groups" + // Go through all equivalence classes, get the "pointer check groups" // and add them to the overall solution. We use the order in which accesses // appear in 'Pointers' to enforce determinism. for (unsigned I = 0; I < Pointers.size(); ++I) { @@ -280,8 +348,14 @@ void RuntimePointerChecking::groupChecks( } } -bool RuntimePointerChecking::needsChecking( - unsigned I, unsigned J, const SmallVectorImpl<int> *PtrPartition) const { +bool RuntimePointerChecking::arePointersInSamePartition( + const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1, + unsigned PtrIdx2) { + return (PtrToPartition[PtrIdx1] != -1 && + PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]); +} + +bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const { const PointerInfo &PointerI = Pointers[I]; const PointerInfo &PointerJ = Pointers[J]; @@ -297,85 +371,45 @@ bool RuntimePointerChecking::needsChecking( if (PointerI.AliasSetId != PointerJ.AliasSetId) return false; - // If PtrPartition is set omit checks between pointers of the same partition. - // Partition number -1 means that the pointer is used in multiple partitions. - // In this case we can't omit the check. - if (PtrPartition && (*PtrPartition)[I] != -1 && - (*PtrPartition)[I] == (*PtrPartition)[J]) - return false; - return true; } -void RuntimePointerChecking::print( - raw_ostream &OS, unsigned Depth, - const SmallVectorImpl<int> *PtrPartition) const { - - OS.indent(Depth) << "Run-time memory checks:\n"; - +void RuntimePointerChecking::printChecks( + raw_ostream &OS, const SmallVectorImpl<PointerCheck> &Checks, + unsigned Depth) const { unsigned N = 0; - for (unsigned I = 0; I < CheckingGroups.size(); ++I) - for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) - if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) { - OS.indent(Depth) << "Check " << N++ << ":\n"; - OS.indent(Depth + 2) << "Comparing group " << I << ":\n"; - - for (unsigned K = 0; K < CheckingGroups[I].Members.size(); ++K) { - OS.indent(Depth + 2) - << *Pointers[CheckingGroups[I].Members[K]].PointerValue << "\n"; - if (PtrPartition) - OS << " (Partition: " - << (*PtrPartition)[CheckingGroups[I].Members[K]] << ")" - << "\n"; - } + for (const auto &Check : Checks) { + const auto &First = Check.first->Members, &Second = Check.second->Members; - OS.indent(Depth + 2) << "Against group " << J << ":\n"; + OS.indent(Depth) << "Check " << N++ << ":\n"; - for (unsigned K = 0; K < CheckingGroups[J].Members.size(); ++K) { - OS.indent(Depth + 2) - << *Pointers[CheckingGroups[J].Members[K]].PointerValue << "\n"; - if (PtrPartition) - OS << " (Partition: " - << (*PtrPartition)[CheckingGroups[J].Members[K]] << ")" - << "\n"; - } - } + OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n"; + for (unsigned K = 0; K < First.size(); ++K) + OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n"; - OS.indent(Depth) << "Grouped accesses:\n"; - for (unsigned I = 0; I < CheckingGroups.size(); ++I) { - OS.indent(Depth + 2) << "Group " << I << ":\n"; - OS.indent(Depth + 4) << "(Low: " << *CheckingGroups[I].Low - << " High: " << *CheckingGroups[I].High << ")\n"; - for (unsigned J = 0; J < CheckingGroups[I].Members.size(); ++J) { - OS.indent(Depth + 6) << "Member: " - << *Pointers[CheckingGroups[I].Members[J]].Expr - << "\n"; - } + OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n"; + for (unsigned K = 0; K < Second.size(); ++K) + OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n"; } } -unsigned RuntimePointerChecking::getNumberOfChecks( - const SmallVectorImpl<int> *PtrPartition) const { - - unsigned NumPartitions = CheckingGroups.size(); - unsigned CheckCount = 0; +void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const { - for (unsigned I = 0; I < NumPartitions; ++I) - for (unsigned J = I + 1; J < NumPartitions; ++J) - if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) - CheckCount++; - return CheckCount; -} + OS.indent(Depth) << "Run-time memory checks:\n"; + printChecks(OS, Checks, Depth); -bool RuntimePointerChecking::needsAnyChecking( - const SmallVectorImpl<int> *PtrPartition) const { - unsigned NumPointers = Pointers.size(); + OS.indent(Depth) << "Grouped accesses:\n"; + for (unsigned I = 0; I < CheckingGroups.size(); ++I) { + const auto &CG = CheckingGroups[I]; - for (unsigned I = 0; I < NumPointers; ++I) - for (unsigned J = I + 1; J < NumPointers; ++J) - if (needsChecking(I, J, PtrPartition)) - return true; - return false; + OS.indent(Depth + 2) << "Group " << &CG << ":\n"; + OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High + << ")\n"; + for (unsigned J = 0; J < CG.Members.size(); ++J) { + OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr + << "\n"; + } + } } namespace { @@ -390,9 +424,10 @@ public: typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet; AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI, - MemoryDepChecker::DepCandidates &DA) - : DL(Dl), AST(*AA), LI(LI), DepCands(DA), - IsRTCheckAnalysisNeeded(false) {} + MemoryDepChecker::DepCandidates &DA, + PredicatedScalarEvolution &PSE) + : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckAnalysisNeeded(false), + PSE(PSE) {} /// \brief Register a load and whether it is only read from. void addLoad(MemoryLocation &Loc, bool IsReadOnly) { @@ -435,7 +470,7 @@ public: /// We decided that no dependence analysis would be used. Reset the state. void resetDepChecks(MemoryDepChecker &DepChecker) { CheckDeps.clear(); - DepChecker.clearInterestingDependences(); + DepChecker.clearDependences(); } MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; } @@ -477,14 +512,18 @@ private: /// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared /// while this remains set if we have potentially dependent accesses. bool IsRTCheckAnalysisNeeded; + + /// The SCEV predicate containing all the SCEV-related assumptions. + PredicatedScalarEvolution &PSE; }; } // end anonymous namespace /// \brief Check whether a pointer can participate in a runtime bounds check. -static bool hasComputableBounds(ScalarEvolution *SE, - const ValueToValueMap &Strides, Value *Ptr) { - const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Ptr); +static bool hasComputableBounds(PredicatedScalarEvolution &PSE, + const ValueToValueMap &Strides, Value *Ptr, + Loop *L) { + const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev); if (!AR) return false; @@ -527,11 +566,11 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, else ++NumReadPtrChecks; - if (hasComputableBounds(SE, StridesMap, Ptr) && + if (hasComputableBounds(PSE, StridesMap, Ptr, TheLoop) && // When we run after a failing dependency check we have to make sure // we don't have wrapping pointers. (!ShouldCheckStride || - isStridedPtr(SE, Ptr, TheLoop, StridesMap) == 1)) { + isStridedPtr(PSE, Ptr, TheLoop, StridesMap) == 1)) { // The id of the dependence set. unsigned DepId; @@ -545,7 +584,7 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, // Each access has its own dependence set. DepId = RunningDepId++; - RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap); + RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE); DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); } else { @@ -599,9 +638,9 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, } if (NeedRTCheck && CanDoRT) - RtCheck.groupChecks(DepCands, IsDepCheckNeeded); + RtCheck.generateChecks(DepCands, IsDepCheckNeeded); - DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks(nullptr) + DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks() << " pointer comparisons.\n"); RtCheck.Need = NeedRTCheck; @@ -706,6 +745,11 @@ void AccessAnalysis::processMemAccesses() { GetUnderlyingObjects(Ptr, TempObjects, DL, LI); DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n"); for (Value *UnderlyingObj : TempObjects) { + // nullptr never alias, don't join sets for pointer that have "null" + // in their UnderlyingObjects list. + if (isa<ConstantPointerNull>(UnderlyingObj)) + continue; + UnderlyingObjToAccessMap::iterator Prev = ObjToLastAccess.find(UnderlyingObj); if (Prev != ObjToLastAccess.end()) @@ -775,20 +819,20 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, } /// \brief Check whether the access through \p Ptr has a constant stride. -int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, - const ValueToValueMap &StridesMap) { - const Type *Ty = Ptr->getType(); +int llvm::isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, + const Loop *Lp, const ValueToValueMap &StridesMap) { + Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Unexpected non-ptr"); // Make sure that the pointer does not point to aggregate types. - const PointerType *PtrTy = cast<PointerType>(Ty); + auto *PtrTy = cast<PointerType>(Ty); if (PtrTy->getElementType()->isAggregateType()) { DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type" << *Ptr << "\n"); return 0; } - const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Ptr); + const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr); const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev); if (!AR) { @@ -811,16 +855,16 @@ int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, // to access the pointer value "0" which is undefined behavior in address // space 0, therefore we can also vectorize this case. bool IsInBoundsGEP = isInBoundsGep(Ptr); - bool IsNoWrapAddRec = isNoWrapAddRec(Ptr, AR, SE, Lp); + bool IsNoWrapAddRec = isNoWrapAddRec(Ptr, AR, PSE.getSE(), Lp); bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0; if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) { DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space " - << *Ptr << " SCEV: " << *PtrScev << "\n"); + << *Ptr << " SCEV: " << *PtrScev << "\n"); return 0; } // Check the step is constant. - const SCEV *Step = AR->getStepRecurrence(*SE); + const SCEV *Step = AR->getStepRecurrence(*PSE.getSE()); // Calculate the pointer stride and check if it is constant. const SCEVConstant *C = dyn_cast<SCEVConstant>(Step); @@ -832,7 +876,7 @@ int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, auto &DL = Lp->getHeader()->getModule()->getDataLayout(); int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType()); - const APInt &APStepVal = C->getValue()->getValue(); + const APInt &APStepVal = C->getAPInt(); // Huge step value - give up. if (APStepVal.getBitWidth() > 64) @@ -872,15 +916,15 @@ bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) { llvm_unreachable("unexpected DepType!"); } -bool MemoryDepChecker::Dependence::isInterestingDependence(DepType Type) { +bool MemoryDepChecker::Dependence::isBackward() const { switch (Type) { case NoDep: case Forward: + case ForwardButPreventsForwarding: + case Unknown: return false; case BackwardVectorizable: - case Unknown: - case ForwardButPreventsForwarding: case Backward: case BackwardVectorizableButPreventsForwarding: return true; @@ -889,17 +933,21 @@ bool MemoryDepChecker::Dependence::isInterestingDependence(DepType Type) { } bool MemoryDepChecker::Dependence::isPossiblyBackward() const { + return isBackward() || Type == Unknown; +} + +bool MemoryDepChecker::Dependence::isForward() const { switch (Type) { - case NoDep: case Forward: case ForwardButPreventsForwarding: - return false; + return true; + case NoDep: case Unknown: case BackwardVectorizable: case Backward: case BackwardVectorizableButPreventsForwarding: - return true; + return false; } llvm_unreachable("unexpected DepType!"); } @@ -999,11 +1047,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, BPtr->getType()->getPointerAddressSpace()) return Dependence::Unknown; - const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr); - const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr); + const SCEV *AScev = replaceSymbolicStrideSCEV(PSE, Strides, APtr); + const SCEV *BScev = replaceSymbolicStrideSCEV(PSE, Strides, BPtr); - int StrideAPtr = isStridedPtr(SE, APtr, InnermostLoop, Strides); - int StrideBPtr = isStridedPtr(SE, BPtr, InnermostLoop, Strides); + int StrideAPtr = isStridedPtr(PSE, APtr, InnermostLoop, Strides); + int StrideBPtr = isStridedPtr(PSE, BPtr, InnermostLoop, Strides); const SCEV *Src = AScev; const SCEV *Sink = BScev; @@ -1020,12 +1068,12 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, std::swap(StrideAPtr, StrideBPtr); } - const SCEV *Dist = SE->getMinusSCEV(Sink, Src); + const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src); DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink - << "(Induction step: " << StrideAPtr << ")\n"); + << "(Induction step: " << StrideAPtr << ")\n"); DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to " - << *InstMap[BIdx] << ": " << *Dist << "\n"); + << *InstMap[BIdx] << ": " << *Dist << "\n"); // Need accesses with constant stride. We don't want to vectorize // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in @@ -1048,7 +1096,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, unsigned TypeByteSize = DL.getTypeAllocSize(ATy); // Negative distances are not plausible dependencies. - const APInt &Val = C->getValue()->getValue(); + const APInt &Val = C->getAPInt(); if (Val.isNegative()) { bool IsTrueDataDependence = (AIsWrite && !BIsWrite); if (IsTrueDataDependence && @@ -1064,7 +1112,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // Could be improved to assert type sizes are the same (i32 == float, etc). if (Val == 0) { if (ATy == BTy) - return Dependence::NoDep; + return Dependence::Forward; DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n"); return Dependence::Unknown; } @@ -1203,22 +1251,21 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, isDependent(*A.first, A.second, *B.first, B.second, Strides); SafeForVectorization &= Dependence::isSafeForVectorization(Type); - // Gather dependences unless we accumulated MaxInterestingDependence + // Gather dependences unless we accumulated MaxDependences // dependences. In that case return as soon as we find the first // unsafe dependence. This puts a limit on this quadratic // algorithm. - if (RecordInterestingDependences) { - if (Dependence::isInterestingDependence(Type)) - InterestingDependences.push_back( - Dependence(A.second, B.second, Type)); - - if (InterestingDependences.size() >= MaxInterestingDependence) { - RecordInterestingDependences = false; - InterestingDependences.clear(); + if (RecordDependences) { + if (Type != Dependence::NoDep) + Dependences.push_back(Dependence(A.second, B.second, Type)); + + if (Dependences.size() >= MaxDependences) { + RecordDependences = false; + Dependences.clear(); DEBUG(dbgs() << "Too many dependences, stopped recording\n"); } } - if (!RecordInterestingDependences && !SafeForVectorization) + if (!RecordDependences && !SafeForVectorization) return false; } ++OI; @@ -1227,8 +1274,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, } } - DEBUG(dbgs() << "Total Interesting Dependences: " - << InterestingDependences.size() << "\n"); + DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n"); return SafeForVectorization; } @@ -1298,10 +1344,10 @@ bool LoopAccessInfo::canAnalyzeLoop() { } // ScalarEvolution needs to be able to find the exit count. - const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop); - if (ExitCount == SE->getCouldNotCompute()) { - emitAnalysis(LoopAccessReport() << - "could not determine number of loop iterations"); + const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop); + if (ExitCount == PSE.getSE()->getCouldNotCompute()) { + emitAnalysis(LoopAccessReport() + << "could not determine number of loop iterations"); DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n"); return false; } @@ -1370,7 +1416,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { if (it->mayWriteToMemory()) { StoreInst *St = dyn_cast<StoreInst>(it); if (!St) { - emitAnalysis(LoopAccessReport(it) << + emitAnalysis(LoopAccessReport(&*it) << "instruction cannot be vectorized"); CanVecMem = false; return; @@ -1402,7 +1448,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { MemoryDepChecker::DepCandidates DependentAccesses; AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(), - AA, LI, DependentAccesses); + AA, LI, DependentAccesses, PSE); // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects // multiple times on the same object. If the ptr is accessed twice, once @@ -1453,7 +1499,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { // read a few words, modify, and write a few words, and some of the // words may be written to the same address. bool IsReadOnlyPtr = false; - if (Seen.insert(Ptr).second || !isStridedPtr(SE, Ptr, TheLoop, Strides)) { + if (Seen.insert(Ptr).second || !isStridedPtr(PSE, Ptr, TheLoop, Strides)) { ++NumReads; IsReadOnlyPtr = true; } @@ -1483,7 +1529,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. bool CanDoRTIfNeeded = - Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides); + Accesses.canCheckPtrAtRT(PtrRtChecking, PSE.getSE(), TheLoop, Strides); if (!CanDoRTIfNeeded) { emitAnalysis(LoopAccessReport() << "cannot identify array bounds"); DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " @@ -1510,6 +1556,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { PtrRtChecking.reset(); PtrRtChecking.Need = true; + auto *SE = PSE.getSE(); CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides, true); @@ -1552,7 +1599,7 @@ void LoopAccessInfo::emitAnalysis(LoopAccessReport &Message) { } bool LoopAccessInfo::isUniform(Value *V) const { - return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop)); + return (PSE.getSE()->isLoopInvariant(PSE.getSE()->getSCEV(V), TheLoop)); } // FIXME: this function is currently a duplicate of the one in @@ -1566,86 +1613,115 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V, return nullptr; } -std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck( - Instruction *Loc, const SmallVectorImpl<int> *PtrPartition) const { - if (!PtrRtChecking.Need) - return std::make_pair(nullptr, nullptr); +namespace { +/// \brief IR Values for the lower and upper bounds of a pointer evolution. We +/// need to use value-handles because SCEV expansion can invalidate previously +/// expanded values. Thus expansion of a pointer can invalidate the bounds for +/// a previous one. +struct PointerBounds { + TrackingVH<Value> Start; + TrackingVH<Value> End; +}; +} // end anonymous namespace - SmallVector<TrackingVH<Value>, 2> Starts; - SmallVector<TrackingVH<Value>, 2> Ends; +/// \brief Expand code for the lower and upper bound of the pointer group \p CG +/// in \p TheLoop. \return the values for the bounds. +static PointerBounds +expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop, + Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE, + const RuntimePointerChecking &PtrRtChecking) { + Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue; + const SCEV *Sc = SE->getSCEV(Ptr); + + if (SE->isLoopInvariant(Sc, TheLoop)) { + DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr + << "\n"); + return {Ptr, Ptr}; + } else { + unsigned AS = Ptr->getType()->getPointerAddressSpace(); + LLVMContext &Ctx = Loc->getContext(); + + // Use this type for pointer arithmetic. + Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS); + Value *Start = nullptr, *End = nullptr; + + DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); + Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc); + End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc); + DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n"); + return {Start, End}; + } +} - LLVMContext &Ctx = Loc->getContext(); - SCEVExpander Exp(*SE, DL, "induction"); - Instruction *FirstInst = nullptr; +/// \brief Turns a collection of checks into a collection of expanded upper and +/// lower bounds for both pointers in the check. +static SmallVector<std::pair<PointerBounds, PointerBounds>, 4> expandBounds( + const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks, + Loop *L, Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp, + const RuntimePointerChecking &PtrRtChecking) { + SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds; + + // Here we're relying on the SCEV Expander's cache to only emit code for the + // same bounds once. + std::transform( + PointerChecks.begin(), PointerChecks.end(), + std::back_inserter(ChecksWithBounds), + [&](const RuntimePointerChecking::PointerCheck &Check) { + PointerBounds + First = expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking), + Second = expandBounds(Check.second, L, Loc, Exp, SE, PtrRtChecking); + return std::make_pair(First, Second); + }); + + return ChecksWithBounds; +} - for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) { - const RuntimePointerChecking::CheckingPtrGroup &CG = - PtrRtChecking.CheckingGroups[i]; - Value *Ptr = PtrRtChecking.Pointers[CG.Members[0]].PointerValue; - const SCEV *Sc = SE->getSCEV(Ptr); - - if (SE->isLoopInvariant(Sc, TheLoop)) { - DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr - << "\n"); - Starts.push_back(Ptr); - Ends.push_back(Ptr); - } else { - unsigned AS = Ptr->getType()->getPointerAddressSpace(); - - // Use this type for pointer arithmetic. - Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS); - Value *Start = nullptr, *End = nullptr; - - DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); - Start = Exp.expandCodeFor(CG.Low, PtrArithTy, Loc); - End = Exp.expandCodeFor(CG.High, PtrArithTy, Loc); - DEBUG(dbgs() << "Start: " << *CG.Low << " End: " << *CG.High << "\n"); - Starts.push_back(Start); - Ends.push_back(End); - } - } +std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks( + Instruction *Loc, + const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks) + const { + auto *SE = PSE.getSE(); + SCEVExpander Exp(*SE, DL, "induction"); + auto ExpandedChecks = + expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, PtrRtChecking); + LLVMContext &Ctx = Loc->getContext(); + Instruction *FirstInst = nullptr; IRBuilder<> ChkBuilder(Loc); // Our instructions might fold to a constant. Value *MemoryRuntimeCheck = nullptr; - for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) { - for (unsigned j = i + 1; j < PtrRtChecking.CheckingGroups.size(); ++j) { - const RuntimePointerChecking::CheckingPtrGroup &CGI = - PtrRtChecking.CheckingGroups[i]; - const RuntimePointerChecking::CheckingPtrGroup &CGJ = - PtrRtChecking.CheckingGroups[j]; - - if (!PtrRtChecking.needsChecking(CGI, CGJ, PtrPartition)) - continue; - unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace(); - unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace(); - - assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) && - (AS1 == Ends[i]->getType()->getPointerAddressSpace()) && - "Trying to bounds check pointers with different address spaces"); - - Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0); - Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1); - - Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc"); - Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc"); - Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy1, "bc"); - Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy0, "bc"); - - Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0"); - FirstInst = getFirstInst(FirstInst, Cmp0, Loc); - Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1"); - FirstInst = getFirstInst(FirstInst, Cmp1, Loc); - Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict"); + for (const auto &Check : ExpandedChecks) { + const PointerBounds &A = Check.first, &B = Check.second; + // Check if two pointers (A and B) conflict where conflict is computed as: + // start(A) <= end(B) && start(B) <= end(A) + unsigned AS0 = A.Start->getType()->getPointerAddressSpace(); + unsigned AS1 = B.Start->getType()->getPointerAddressSpace(); + + assert((AS0 == B.End->getType()->getPointerAddressSpace()) && + (AS1 == A.End->getType()->getPointerAddressSpace()) && + "Trying to bounds check pointers with different address spaces"); + + Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0); + Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1); + + Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc"); + Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc"); + Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc"); + Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc"); + + Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0"); + FirstInst = getFirstInst(FirstInst, Cmp0, Loc); + Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1"); + FirstInst = getFirstInst(FirstInst, Cmp1, Loc); + Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict"); + FirstInst = getFirstInst(FirstInst, IsConflict, Loc); + if (MemoryRuntimeCheck) { + IsConflict = + ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx"); FirstInst = getFirstInst(FirstInst, IsConflict, Loc); - if (MemoryRuntimeCheck) { - IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, - "conflict.rdx"); - FirstInst = getFirstInst(FirstInst, IsConflict, Loc); - } - MemoryRuntimeCheck = IsConflict; } + MemoryRuntimeCheck = IsConflict; } if (!MemoryRuntimeCheck) @@ -1661,12 +1737,20 @@ std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck( return std::make_pair(FirstInst, Check); } +std::pair<Instruction *, Instruction *> +LoopAccessInfo::addRuntimeChecks(Instruction *Loc) const { + if (!PtrRtChecking.Need) + return std::make_pair(nullptr, nullptr); + + return addRuntimeChecks(Loc, PtrRtChecking.getChecks()); +} + LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const DataLayout &DL, const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, const ValueToValueMap &Strides) - : PtrRtChecking(SE), DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL), + : PSE(*SE), PtrRtChecking(SE), DepChecker(PSE, L), TheLoop(L), DL(DL), TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1U), CanVecMem(false), StoreToLoopInvariantAddress(false) { @@ -1685,14 +1769,14 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { if (Report) OS.indent(Depth) << "Report: " << Report->str() << "\n"; - if (auto *InterestingDependences = DepChecker.getInterestingDependences()) { - OS.indent(Depth) << "Interesting Dependences:\n"; - for (auto &Dep : *InterestingDependences) { + if (auto *Dependences = DepChecker.getDependences()) { + OS.indent(Depth) << "Dependences:\n"; + for (auto &Dep : *Dependences) { Dep.print(OS, Depth + 2, DepChecker.getMemoryInstructions()); OS << "\n"; } } else - OS.indent(Depth) << "Too many interesting dependences, not recorded\n"; + OS.indent(Depth) << "Too many dependences, not recorded\n"; // List the pair of accesses need run-time checks to prove independence. PtrRtChecking.print(OS, Depth); @@ -1701,6 +1785,9 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { OS.indent(Depth) << "Store to invariant address was " << (StoreToLoopInvariantAddress ? "" : "not ") << "found in loop.\n"; + + OS.indent(Depth) << "SCEV assumptions:\n"; + PSE.getUnionPredicate().print(OS, Depth); } const LoopAccessInfo & @@ -1714,8 +1801,8 @@ LoopAccessAnalysis::getInfo(Loop *L, const ValueToValueMap &Strides) { if (!LAI) { const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); - LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI, - Strides); + LAI = + llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI, Strides); #ifndef NDEBUG LAI->NumSymbolicStrides = Strides.size(); #endif @@ -1737,10 +1824,10 @@ void LoopAccessAnalysis::print(raw_ostream &OS, const Module *M) const { } bool LoopAccessAnalysis::runOnFunction(Function &F) { - SE = &getAnalysis<ScalarEvolution>(); + SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); TLI = TLIP ? &TLIP->getTLI() : nullptr; - AA = &getAnalysis<AliasAnalysis>(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); @@ -1748,8 +1835,8 @@ bool LoopAccessAnalysis::runOnFunction(Function &F) { } void LoopAccessAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<ScalarEvolution>(); - AU.addRequired<AliasAnalysis>(); + AU.addRequired<ScalarEvolutionWrapperPass>(); + AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<LoopInfoWrapperPass>(); @@ -1761,8 +1848,8 @@ static const char laa_name[] = "Loop Access Analysis"; #define LAA_NAME "loop-accesses" INITIALIZE_PASS_BEGIN(LoopAccessAnalysis, LAA_NAME, laa_name, false, true) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(LoopAccessAnalysis, LAA_NAME, laa_name, false, true) diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp index 6b6faf8..9ab9eea 100644 --- a/contrib/llvm/lib/Analysis/LoopInfo.cpp +++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp @@ -102,8 +102,8 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed, return false; if (I->mayReadFromMemory()) return false; - // The landingpad instruction is immobile. - if (isa<LandingPadInst>(I)) + // EH block instructions are immobile. + if (I->isEHPad()) return false; // Determine the insertion point, unless one was given. if (!InsertPt) { @@ -120,6 +120,13 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed, // Hoist. I->moveBefore(InsertPt); + + // There is possibility of hoisting this instruction above some arbitrary + // condition. Any metadata defined on it can be control dependent on this + // condition. Conservatively strip it here so that we don't give any wrong + // information to the optimizer. + I->dropUnknownNonDebugMetadata(); + Changed = true; return true; } @@ -172,7 +179,13 @@ PHINode *Loop::getCanonicalInductionVariable() const { bool Loop::isLCSSAForm(DominatorTree &DT) const { for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) { BasicBlock *BB = *BI; - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I) { + // Tokens can't be used in PHI nodes and live-out tokens prevent loop + // optimizations, so for the purposes of considered LCSSA form, we + // can ignore them. + if (I->getType()->isTokenTy()) + continue; + for (Use &U : I->uses()) { Instruction *UI = cast<Instruction>(U.getUser()); BasicBlock *UserBB = UI->getParent(); @@ -188,11 +201,21 @@ bool Loop::isLCSSAForm(DominatorTree &DT) const { DT.isReachableFromEntry(UserBB)) return false; } + } } return true; } +bool Loop::isRecursivelyLCSSAForm(DominatorTree &DT) const { + if (!isLCSSAForm(DT)) + return false; + + return std::all_of(begin(), end(), [&](const Loop *L) { + return L->isRecursivelyLCSSAForm(DT); + }); +} + /// isLoopSimplifyForm - Return true if the Loop is in the form that /// the LoopSimplify form transforms loops to, which is sometimes called /// normal form. @@ -211,15 +234,23 @@ bool Loop::isSafeToClone() const { if (isa<IndirectBrInst>((*I)->getTerminator())) return false; - if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator())) + if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator())) { if (II->cannotDuplicate()) return false; + // Return false if any loop blocks contain invokes to EH-pads other than + // landingpads; we don't know how to split those edges yet. + auto *FirstNonPHI = II->getUnwindDest()->getFirstNonPHI(); + if (FirstNonPHI->isEHPad() && !isa<LandingPadInst>(FirstNonPHI)) + return false; + } for (BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); BI != BE; ++BI) { if (const CallInst *CI = dyn_cast<CallInst>(BI)) { if (CI->cannotDuplicate()) return false; } + if (BI->getType()->isTokenTy() && BI->isUsedOutsideOfBlock(*I)) + return false; } } return true; @@ -602,14 +633,12 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) { return NearLoop; } -/// updateUnloop - The last backedge has been removed from a loop--now the -/// "unloop". Find a new parent for the blocks contained within unloop and -/// update the loop tree. We don't necessarily have valid dominators at this -/// point, but LoopInfo is still valid except for the removal of this loop. -/// -/// Note that Unloop may now be an empty loop. Calling Loop::getHeader without -/// checking first is illegal. +LoopInfo::LoopInfo(const DominatorTreeBase<BasicBlock> &DomTree) { + analyze(DomTree); +} + void LoopInfo::updateUnloop(Loop *Unloop) { + Unloop->markUnlooped(); // First handle the special case of no parent loop to simplify the algorithm. if (!Unloop->getParentLoop()) { @@ -675,7 +704,7 @@ LoopInfo LoopAnalysis::run(Function &F, AnalysisManager<Function> *AM) { // objects. I don't want to add that kind of complexity until the scope of // the problem is better understood. LoopInfo LI; - LI.Analyze(AM->getResult<DominatorTreeAnalysis>(F)); + LI.analyze(AM->getResult<DominatorTreeAnalysis>(F)); return LI; } @@ -685,6 +714,20 @@ PreservedAnalyses LoopPrinterPass::run(Function &F, return PreservedAnalyses::all(); } +PrintLoopPass::PrintLoopPass() : OS(dbgs()) {} +PrintLoopPass::PrintLoopPass(raw_ostream &OS, const std::string &Banner) + : OS(OS), Banner(Banner) {} + +PreservedAnalyses PrintLoopPass::run(Loop &L) { + OS << Banner; + for (auto *Block : L.blocks()) + if (Block) + Block->print(OS); + else + OS << "Printing <null> block"; + return PreservedAnalyses::all(); +} + //===----------------------------------------------------------------------===// // LoopInfo implementation // @@ -698,7 +741,7 @@ INITIALIZE_PASS_END(LoopInfoWrapperPass, "loops", "Natural Loop Information", bool LoopInfoWrapperPass::runOnFunction(Function &) { releaseMemory(); - LI.Analyze(getAnalysis<DominatorTreeWrapperPass>().getDomTree()); + LI.analyze(getAnalysis<DominatorTreeWrapperPass>().getDomTree()); return false; } diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp index e9fcf02..dc42473 100644 --- a/contrib/llvm/lib/Analysis/LoopPass.cpp +++ b/contrib/llvm/lib/Analysis/LoopPass.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/LoopPass.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/PassManager.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" @@ -27,35 +28,26 @@ namespace { /// PrintLoopPass - Print a Function corresponding to a Loop. /// -class PrintLoopPass : public LoopPass { -private: - std::string Banner; - raw_ostream &Out; // raw_ostream to print on. +class PrintLoopPassWrapper : public LoopPass { + PrintLoopPass P; public: static char ID; - PrintLoopPass(const std::string &B, raw_ostream &o) - : LoopPass(ID), Banner(B), Out(o) {} + PrintLoopPassWrapper() : LoopPass(ID) {} + PrintLoopPassWrapper(raw_ostream &OS, const std::string &Banner) + : LoopPass(ID), P(OS, Banner) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); } bool runOnLoop(Loop *L, LPPassManager &) override { - Out << Banner; - for (Loop::block_iterator b = L->block_begin(), be = L->block_end(); - b != be; - ++b) { - if (*b) - (*b)->print(Out); - else - Out << "Printing <null> block"; - } + P.run(*L); return false; } }; -char PrintLoopPass::ID = 0; +char PrintLoopPassWrapper::ID = 0; } //===----------------------------------------------------------------------===// @@ -66,81 +58,34 @@ char LPPassManager::ID = 0; LPPassManager::LPPassManager() : FunctionPass(ID), PMDataManager() { - skipThisLoop = false; - redoThisLoop = false; LI = nullptr; CurrentLoop = nullptr; } -/// Delete loop from the loop queue and loop hierarchy (LoopInfo). -void LPPassManager::deleteLoopFromQueue(Loop *L) { - - LI->updateUnloop(L); - - // Notify passes that the loop is being deleted. - deleteSimpleAnalysisLoop(L); - - // If L is current loop then skip rest of the passes and let - // runOnFunction remove L from LQ. Otherwise, remove L from LQ now - // and continue applying other passes on CurrentLoop. - if (CurrentLoop == L) - skipThisLoop = true; - - delete L; - - if (skipThisLoop) - return; - - for (std::deque<Loop *>::iterator I = LQ.begin(), - E = LQ.end(); I != E; ++I) { - if (*I == L) { - LQ.erase(I); - break; - } - } -} - // Inset loop into loop nest (LoopInfo) and loop queue (LQ). -void LPPassManager::insertLoop(Loop *L, Loop *ParentLoop) { - - assert (CurrentLoop != L && "Cannot insert CurrentLoop"); +Loop &LPPassManager::addLoop(Loop *ParentLoop) { + // Create a new loop. LI will take ownership. + Loop *L = new Loop(); - // Insert into loop nest - if (ParentLoop) - ParentLoop->addChildLoop(L); - else + // Insert into the loop nest and the loop queue. + if (!ParentLoop) { + // This is the top level loop. LI->addTopLevelLoop(L); - - insertLoopIntoQueue(L); -} - -void LPPassManager::insertLoopIntoQueue(Loop *L) { - // Insert L into loop queue - if (L == CurrentLoop) - redoLoop(L); - else if (!L->getParentLoop()) - // This is top level loop. LQ.push_front(L); - else { - // Insert L after the parent loop. - for (std::deque<Loop *>::iterator I = LQ.begin(), - E = LQ.end(); I != E; ++I) { - if (*I == L->getParentLoop()) { - // deque does not support insert after. - ++I; - LQ.insert(I, 1, L); - break; - } - } + return *L; } -} -// Reoptimize this loop. LPPassManager will re-insert this loop into the -// queue. This allows LoopPass to change loop nest for the loop. This -// utility may send LPPassManager into infinite loops so use caution. -void LPPassManager::redoLoop(Loop *L) { - assert (CurrentLoop == L && "Can redo only CurrentLoop"); - redoThisLoop = true; + ParentLoop->addChildLoop(L); + // Insert L into the loop queue after the parent loop. + for (auto I = LQ.begin(), E = LQ.end(); I != E; ++I) { + if (*I == L->getParentLoop()) { + // deque does not support insert after. + ++I; + LQ.insert(I, 1, L); + break; + } + } + return *L; } /// cloneBasicBlockSimpleAnalysis - Invoke cloneBasicBlockAnalysis hook for @@ -230,10 +175,7 @@ bool LPPassManager::runOnFunction(Function &F) { // Walk Loops while (!LQ.empty()) { - CurrentLoop = LQ.back(); - skipThisLoop = false; - redoThisLoop = false; - + CurrentLoop = LQ.back(); // Run all passes on the current Loop. for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { LoopPass *P = getContainedPass(Index); @@ -253,11 +195,15 @@ bool LPPassManager::runOnFunction(Function &F) { if (Changed) dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG, - skipThisLoop ? "<deleted>" : - CurrentLoop->getHeader()->getName()); + CurrentLoop->isUnloop() + ? "<deleted>" + : CurrentLoop->getHeader()->getName()); dumpPreservedSet(P); - if (!skipThisLoop) { + if (CurrentLoop->isUnloop()) { + // Notify passes that the loop is being deleted. + deleteSimpleAnalysisLoop(CurrentLoop); + } else { // Manually check that this loop is still healthy. This is done // instead of relying on LoopInfo::verifyLoop since LoopInfo // is a function pass and it's really expensive to verify every @@ -276,12 +222,12 @@ bool LPPassManager::runOnFunction(Function &F) { removeNotPreservedAnalysis(P); recordAvailableAnalysis(P); - removeDeadPasses(P, - skipThisLoop ? "<deleted>" : - CurrentLoop->getHeader()->getName(), + removeDeadPasses(P, CurrentLoop->isUnloop() + ? "<deleted>" + : CurrentLoop->getHeader()->getName(), ON_LOOP_MSG); - if (skipThisLoop) + if (CurrentLoop->isUnloop()) // Do not run other passes on this loop. break; } @@ -289,17 +235,16 @@ bool LPPassManager::runOnFunction(Function &F) { // If the loop was deleted, release all the loop passes. This frees up // some memory, and avoids trouble with the pass manager trying to call // verifyAnalysis on them. - if (skipThisLoop) + if (CurrentLoop->isUnloop()) { for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { Pass *P = getContainedPass(Index); freePass(P, "<deleted>", ON_LOOP_MSG); } + delete CurrentLoop; + } // Pop the loop from queue after running all passes. LQ.pop_back(); - - if (redoThisLoop) - LQ.push_back(CurrentLoop); } // Finalization @@ -327,7 +272,7 @@ void LPPassManager::dumpPassStructure(unsigned Offset) { Pass *LoopPass::createPrinterPass(raw_ostream &O, const std::string &Banner) const { - return new PrintLoopPass(Banner, O); + return new PrintLoopPassWrapper(O, Banner); } // Check if this pass is suitable for the current LPPassManager, if diff --git a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp index da3b829..078cefe 100644 --- a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp +++ b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp @@ -49,7 +49,7 @@ namespace { void print(raw_ostream &OS, const Module * = nullptr) const override; void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequiredTransitive<AliasAnalysis>(); + AU.addRequiredTransitive<AAResultsWrapperPass>(); AU.addRequiredTransitive<MemoryDependenceAnalysis>(); AU.setPreservesAll(); } @@ -96,7 +96,7 @@ bool MemDepPrinter::runOnFunction(Function &F) { // All this code uses non-const interfaces because MemDep is not // const-friendly, though nothing is actually modified. - for (auto &I : inst_range(F)) { + for (auto &I : instructions(F)) { Instruction *Inst = &I; if (!Inst->mayReadFromMemory() && !Inst->mayWriteToMemory()) @@ -135,7 +135,7 @@ bool MemDepPrinter::runOnFunction(Function &F) { } void MemDepPrinter::print(raw_ostream &OS, const Module *M) const { - for (const auto &I : inst_range(*F)) { + for (const auto &I : instructions(*F)) { const Instruction *Inst = &I; DepSetMap::const_iterator DI = Deps.find(Inst); diff --git a/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp b/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp index fa292a2..36f1424 100644 --- a/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp +++ b/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp @@ -22,7 +22,8 @@ using namespace llvm; namespace { struct MemDerefPrinter : public FunctionPass { - SmallVector<Value *, 4> Vec; + SmallVector<Value *, 4> Deref; + SmallPtrSet<Value *, 4> DerefAndAligned; static char ID; // Pass identification, replacement for typeid MemDerefPrinter() : FunctionPass(ID) { @@ -34,7 +35,8 @@ namespace { bool runOnFunction(Function &F) override; void print(raw_ostream &OS, const Module * = nullptr) const override; void releaseMemory() override { - Vec.clear(); + Deref.clear(); + DerefAndAligned.clear(); } }; } @@ -51,11 +53,13 @@ FunctionPass *llvm::createMemDerefPrinter() { bool MemDerefPrinter::runOnFunction(Function &F) { const DataLayout &DL = F.getParent()->getDataLayout(); - for (auto &I: inst_range(F)) { + for (auto &I: instructions(F)) { if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { Value *PO = LI->getPointerOperand(); if (isDereferenceablePointer(PO, DL)) - Vec.push_back(PO); + Deref.push_back(PO); + if (isDereferenceableAndAlignedPointer(PO, LI->getAlignment(), DL)) + DerefAndAligned.insert(PO); } } return false; @@ -63,8 +67,12 @@ bool MemDerefPrinter::runOnFunction(Function &F) { void MemDerefPrinter::print(raw_ostream &OS, const Module *M) const { OS << "The following are dereferenceable:\n"; - for (auto &V: Vec) { + for (Value *V: Deref) { V->print(OS); + if (DerefAndAligned.count(V)) + OS << "\t(aligned)"; + else + OS << "\t(unaligned)"; OS << "\n\n"; } } diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp index 8ddac8f..b19ecad 100644 --- a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -31,7 +31,7 @@ using namespace llvm; #define DEBUG_TYPE "memory-builtins" -enum AllocType { +enum AllocType : uint8_t { OpNewLike = 1<<0, // allocates; never returns null MallocLike = 1<<1 | OpNewLike, // allocates; may return null CallocLike = 1<<2, // allocates + bzero @@ -62,6 +62,14 @@ static const AllocFnsTy AllocationFnData[] = { {LibFunc::ZnajRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned int, nothrow) {LibFunc::Znam, OpNewLike, 1, 0, -1}, // new[](unsigned long) {LibFunc::ZnamRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned long, nothrow) + {LibFunc::msvc_new_int, OpNewLike, 1, 0, -1}, // new(unsigned int) + {LibFunc::msvc_new_int_nothrow, MallocLike, 2, 0, -1}, // new(unsigned int, nothrow) + {LibFunc::msvc_new_longlong, OpNewLike, 1, 0, -1}, // new(unsigned long long) + {LibFunc::msvc_new_longlong_nothrow, MallocLike, 2, 0, -1}, // new(unsigned long long, nothrow) + {LibFunc::msvc_new_array_int, OpNewLike, 1, 0, -1}, // new[](unsigned int) + {LibFunc::msvc_new_array_int_nothrow, MallocLike, 2, 0, -1}, // new[](unsigned int, nothrow) + {LibFunc::msvc_new_array_longlong, OpNewLike, 1, 0, -1}, // new[](unsigned long long) + {LibFunc::msvc_new_array_longlong_nothrow, MallocLike, 2, 0, -1}, // new[](unsigned long long, nothrow) {LibFunc::calloc, CallocLike, 2, 0, 1}, {LibFunc::realloc, ReallocLike, 2, 1, -1}, {LibFunc::reallocf, ReallocLike, 2, 1, -1}, @@ -107,18 +115,13 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn)) return nullptr; - unsigned i = 0; - bool found = false; - for ( ; i < array_lengthof(AllocationFnData); ++i) { - if (AllocationFnData[i].Func == TLIFn) { - found = true; - break; - } - } - if (!found) + const AllocFnsTy *FnData = + std::find_if(std::begin(AllocationFnData), std::end(AllocationFnData), + [TLIFn](const AllocFnsTy &Fn) { return Fn.Func == TLIFn; }); + + if (FnData == std::end(AllocationFnData)) return nullptr; - const AllocFnsTy *FnData = &AllocationFnData[i]; if ((FnData->AllocTy & AllocTy) != FnData->AllocTy) return nullptr; @@ -185,13 +188,6 @@ bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, } /// \brief Tests if a value is a call or invoke to a library function that -/// reallocates memory (such as realloc). -bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, - bool LookThroughBitCast) { - return getAllocationData(V, ReallocLike, TLI, LookThroughBitCast); -} - -/// \brief Tests if a value is a call or invoke to a library function that /// allocates memory and never returns null (such as operator new). bool llvm::isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast) { @@ -313,14 +309,26 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { unsigned ExpectedNumParams; if (TLIFn == LibFunc::free || TLIFn == LibFunc::ZdlPv || // operator delete(void*) - TLIFn == LibFunc::ZdaPv) // operator delete[](void*) + TLIFn == LibFunc::ZdaPv || // operator delete[](void*) + TLIFn == LibFunc::msvc_delete_ptr32 || // operator delete(void*) + TLIFn == LibFunc::msvc_delete_ptr64 || // operator delete(void*) + TLIFn == LibFunc::msvc_delete_array_ptr32 || // operator delete[](void*) + TLIFn == LibFunc::msvc_delete_array_ptr64) // operator delete[](void*) ExpectedNumParams = 1; else if (TLIFn == LibFunc::ZdlPvj || // delete(void*, uint) TLIFn == LibFunc::ZdlPvm || // delete(void*, ulong) TLIFn == LibFunc::ZdlPvRKSt9nothrow_t || // delete(void*, nothrow) TLIFn == LibFunc::ZdaPvj || // delete[](void*, uint) TLIFn == LibFunc::ZdaPvm || // delete[](void*, ulong) - TLIFn == LibFunc::ZdaPvRKSt9nothrow_t) // delete[](void*, nothrow) + TLIFn == LibFunc::ZdaPvRKSt9nothrow_t || // delete[](void*, nothrow) + TLIFn == LibFunc::msvc_delete_ptr32_int || // delete(void*, uint) + TLIFn == LibFunc::msvc_delete_ptr64_longlong || // delete(void*, ulonglong) + TLIFn == LibFunc::msvc_delete_ptr32_nothrow || // delete(void*, nothrow) + TLIFn == LibFunc::msvc_delete_ptr64_nothrow || // delete(void*, nothrow) + TLIFn == LibFunc::msvc_delete_array_ptr32_int || // delete[](void*, uint) + TLIFn == LibFunc::msvc_delete_array_ptr64_longlong || // delete[](void*, ulonglong) + TLIFn == LibFunc::msvc_delete_array_ptr32_nothrow || // delete[](void*, nothrow) + TLIFn == LibFunc::msvc_delete_array_ptr64_nothrow) // delete[](void*, nothrow) ExpectedNumParams = 2; else return nullptr; @@ -621,7 +629,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) { // always generate code immediately before the instruction being // processed, so that the generated code dominates the same BBs - Instruction *PrevInsertPoint = Builder.GetInsertPoint(); + BuilderTy::InsertPointGuard Guard(Builder); if (Instruction *I = dyn_cast<Instruction>(V)) Builder.SetInsertPoint(I); @@ -650,9 +658,6 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) { Result = unknown(); } - if (PrevInsertPoint) - Builder.SetInsertPoint(PrevInsertPoint); - // Don't reuse CacheIt since it may be invalid at this point. CacheMap[V] = Result; return Result; @@ -742,7 +747,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitPHINode(PHINode &PHI) { // compute offset/size for each PHI incoming pointer for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i) { - Builder.SetInsertPoint(PHI.getIncomingBlock(i)->getFirstInsertionPt()); + Builder.SetInsertPoint(&*PHI.getIncomingBlock(i)->getFirstInsertionPt()); SizeOffsetEvalType EdgeData = compute_(PHI.getIncomingValue(i)); if (!bothKnown(EdgeData)) { diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index 782a67b..3e80bfe 100644 --- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -22,7 +22,9 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Analysis/OrderedBasicBlock.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" @@ -49,7 +51,11 @@ STATISTIC(NumCacheCompleteNonLocalPtr, "Number of block queries that were completely cached"); // Limit for the number of instructions to scan in a block. -static const unsigned int BlockScanLimit = 100; + +static cl::opt<unsigned> BlockScanLimit( + "memdep-block-scan-limit", cl::Hidden, cl::init(100), + cl::desc("The number of instructions to scan in a block in memory " + "dependency analysis (default = 100)")); // Limit on the number of memdep results to process. static const unsigned int NumResultsLimit = 100; @@ -60,7 +66,8 @@ char MemoryDependenceAnalysis::ID = 0; INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep", "Memory Dependence Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep", "Memory Dependence Analysis", false, true) @@ -87,15 +94,17 @@ void MemoryDependenceAnalysis::releaseMemory() { void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired<AssumptionCacheTracker>(); - AU.addRequiredTransitive<AliasAnalysis>(); + AU.addRequiredTransitive<AAResultsWrapperPass>(); + AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>(); } bool MemoryDependenceAnalysis::runOnFunction(Function &F) { - AA = &getAnalysis<AliasAnalysis>(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); DT = DTWP ? &DTWP->getDomTree() : nullptr; + TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); return false; } @@ -118,43 +127,43 @@ static void RemoveFromReverseMap(DenseMap<Instruction*, /// location, fill in Loc with the details, otherwise set Loc.Ptr to null. /// Return a ModRefInfo value describing the general behavior of the /// instruction. -static AliasAnalysis::ModRefResult -GetLocation(const Instruction *Inst, MemoryLocation &Loc, AliasAnalysis *AA) { +static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, + const TargetLibraryInfo &TLI) { if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) { if (LI->isUnordered()) { Loc = MemoryLocation::get(LI); - return AliasAnalysis::Ref; + return MRI_Ref; } if (LI->getOrdering() == Monotonic) { Loc = MemoryLocation::get(LI); - return AliasAnalysis::ModRef; + return MRI_ModRef; } Loc = MemoryLocation(); - return AliasAnalysis::ModRef; + return MRI_ModRef; } if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { if (SI->isUnordered()) { Loc = MemoryLocation::get(SI); - return AliasAnalysis::Mod; + return MRI_Mod; } if (SI->getOrdering() == Monotonic) { Loc = MemoryLocation::get(SI); - return AliasAnalysis::ModRef; + return MRI_ModRef; } Loc = MemoryLocation(); - return AliasAnalysis::ModRef; + return MRI_ModRef; } if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) { Loc = MemoryLocation::get(V); - return AliasAnalysis::ModRef; + return MRI_ModRef; } - if (const CallInst *CI = isFreeCall(Inst, AA->getTargetLibraryInfo())) { + if (const CallInst *CI = isFreeCall(Inst, &TLI)) { // calls to free() deallocate the entire structure Loc = MemoryLocation(CI->getArgOperand(0)); - return AliasAnalysis::Mod; + return MRI_Mod; } if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { @@ -170,7 +179,7 @@ GetLocation(const Instruction *Inst, MemoryLocation &Loc, AliasAnalysis *AA) { cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(), AAInfo); // These intrinsics don't really modify the memory, but returning Mod // will allow them to be handled conservatively. - return AliasAnalysis::Mod; + return MRI_Mod; case Intrinsic::invariant_end: II->getAAMetadata(AAInfo); Loc = MemoryLocation( @@ -178,7 +187,7 @@ GetLocation(const Instruction *Inst, MemoryLocation &Loc, AliasAnalysis *AA) { cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(), AAInfo); // These intrinsics don't really modify the memory, but returning Mod // will allow them to be handled conservatively. - return AliasAnalysis::Mod; + return MRI_Mod; default: break; } @@ -186,10 +195,10 @@ GetLocation(const Instruction *Inst, MemoryLocation &Loc, AliasAnalysis *AA) { // Otherwise, just do the coarse-grained thing that always works. if (Inst->mayWriteToMemory()) - return AliasAnalysis::ModRef; + return MRI_ModRef; if (Inst->mayReadFromMemory()) - return AliasAnalysis::Ref; - return AliasAnalysis::NoModRef; + return MRI_Ref; + return MRI_NoModRef; } /// getCallSiteDependencyFrom - Private helper for finding the local @@ -207,14 +216,14 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, if (!Limit) return MemDepResult::getUnknown(); - Instruction *Inst = --ScanIt; + Instruction *Inst = &*--ScanIt; // If this inst is a memory op, get the pointer it accessed MemoryLocation Loc; - AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA); + ModRefInfo MR = GetLocation(Inst, Loc, *TLI); if (Loc.Ptr) { // A simple instruction. - if (AA->getModRefInfo(CS, Loc) != AliasAnalysis::NoModRef) + if (AA->getModRefInfo(CS, Loc) != MRI_NoModRef) return MemDepResult::getClobber(Inst); continue; } @@ -224,10 +233,10 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, if (isa<DbgInfoIntrinsic>(Inst)) continue; // If these two calls do not interfere, look past it. switch (AA->getModRefInfo(CS, InstCS)) { - case AliasAnalysis::NoModRef: + case MRI_NoModRef: // If the two calls are the same, return InstCS as a Def, so that // CS can be found redundant and eliminated. - if (isReadOnlyCall && !(MR & AliasAnalysis::Mod) && + if (isReadOnlyCall && !(MR & MRI_Mod) && CS.getInstruction()->isIdenticalToWhenDefined(Inst)) return MemDepResult::getDef(Inst); @@ -241,7 +250,7 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, // If we could not obtain a pointer for the instruction and the instruction // touches memory then assume that this is a dependency. - if (MR != AliasAnalysis::NoModRef) + if (MR != MRI_NoModRef) return MemDepResult::getClobber(Inst); } @@ -371,6 +380,75 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom( const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst) { + if (QueryInst != nullptr) { + if (auto *LI = dyn_cast<LoadInst>(QueryInst)) { + MemDepResult invariantGroupDependency = + getInvariantGroupPointerDependency(LI, BB); + + if (invariantGroupDependency.isDef()) + return invariantGroupDependency; + } + } + return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst); +} + +MemDepResult +MemoryDependenceAnalysis::getInvariantGroupPointerDependency(LoadInst *LI, + BasicBlock *BB) { + Value *LoadOperand = LI->getPointerOperand(); + // It's is not safe to walk the use list of global value, because function + // passes aren't allowed to look outside their functions. + if (isa<GlobalValue>(LoadOperand)) + return MemDepResult::getUnknown(); + + auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group); + if (!InvariantGroupMD) + return MemDepResult::getUnknown(); + + MemDepResult Result = MemDepResult::getUnknown(); + llvm::SmallSet<Value *, 14> Seen; + // Queue to process all pointers that are equivalent to load operand. + llvm::SmallVector<Value *, 8> LoadOperandsQueue; + LoadOperandsQueue.push_back(LoadOperand); + while (!LoadOperandsQueue.empty()) { + Value *Ptr = LoadOperandsQueue.pop_back_val(); + if (isa<GlobalValue>(Ptr)) + continue; + + if (auto *BCI = dyn_cast<BitCastInst>(Ptr)) { + if (!Seen.count(BCI->getOperand(0))) { + LoadOperandsQueue.push_back(BCI->getOperand(0)); + Seen.insert(BCI->getOperand(0)); + } + } + + for (Use &Us : Ptr->uses()) { + auto *U = dyn_cast<Instruction>(Us.getUser()); + if (!U || U == LI || !DT->dominates(U, LI)) + continue; + + if (auto *BCI = dyn_cast<BitCastInst>(U)) { + if (!Seen.count(BCI)) { + LoadOperandsQueue.push_back(BCI); + Seen.insert(BCI); + } + continue; + } + // If we hit load/store with the same invariant.group metadata (and the + // same pointer operand) we can assume that value pointed by pointer + // operand didn't change. + if ((isa<LoadInst>(U) || isa<StoreInst>(U)) && U->getParent() == BB && + U->getMetadata(LLVMContext::MD_invariant_group) == InvariantGroupMD) + return MemDepResult::getDef(U); + } + } + return Result; +} + +MemDepResult MemoryDependenceAnalysis::getSimplePointerDependencyFrom( + const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, + BasicBlock *BB, Instruction *QueryInst) { + const Value *MemLocBase = nullptr; int64_t MemLocOffset = 0; unsigned Limit = BlockScanLimit; @@ -416,9 +494,15 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom( const DataLayout &DL = BB->getModule()->getDataLayout(); + // Create a numbered basic block to lazily compute and cache instruction + // positions inside a BB. This is used to provide fast queries for relative + // position between two instructions in a BB and can be used by + // AliasAnalysis::callCapturesBefore. + OrderedBasicBlock OBB(BB); + // Walk backwards through the basic block, looking for dependencies. while (ScanIt != BB->begin()) { - Instruction *Inst = --ScanIt; + Instruction *Inst = &*--ScanIt; if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) // Debug intrinsics don't (and can't) cause dependencies. @@ -567,7 +651,7 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom( // If alias analysis can tell that this store is guaranteed to not modify // the query pointer, ignore it. Use getModRefInfo to handle cases where // the query pointer points to constant memory etc. - if (AA->getModRefInfo(SI, MemLoc) == AliasAnalysis::NoModRef) + if (AA->getModRefInfo(SI, MemLoc) == MRI_NoModRef) continue; // Ok, this store might clobber the query pointer. Check to see if it is @@ -594,7 +678,6 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom( // a subsequent bitcast of the malloc call result. There can be stores to // the malloced memory between the malloc call and its bitcast uses, and we // need to continue scanning until the malloc call. - const TargetLibraryInfo *TLI = AA->getTargetLibraryInfo(); if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, TLI)) { const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, DL); @@ -616,17 +699,17 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom( continue; // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer. - AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc); + ModRefInfo MR = AA->getModRefInfo(Inst, MemLoc); // If necessary, perform additional analysis. - if (MR == AliasAnalysis::ModRef) - MR = AA->callCapturesBefore(Inst, MemLoc, DT); + if (MR == MRI_ModRef) + MR = AA->callCapturesBefore(Inst, MemLoc, DT, &OBB); switch (MR) { - case AliasAnalysis::NoModRef: + case MRI_NoModRef: // If the call has no effect on the queried pointer, just ignore it. continue; - case AliasAnalysis::Mod: + case MRI_Mod: return MemDepResult::getClobber(Inst); - case AliasAnalysis::Ref: + case MRI_Ref: // If the call is known to never store to the pointer, and if this is a // load query, we can safely ignore it (scan past it). if (isLoad) @@ -677,20 +760,20 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { LocalCache = MemDepResult::getNonFuncLocal(); } else { MemoryLocation MemLoc; - AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA); + ModRefInfo MR = GetLocation(QueryInst, MemLoc, *TLI); if (MemLoc.Ptr) { // If we can do a pointer scan, make it happen. - bool isLoad = !(MR & AliasAnalysis::Mod); + bool isLoad = !(MR & MRI_Mod); if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst)) isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start; - LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos, - QueryParent, QueryInst); + LocalCache = getPointerDependencyFrom( + MemLoc, isLoad, ScanPos->getIterator(), QueryParent, QueryInst); } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) { CallSite QueryCS(QueryInst); bool isReadOnly = AA->onlyReadsMemory(QueryCS); - LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos, - QueryParent); + LocalCache = getCallSiteDependencyFrom( + QueryCS, isReadOnly, ScanPos->getIterator(), QueryParent); } else // Non-memory instruction. LocalCache = MemDepResult::getUnknown(); @@ -813,7 +896,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { BasicBlock::iterator ScanPos = DirtyBB->end(); if (ExistingResult) { if (Instruction *Inst = ExistingResult->getResult().getInst()) { - ScanPos = Inst; + ScanPos = Inst->getIterator(); // We're removing QueryInst's use of Inst. RemoveFromReverseMap(ReverseNonLocalDeps, Inst, QueryCS.getInstruction()); @@ -952,11 +1035,11 @@ MemDepResult MemoryDependenceAnalysis::GetNonLocalInfoForBlock( assert(ExistingResult->getResult().getInst()->getParent() == BB && "Instruction invalidated?"); ++NumCacheDirtyNonLocalPtr; - ScanPos = ExistingResult->getResult().getInst(); + ScanPos = ExistingResult->getResult().getInst()->getIterator(); // Eliminating the dirty entry from 'Cache', so update the reverse info. ValueIsLoadPair CacheKey(Loc.Ptr, isLoad); - RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey); + RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey); } else { ++NumUncacheNonLocalPtr; } @@ -1507,7 +1590,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { // the entire block to get to this point. MemDepResult NewDirtyVal; if (!RemInst->isTerminator()) - NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst)); + NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator()); ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst); if (ReverseDepIt != ReverseLocalDeps.end()) { @@ -1614,7 +1697,6 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?"); - AA->deleteValue(RemInst); DEBUG(verifyRemoved(RemInst)); } /// verifyRemoved - Verify that the specified instruction does not occur diff --git a/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp deleted file mode 100644 index 322a9a8..0000000 --- a/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp +++ /dev/null @@ -1,95 +0,0 @@ -//===- NoAliasAnalysis.cpp - Minimal Alias Analysis Impl ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the default implementation of the Alias Analysis interface -// that simply returns "I don't know" for all queries. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -using namespace llvm; - -namespace { - /// NoAA - This class implements the -no-aa pass, which always returns "I - /// don't know" for alias queries. NoAA is unlike other alias analysis - /// implementations, in that it does not chain to a previous analysis. As - /// such it doesn't follow many of the rules that other alias analyses must. - /// - struct NoAA : public ImmutablePass, public AliasAnalysis { - static char ID; // Class identification, replacement for typeinfo - NoAA() : ImmutablePass(ID) { - initializeNoAAPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override {} - - bool doInitialization(Module &M) override { - // Note: NoAA does not call InitializeAliasAnalysis because it's - // special and does not support chaining. - DL = &M.getDataLayout(); - return true; - } - - AliasResult alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) override { - return MayAlias; - } - - ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override { - return UnknownModRefBehavior; - } - ModRefBehavior getModRefBehavior(const Function *F) override { - return UnknownModRefBehavior; - } - - bool pointsToConstantMemory(const MemoryLocation &Loc, - bool OrLocal) override { - return false; - } - ModRefResult getArgModRefInfo(ImmutableCallSite CS, - unsigned ArgIdx) override { - return ModRef; - } - - ModRefResult getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) override { - return ModRef; - } - ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) override { - return ModRef; - } - - void deleteValue(Value *V) override {} - void addEscapingUse(Use &U) override {} - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - void *getAdjustedAnalysisPointer(const void *ID) override { - if (ID == &AliasAnalysis::ID) - return (AliasAnalysis*)this; - return this; - } - }; -} // End of anonymous namespace - -// Register this pass... -char NoAA::ID = 0; -INITIALIZE_AG_PASS(NoAA, AliasAnalysis, "no-aa", - "No Alias Analysis (always returns 'may' alias)", - true, true, true) - -ImmutablePass *llvm::createNoAAPass() { return new NoAA(); } diff --git a/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp new file mode 100644 index 0000000..25f660f --- /dev/null +++ b/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp @@ -0,0 +1,170 @@ +//===- ObjCARCAliasAnalysis.cpp - ObjC ARC Optimization -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file defines a simple ARC-aware AliasAnalysis using special knowledge +/// of Objective C to enhance other optimization passes which rely on the Alias +/// Analysis infrastructure. +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +/// TODO: Theoretically we could check for dependencies between objc_* calls +/// and FMRB_OnlyAccessesArgumentPointees calls or other well-behaved calls. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ObjCARCAliasAnalysis.h" +#include "llvm/Analysis/ObjCARCAnalysisUtils.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" +#include "llvm/PassAnalysisSupport.h" +#include "llvm/PassSupport.h" + +#define DEBUG_TYPE "objc-arc-aa" + +using namespace llvm; +using namespace llvm::objcarc; + +AliasResult ObjCARCAAResult::alias(const MemoryLocation &LocA, + const MemoryLocation &LocB) { + if (!EnableARCOpts) + return AAResultBase::alias(LocA, LocB); + + // First, strip off no-ops, including ObjC-specific no-ops, and try making a + // precise alias query. + const Value *SA = GetRCIdentityRoot(LocA.Ptr); + const Value *SB = GetRCIdentityRoot(LocB.Ptr); + AliasResult Result = + AAResultBase::alias(MemoryLocation(SA, LocA.Size, LocA.AATags), + MemoryLocation(SB, LocB.Size, LocB.AATags)); + if (Result != MayAlias) + return Result; + + // If that failed, climb to the underlying object, including climbing through + // ObjC-specific no-ops, and try making an imprecise alias query. + const Value *UA = GetUnderlyingObjCPtr(SA, DL); + const Value *UB = GetUnderlyingObjCPtr(SB, DL); + if (UA != SA || UB != SB) { + Result = AAResultBase::alias(MemoryLocation(UA), MemoryLocation(UB)); + // We can't use MustAlias or PartialAlias results here because + // GetUnderlyingObjCPtr may return an offsetted pointer value. + if (Result == NoAlias) + return NoAlias; + } + + // If that failed, fail. We don't need to chain here, since that's covered + // by the earlier precise query. + return MayAlias; +} + +bool ObjCARCAAResult::pointsToConstantMemory(const MemoryLocation &Loc, + bool OrLocal) { + if (!EnableARCOpts) + return AAResultBase::pointsToConstantMemory(Loc, OrLocal); + + // First, strip off no-ops, including ObjC-specific no-ops, and try making + // a precise alias query. + const Value *S = GetRCIdentityRoot(Loc.Ptr); + if (AAResultBase::pointsToConstantMemory( + MemoryLocation(S, Loc.Size, Loc.AATags), OrLocal)) + return true; + + // If that failed, climb to the underlying object, including climbing through + // ObjC-specific no-ops, and try making an imprecise alias query. + const Value *U = GetUnderlyingObjCPtr(S, DL); + if (U != S) + return AAResultBase::pointsToConstantMemory(MemoryLocation(U), OrLocal); + + // If that failed, fail. We don't need to chain here, since that's covered + // by the earlier precise query. + return false; +} + +FunctionModRefBehavior ObjCARCAAResult::getModRefBehavior(const Function *F) { + if (!EnableARCOpts) + return AAResultBase::getModRefBehavior(F); + + switch (GetFunctionClass(F)) { + case ARCInstKind::NoopCast: + return FMRB_DoesNotAccessMemory; + default: + break; + } + + return AAResultBase::getModRefBehavior(F); +} + +ModRefInfo ObjCARCAAResult::getModRefInfo(ImmutableCallSite CS, + const MemoryLocation &Loc) { + if (!EnableARCOpts) + return AAResultBase::getModRefInfo(CS, Loc); + + switch (GetBasicARCInstKind(CS.getInstruction())) { + case ARCInstKind::Retain: + case ARCInstKind::RetainRV: + case ARCInstKind::Autorelease: + case ARCInstKind::AutoreleaseRV: + case ARCInstKind::NoopCast: + case ARCInstKind::AutoreleasepoolPush: + case ARCInstKind::FusedRetainAutorelease: + case ARCInstKind::FusedRetainAutoreleaseRV: + // These functions don't access any memory visible to the compiler. + // Note that this doesn't include objc_retainBlock, because it updates + // pointers when it copies block data. + return MRI_NoModRef; + default: + break; + } + + return AAResultBase::getModRefInfo(CS, Loc); +} + +ObjCARCAAResult ObjCARCAA::run(Function &F, AnalysisManager<Function> *AM) { + return ObjCARCAAResult(F.getParent()->getDataLayout(), + AM->getResult<TargetLibraryAnalysis>(F)); +} + +char ObjCARCAA::PassID; + +char ObjCARCAAWrapperPass::ID = 0; +INITIALIZE_PASS_BEGIN(ObjCARCAAWrapperPass, "objc-arc-aa", + "ObjC-ARC-Based Alias Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(ObjCARCAAWrapperPass, "objc-arc-aa", + "ObjC-ARC-Based Alias Analysis", false, true) + +ImmutablePass *llvm::createObjCARCAAWrapperPass() { + return new ObjCARCAAWrapperPass(); +} + +ObjCARCAAWrapperPass::ObjCARCAAWrapperPass() : ImmutablePass(ID) { + initializeObjCARCAAWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +bool ObjCARCAAWrapperPass::doInitialization(Module &M) { + Result.reset(new ObjCARCAAResult( + M.getDataLayout(), getAnalysis<TargetLibraryInfoWrapperPass>().getTLI())); + return false; +} + +bool ObjCARCAAWrapperPass::doFinalization(Module &M) { + Result.reset(); + return false; +} + +void ObjCARCAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); +} diff --git a/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp b/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp new file mode 100644 index 0000000..e3e74aa --- /dev/null +++ b/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp @@ -0,0 +1,28 @@ +//===- ObjCARCAnalysisUtils.cpp -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements common infrastructure for libLLVMObjCARCOpts.a, which +// implements several scalar transformations over the LLVM intermediate +// representation, including the C bindings for that library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ObjCARCAnalysisUtils.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; +using namespace llvm::objcarc; + +/// \brief A handy option to enable/disable all ARC Optimizations. +bool llvm::objcarc::EnableARCOpts; +static cl::opt<bool, true> +EnableARCOptimizations("enable-objc-arc-opts", + cl::desc("enable/disable all ARC Optimizations"), + cl::location(EnableARCOpts), + cl::init(true)); diff --git a/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp b/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp new file mode 100644 index 0000000..133b635 --- /dev/null +++ b/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp @@ -0,0 +1,675 @@ +//===- ARCInstKind.cpp - ObjC ARC Optimization ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file defines several utility functions used by various ARC +/// optimizations which are IMHO too big to be in a header file. +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ObjCARCInstKind.h" +#include "llvm/Analysis/ObjCARCAnalysisUtils.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/IR/Intrinsics.h" + +using namespace llvm; +using namespace llvm::objcarc; + +raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS, + const ARCInstKind Class) { + switch (Class) { + case ARCInstKind::Retain: + return OS << "ARCInstKind::Retain"; + case ARCInstKind::RetainRV: + return OS << "ARCInstKind::RetainRV"; + case ARCInstKind::RetainBlock: + return OS << "ARCInstKind::RetainBlock"; + case ARCInstKind::Release: + return OS << "ARCInstKind::Release"; + case ARCInstKind::Autorelease: + return OS << "ARCInstKind::Autorelease"; + case ARCInstKind::AutoreleaseRV: + return OS << "ARCInstKind::AutoreleaseRV"; + case ARCInstKind::AutoreleasepoolPush: + return OS << "ARCInstKind::AutoreleasepoolPush"; + case ARCInstKind::AutoreleasepoolPop: + return OS << "ARCInstKind::AutoreleasepoolPop"; + case ARCInstKind::NoopCast: + return OS << "ARCInstKind::NoopCast"; + case ARCInstKind::FusedRetainAutorelease: + return OS << "ARCInstKind::FusedRetainAutorelease"; + case ARCInstKind::FusedRetainAutoreleaseRV: + return OS << "ARCInstKind::FusedRetainAutoreleaseRV"; + case ARCInstKind::LoadWeakRetained: + return OS << "ARCInstKind::LoadWeakRetained"; + case ARCInstKind::StoreWeak: + return OS << "ARCInstKind::StoreWeak"; + case ARCInstKind::InitWeak: + return OS << "ARCInstKind::InitWeak"; + case ARCInstKind::LoadWeak: + return OS << "ARCInstKind::LoadWeak"; + case ARCInstKind::MoveWeak: + return OS << "ARCInstKind::MoveWeak"; + case ARCInstKind::CopyWeak: + return OS << "ARCInstKind::CopyWeak"; + case ARCInstKind::DestroyWeak: + return OS << "ARCInstKind::DestroyWeak"; + case ARCInstKind::StoreStrong: + return OS << "ARCInstKind::StoreStrong"; + case ARCInstKind::CallOrUser: + return OS << "ARCInstKind::CallOrUser"; + case ARCInstKind::Call: + return OS << "ARCInstKind::Call"; + case ARCInstKind::User: + return OS << "ARCInstKind::User"; + case ARCInstKind::IntrinsicUser: + return OS << "ARCInstKind::IntrinsicUser"; + case ARCInstKind::None: + return OS << "ARCInstKind::None"; + } + llvm_unreachable("Unknown instruction class!"); +} + +ARCInstKind llvm::objcarc::GetFunctionClass(const Function *F) { + Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + + // No (mandatory) arguments. + if (AI == AE) + return StringSwitch<ARCInstKind>(F->getName()) + .Case("objc_autoreleasePoolPush", ARCInstKind::AutoreleasepoolPush) + .Case("clang.arc.use", ARCInstKind::IntrinsicUser) + .Default(ARCInstKind::CallOrUser); + + // One argument. + const Argument *A0 = &*AI++; + if (AI == AE) + // Argument is a pointer. + if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) { + Type *ETy = PTy->getElementType(); + // Argument is i8*. + if (ETy->isIntegerTy(8)) + return StringSwitch<ARCInstKind>(F->getName()) + .Case("objc_retain", ARCInstKind::Retain) + .Case("objc_retainAutoreleasedReturnValue", ARCInstKind::RetainRV) + .Case("objc_retainBlock", ARCInstKind::RetainBlock) + .Case("objc_release", ARCInstKind::Release) + .Case("objc_autorelease", ARCInstKind::Autorelease) + .Case("objc_autoreleaseReturnValue", ARCInstKind::AutoreleaseRV) + .Case("objc_autoreleasePoolPop", ARCInstKind::AutoreleasepoolPop) + .Case("objc_retainedObject", ARCInstKind::NoopCast) + .Case("objc_unretainedObject", ARCInstKind::NoopCast) + .Case("objc_unretainedPointer", ARCInstKind::NoopCast) + .Case("objc_retain_autorelease", + ARCInstKind::FusedRetainAutorelease) + .Case("objc_retainAutorelease", ARCInstKind::FusedRetainAutorelease) + .Case("objc_retainAutoreleaseReturnValue", + ARCInstKind::FusedRetainAutoreleaseRV) + .Case("objc_sync_enter", ARCInstKind::User) + .Case("objc_sync_exit", ARCInstKind::User) + .Default(ARCInstKind::CallOrUser); + + // Argument is i8** + if (PointerType *Pte = dyn_cast<PointerType>(ETy)) + if (Pte->getElementType()->isIntegerTy(8)) + return StringSwitch<ARCInstKind>(F->getName()) + .Case("objc_loadWeakRetained", ARCInstKind::LoadWeakRetained) + .Case("objc_loadWeak", ARCInstKind::LoadWeak) + .Case("objc_destroyWeak", ARCInstKind::DestroyWeak) + .Default(ARCInstKind::CallOrUser); + } + + // Two arguments, first is i8**. + const Argument *A1 = &*AI++; + if (AI == AE) + if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) + if (PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType())) + if (Pte->getElementType()->isIntegerTy(8)) + if (PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) { + Type *ETy1 = PTy1->getElementType(); + // Second argument is i8* + if (ETy1->isIntegerTy(8)) + return StringSwitch<ARCInstKind>(F->getName()) + .Case("objc_storeWeak", ARCInstKind::StoreWeak) + .Case("objc_initWeak", ARCInstKind::InitWeak) + .Case("objc_storeStrong", ARCInstKind::StoreStrong) + .Default(ARCInstKind::CallOrUser); + // Second argument is i8**. + if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1)) + if (Pte1->getElementType()->isIntegerTy(8)) + return StringSwitch<ARCInstKind>(F->getName()) + .Case("objc_moveWeak", ARCInstKind::MoveWeak) + .Case("objc_copyWeak", ARCInstKind::CopyWeak) + // Ignore annotation calls. This is important to stop the + // optimizer from treating annotations as uses which would + // make the state of the pointers they are attempting to + // elucidate to be incorrect. + .Case("llvm.arc.annotation.topdown.bbstart", + ARCInstKind::None) + .Case("llvm.arc.annotation.topdown.bbend", + ARCInstKind::None) + .Case("llvm.arc.annotation.bottomup.bbstart", + ARCInstKind::None) + .Case("llvm.arc.annotation.bottomup.bbend", + ARCInstKind::None) + .Default(ARCInstKind::CallOrUser); + } + + // Anything else. + return ARCInstKind::CallOrUser; +} + +// A whitelist of intrinsics that we know do not use objc pointers or decrement +// ref counts. +static bool isInertIntrinsic(unsigned ID) { + // TODO: Make this into a covered switch. + switch (ID) { + case Intrinsic::returnaddress: + case Intrinsic::frameaddress: + case Intrinsic::stacksave: + case Intrinsic::stackrestore: + case Intrinsic::vastart: + case Intrinsic::vacopy: + case Intrinsic::vaend: + case Intrinsic::objectsize: + case Intrinsic::prefetch: + case Intrinsic::stackprotector: + case Intrinsic::eh_return_i32: + case Intrinsic::eh_return_i64: + case Intrinsic::eh_typeid_for: + case Intrinsic::eh_dwarf_cfa: + case Intrinsic::eh_sjlj_lsda: + case Intrinsic::eh_sjlj_functioncontext: + case Intrinsic::init_trampoline: + case Intrinsic::adjust_trampoline: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + // Don't let dbg info affect our results. + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + // Short cut: Some intrinsics obviously don't use ObjC pointers. + return true; + default: + return false; + } +} + +// A whitelist of intrinsics that we know do not use objc pointers or decrement +// ref counts. +static bool isUseOnlyIntrinsic(unsigned ID) { + // We are conservative and even though intrinsics are unlikely to touch + // reference counts, we white list them for safety. + // + // TODO: Expand this into a covered switch. There is a lot more here. + switch (ID) { + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::memset: + return true; + default: + return false; + } +} + +/// \brief Determine what kind of construct V is. +ARCInstKind llvm::objcarc::GetARCInstKind(const Value *V) { + if (const Instruction *I = dyn_cast<Instruction>(V)) { + // Any instruction other than bitcast and gep with a pointer operand have a + // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer + // to a subsequent use, rather than using it themselves, in this sense. + // As a short cut, several other opcodes are known to have no pointer + // operands of interest. And ret is never followed by a release, so it's + // not interesting to examine. + switch (I->getOpcode()) { + case Instruction::Call: { + const CallInst *CI = cast<CallInst>(I); + // See if we have a function that we know something about. + if (const Function *F = CI->getCalledFunction()) { + ARCInstKind Class = GetFunctionClass(F); + if (Class != ARCInstKind::CallOrUser) + return Class; + Intrinsic::ID ID = F->getIntrinsicID(); + if (isInertIntrinsic(ID)) + return ARCInstKind::None; + if (isUseOnlyIntrinsic(ID)) + return ARCInstKind::User; + } + + // Otherwise, be conservative. + return GetCallSiteClass(CI); + } + case Instruction::Invoke: + // Otherwise, be conservative. + return GetCallSiteClass(cast<InvokeInst>(I)); + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::Select: + case Instruction::PHI: + case Instruction::Ret: + case Instruction::Br: + case Instruction::Switch: + case Instruction::IndirectBr: + case Instruction::Alloca: + case Instruction::VAArg: + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::SDiv: + case Instruction::UDiv: + case Instruction::FDiv: + case Instruction::SRem: + case Instruction::URem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::SExt: + case Instruction::ZExt: + case Instruction::Trunc: + case Instruction::IntToPtr: + case Instruction::FCmp: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::InsertElement: + case Instruction::ExtractElement: + case Instruction::ShuffleVector: + case Instruction::ExtractValue: + break; + case Instruction::ICmp: + // Comparing a pointer with null, or any other constant, isn't an + // interesting use, because we don't care what the pointer points to, or + // about the values of any other dynamic reference-counted pointers. + if (IsPotentialRetainableObjPtr(I->getOperand(1))) + return ARCInstKind::User; + break; + default: + // For anything else, check all the operands. + // Note that this includes both operands of a Store: while the first + // operand isn't actually being dereferenced, it is being stored to + // memory where we can no longer track who might read it and dereference + // it, so we have to consider it potentially used. + for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end(); + OI != OE; ++OI) + if (IsPotentialRetainableObjPtr(*OI)) + return ARCInstKind::User; + } + } + + // Otherwise, it's totally inert for ARC purposes. + return ARCInstKind::None; +} + +/// \brief Test if the given class is a kind of user. +bool llvm::objcarc::IsUser(ARCInstKind Class) { + switch (Class) { + case ARCInstKind::User: + case ARCInstKind::CallOrUser: + case ARCInstKind::IntrinsicUser: + return true; + case ARCInstKind::Retain: + case ARCInstKind::RetainRV: + case ARCInstKind::RetainBlock: + case ARCInstKind::Release: + case ARCInstKind::Autorelease: + case ARCInstKind::AutoreleaseRV: + case ARCInstKind::AutoreleasepoolPush: + case ARCInstKind::AutoreleasepoolPop: + case ARCInstKind::NoopCast: + case ARCInstKind::FusedRetainAutorelease: + case ARCInstKind::FusedRetainAutoreleaseRV: + case ARCInstKind::LoadWeakRetained: + case ARCInstKind::StoreWeak: + case ARCInstKind::InitWeak: + case ARCInstKind::LoadWeak: + case ARCInstKind::MoveWeak: + case ARCInstKind::CopyWeak: + case ARCInstKind::DestroyWeak: + case ARCInstKind::StoreStrong: + case ARCInstKind::Call: + case ARCInstKind::None: + return false; + } + llvm_unreachable("covered switch isn't covered?"); +} + +/// \brief Test if the given class is objc_retain or equivalent. +bool llvm::objcarc::IsRetain(ARCInstKind Class) { + switch (Class) { + case ARCInstKind::Retain: + case ARCInstKind::RetainRV: + return true; + // I believe we treat retain block as not a retain since it can copy its + // block. + case ARCInstKind::RetainBlock: + case ARCInstKind::Release: + case ARCInstKind::Autorelease: + case ARCInstKind::AutoreleaseRV: + case ARCInstKind::AutoreleasepoolPush: + case ARCInstKind::AutoreleasepoolPop: + case ARCInstKind::NoopCast: + case ARCInstKind::FusedRetainAutorelease: + case ARCInstKind::FusedRetainAutoreleaseRV: + case ARCInstKind::LoadWeakRetained: + case ARCInstKind::StoreWeak: + case ARCInstKind::InitWeak: + case ARCInstKind::LoadWeak: + case ARCInstKind::MoveWeak: + case ARCInstKind::CopyWeak: + case ARCInstKind::DestroyWeak: + case ARCInstKind::StoreStrong: + case ARCInstKind::IntrinsicUser: + case ARCInstKind::CallOrUser: + case ARCInstKind::Call: + case ARCInstKind::User: + case ARCInstKind::None: + return false; + } + llvm_unreachable("covered switch isn't covered?"); +} + +/// \brief Test if the given class is objc_autorelease or equivalent. +bool llvm::objcarc::IsAutorelease(ARCInstKind Class) { + switch (Class) { + case ARCInstKind::Autorelease: + case ARCInstKind::AutoreleaseRV: + return true; + case ARCInstKind::Retain: + case ARCInstKind::RetainRV: + case ARCInstKind::RetainBlock: + case ARCInstKind::Release: + case ARCInstKind::AutoreleasepoolPush: + case ARCInstKind::AutoreleasepoolPop: + case ARCInstKind::NoopCast: + case ARCInstKind::FusedRetainAutorelease: + case ARCInstKind::FusedRetainAutoreleaseRV: + case ARCInstKind::LoadWeakRetained: + case ARCInstKind::StoreWeak: + case ARCInstKind::InitWeak: + case ARCInstKind::LoadWeak: + case ARCInstKind::MoveWeak: + case ARCInstKind::CopyWeak: + case ARCInstKind::DestroyWeak: + case ARCInstKind::StoreStrong: + case ARCInstKind::IntrinsicUser: + case ARCInstKind::CallOrUser: + case ARCInstKind::Call: + case ARCInstKind::User: + case ARCInstKind::None: + return false; + } + llvm_unreachable("covered switch isn't covered?"); +} + +/// \brief Test if the given class represents instructions which return their +/// argument verbatim. +bool llvm::objcarc::IsForwarding(ARCInstKind Class) { + switch (Class) { + case ARCInstKind::Retain: + case ARCInstKind::RetainRV: + case ARCInstKind::Autorelease: + case ARCInstKind::AutoreleaseRV: + case ARCInstKind::NoopCast: + return true; + case ARCInstKind::RetainBlock: + case ARCInstKind::Release: + case ARCInstKind::AutoreleasepoolPush: + case ARCInstKind::AutoreleasepoolPop: + case ARCInstKind::FusedRetainAutorelease: + case ARCInstKind::FusedRetainAutoreleaseRV: + case ARCInstKind::LoadWeakRetained: + case ARCInstKind::StoreWeak: + case ARCInstKind::InitWeak: + case ARCInstKind::LoadWeak: + case ARCInstKind::MoveWeak: + case ARCInstKind::CopyWeak: + case ARCInstKind::DestroyWeak: + case ARCInstKind::StoreStrong: + case ARCInstKind::IntrinsicUser: + case ARCInstKind::CallOrUser: + case ARCInstKind::Call: + case ARCInstKind::User: + case ARCInstKind::None: + return false; + } + llvm_unreachable("covered switch isn't covered?"); +} + +/// \brief Test if the given class represents instructions which do nothing if +/// passed a null pointer. +bool llvm::objcarc::IsNoopOnNull(ARCInstKind Class) { + switch (Class) { + case ARCInstKind::Retain: + case ARCInstKind::RetainRV: + case ARCInstKind::Release: + case ARCInstKind::Autorelease: + case ARCInstKind::AutoreleaseRV: + case ARCInstKind::RetainBlock: + return true; + case ARCInstKind::AutoreleasepoolPush: + case ARCInstKind::AutoreleasepoolPop: + case ARCInstKind::FusedRetainAutorelease: + case ARCInstKind::FusedRetainAutoreleaseRV: + case ARCInstKind::LoadWeakRetained: + case ARCInstKind::StoreWeak: + case ARCInstKind::InitWeak: + case ARCInstKind::LoadWeak: + case ARCInstKind::MoveWeak: + case ARCInstKind::CopyWeak: + case ARCInstKind::DestroyWeak: + case ARCInstKind::StoreStrong: + case ARCInstKind::IntrinsicUser: + case ARCInstKind::CallOrUser: + case ARCInstKind::Call: + case ARCInstKind::User: + case ARCInstKind::None: + case ARCInstKind::NoopCast: + return false; + } + llvm_unreachable("covered switch isn't covered?"); +} + +/// \brief Test if the given class represents instructions which are always safe +/// to mark with the "tail" keyword. +bool llvm::objcarc::IsAlwaysTail(ARCInstKind Class) { + // ARCInstKind::RetainBlock may be given a stack argument. + switch (Class) { + case ARCInstKind::Retain: + case ARCInstKind::RetainRV: + case ARCInstKind::AutoreleaseRV: + return true; + case ARCInstKind::Release: + case ARCInstKind::Autorelease: + case ARCInstKind::RetainBlock: + case ARCInstKind::AutoreleasepoolPush: + case ARCInstKind::AutoreleasepoolPop: + case ARCInstKind::FusedRetainAutorelease: + case ARCInstKind::FusedRetainAutoreleaseRV: + case ARCInstKind::LoadWeakRetained: + case ARCInstKind::StoreWeak: + case ARCInstKind::InitWeak: + case ARCInstKind::LoadWeak: + case ARCInstKind::MoveWeak: + case ARCInstKind::CopyWeak: + case ARCInstKind::DestroyWeak: + case ARCInstKind::StoreStrong: + case ARCInstKind::IntrinsicUser: + case ARCInstKind::CallOrUser: + case ARCInstKind::Call: + case ARCInstKind::User: + case ARCInstKind::None: + case ARCInstKind::NoopCast: + return false; + } + llvm_unreachable("covered switch isn't covered?"); +} + +/// \brief Test if the given class represents instructions which are never safe +/// to mark with the "tail" keyword. +bool llvm::objcarc::IsNeverTail(ARCInstKind Class) { + /// It is never safe to tail call objc_autorelease since by tail calling + /// objc_autorelease: fast autoreleasing causing our object to be potentially + /// reclaimed from the autorelease pool which violates the semantics of + /// __autoreleasing types in ARC. + switch (Class) { + case ARCInstKind::Autorelease: + return true; + case ARCInstKind::Retain: + case ARCInstKind::RetainRV: + case ARCInstKind::AutoreleaseRV: + case ARCInstKind::Release: + case ARCInstKind::RetainBlock: + case ARCInstKind::AutoreleasepoolPush: + case ARCInstKind::AutoreleasepoolPop: + case ARCInstKind::FusedRetainAutorelease: + case ARCInstKind::FusedRetainAutoreleaseRV: + case ARCInstKind::LoadWeakRetained: + case ARCInstKind::StoreWeak: + case ARCInstKind::InitWeak: + case ARCInstKind::LoadWeak: + case ARCInstKind::MoveWeak: + case ARCInstKind::CopyWeak: + case ARCInstKind::DestroyWeak: + case ARCInstKind::StoreStrong: + case ARCInstKind::IntrinsicUser: + case ARCInstKind::CallOrUser: + case ARCInstKind::Call: + case ARCInstKind::User: + case ARCInstKind::None: + case ARCInstKind::NoopCast: + return false; + } + llvm_unreachable("covered switch isn't covered?"); +} + +/// \brief Test if the given class represents instructions which are always safe +/// to mark with the nounwind attribute. +bool llvm::objcarc::IsNoThrow(ARCInstKind Class) { + // objc_retainBlock is not nounwind because it calls user copy constructors + // which could theoretically throw. + switch (Class) { + case ARCInstKind::Retain: + case ARCInstKind::RetainRV: + case ARCInstKind::Release: + case ARCInstKind::Autorelease: + case ARCInstKind::AutoreleaseRV: + case ARCInstKind::AutoreleasepoolPush: + case ARCInstKind::AutoreleasepoolPop: + return true; + case ARCInstKind::RetainBlock: + case ARCInstKind::FusedRetainAutorelease: + case ARCInstKind::FusedRetainAutoreleaseRV: + case ARCInstKind::LoadWeakRetained: + case ARCInstKind::StoreWeak: + case ARCInstKind::InitWeak: + case ARCInstKind::LoadWeak: + case ARCInstKind::MoveWeak: + case ARCInstKind::CopyWeak: + case ARCInstKind::DestroyWeak: + case ARCInstKind::StoreStrong: + case ARCInstKind::IntrinsicUser: + case ARCInstKind::CallOrUser: + case ARCInstKind::Call: + case ARCInstKind::User: + case ARCInstKind::None: + case ARCInstKind::NoopCast: + return false; + } + llvm_unreachable("covered switch isn't covered?"); +} + +/// Test whether the given instruction can autorelease any pointer or cause an +/// autoreleasepool pop. +/// +/// This means that it *could* interrupt the RV optimization. +bool llvm::objcarc::CanInterruptRV(ARCInstKind Class) { + switch (Class) { + case ARCInstKind::AutoreleasepoolPop: + case ARCInstKind::CallOrUser: + case ARCInstKind::Call: + case ARCInstKind::Autorelease: + case ARCInstKind::AutoreleaseRV: + case ARCInstKind::FusedRetainAutorelease: + case ARCInstKind::FusedRetainAutoreleaseRV: + return true; + case ARCInstKind::Retain: + case ARCInstKind::RetainRV: + case ARCInstKind::Release: + case ARCInstKind::AutoreleasepoolPush: + case ARCInstKind::RetainBlock: + case ARCInstKind::LoadWeakRetained: + case ARCInstKind::StoreWeak: + case ARCInstKind::InitWeak: + case ARCInstKind::LoadWeak: + case ARCInstKind::MoveWeak: + case ARCInstKind::CopyWeak: + case ARCInstKind::DestroyWeak: + case ARCInstKind::StoreStrong: + case ARCInstKind::IntrinsicUser: + case ARCInstKind::User: + case ARCInstKind::None: + case ARCInstKind::NoopCast: + return false; + } + llvm_unreachable("covered switch isn't covered?"); +} + +bool llvm::objcarc::CanDecrementRefCount(ARCInstKind Kind) { + switch (Kind) { + case ARCInstKind::Retain: + case ARCInstKind::RetainRV: + case ARCInstKind::Autorelease: + case ARCInstKind::AutoreleaseRV: + case ARCInstKind::NoopCast: + case ARCInstKind::FusedRetainAutorelease: + case ARCInstKind::FusedRetainAutoreleaseRV: + case ARCInstKind::IntrinsicUser: + case ARCInstKind::User: + case ARCInstKind::None: + return false; + + // The cases below are conservative. + + // RetainBlock can result in user defined copy constructors being called + // implying releases may occur. + case ARCInstKind::RetainBlock: + case ARCInstKind::Release: + case ARCInstKind::AutoreleasepoolPush: + case ARCInstKind::AutoreleasepoolPop: + case ARCInstKind::LoadWeakRetained: + case ARCInstKind::StoreWeak: + case ARCInstKind::InitWeak: + case ARCInstKind::LoadWeak: + case ARCInstKind::MoveWeak: + case ARCInstKind::CopyWeak: + case ARCInstKind::DestroyWeak: + case ARCInstKind::StoreStrong: + case ARCInstKind::CallOrUser: + case ARCInstKind::Call: + return true; + } + + llvm_unreachable("covered switch isn't covered?"); +} diff --git a/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp b/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp new file mode 100644 index 0000000..0f0016f --- /dev/null +++ b/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp @@ -0,0 +1,85 @@ +//===- OrderedBasicBlock.cpp --------------------------------- -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the OrderedBasicBlock class. OrderedBasicBlock +// maintains an interface where clients can query if one instruction comes +// before another in a BasicBlock. Since BasicBlock currently lacks a reliable +// way to query relative position between instructions one can use +// OrderedBasicBlock to do such queries. OrderedBasicBlock is lazily built on a +// source BasicBlock and maintains an internal Instruction -> Position map. A +// OrderedBasicBlock instance should be discarded whenever the source +// BasicBlock changes. +// +// It's currently used by the CaptureTracker in order to find relative +// positions of a pair of instructions inside a BasicBlock. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/OrderedBasicBlock.h" +#include "llvm/IR/Instruction.h" +using namespace llvm; + +OrderedBasicBlock::OrderedBasicBlock(const BasicBlock *BasicB) + : NextInstPos(0), BB(BasicB) { + LastInstFound = BB->end(); +} + +/// \brief Given no cached results, find if \p A comes before \p B in \p BB. +/// Cache and number out instruction while walking \p BB. +bool OrderedBasicBlock::comesBefore(const Instruction *A, + const Instruction *B) { + const Instruction *Inst = nullptr; + assert(!(LastInstFound == BB->end() && NextInstPos != 0) && + "Instruction supposed to be in NumberedInsts"); + + // Start the search with the instruction found in the last lookup round. + auto II = BB->begin(); + auto IE = BB->end(); + if (LastInstFound != IE) + II = std::next(LastInstFound); + + // Number all instructions up to the point where we find 'A' or 'B'. + for (; II != IE; ++II) { + Inst = cast<Instruction>(II); + NumberedInsts[Inst] = NextInstPos++; + if (Inst == A || Inst == B) + break; + } + + assert(II != IE && "Instruction not found?"); + assert((Inst == A || Inst == B) && "Should find A or B"); + LastInstFound = II; + return Inst == A; +} + +/// \brief Find out whether \p A dominates \p B, meaning whether \p A +/// comes before \p B in \p BB. This is a simplification that considers +/// cached instruction positions and ignores other basic blocks, being +/// only relevant to compare relative instructions positions inside \p BB. +bool OrderedBasicBlock::dominates(const Instruction *A, const Instruction *B) { + assert(A->getParent() == B->getParent() && + "Instructions must be in the same basic block!"); + + // First we lookup the instructions. If they don't exist, lookup will give us + // back ::end(). If they both exist, we compare the numbers. Otherwise, if NA + // exists and NB doesn't, it means NA must come before NB because we would + // have numbered NB as well if it didn't. The same is true for NB. If it + // exists, but NA does not, NA must come after it. If neither exist, we need + // to number the block and cache the results (by calling comesBefore). + auto NAI = NumberedInsts.find(A); + auto NBI = NumberedInsts.find(B); + if (NAI != NumberedInsts.end() && NBI != NumberedInsts.end()) + return NAI->second < NBI->second; + if (NAI != NumberedInsts.end()) + return true; + if (NBI != NumberedInsts.end()) + return false; + + return comesBefore(A, B); +} diff --git a/contrib/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm/lib/Analysis/RegionInfo.cpp index 8cd8534..f59d267 100644 --- a/contrib/llvm/lib/Analysis/RegionInfo.cpp +++ b/contrib/llvm/lib/Analysis/RegionInfo.cpp @@ -21,6 +21,9 @@ #include <algorithm> #include <iterator> #include <set> +#ifndef NDEBUG +#include "llvm/Analysis/RegionPrinter.h" +#endif using namespace llvm; @@ -103,6 +106,12 @@ void RegionInfo::recalculate(Function &F, DominatorTree *DT_, calculate(F); } +#ifndef NDEBUG +void RegionInfo::view() { viewRegion(this); } + +void RegionInfo::viewOnly() { viewRegionOnly(this); } +#endif + //===----------------------------------------------------------------------===// // RegionInfoPass implementation // diff --git a/contrib/llvm/lib/Analysis/RegionPrinter.cpp b/contrib/llvm/lib/Analysis/RegionPrinter.cpp index d7f5109..acb218d 100644 --- a/contrib/llvm/lib/Analysis/RegionPrinter.cpp +++ b/contrib/llvm/lib/Analysis/RegionPrinter.cpp @@ -20,6 +20,9 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#ifndef NDEBUG +#include "llvm/IR/LegacyPassManager.h" +#endif using namespace llvm; @@ -55,25 +58,22 @@ struct DOTGraphTraits<RegionNode*> : public DefaultDOTGraphTraits { } }; -template<> -struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> { +template <> +struct DOTGraphTraits<RegionInfo *> : public DOTGraphTraits<RegionNode *> { DOTGraphTraits (bool isSimple = false) : DOTGraphTraits<RegionNode*>(isSimple) {} - static std::string getGraphName(RegionInfoPass *DT) { - return "Region Graph"; - } + static std::string getGraphName(const RegionInfo *) { return "Region Graph"; } - std::string getNodeLabel(RegionNode *Node, RegionInfoPass *G) { - RegionInfo &RI = G->getRegionInfo(); - return DOTGraphTraits<RegionNode*>::getNodeLabel(Node, - reinterpret_cast<RegionNode*>(RI.getTopLevelRegion())); + std::string getNodeLabel(RegionNode *Node, RegionInfo *G) { + return DOTGraphTraits<RegionNode *>::getNodeLabel( + Node, reinterpret_cast<RegionNode *>(G->getTopLevelRegion())); } std::string getEdgeAttributes(RegionNode *srcNode, - GraphTraits<RegionInfo*>::ChildIteratorType CI, RegionInfoPass *G) { - RegionInfo &RI = G->getRegionInfo(); + GraphTraits<RegionInfo *>::ChildIteratorType CI, + RegionInfo *G) { RegionNode *destNode = *CI; if (srcNode->isSubRegion() || destNode->isSubRegion()) @@ -83,7 +83,7 @@ struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> { BasicBlock *srcBB = srcNode->getNodeAs<BasicBlock>(); BasicBlock *destBB = destNode->getNodeAs<BasicBlock>(); - Region *R = RI.getRegionFor(destBB); + Region *R = G->getRegionFor(destBB); while (R && R->getParent()) if (R->getParent()->getEntry() == destBB) @@ -91,7 +91,7 @@ struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> { else break; - if (R->getEntry() == destBB && R->contains(srcBB)) + if (R && R->getEntry() == destBB && R->contains(srcBB)) return "constraint=false"; return ""; @@ -99,8 +99,7 @@ struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> { // Print the cluster of the subregions. This groups the single basic blocks // and adds a different background color for each group. - static void printRegionCluster(const Region &R, - GraphWriter<RegionInfoPass*> &GW, + static void printRegionCluster(const Region &R, GraphWriter<RegionInfo *> &GW, unsigned depth = 0) { raw_ostream &O = GW.getOStream(); O.indent(2 * depth) << "subgraph cluster_" << static_cast<const void*>(&R) @@ -132,50 +131,81 @@ struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> { O.indent(2 * depth) << "}\n"; } - static void addCustomGraphFeatures(const RegionInfoPass* RIP, - GraphWriter<RegionInfoPass*> &GW) { - const RegionInfo &RI = RIP->getRegionInfo(); + static void addCustomGraphFeatures(const RegionInfo *G, + GraphWriter<RegionInfo *> &GW) { raw_ostream &O = GW.getOStream(); O << "\tcolorscheme = \"paired12\"\n"; - printRegionCluster(*RI.getTopLevelRegion(), GW, 4); + printRegionCluster(*G->getTopLevelRegion(), GW, 4); } }; } //end namespace llvm namespace { +struct RegionInfoPassGraphTraits { + static RegionInfo *getGraph(RegionInfoPass *RIP) { + return &RIP->getRegionInfo(); + } +}; + +struct RegionPrinter + : public DOTGraphTraitsPrinter<RegionInfoPass, false, RegionInfo *, + RegionInfoPassGraphTraits> { + static char ID; + RegionPrinter() + : DOTGraphTraitsPrinter<RegionInfoPass, false, RegionInfo *, + RegionInfoPassGraphTraits>("reg", ID) { + initializeRegionPrinterPass(*PassRegistry::getPassRegistry()); + } +}; +char RegionPrinter::ID = 0; + +struct RegionOnlyPrinter + : public DOTGraphTraitsPrinter<RegionInfoPass, true, RegionInfo *, + RegionInfoPassGraphTraits> { + static char ID; + RegionOnlyPrinter() + : DOTGraphTraitsPrinter<RegionInfoPass, true, RegionInfo *, + RegionInfoPassGraphTraits>("reg", ID) { + initializeRegionOnlyPrinterPass(*PassRegistry::getPassRegistry()); + } +}; +char RegionOnlyPrinter::ID = 0; + struct RegionViewer - : public DOTGraphTraitsViewer<RegionInfoPass, false> { + : public DOTGraphTraitsViewer<RegionInfoPass, false, RegionInfo *, + RegionInfoPassGraphTraits> { static char ID; - RegionViewer() : DOTGraphTraitsViewer<RegionInfoPass, false>("reg", ID){ + RegionViewer() + : DOTGraphTraitsViewer<RegionInfoPass, false, RegionInfo *, + RegionInfoPassGraphTraits>("reg", ID) { initializeRegionViewerPass(*PassRegistry::getPassRegistry()); } }; char RegionViewer::ID = 0; struct RegionOnlyViewer - : public DOTGraphTraitsViewer<RegionInfoPass, true> { + : public DOTGraphTraitsViewer<RegionInfoPass, true, RegionInfo *, + RegionInfoPassGraphTraits> { static char ID; - RegionOnlyViewer() : DOTGraphTraitsViewer<RegionInfoPass, true>("regonly", ID) { + RegionOnlyViewer() + : DOTGraphTraitsViewer<RegionInfoPass, true, RegionInfo *, + RegionInfoPassGraphTraits>("regonly", ID) { initializeRegionOnlyViewerPass(*PassRegistry::getPassRegistry()); } }; char RegionOnlyViewer::ID = 0; -struct RegionPrinter - : public DOTGraphTraitsPrinter<RegionInfoPass, false> { - static char ID; - RegionPrinter() : - DOTGraphTraitsPrinter<RegionInfoPass, false>("reg", ID) { - initializeRegionPrinterPass(*PassRegistry::getPassRegistry()); - } -}; -char RegionPrinter::ID = 0; } //end anonymous namespace INITIALIZE_PASS(RegionPrinter, "dot-regions", "Print regions of function to 'dot' file", true, true) +INITIALIZE_PASS( + RegionOnlyPrinter, "dot-regions-only", + "Print regions of function to 'dot' file (with no function bodies)", true, + true) + INITIALIZE_PASS(RegionViewer, "view-regions", "View regions of function", true, true) @@ -183,25 +213,12 @@ INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only", "View regions of function (with no function bodies)", true, true) -namespace { - -struct RegionOnlyPrinter - : public DOTGraphTraitsPrinter<RegionInfoPass, true> { - static char ID; - RegionOnlyPrinter() : - DOTGraphTraitsPrinter<RegionInfoPass, true>("reg", ID) { - initializeRegionOnlyPrinterPass(*PassRegistry::getPassRegistry()); - } -}; +FunctionPass *llvm::createRegionPrinterPass() { return new RegionPrinter(); } +FunctionPass *llvm::createRegionOnlyPrinterPass() { + return new RegionOnlyPrinter(); } -char RegionOnlyPrinter::ID = 0; -INITIALIZE_PASS(RegionOnlyPrinter, "dot-regions-only", - "Print regions of function to 'dot' file " - "(with no function bodies)", - true, true) - FunctionPass* llvm::createRegionViewerPass() { return new RegionViewer(); } @@ -210,11 +227,41 @@ FunctionPass* llvm::createRegionOnlyViewerPass() { return new RegionOnlyViewer(); } -FunctionPass* llvm::createRegionPrinterPass() { - return new RegionPrinter(); +#ifndef NDEBUG +static void viewRegionInfo(RegionInfo *RI, bool ShortNames) { + assert(RI && "Argument must be non-null"); + + llvm::Function *F = RI->getTopLevelRegion()->getEntry()->getParent(); + std::string GraphName = DOTGraphTraits<RegionInfo *>::getGraphName(RI); + + llvm::ViewGraph(RI, "reg", ShortNames, + Twine(GraphName) + " for '" + F->getName() + "' function"); } -FunctionPass* llvm::createRegionOnlyPrinterPass() { - return new RegionOnlyPrinter(); +static void invokeFunctionPass(const Function *F, FunctionPass *ViewerPass) { + assert(F && "Argument must be non-null"); + assert(!F->isDeclaration() && "Function must have an implementation"); + + // The viewer and analysis passes do not modify anything, so we can safely + // remove the const qualifier + auto NonConstF = const_cast<Function *>(F); + + llvm::legacy::FunctionPassManager FPM(NonConstF->getParent()); + FPM.add(ViewerPass); + FPM.doInitialization(); + FPM.run(*NonConstF); + FPM.doFinalization(); } +void llvm::viewRegion(RegionInfo *RI) { viewRegionInfo(RI, false); } + +void llvm::viewRegion(const Function *F) { + invokeFunctionPass(F, createRegionViewerPass()); +} + +void llvm::viewRegionOnly(RegionInfo *RI) { viewRegionInfo(RI, true); } + +void llvm::viewRegionOnly(const Function *F) { + invokeFunctionPass(F, createRegionOnlyViewerPass()); +} +#endif diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp index 9c7c175..34074ef 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp @@ -83,11 +83,13 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/SaveAndRestore.h" #include <algorithm> using namespace llvm; @@ -114,16 +116,6 @@ static cl::opt<bool> VerifySCEV("verify-scev", cl::desc("Verify ScalarEvolution's backedge taken counts (slow)")); -INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution", - "Scalar Evolution Analysis", false, true) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution", - "Scalar Evolution Analysis", false, true) -char ScalarEvolution::ID = 0; - //===----------------------------------------------------------------------===// // SCEV class definitions //===----------------------------------------------------------------------===// @@ -132,12 +124,11 @@ char ScalarEvolution::ID = 0; // Implementation of the SCEV class. // -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void SCEV::dump() const { print(dbgs()); dbgs() << '\n'; } -#endif void SCEV::print(raw_ostream &OS) const { switch (static_cast<SCEVTypes>(getSCEVType())) { @@ -303,7 +294,7 @@ bool SCEV::isNonConstantNegative() const { if (!SC) return false; // Return true if the value is negative, this matches things like (-42 * V). - return SC->getValue()->getValue().isNegative(); + return SC->getAPInt().isNegative(); } SCEVCouldNotCompute::SCEVCouldNotCompute() : @@ -455,179 +446,179 @@ bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const { //===----------------------------------------------------------------------===// namespace { - /// SCEVComplexityCompare - Return true if the complexity of the LHS is less - /// than the complexity of the RHS. This comparator is used to canonicalize - /// expressions. - class SCEVComplexityCompare { - const LoopInfo *const LI; - public: - explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {} - - // Return true or false if LHS is less than, or at least RHS, respectively. - bool operator()(const SCEV *LHS, const SCEV *RHS) const { - return compare(LHS, RHS) < 0; - } - - // Return negative, zero, or positive, if LHS is less than, equal to, or - // greater than RHS, respectively. A three-way result allows recursive - // comparisons to be more efficient. - int compare(const SCEV *LHS, const SCEV *RHS) const { - // Fast-path: SCEVs are uniqued so we can do a quick equality check. - if (LHS == RHS) - return 0; - - // Primarily, sort the SCEVs by their getSCEVType(). - unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType(); - if (LType != RType) - return (int)LType - (int)RType; - - // Aside from the getSCEVType() ordering, the particular ordering - // isn't very important except that it's beneficial to be consistent, - // so that (a + b) and (b + a) don't end up as different expressions. - switch (static_cast<SCEVTypes>(LType)) { - case scUnknown: { - const SCEVUnknown *LU = cast<SCEVUnknown>(LHS); - const SCEVUnknown *RU = cast<SCEVUnknown>(RHS); - - // Sort SCEVUnknown values with some loose heuristics. TODO: This is - // not as complete as it could be. - const Value *LV = LU->getValue(), *RV = RU->getValue(); - - // Order pointer values after integer values. This helps SCEVExpander - // form GEPs. - bool LIsPointer = LV->getType()->isPointerTy(), - RIsPointer = RV->getType()->isPointerTy(); - if (LIsPointer != RIsPointer) - return (int)LIsPointer - (int)RIsPointer; - - // Compare getValueID values. - unsigned LID = LV->getValueID(), - RID = RV->getValueID(); - if (LID != RID) - return (int)LID - (int)RID; - - // Sort arguments by their position. - if (const Argument *LA = dyn_cast<Argument>(LV)) { - const Argument *RA = cast<Argument>(RV); - unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo(); - return (int)LArgNo - (int)RArgNo; - } - - // For instructions, compare their loop depth, and their operand - // count. This is pretty loose. - if (const Instruction *LInst = dyn_cast<Instruction>(LV)) { - const Instruction *RInst = cast<Instruction>(RV); - - // Compare loop depths. - const BasicBlock *LParent = LInst->getParent(), - *RParent = RInst->getParent(); - if (LParent != RParent) { - unsigned LDepth = LI->getLoopDepth(LParent), - RDepth = LI->getLoopDepth(RParent); - if (LDepth != RDepth) - return (int)LDepth - (int)RDepth; - } - - // Compare the number of operands. - unsigned LNumOps = LInst->getNumOperands(), - RNumOps = RInst->getNumOperands(); - return (int)LNumOps - (int)RNumOps; - } +/// SCEVComplexityCompare - Return true if the complexity of the LHS is less +/// than the complexity of the RHS. This comparator is used to canonicalize +/// expressions. +class SCEVComplexityCompare { + const LoopInfo *const LI; +public: + explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {} - return 0; - } + // Return true or false if LHS is less than, or at least RHS, respectively. + bool operator()(const SCEV *LHS, const SCEV *RHS) const { + return compare(LHS, RHS) < 0; + } - case scConstant: { - const SCEVConstant *LC = cast<SCEVConstant>(LHS); - const SCEVConstant *RC = cast<SCEVConstant>(RHS); - - // Compare constant values. - const APInt &LA = LC->getValue()->getValue(); - const APInt &RA = RC->getValue()->getValue(); - unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth(); - if (LBitWidth != RBitWidth) - return (int)LBitWidth - (int)RBitWidth; - return LA.ult(RA) ? -1 : 1; + // Return negative, zero, or positive, if LHS is less than, equal to, or + // greater than RHS, respectively. A three-way result allows recursive + // comparisons to be more efficient. + int compare(const SCEV *LHS, const SCEV *RHS) const { + // Fast-path: SCEVs are uniqued so we can do a quick equality check. + if (LHS == RHS) + return 0; + + // Primarily, sort the SCEVs by their getSCEVType(). + unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType(); + if (LType != RType) + return (int)LType - (int)RType; + + // Aside from the getSCEVType() ordering, the particular ordering + // isn't very important except that it's beneficial to be consistent, + // so that (a + b) and (b + a) don't end up as different expressions. + switch (static_cast<SCEVTypes>(LType)) { + case scUnknown: { + const SCEVUnknown *LU = cast<SCEVUnknown>(LHS); + const SCEVUnknown *RU = cast<SCEVUnknown>(RHS); + + // Sort SCEVUnknown values with some loose heuristics. TODO: This is + // not as complete as it could be. + const Value *LV = LU->getValue(), *RV = RU->getValue(); + + // Order pointer values after integer values. This helps SCEVExpander + // form GEPs. + bool LIsPointer = LV->getType()->isPointerTy(), + RIsPointer = RV->getType()->isPointerTy(); + if (LIsPointer != RIsPointer) + return (int)LIsPointer - (int)RIsPointer; + + // Compare getValueID values. + unsigned LID = LV->getValueID(), + RID = RV->getValueID(); + if (LID != RID) + return (int)LID - (int)RID; + + // Sort arguments by their position. + if (const Argument *LA = dyn_cast<Argument>(LV)) { + const Argument *RA = cast<Argument>(RV); + unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo(); + return (int)LArgNo - (int)RArgNo; } - case scAddRecExpr: { - const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS); - const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS); - - // Compare addrec loop depths. - const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop(); - if (LLoop != RLoop) { - unsigned LDepth = LLoop->getLoopDepth(), - RDepth = RLoop->getLoopDepth(); + // For instructions, compare their loop depth, and their operand + // count. This is pretty loose. + if (const Instruction *LInst = dyn_cast<Instruction>(LV)) { + const Instruction *RInst = cast<Instruction>(RV); + + // Compare loop depths. + const BasicBlock *LParent = LInst->getParent(), + *RParent = RInst->getParent(); + if (LParent != RParent) { + unsigned LDepth = LI->getLoopDepth(LParent), + RDepth = LI->getLoopDepth(RParent); if (LDepth != RDepth) return (int)LDepth - (int)RDepth; } - // Addrec complexity grows with operand count. - unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands(); - if (LNumOps != RNumOps) - return (int)LNumOps - (int)RNumOps; + // Compare the number of operands. + unsigned LNumOps = LInst->getNumOperands(), + RNumOps = RInst->getNumOperands(); + return (int)LNumOps - (int)RNumOps; + } - // Lexicographically compare. - for (unsigned i = 0; i != LNumOps; ++i) { - long X = compare(LA->getOperand(i), RA->getOperand(i)); - if (X != 0) - return X; - } + return 0; + } - return 0; + case scConstant: { + const SCEVConstant *LC = cast<SCEVConstant>(LHS); + const SCEVConstant *RC = cast<SCEVConstant>(RHS); + + // Compare constant values. + const APInt &LA = LC->getAPInt(); + const APInt &RA = RC->getAPInt(); + unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth(); + if (LBitWidth != RBitWidth) + return (int)LBitWidth - (int)RBitWidth; + return LA.ult(RA) ? -1 : 1; + } + + case scAddRecExpr: { + const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS); + const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS); + + // Compare addrec loop depths. + const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop(); + if (LLoop != RLoop) { + unsigned LDepth = LLoop->getLoopDepth(), + RDepth = RLoop->getLoopDepth(); + if (LDepth != RDepth) + return (int)LDepth - (int)RDepth; } - case scAddExpr: - case scMulExpr: - case scSMaxExpr: - case scUMaxExpr: { - const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS); - const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS); - - // Lexicographically compare n-ary expressions. - unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands(); - if (LNumOps != RNumOps) - return (int)LNumOps - (int)RNumOps; - - for (unsigned i = 0; i != LNumOps; ++i) { - if (i >= RNumOps) - return 1; - long X = compare(LC->getOperand(i), RC->getOperand(i)); - if (X != 0) - return X; - } + // Addrec complexity grows with operand count. + unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands(); + if (LNumOps != RNumOps) return (int)LNumOps - (int)RNumOps; + + // Lexicographically compare. + for (unsigned i = 0; i != LNumOps; ++i) { + long X = compare(LA->getOperand(i), RA->getOperand(i)); + if (X != 0) + return X; } - case scUDivExpr: { - const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS); - const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS); + return 0; + } + + case scAddExpr: + case scMulExpr: + case scSMaxExpr: + case scUMaxExpr: { + const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS); + const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS); - // Lexicographically compare udiv expressions. - long X = compare(LC->getLHS(), RC->getLHS()); + // Lexicographically compare n-ary expressions. + unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands(); + if (LNumOps != RNumOps) + return (int)LNumOps - (int)RNumOps; + + for (unsigned i = 0; i != LNumOps; ++i) { + if (i >= RNumOps) + return 1; + long X = compare(LC->getOperand(i), RC->getOperand(i)); if (X != 0) return X; - return compare(LC->getRHS(), RC->getRHS()); } + return (int)LNumOps - (int)RNumOps; + } - case scTruncate: - case scZeroExtend: - case scSignExtend: { - const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS); - const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS); + case scUDivExpr: { + const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS); + const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS); - // Compare cast expressions by operand. - return compare(LC->getOperand(), RC->getOperand()); - } + // Lexicographically compare udiv expressions. + long X = compare(LC->getLHS(), RC->getLHS()); + if (X != 0) + return X; + return compare(LC->getRHS(), RC->getRHS()); + } - case scCouldNotCompute: - llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - } - llvm_unreachable("Unknown SCEV kind!"); + case scTruncate: + case scZeroExtend: + case scSignExtend: { + const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS); + const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS); + + // Compare cast expressions by operand. + return compare(LC->getOperand(), RC->getOperand()); } - }; -} + + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + } + llvm_unreachable("Unknown SCEV kind!"); + } +}; +} // end anonymous namespace /// GroupByComplexity - Given a list of SCEV objects, order them by their /// complexity, and group objects of the same complexity together by value. @@ -675,24 +666,22 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops, } } -namespace { -struct FindSCEVSize { - int Size; - FindSCEVSize() : Size(0) {} - - bool follow(const SCEV *S) { - ++Size; - // Keep looking at all operands of S. - return true; - } - bool isDone() const { - return false; - } -}; -} - // Returns the size of the SCEV S. static inline int sizeOfSCEV(const SCEV *S) { + struct FindSCEVSize { + int Size; + FindSCEVSize() : Size(0) {} + + bool follow(const SCEV *S) { + ++Size; + // Keep looking at all operands of S. + return true; + } + bool isDone() const { + return false; + } + }; + FindSCEVSize F; SCEVTraversal<FindSCEVSize> ST(F); ST.visitAll(S); @@ -771,8 +760,8 @@ public: void visitConstant(const SCEVConstant *Numerator) { if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) { - APInt NumeratorVal = Numerator->getValue()->getValue(); - APInt DenominatorVal = D->getValue()->getValue(); + APInt NumeratorVal = Numerator->getAPInt(); + APInt DenominatorVal = D->getAPInt(); uint32_t NumeratorBW = NumeratorVal.getBitWidth(); uint32_t DenominatorBW = DenominatorVal.getBitWidth(); @@ -792,17 +781,15 @@ public: void visitAddRecExpr(const SCEVAddRecExpr *Numerator) { const SCEV *StartQ, *StartR, *StepQ, *StepR; - assert(Numerator->isAffine() && "Numerator should be affine"); + if (!Numerator->isAffine()) + return cannotDivide(Numerator); divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR); divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR); // Bail out if the types do not match. Type *Ty = Denominator->getType(); if (Ty != StartQ->getType() || Ty != StartR->getType() || - Ty != StepQ->getType() || Ty != StepR->getType()) { - Quotient = Zero; - Remainder = Numerator; - return; - } + Ty != StepQ->getType() || Ty != StepR->getType()) + return cannotDivide(Numerator); Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(), Numerator->getNoWrapFlags()); Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(), @@ -818,11 +805,8 @@ public: divide(SE, Op, Denominator, &Q, &R); // Bail out if types do not match. - if (Ty != Q->getType() || Ty != R->getType()) { - Quotient = Zero; - Remainder = Numerator; - return; - } + if (Ty != Q->getType() || Ty != R->getType()) + return cannotDivide(Numerator); Qs.push_back(Q); Rs.push_back(R); @@ -845,11 +829,8 @@ public: bool FoundDenominatorTerm = false; for (const SCEV *Op : Numerator->operands()) { // Bail out if types do not match. - if (Ty != Op->getType()) { - Quotient = Zero; - Remainder = Numerator; - return; - } + if (Ty != Op->getType()) + return cannotDivide(Numerator); if (FoundDenominatorTerm) { Qs.push_back(Op); @@ -865,11 +846,8 @@ public: } // Bail out if types do not match. - if (Ty != Q->getType()) { - Quotient = Zero; - Remainder = Numerator; - return; - } + if (Ty != Q->getType()) + return cannotDivide(Numerator); FoundDenominatorTerm = true; Qs.push_back(Q); @@ -884,11 +862,8 @@ public: return; } - if (!isa<SCEVUnknown>(Denominator)) { - Quotient = Zero; - Remainder = Numerator; - return; - } + if (!isa<SCEVUnknown>(Denominator)) + return cannotDivide(Numerator); // The Remainder is obtained by replacing Denominator by 0 in Numerator. ValueToValueMap RewriteMap; @@ -908,15 +883,12 @@ public: // Quotient is (Numerator - Remainder) divided by Denominator. const SCEV *Q, *R; const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder); - if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) { - // This SCEV does not seem to simplify: fail the division here. - Quotient = Zero; - Remainder = Numerator; - return; - } + // This SCEV does not seem to simplify: fail the division here. + if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) + return cannotDivide(Numerator); divide(SE, Diff, Denominator, &Q, &R); - assert(R == Zero && - "(Numerator - Remainder) should evenly divide Denominator"); + if (R != Zero) + return cannotDivide(Numerator); Quotient = Q; } @@ -924,11 +896,18 @@ private: SCEVDivision(ScalarEvolution &S, const SCEV *Numerator, const SCEV *Denominator) : SE(S), Denominator(Denominator) { - Zero = SE.getConstant(Denominator->getType(), 0); - One = SE.getConstant(Denominator->getType(), 1); + Zero = SE.getZero(Denominator->getType()); + One = SE.getOne(Denominator->getType()); + + // We generally do not know how to divide Expr by Denominator. We + // initialize the division to a "cannot divide" state to simplify the rest + // of the code. + cannotDivide(Numerator); + } - // By default, we don't know how to divide Expr by Denominator. - // Providing the default here simplifies the rest of the code. + // Convenience function for giving up on the division. We set the quotient to + // be equal to zero and the remainder to be equal to the numerator. + void cannotDivide(const SCEV *Numerator) { Quotient = Zero; Remainder = Numerator; } @@ -1151,8 +1130,8 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, // If the input value is a chrec scev, truncate the chrec's operands. if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) { SmallVector<const SCEV *, 4> Operands; - for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) - Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty)); + for (const SCEV *Op : AddRec->operands()) + Operands.push_back(getTruncateExpr(Op, Ty)); return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap); } @@ -1287,7 +1266,9 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty, // `Step`: // 1. NSW/NUW flags on the step increment. - const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags()); + auto PreStartFlags = + ScalarEvolution::maskFlags(SA->getNoWrapFlags(), SCEV::FlagNUW); + const SCEV *PreStart = SE->getAddExpr(DiffOps, PreStartFlags); const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>( SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap)); @@ -1322,9 +1303,9 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty, ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(Step, &Pred, SE); if (OverflowLimit && - SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) { + SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) return PreStart; - } + return nullptr; } @@ -1390,24 +1371,22 @@ bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start, if (!StartC) return false; - APInt StartAI = StartC->getValue()->getValue(); + APInt StartAI = StartC->getAPInt(); for (unsigned Delta : {-2, -1, 1, 2}) { const SCEV *PreStart = getConstant(StartAI - Delta); + FoldingSetNodeID ID; + ID.AddInteger(scAddRecExpr); + ID.AddPointer(PreStart); + ID.AddPointer(Step); + ID.AddPointer(L); + void *IP = nullptr; + const auto *PreAR = + static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); + // Give up if we don't already have the add recurrence we need because // actually constructing an add recurrence is relatively expensive. - const SCEVAddRecExpr *PreAR = [&]() { - FoldingSetNodeID ID; - ID.AddInteger(scAddRecExpr); - ID.AddPointer(PreStart); - ID.AddPointer(Step); - ID.AddPointer(L); - void *IP = nullptr; - return static_cast<SCEVAddRecExpr *>( - this->UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); - }(); - if (PreAR && PreAR->getNoWrapFlags(WrapType)) { // proves (2) const SCEV *DeltaS = getConstant(StartC->getType(), Delta); ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; @@ -1578,6 +1557,18 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, } } + if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) { + // zext((A + B + ...)<nuw>) --> (zext(A) + zext(B) + ...)<nuw> + if (SA->getNoWrapFlags(SCEV::FlagNUW)) { + // If the addition does not unsign overflow then we can, by definition, + // commute the zero extension with the addition operation. + SmallVector<const SCEV *, 4> Ops; + for (const auto *Op : SA->operands()) + Ops.push_back(getZeroExtendExpr(Op, Ty)); + return getAddExpr(Ops, SCEV::FlagNUW); + } + } + // The cast wasn't folded; create an explicit cast node. // Recompute the insert position, as it may have been invalidated. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; @@ -1635,14 +1626,14 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, } // sext(C1 + (C2 * x)) --> C1 + sext(C2 * x) if C1 < C2 - if (auto SA = dyn_cast<SCEVAddExpr>(Op)) { + if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) { if (SA->getNumOperands() == 2) { - auto SC1 = dyn_cast<SCEVConstant>(SA->getOperand(0)); - auto SMul = dyn_cast<SCEVMulExpr>(SA->getOperand(1)); + auto *SC1 = dyn_cast<SCEVConstant>(SA->getOperand(0)); + auto *SMul = dyn_cast<SCEVMulExpr>(SA->getOperand(1)); if (SMul && SC1) { - if (auto SC2 = dyn_cast<SCEVConstant>(SMul->getOperand(0))) { - const APInt &C1 = SC1->getValue()->getValue(); - const APInt &C2 = SC2->getValue()->getValue(); + if (auto *SC2 = dyn_cast<SCEVConstant>(SMul->getOperand(0))) { + const APInt &C1 = SC1->getAPInt(); + const APInt &C2 = SC2->getAPInt(); if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) && C2.isPowerOf2()) return getAddExpr(getSignExtendExpr(SC1, Ty), @@ -1650,6 +1641,16 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, } } } + + // sext((A + B + ...)<nsw>) --> (sext(A) + sext(B) + ...)<nsw> + if (SA->getNoWrapFlags(SCEV::FlagNSW)) { + // If the addition does not sign overflow then we can, by definition, + // commute the sign extension with the addition operation. + SmallVector<const SCEV *, 4> Ops; + for (const auto *Op : SA->operands()) + Ops.push_back(getSignExtendExpr(Op, Ty)); + return getAddExpr(Ops, SCEV::FlagNSW); + } } // If the input value is a chrec scev, and we can prove that the value // did not overflow the old, smaller, value, we can sign extend all of the @@ -1754,16 +1755,16 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // If Start and Step are constants, check if we can apply this // transformation: // sext{C1,+,C2} --> C1 + sext{0,+,C2} if C1 < C2 - auto SC1 = dyn_cast<SCEVConstant>(Start); - auto SC2 = dyn_cast<SCEVConstant>(Step); + auto *SC1 = dyn_cast<SCEVConstant>(Start); + auto *SC2 = dyn_cast<SCEVConstant>(Step); if (SC1 && SC2) { - const APInt &C1 = SC1->getValue()->getValue(); - const APInt &C2 = SC2->getValue()->getValue(); + const APInt &C1 = SC1->getAPInt(); + const APInt &C2 = SC2->getAPInt(); if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) && C2.isPowerOf2()) { Start = getSignExtendExpr(Start, Ty); - const SCEV *NewAR = getAddRecExpr(getConstant(AR->getType(), 0), Step, - L, AR->getNoWrapFlags()); + const SCEV *NewAR = getAddRecExpr(getZero(AR->getType()), Step, L, + AR->getNoWrapFlags()); return getAddExpr(Start, getSignExtendExpr(NewAR, Ty)); } } @@ -1798,7 +1799,7 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, // Sign-extend negative constants. if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) - if (SC->getValue()->getValue().isNegative()) + if (SC->getAPInt().isNegative()) return getSignExtendExpr(Op, Ty); // Peel off a truncate cast. @@ -1876,7 +1877,7 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M, // Pull a buried constant out to the outside. if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero()) Interesting = true; - AccumulatedConstant += Scale * C->getValue()->getValue(); + AccumulatedConstant += Scale * C->getAPInt(); } // Next comes everything else. We're especially interested in multiplies @@ -1885,7 +1886,7 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M, const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]); if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) { APInt NewScale = - Scale * cast<SCEVConstant>(Mul->getOperand(0))->getValue()->getValue(); + Scale * cast<SCEVConstant>(Mul->getOperand(0))->getAPInt(); if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) { // A multiplication of a constant with another add; recurse. const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1)); @@ -1898,8 +1899,7 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M, // the map. SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end()); const SCEV *Key = SE.getMulExpr(MulOps); - std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair = - M.insert(std::make_pair(Key, NewScale)); + auto Pair = M.insert(std::make_pair(Key, NewScale)); if (Pair.second) { NewOps.push_back(Pair.first->first); } else { @@ -1927,22 +1927,15 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M, return Interesting; } -namespace { - struct APIntCompare { - bool operator()(const APInt &LHS, const APInt &RHS) const { - return LHS.ult(RHS); - } - }; -} - // We're trying to construct a SCEV of type `Type' with `Ops' as operands and // `OldFlags' as can't-wrap behavior. Infer a more aggressive set of // can't-overflow flags for the operation if possible. static SCEV::NoWrapFlags StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type, const SmallVectorImpl<const SCEV *> &Ops, - SCEV::NoWrapFlags OldFlags) { + SCEV::NoWrapFlags Flags) { using namespace std::placeholders; + typedef OverflowingBinaryOperator OBO; bool CanAnalyze = Type == scAddExpr || Type == scAddRecExpr || Type == scMulExpr; @@ -1951,18 +1944,42 @@ StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type, int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW; SCEV::NoWrapFlags SignOrUnsignWrap = - ScalarEvolution::maskFlags(OldFlags, SignOrUnsignMask); + ScalarEvolution::maskFlags(Flags, SignOrUnsignMask); // If FlagNSW is true and all the operands are non-negative, infer FlagNUW. - auto IsKnownNonNegative = - std::bind(std::mem_fn(&ScalarEvolution::isKnownNonNegative), SE, _1); + auto IsKnownNonNegative = [&](const SCEV *S) { + return SE->isKnownNonNegative(S); + }; + + if (SignOrUnsignWrap == SCEV::FlagNSW && all_of(Ops, IsKnownNonNegative)) + Flags = + ScalarEvolution::setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask); - if (SignOrUnsignWrap == SCEV::FlagNSW && - std::all_of(Ops.begin(), Ops.end(), IsKnownNonNegative)) - return ScalarEvolution::setFlags(OldFlags, - (SCEV::NoWrapFlags)SignOrUnsignMask); + SignOrUnsignWrap = ScalarEvolution::maskFlags(Flags, SignOrUnsignMask); + + if (SignOrUnsignWrap != SignOrUnsignMask && Type == scAddExpr && + Ops.size() == 2 && isa<SCEVConstant>(Ops[0])) { + + // (A + C) --> (A + C)<nsw> if the addition does not sign overflow + // (A + C) --> (A + C)<nuw> if the addition does not unsign overflow + + const APInt &C = cast<SCEVConstant>(Ops[0])->getAPInt(); + if (!(SignOrUnsignWrap & SCEV::FlagNSW)) { + auto NSWRegion = + ConstantRange::makeNoWrapRegion(Instruction::Add, C, OBO::NoSignedWrap); + if (NSWRegion.contains(SE->getSignedRange(Ops[1]))) + Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW); + } + if (!(SignOrUnsignWrap & SCEV::FlagNUW)) { + auto NUWRegion = + ConstantRange::makeNoWrapRegion(Instruction::Add, C, + OBO::NoUnsignedWrap); + if (NUWRegion.contains(SE->getUnsignedRange(Ops[1]))) + Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW); + } + } - return OldFlags; + return Flags; } /// getAddExpr - Get a canonical add expression, or something simpler if @@ -1980,10 +1997,10 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, "SCEVAddExpr operand types don't match!"); #endif - Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags); - // Sort by complexity, this groups all similar expression types together. - GroupByComplexity(Ops, LI); + GroupByComplexity(Ops, &LI); + + Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags); // If there are any constants, fold them together. unsigned Idx = 0; @@ -1992,8 +2009,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, assert(Idx < Ops.size()); while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { // We found two constants, fold them together! - Ops[0] = getConstant(LHSC->getValue()->getValue() + - RHSC->getValue()->getValue()); + Ops[0] = getConstant(LHSC->getAPInt() + RHSC->getAPInt()); if (Ops.size() == 2) return Ops[0]; Ops.erase(Ops.begin()+1); // Erase the folded element LHSC = cast<SCEVConstant>(Ops[0]); @@ -2063,8 +2079,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, break; } LargeMulOps.push_back(T->getOperand()); - } else if (const SCEVConstant *C = - dyn_cast<SCEVConstant>(M->getOperand(j))) { + } else if (const auto *C = dyn_cast<SCEVConstant>(M->getOperand(j))) { LargeMulOps.push_back(getAnyExtendExpr(C, SrcType)); } else { Ok = false; @@ -2123,24 +2138,28 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, Ops.data(), Ops.size(), APInt(BitWidth, 1), *this)) { + struct APIntCompare { + bool operator()(const APInt &LHS, const APInt &RHS) const { + return LHS.ult(RHS); + } + }; + // Some interesting folding opportunity is present, so its worthwhile to // re-generate the operands list. Group the operands by constant scale, // to avoid multiplying by the same constant scale multiple times. std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists; - for (SmallVectorImpl<const SCEV *>::const_iterator I = NewOps.begin(), - E = NewOps.end(); I != E; ++I) - MulOpLists[M.find(*I)->second].push_back(*I); + for (const SCEV *NewOp : NewOps) + MulOpLists[M.find(NewOp)->second].push_back(NewOp); // Re-generate the operands list. Ops.clear(); if (AccumulatedConstant != 0) Ops.push_back(getConstant(AccumulatedConstant)); - for (std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare>::iterator - I = MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I) - if (I->first != 0) - Ops.push_back(getMulExpr(getConstant(I->first), - getAddExpr(I->second))); + for (auto &MulOp : MulOpLists) + if (MulOp.first != 0) + Ops.push_back(getMulExpr(getConstant(MulOp.first), + getAddExpr(MulOp.second))); if (Ops.empty()) - return getConstant(Ty, 0); + return getZero(Ty); if (Ops.size() == 1) return Ops[0]; return getAddExpr(Ops); @@ -2168,7 +2187,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end()); InnerMul = getMulExpr(MulOps); } - const SCEV *One = getConstant(Ty, 1); + const SCEV *One = getOne(Ty); const SCEV *AddOne = getAddExpr(One, InnerMul); const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV); if (Ops.size() == 2) return OuterMul; @@ -2279,8 +2298,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, AddRec->op_end()); for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]); ++OtherIdx) - if (const SCEVAddRecExpr *OtherAddRec = - dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx])) + if (const auto *OtherAddRec = dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx])) if (OtherAddRec->getLoop() == AddRecLoop) { for (unsigned i = 0, e = OtherAddRec->getNumOperands(); i != e; ++i) { @@ -2388,10 +2406,10 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, "SCEVMulExpr operand types don't match!"); #endif - Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags); - // Sort by complexity, this groups all similar expression types together. - GroupByComplexity(Ops, LI); + GroupByComplexity(Ops, &LI); + + Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags); // If there are any constants, fold them together. unsigned Idx = 0; @@ -2410,9 +2428,8 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, ++Idx; while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { // We found two constants, fold them together! - ConstantInt *Fold = ConstantInt::get(getContext(), - LHSC->getValue()->getValue() * - RHSC->getValue()->getValue()); + ConstantInt *Fold = + ConstantInt::get(getContext(), LHSC->getAPInt() * RHSC->getAPInt()); Ops[0] = getConstant(Fold); Ops.erase(Ops.begin()+1); // Erase the folded element if (Ops.size() == 1) return Ops[0]; @@ -2433,23 +2450,19 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) { SmallVector<const SCEV *, 4> NewOps; bool AnyFolded = false; - for (SCEVAddRecExpr::op_iterator I = Add->op_begin(), - E = Add->op_end(); I != E; ++I) { - const SCEV *Mul = getMulExpr(Ops[0], *I); + for (const SCEV *AddOp : Add->operands()) { + const SCEV *Mul = getMulExpr(Ops[0], AddOp); if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true; NewOps.push_back(Mul); } if (AnyFolded) return getAddExpr(NewOps); - } - else if (const SCEVAddRecExpr * - AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) { + } else if (const auto *AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) { // Negation preserves a recurrence's no self-wrap property. SmallVector<const SCEV *, 4> Operands; - for (SCEVAddRecExpr::op_iterator I = AddRec->op_begin(), - E = AddRec->op_end(); I != E; ++I) { - Operands.push_back(getMulExpr(Ops[0], *I)); - } + for (const SCEV *AddRecOp : AddRec->operands()) + Operands.push_back(getMulExpr(Ops[0], AddRecOp)); + return getAddRecExpr(Operands, AddRec->getLoop(), AddRec->getNoWrapFlags(SCEV::FlagNW)); } @@ -2560,7 +2573,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, SmallVector<const SCEV*, 7> AddRecOps; for (int x = 0, xe = AddRec->getNumOperands() + OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) { - const SCEV *Term = getConstant(Ty, 0); + const SCEV *Term = getZero(Ty); for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) { uint64_t Coeff1 = Choose(x, 2*x - y, Overflow); for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1), @@ -2638,11 +2651,11 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, // its operands. // TODO: Generalize this to non-constants by using known-bits information. Type *Ty = LHS->getType(); - unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros(); + unsigned LZ = RHSC->getAPInt().countLeadingZeros(); unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1; // For non-power-of-two values, effectively round the value up to the // nearest power of two. - if (!RHSC->getValue()->getValue().isPowerOf2()) + if (!RHSC->getAPInt().isPowerOf2()) ++MaxShiftAmt; IntegerType *ExtTy = IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt); @@ -2650,18 +2663,17 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, if (const SCEVConstant *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) { // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded. - const APInt &StepInt = Step->getValue()->getValue(); - const APInt &DivInt = RHSC->getValue()->getValue(); + const APInt &StepInt = Step->getAPInt(); + const APInt &DivInt = RHSC->getAPInt(); if (!StepInt.urem(DivInt) && getZeroExtendExpr(AR, ExtTy) == getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), getZeroExtendExpr(Step, ExtTy), AR->getLoop(), SCEV::FlagAnyWrap)) { SmallVector<const SCEV *, 4> Operands; - for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i) - Operands.push_back(getUDivExpr(AR->getOperand(i), RHS)); - return getAddRecExpr(Operands, AR->getLoop(), - SCEV::FlagNW); + for (const SCEV *Op : AR->operands()) + Operands.push_back(getUDivExpr(Op, RHS)); + return getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagNW); } /// Get a canonical UDivExpr for a recurrence. /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0. @@ -2672,7 +2684,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), getZeroExtendExpr(Step, ExtTy), AR->getLoop(), SCEV::FlagAnyWrap)) { - const APInt &StartInt = StartC->getValue()->getValue(); + const APInt &StartInt = StartC->getAPInt(); const APInt &StartRem = StartInt.urem(StepInt); if (StartRem != 0) LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step, @@ -2682,8 +2694,8 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, // (A*B)/C --> A*(B/C) if safe and B/C can be folded. if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) { SmallVector<const SCEV *, 4> Operands; - for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) - Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy)); + for (const SCEV *Op : M->operands()) + Operands.push_back(getZeroExtendExpr(Op, ExtTy)); if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands)) // Find an operand that's safely divisible. for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { @@ -2700,8 +2712,8 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded. if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) { SmallVector<const SCEV *, 4> Operands; - for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) - Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy)); + for (const SCEV *Op : A->operands()) + Operands.push_back(getZeroExtendExpr(Op, ExtTy)); if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) { Operands.clear(); for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) { @@ -2739,8 +2751,8 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, } static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) { - APInt A = C1->getValue()->getValue().abs(); - APInt B = C2->getValue()->getValue().abs(); + APInt A = C1->getAPInt().abs(); + APInt B = C2->getAPInt().abs(); uint32_t ABW = A.getBitWidth(); uint32_t BBW = B.getBitWidth(); @@ -2769,8 +2781,7 @@ const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS, if (const SCEVConstant *RHSCst = dyn_cast<SCEVConstant>(RHS)) { // If the mulexpr multiplies by a constant, then that constant must be the // first element of the mulexpr. - if (const SCEVConstant *LHSCst = - dyn_cast<SCEVConstant>(Mul->getOperand(0))) { + if (const auto *LHSCst = dyn_cast<SCEVConstant>(Mul->getOperand(0))) { if (LHSCst == RHSCst) { SmallVector<const SCEV *, 2> Operands; Operands.append(Mul->op_begin() + 1, Mul->op_end()); @@ -2782,10 +2793,10 @@ const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS, // check. APInt Factor = gcd(LHSCst, RHSCst); if (!Factor.isIntN(1)) { - LHSCst = cast<SCEVConstant>( - getConstant(LHSCst->getValue()->getValue().udiv(Factor))); - RHSCst = cast<SCEVConstant>( - getConstant(RHSCst->getValue()->getValue().udiv(Factor))); + LHSCst = + cast<SCEVConstant>(getConstant(LHSCst->getAPInt().udiv(Factor))); + RHSCst = + cast<SCEVConstant>(getConstant(RHSCst->getAPInt().udiv(Factor))); SmallVector<const SCEV *, 2> Operands; Operands.push_back(LHSCst); Operands.append(Mul->op_begin() + 1, Mul->op_end()); @@ -2859,22 +2870,19 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, // Canonicalize nested AddRecs in by nesting them in order of loop depth. if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) { const Loop *NestedLoop = NestedAR->getLoop(); - if (L->contains(NestedLoop) ? - (L->getLoopDepth() < NestedLoop->getLoopDepth()) : - (!NestedLoop->contains(L) && - DT->dominates(L->getHeader(), NestedLoop->getHeader()))) { + if (L->contains(NestedLoop) + ? (L->getLoopDepth() < NestedLoop->getLoopDepth()) + : (!NestedLoop->contains(L) && + DT.dominates(L->getHeader(), NestedLoop->getHeader()))) { SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(), NestedAR->op_end()); Operands[0] = NestedAR->getStart(); // AddRecs require their operands be loop-invariant with respect to their // loops. Don't perform this transformation if it would break this // requirement. - bool AllInvariant = true; - for (unsigned i = 0, e = Operands.size(); i != e; ++i) - if (!isLoopInvariant(Operands[i], L)) { - AllInvariant = false; - break; - } + bool AllInvariant = all_of( + Operands, [&](const SCEV *Op) { return isLoopInvariant(Op, L); }); + if (AllInvariant) { // Create a recurrence for the outer loop with the same step size. // @@ -2884,12 +2892,10 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags()); NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags); - AllInvariant = true; - for (unsigned i = 0, e = NestedOperands.size(); i != e; ++i) - if (!isLoopInvariant(NestedOperands[i], NestedLoop)) { - AllInvariant = false; - break; - } + AllInvariant = all_of(NestedOperands, [&](const SCEV *Op) { + return isLoopInvariant(Op, NestedLoop); + }); + if (AllInvariant) { // Ok, both add recurrences are valid after the transformation. // @@ -2936,10 +2942,11 @@ ScalarEvolution::getGEPExpr(Type *PointeeType, const SCEV *BaseExpr, // FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP // instruction to its SCEV, because the Instruction may be guarded by control // flow and the no-overflow bits may not be valid for the expression in any - // context. + // context. This can be fixed similarly to how these flags are handled for + // adds. SCEV::NoWrapFlags Wrap = InBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap; - const SCEV *TotalOffset = getConstant(IntPtrTy, 0); + const SCEV *TotalOffset = getZero(IntPtrTy); // The address space is unimportant. The first thing we do on CurTy is getting // its element type. Type *CurTy = PointerType::getUnqual(PointeeType); @@ -2996,7 +3003,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { #endif // Sort by complexity, this groups all similar expression types together. - GroupByComplexity(Ops, LI); + GroupByComplexity(Ops, &LI); // If there are any constants, fold them together. unsigned Idx = 0; @@ -3005,9 +3012,8 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { assert(Idx < Ops.size()); while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { // We found two constants, fold them together! - ConstantInt *Fold = ConstantInt::get(getContext(), - APIntOps::smax(LHSC->getValue()->getValue(), - RHSC->getValue()->getValue())); + ConstantInt *Fold = ConstantInt::get( + getContext(), APIntOps::smax(LHSC->getAPInt(), RHSC->getAPInt())); Ops[0] = getConstant(Fold); Ops.erase(Ops.begin()+1); // Erase the folded element if (Ops.size() == 1) return Ops[0]; @@ -3100,7 +3106,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { #endif // Sort by complexity, this groups all similar expression types together. - GroupByComplexity(Ops, LI); + GroupByComplexity(Ops, &LI); // If there are any constants, fold them together. unsigned Idx = 0; @@ -3109,9 +3115,8 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { assert(Idx < Ops.size()); while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { // We found two constants, fold them together! - ConstantInt *Fold = ConstantInt::get(getContext(), - APIntOps::umax(LHSC->getValue()->getValue(), - RHSC->getValue()->getValue())); + ConstantInt *Fold = ConstantInt::get( + getContext(), APIntOps::umax(LHSC->getAPInt(), RHSC->getAPInt())); Ops[0] = getConstant(Fold); Ops.erase(Ops.begin()+1); // Erase the folded element if (Ops.size() == 1) return Ops[0]; @@ -3200,8 +3205,7 @@ const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) { // We can bypass creating a target-independent // constant expression and then folding it back into a ConstantInt. // This is just a compile-time optimization. - return getConstant(IntTy, - F->getParent()->getDataLayout().getTypeAllocSize(AllocTy)); + return getConstant(IntTy, getDataLayout().getTypeAllocSize(AllocTy)); } const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy, @@ -3211,9 +3215,7 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy, // constant expression and then folding it back into a ConstantInt. // This is just a compile-time optimization. return getConstant( - IntTy, - F->getParent()->getDataLayout().getStructLayout(STy)->getElementOffset( - FieldNo)); + IntTy, getDataLayout().getStructLayout(STy)->getElementOffset(FieldNo)); } const SCEV *ScalarEvolution::getUnknown(Value *V) { @@ -3255,7 +3257,7 @@ bool ScalarEvolution::isSCEVable(Type *Ty) const { /// for which isSCEVable must return true. uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const { assert(isSCEVable(Ty) && "Type is not SCEVable!"); - return F->getParent()->getDataLayout().getTypeSizeInBits(Ty); + return getDataLayout().getTypeSizeInBits(Ty); } /// getEffectiveSCEVType - Return a type with the same bitwidth as @@ -3265,20 +3267,20 @@ uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const { Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const { assert(isSCEVable(Ty) && "Type is not SCEVable!"); - if (Ty->isIntegerTy()) { + if (Ty->isIntegerTy()) return Ty; - } // The only other support type is pointer. assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!"); - return F->getParent()->getDataLayout().getIntPtrType(Ty); + return getDataLayout().getIntPtrType(Ty); } const SCEV *ScalarEvolution::getCouldNotCompute() { - return &CouldNotCompute; + return CouldNotCompute.get(); } -namespace { + +bool ScalarEvolution::checkValidity(const SCEV *S) const { // Helper class working with SCEVTraversal to figure out if a SCEV contains // a SCEVUnknown with null value-pointer. FindInvalidSCEVUnknown::FindOne // is set iff if find such SCEVUnknown. @@ -3300,9 +3302,7 @@ namespace { } bool isDone() const { return FindOne; } }; -} -bool ScalarEvolution::checkValidity(const SCEV *S) const { FindInvalidSCEVUnknown F; SCEVTraversal<FindInvalidSCEVUnknown> ST(F); ST.visitAll(S); @@ -3315,35 +3315,39 @@ bool ScalarEvolution::checkValidity(const SCEV *S) const { const SCEV *ScalarEvolution::getSCEV(Value *V) { assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); + const SCEV *S = getExistingSCEV(V); + if (S == nullptr) { + S = createSCEV(V); + ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S)); + } + return S; +} + +const SCEV *ScalarEvolution::getExistingSCEV(Value *V) { + assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); + ValueExprMapType::iterator I = ValueExprMap.find_as(V); if (I != ValueExprMap.end()) { const SCEV *S = I->second; if (checkValidity(S)) return S; - else - ValueExprMap.erase(I); + ValueExprMap.erase(I); } - const SCEV *S = createSCEV(V); - - // The process of creating a SCEV for V may have caused other SCEVs - // to have been created, so it's necessary to insert the new entry - // from scratch, rather than trying to remember the insert position - // above. - ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S)); - return S; + return nullptr; } /// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V /// -const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) { +const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V, + SCEV::NoWrapFlags Flags) { if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V)) return getConstant( cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue()))); Type *Ty = V->getType(); Ty = getEffectiveSCEVType(Ty); - return getMulExpr(V, - getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)))); + return getMulExpr( + V, getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))), Flags); } /// getNotSCEV - Return a SCEV corresponding to ~V = -1-V @@ -3362,15 +3366,40 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) { /// getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1. const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags) { - assert(!maskFlags(Flags, SCEV::FlagNUW) && "subtraction does not have NUW"); - // Fast path: X - X --> 0. if (LHS == RHS) - return getConstant(LHS->getType(), 0); + return getZero(LHS->getType()); + + // We represent LHS - RHS as LHS + (-1)*RHS. This transformation + // makes it so that we cannot make much use of NUW. + auto AddFlags = SCEV::FlagAnyWrap; + const bool RHSIsNotMinSigned = + !getSignedRange(RHS).getSignedMin().isMinSignedValue(); + if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) { + // Let M be the minimum representable signed value. Then (-1)*RHS + // signed-wraps if and only if RHS is M. That can happen even for + // a NSW subtraction because e.g. (-1)*M signed-wraps even though + // -1 - M does not. So to transfer NSW from LHS - RHS to LHS + + // (-1)*RHS, we need to prove that RHS != M. + // + // If LHS is non-negative and we know that LHS - RHS does not + // signed-wrap, then RHS cannot be M. So we can rule out signed-wrap + // either by proving that RHS > M or that LHS >= 0. + if (RHSIsNotMinSigned || isKnownNonNegative(LHS)) { + AddFlags = SCEV::FlagNSW; + } + } + + // FIXME: Find a correct way to transfer NSW to (-1)*M when LHS - + // RHS is NSW and LHS >= 0. + // + // The difficulty here is that the NSW flag may have been proven + // relative to a loop that is to be found in a recurrence in LHS and + // not in RHS. Applying NSW to (-1)*M may then let the NSW have a + // larger scope than intended. + auto NegFlags = RHSIsNotMinSigned ? SCEV::FlagNSW : SCEV::FlagAnyWrap; - // X - Y --> X + -Y. - // X -(nsw || nuw) Y --> X + -Y. - return getAddExpr(LHS, getNegativeSCEV(RHS)); + return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags); } /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the @@ -3513,16 +3542,14 @@ const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) { if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) { return getPointerBase(Cast->getOperand()); - } - else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) { + } else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) { const SCEV *PtrOp = nullptr; - for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); - I != E; ++I) { - if ((*I)->getType()->isPointerTy()) { + for (const SCEV *NAryOp : NAry->operands()) { + if (NAryOp->getType()->isPointerTy()) { // Cannot find the base of an expression with multiple pointer operands. if (PtrOp) return V; - PtrOp = *I; + PtrOp = NAryOp; } } if (!PtrOp) @@ -3558,8 +3585,7 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) { if (!Visited.insert(I).second) continue; - ValueExprMapType::iterator It = - ValueExprMap.find_as(static_cast<Value *>(I)); + auto It = ValueExprMap.find_as(static_cast<Value *>(I)); if (It != ValueExprMap.end()) { const SCEV *Old = It->second; @@ -3587,165 +3613,476 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) { } } -/// createNodeForPHI - PHI nodes have two cases. Either the PHI node exists in -/// a loop header, making it a potential recurrence, or it doesn't. -/// -const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { - if (const Loop *L = LI->getLoopFor(PN->getParent())) - if (L->getHeader() == PN->getParent()) { - // The loop may have multiple entrances or multiple exits; we can analyze - // this phi as an addrec if it has a unique entry value and a unique - // backedge value. - Value *BEValueV = nullptr, *StartValueV = nullptr; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - Value *V = PN->getIncomingValue(i); - if (L->contains(PN->getIncomingBlock(i))) { - if (!BEValueV) { - BEValueV = V; - } else if (BEValueV != V) { - BEValueV = nullptr; - break; - } - } else if (!StartValueV) { - StartValueV = V; - } else if (StartValueV != V) { - StartValueV = nullptr; - break; - } - } - if (BEValueV && StartValueV) { - // While we are analyzing this PHI node, handle its value symbolically. - const SCEV *SymbolicName = getUnknown(PN); - assert(ValueExprMap.find_as(PN) == ValueExprMap.end() && - "PHI node already processed?"); - ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName)); - - // Using this symbolic name for the PHI, analyze the value coming around - // the back-edge. - const SCEV *BEValue = getSCEV(BEValueV); - - // NOTE: If BEValue is loop invariant, we know that the PHI node just - // has a special value for the first iteration of the loop. - - // If the value coming around the backedge is an add with the symbolic - // value we just inserted, then we found a simple induction variable! - if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) { - // If there is a single occurrence of the symbolic value, replace it - // with a recurrence. - unsigned FoundIndex = Add->getNumOperands(); - for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) - if (Add->getOperand(i) == SymbolicName) - if (FoundIndex == e) { - FoundIndex = i; - break; - } +namespace { +class SCEVInitRewriter : public SCEVRewriteVisitor<SCEVInitRewriter> { +public: + static const SCEV *rewrite(const SCEV *Scev, const Loop *L, + ScalarEvolution &SE) { + SCEVInitRewriter Rewriter(L, SE); + const SCEV *Result = Rewriter.visit(Scev); + return Rewriter.isValid() ? Result : SE.getCouldNotCompute(); + } - if (FoundIndex != Add->getNumOperands()) { - // Create an add with everything but the specified operand. - SmallVector<const SCEV *, 8> Ops; - for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) - if (i != FoundIndex) - Ops.push_back(Add->getOperand(i)); - const SCEV *Accum = getAddExpr(Ops); - - // This is not a valid addrec if the step amount is varying each - // loop iteration, but is not itself an addrec in this loop. - if (isLoopInvariant(Accum, L) || - (isa<SCEVAddRecExpr>(Accum) && - cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) { - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; - - // If the increment doesn't overflow, then neither the addrec nor - // the post-increment will overflow. - if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) { - if (OBO->getOperand(0) == PN) { - if (OBO->hasNoUnsignedWrap()) - Flags = setFlags(Flags, SCEV::FlagNUW); - if (OBO->hasNoSignedWrap()) - Flags = setFlags(Flags, SCEV::FlagNSW); - } - } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) { - // If the increment is an inbounds GEP, then we know the address - // space cannot be wrapped around. We cannot make any guarantee - // about signed or unsigned overflow because pointers are - // unsigned but we may have a negative index from the base - // pointer. We can guarantee that no unsigned wrap occurs if the - // indices form a positive value. - if (GEP->isInBounds() && GEP->getOperand(0) == PN) { - Flags = setFlags(Flags, SCEV::FlagNW); - - const SCEV *Ptr = getSCEV(GEP->getPointerOperand()); - if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr))) - Flags = setFlags(Flags, SCEV::FlagNUW); - } + SCEVInitRewriter(const Loop *L, ScalarEvolution &SE) + : SCEVRewriteVisitor(SE), L(L), Valid(true) {} - // We cannot transfer nuw and nsw flags from subtraction - // operations -- sub nuw X, Y is not the same as add nuw X, -Y - // for instance. - } + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + if (!(SE.getLoopDisposition(Expr, L) == ScalarEvolution::LoopInvariant)) + Valid = false; + return Expr; + } - const SCEV *StartVal = getSCEV(StartValueV); - const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags); - - // Since the no-wrap flags are on the increment, they apply to the - // post-incremented value as well. - if (isLoopInvariant(Accum, L)) - (void)getAddRecExpr(getAddExpr(StartVal, Accum), - Accum, L, Flags); - - // Okay, for the entire analysis of this edge we assumed the PHI - // to be symbolic. We now need to go back and purge all of the - // entries for the scalars that use the symbolic expression. - ForgetSymbolicName(PN, SymbolicName); - ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; - return PHISCEV; - } + const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { + // Only allow AddRecExprs for this loop. + if (Expr->getLoop() == L) + return Expr->getStart(); + Valid = false; + return Expr; + } + + bool isValid() { return Valid; } + +private: + const Loop *L; + bool Valid; +}; + +class SCEVShiftRewriter : public SCEVRewriteVisitor<SCEVShiftRewriter> { +public: + static const SCEV *rewrite(const SCEV *Scev, const Loop *L, + ScalarEvolution &SE) { + SCEVShiftRewriter Rewriter(L, SE); + const SCEV *Result = Rewriter.visit(Scev); + return Rewriter.isValid() ? Result : SE.getCouldNotCompute(); + } + + SCEVShiftRewriter(const Loop *L, ScalarEvolution &SE) + : SCEVRewriteVisitor(SE), L(L), Valid(true) {} + + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + // Only allow AddRecExprs for this loop. + if (!(SE.getLoopDisposition(Expr, L) == ScalarEvolution::LoopInvariant)) + Valid = false; + return Expr; + } + + const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { + if (Expr->getLoop() == L && Expr->isAffine()) + return SE.getMinusSCEV(Expr, Expr->getStepRecurrence(SE)); + Valid = false; + return Expr; + } + bool isValid() { return Valid; } + +private: + const Loop *L; + bool Valid; +}; +} // end anonymous namespace + +const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) { + const Loop *L = LI.getLoopFor(PN->getParent()); + if (!L || L->getHeader() != PN->getParent()) + return nullptr; + + // The loop may have multiple entrances or multiple exits; we can analyze + // this phi as an addrec if it has a unique entry value and a unique + // backedge value. + Value *BEValueV = nullptr, *StartValueV = nullptr; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *V = PN->getIncomingValue(i); + if (L->contains(PN->getIncomingBlock(i))) { + if (!BEValueV) { + BEValueV = V; + } else if (BEValueV != V) { + BEValueV = nullptr; + break; + } + } else if (!StartValueV) { + StartValueV = V; + } else if (StartValueV != V) { + StartValueV = nullptr; + break; + } + } + if (BEValueV && StartValueV) { + // While we are analyzing this PHI node, handle its value symbolically. + const SCEV *SymbolicName = getUnknown(PN); + assert(ValueExprMap.find_as(PN) == ValueExprMap.end() && + "PHI node already processed?"); + ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName)); + + // Using this symbolic name for the PHI, analyze the value coming around + // the back-edge. + const SCEV *BEValue = getSCEV(BEValueV); + + // NOTE: If BEValue is loop invariant, we know that the PHI node just + // has a special value for the first iteration of the loop. + + // If the value coming around the backedge is an add with the symbolic + // value we just inserted, then we found a simple induction variable! + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) { + // If there is a single occurrence of the symbolic value, replace it + // with a recurrence. + unsigned FoundIndex = Add->getNumOperands(); + for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) + if (Add->getOperand(i) == SymbolicName) + if (FoundIndex == e) { + FoundIndex = i; + break; } - } else if (const SCEVAddRecExpr *AddRec = - dyn_cast<SCEVAddRecExpr>(BEValue)) { - // Otherwise, this could be a loop like this: - // i = 0; for (j = 1; ..; ++j) { .... i = j; } - // In this case, j = {1,+,1} and BEValue is j. - // Because the other in-value of i (0) fits the evolution of BEValue - // i really is an addrec evolution. - if (AddRec->getLoop() == L && AddRec->isAffine()) { - const SCEV *StartVal = getSCEV(StartValueV); - - // If StartVal = j.start - j.stride, we can use StartVal as the - // initial step of the addrec evolution. - if (StartVal == getMinusSCEV(AddRec->getOperand(0), - AddRec->getOperand(1))) { - // FIXME: For constant StartVal, we should be able to infer - // no-wrap flags. - const SCEV *PHISCEV = - getAddRecExpr(StartVal, AddRec->getOperand(1), L, - SCEV::FlagAnyWrap); - - // Okay, for the entire analysis of this edge we assumed the PHI - // to be symbolic. We now need to go back and purge all of the - // entries for the scalars that use the symbolic expression. - ForgetSymbolicName(PN, SymbolicName); - ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; - return PHISCEV; + + if (FoundIndex != Add->getNumOperands()) { + // Create an add with everything but the specified operand. + SmallVector<const SCEV *, 8> Ops; + for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) + if (i != FoundIndex) + Ops.push_back(Add->getOperand(i)); + const SCEV *Accum = getAddExpr(Ops); + + // This is not a valid addrec if the step amount is varying each + // loop iteration, but is not itself an addrec in this loop. + if (isLoopInvariant(Accum, L) || + (isa<SCEVAddRecExpr>(Accum) && + cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) { + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; + + // If the increment doesn't overflow, then neither the addrec nor + // the post-increment will overflow. + if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) { + if (OBO->getOperand(0) == PN) { + if (OBO->hasNoUnsignedWrap()) + Flags = setFlags(Flags, SCEV::FlagNUW); + if (OBO->hasNoSignedWrap()) + Flags = setFlags(Flags, SCEV::FlagNSW); + } + } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) { + // If the increment is an inbounds GEP, then we know the address + // space cannot be wrapped around. We cannot make any guarantee + // about signed or unsigned overflow because pointers are + // unsigned but we may have a negative index from the base + // pointer. We can guarantee that no unsigned wrap occurs if the + // indices form a positive value. + if (GEP->isInBounds() && GEP->getOperand(0) == PN) { + Flags = setFlags(Flags, SCEV::FlagNW); + + const SCEV *Ptr = getSCEV(GEP->getPointerOperand()); + if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr))) + Flags = setFlags(Flags, SCEV::FlagNUW); } + + // We cannot transfer nuw and nsw flags from subtraction + // operations -- sub nuw X, Y is not the same as add nuw X, -Y + // for instance. } + + const SCEV *StartVal = getSCEV(StartValueV); + const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags); + + // Since the no-wrap flags are on the increment, they apply to the + // post-incremented value as well. + if (isLoopInvariant(Accum, L)) + (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags); + + // Okay, for the entire analysis of this edge we assumed the PHI + // to be symbolic. We now need to go back and purge all of the + // entries for the scalars that use the symbolic expression. + ForgetSymbolicName(PN, SymbolicName); + ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; + return PHISCEV; + } + } + } else { + // Otherwise, this could be a loop like this: + // i = 0; for (j = 1; ..; ++j) { .... i = j; } + // In this case, j = {1,+,1} and BEValue is j. + // Because the other in-value of i (0) fits the evolution of BEValue + // i really is an addrec evolution. + // + // We can generalize this saying that i is the shifted value of BEValue + // by one iteration: + // PHI(f(0), f({1,+,1})) --> f({0,+,1}) + const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this); + const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this); + if (Shifted != getCouldNotCompute() && + Start != getCouldNotCompute()) { + const SCEV *StartVal = getSCEV(StartValueV); + if (Start == StartVal) { + // Okay, for the entire analysis of this edge we assumed the PHI + // to be symbolic. We now need to go back and purge all of the + // entries for the scalars that use the symbolic expression. + ForgetSymbolicName(PN, SymbolicName); + ValueExprMap[SCEVCallbackVH(PN, this)] = Shifted; + return Shifted; } } } + } + + return nullptr; +} + +// Checks if the SCEV S is available at BB. S is considered available at BB +// if S can be materialized at BB without introducing a fault. +static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S, + BasicBlock *BB) { + struct CheckAvailable { + bool TraversalDone = false; + bool Available = true; + + const Loop *L = nullptr; // The loop BB is in (can be nullptr) + BasicBlock *BB = nullptr; + DominatorTree &DT; + + CheckAvailable(const Loop *L, BasicBlock *BB, DominatorTree &DT) + : L(L), BB(BB), DT(DT) {} + + bool setUnavailable() { + TraversalDone = true; + Available = false; + return false; + } + + bool follow(const SCEV *S) { + switch (S->getSCEVType()) { + case scConstant: case scTruncate: case scZeroExtend: case scSignExtend: + case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr: + // These expressions are available if their operand(s) is/are. + return true; + + case scAddRecExpr: { + // We allow add recurrences that are on the loop BB is in, or some + // outer loop. This guarantees availability because the value of the + // add recurrence at BB is simply the "current" value of the induction + // variable. We can relax this in the future; for instance an add + // recurrence on a sibling dominating loop is also available at BB. + const auto *ARLoop = cast<SCEVAddRecExpr>(S)->getLoop(); + if (L && (ARLoop == L || ARLoop->contains(L))) + return true; + + return setUnavailable(); + } + + case scUnknown: { + // For SCEVUnknown, we check for simple dominance. + const auto *SU = cast<SCEVUnknown>(S); + Value *V = SU->getValue(); + + if (isa<Argument>(V)) + return false; + + if (isa<Instruction>(V) && DT.dominates(cast<Instruction>(V), BB)) + return false; + + return setUnavailable(); + } + + case scUDivExpr: + case scCouldNotCompute: + // We do not try to smart about these at all. + return setUnavailable(); + } + llvm_unreachable("switch should be fully covered!"); + } + + bool isDone() { return TraversalDone; } + }; + + CheckAvailable CA(L, BB, DT); + SCEVTraversal<CheckAvailable> ST(CA); + + ST.visitAll(S); + return CA.Available; +} + +// Try to match a control flow sequence that branches out at BI and merges back +// at Merge into a "C ? LHS : RHS" select pattern. Return true on a successful +// match. +static bool BrPHIToSelect(DominatorTree &DT, BranchInst *BI, PHINode *Merge, + Value *&C, Value *&LHS, Value *&RHS) { + C = BI->getCondition(); + + BasicBlockEdge LeftEdge(BI->getParent(), BI->getSuccessor(0)); + BasicBlockEdge RightEdge(BI->getParent(), BI->getSuccessor(1)); + + if (!LeftEdge.isSingleEdge()) + return false; + + assert(RightEdge.isSingleEdge() && "Follows from LeftEdge.isSingleEdge()"); + + Use &LeftUse = Merge->getOperandUse(0); + Use &RightUse = Merge->getOperandUse(1); + + if (DT.dominates(LeftEdge, LeftUse) && DT.dominates(RightEdge, RightUse)) { + LHS = LeftUse; + RHS = RightUse; + return true; + } + + if (DT.dominates(LeftEdge, RightUse) && DT.dominates(RightEdge, LeftUse)) { + LHS = RightUse; + RHS = LeftUse; + return true; + } + + return false; +} + +const SCEV *ScalarEvolution::createNodeFromSelectLikePHI(PHINode *PN) { + if (PN->getNumIncomingValues() == 2) { + const Loop *L = LI.getLoopFor(PN->getParent()); + + // We don't want to break LCSSA, even in a SCEV expression tree. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (LI.getLoopFor(PN->getIncomingBlock(i)) != L) + return nullptr; + + // Try to match + // + // br %cond, label %left, label %right + // left: + // br label %merge + // right: + // br label %merge + // merge: + // V = phi [ %x, %left ], [ %y, %right ] + // + // as "select %cond, %x, %y" + + BasicBlock *IDom = DT[PN->getParent()]->getIDom()->getBlock(); + assert(IDom && "At least the entry block should dominate PN"); + + auto *BI = dyn_cast<BranchInst>(IDom->getTerminator()); + Value *Cond = nullptr, *LHS = nullptr, *RHS = nullptr; + + if (BI && BI->isConditional() && + BrPHIToSelect(DT, BI, PN, Cond, LHS, RHS) && + IsAvailableOnEntry(L, DT, getSCEV(LHS), PN->getParent()) && + IsAvailableOnEntry(L, DT, getSCEV(RHS), PN->getParent())) + return createNodeForSelectOrPHI(PN, Cond, LHS, RHS); + } + + return nullptr; +} + +const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { + if (const SCEV *S = createAddRecFromPHI(PN)) + return S; + + if (const SCEV *S = createNodeFromSelectLikePHI(PN)) + return S; // If the PHI has a single incoming value, follow that value, unless the // PHI's incoming blocks are in a different loop, in which case doing so // risks breaking LCSSA form. Instcombine would normally zap these, but // it doesn't have DominatorTree information, so it may miss cases. - if (Value *V = - SimplifyInstruction(PN, F->getParent()->getDataLayout(), TLI, DT, AC)) - if (LI->replacementPreservesLCSSAForm(PN, V)) + if (Value *V = SimplifyInstruction(PN, getDataLayout(), &TLI, &DT, &AC)) + if (LI.replacementPreservesLCSSAForm(PN, V)) return getSCEV(V); // If it's not a loop phi, we can't handle it yet. return getUnknown(PN); } +const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I, + Value *Cond, + Value *TrueVal, + Value *FalseVal) { + // Handle "constant" branch or select. This can occur for instance when a + // loop pass transforms an inner loop and moves on to process the outer loop. + if (auto *CI = dyn_cast<ConstantInt>(Cond)) + return getSCEV(CI->isOne() ? TrueVal : FalseVal); + + // Try to match some simple smax or umax patterns. + auto *ICI = dyn_cast<ICmpInst>(Cond); + if (!ICI) + return getUnknown(I); + + Value *LHS = ICI->getOperand(0); + Value *RHS = ICI->getOperand(1); + + switch (ICI->getPredicate()) { + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + std::swap(LHS, RHS); + // fall through + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + // a >s b ? a+x : b+x -> smax(a, b)+x + // a >s b ? b+x : a+x -> smin(a, b)+x + if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) { + const SCEV *LS = getNoopOrSignExtend(getSCEV(LHS), I->getType()); + const SCEV *RS = getNoopOrSignExtend(getSCEV(RHS), I->getType()); + const SCEV *LA = getSCEV(TrueVal); + const SCEV *RA = getSCEV(FalseVal); + const SCEV *LDiff = getMinusSCEV(LA, LS); + const SCEV *RDiff = getMinusSCEV(RA, RS); + if (LDiff == RDiff) + return getAddExpr(getSMaxExpr(LS, RS), LDiff); + LDiff = getMinusSCEV(LA, RS); + RDiff = getMinusSCEV(RA, LS); + if (LDiff == RDiff) + return getAddExpr(getSMinExpr(LS, RS), LDiff); + } + break; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + std::swap(LHS, RHS); + // fall through + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + // a >u b ? a+x : b+x -> umax(a, b)+x + // a >u b ? b+x : a+x -> umin(a, b)+x + if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) { + const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType()); + const SCEV *RS = getNoopOrZeroExtend(getSCEV(RHS), I->getType()); + const SCEV *LA = getSCEV(TrueVal); + const SCEV *RA = getSCEV(FalseVal); + const SCEV *LDiff = getMinusSCEV(LA, LS); + const SCEV *RDiff = getMinusSCEV(RA, RS); + if (LDiff == RDiff) + return getAddExpr(getUMaxExpr(LS, RS), LDiff); + LDiff = getMinusSCEV(LA, RS); + RDiff = getMinusSCEV(RA, LS); + if (LDiff == RDiff) + return getAddExpr(getUMinExpr(LS, RS), LDiff); + } + break; + case ICmpInst::ICMP_NE: + // n != 0 ? n+x : 1+x -> umax(n, 1)+x + if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) && + isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) { + const SCEV *One = getOne(I->getType()); + const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType()); + const SCEV *LA = getSCEV(TrueVal); + const SCEV *RA = getSCEV(FalseVal); + const SCEV *LDiff = getMinusSCEV(LA, LS); + const SCEV *RDiff = getMinusSCEV(RA, One); + if (LDiff == RDiff) + return getAddExpr(getUMaxExpr(One, LS), LDiff); + } + break; + case ICmpInst::ICMP_EQ: + // n == 0 ? 1+x : n+x -> umax(n, 1)+x + if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) && + isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) { + const SCEV *One = getOne(I->getType()); + const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType()); + const SCEV *LA = getSCEV(TrueVal); + const SCEV *RA = getSCEV(FalseVal); + const SCEV *LDiff = getMinusSCEV(LA, One); + const SCEV *RDiff = getMinusSCEV(RA, LS); + if (LDiff == RDiff) + return getAddExpr(getUMaxExpr(One, LS), LDiff); + } + break; + default: + break; + } + + return getUnknown(I); +} + /// createNodeForGEP - Expand GEP instructions into add and multiply /// operations. This allows them to be analyzed by regular SCEV code. /// @@ -3769,7 +4106,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { uint32_t ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) - return C->getValue()->getValue().countTrailingZeros(); + return C->getAPInt().countTrailingZeros(); if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S)) return std::min(GetMinTrailingZeros(T->getOperand()), @@ -3834,8 +4171,8 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { // For a SCEVUnknown, ask ValueTracking. unsigned BitWidth = getTypeSizeInBits(U->getType()); APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - computeKnownBits(U->getValue(), Zeros, Ones, - F->getParent()->getDataLayout(), 0, AC, nullptr, DT); + computeKnownBits(U->getValue(), Zeros, Ones, getDataLayout(), 0, &AC, + nullptr, &DT); return Zeros.countTrailingOnes(); } @@ -3846,26 +4183,9 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { /// GetRangeFromMetadata - Helper method to assign a range to V from /// metadata present in the IR. static Optional<ConstantRange> GetRangeFromMetadata(Value *V) { - if (Instruction *I = dyn_cast<Instruction>(V)) { - if (MDNode *MD = I->getMetadata(LLVMContext::MD_range)) { - ConstantRange TotalRange( - cast<IntegerType>(I->getType())->getBitWidth(), false); - - unsigned NumRanges = MD->getNumOperands() / 2; - assert(NumRanges >= 1); - - for (unsigned i = 0; i < NumRanges; ++i) { - ConstantInt *Lower = - mdconst::extract<ConstantInt>(MD->getOperand(2 * i + 0)); - ConstantInt *Upper = - mdconst::extract<ConstantInt>(MD->getOperand(2 * i + 1)); - ConstantRange Range(Lower->getValue(), Upper->getValue()); - TotalRange = TotalRange.unionWith(Range); - } - - return TotalRange; - } - } + if (Instruction *I = dyn_cast<Instruction>(V)) + if (MDNode *MD = I->getMetadata(LLVMContext::MD_range)) + return getConstantRangeFromMetadata(*MD); return None; } @@ -3887,7 +4207,7 @@ ScalarEvolution::getRange(const SCEV *S, return I->second; if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) - return setRange(C, SignHint, ConstantRange(C->getValue()->getValue())); + return setRange(C, SignHint, ConstantRange(C->getAPInt())); unsigned BitWidth = getTypeSizeInBits(S->getType()); ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true); @@ -3965,9 +4285,8 @@ ScalarEvolution::getRange(const SCEV *S, if (AddRec->getNoWrapFlags(SCEV::FlagNUW)) if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart())) if (!C->getValue()->isZero()) - ConservativeResult = - ConservativeResult.intersectWith( - ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0))); + ConservativeResult = ConservativeResult.intersectWith( + ConstantRange(C->getAPInt(), APInt(BitWidth, 0))); // If there's no signed wrap, and all the operands have the same sign or // zero, the value won't ever change sign. @@ -4065,18 +4384,18 @@ ScalarEvolution::getRange(const SCEV *S, // Split here to avoid paying the compile-time cost of calling both // computeKnownBits and ComputeNumSignBits. This restriction can be lifted // if needed. - const DataLayout &DL = F->getParent()->getDataLayout(); + const DataLayout &DL = getDataLayout(); if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) { // For a SCEVUnknown, ask ValueTracking. APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT); + computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, &AC, nullptr, &DT); if (Ones != ~Zeros + 1) ConservativeResult = ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1)); } else { assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED && "generalize as needed!"); - unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, AC, nullptr, DT); + unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, &AC, nullptr, &DT); if (NS > 1) ConservativeResult = ConservativeResult.intersectWith( ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1), @@ -4089,8 +4408,64 @@ ScalarEvolution::getRange(const SCEV *S, return setRange(S, SignHint, ConservativeResult); } -/// createSCEV - We know that there is no SCEV for the specified value. -/// Analyze the expression. +SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) { + if (isa<ConstantExpr>(V)) return SCEV::FlagAnyWrap; + const BinaryOperator *BinOp = cast<BinaryOperator>(V); + + // Return early if there are no flags to propagate to the SCEV. + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; + if (BinOp->hasNoUnsignedWrap()) + Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW); + if (BinOp->hasNoSignedWrap()) + Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW); + if (Flags == SCEV::FlagAnyWrap) { + return SCEV::FlagAnyWrap; + } + + // Here we check that BinOp is in the header of the innermost loop + // containing BinOp, since we only deal with instructions in the loop + // header. The actual loop we need to check later will come from an add + // recurrence, but getting that requires computing the SCEV of the operands, + // which can be expensive. This check we can do cheaply to rule out some + // cases early. + Loop *innermostContainingLoop = LI.getLoopFor(BinOp->getParent()); + if (innermostContainingLoop == nullptr || + innermostContainingLoop->getHeader() != BinOp->getParent()) + return SCEV::FlagAnyWrap; + + // Only proceed if we can prove that BinOp does not yield poison. + if (!isKnownNotFullPoison(BinOp)) return SCEV::FlagAnyWrap; + + // At this point we know that if V is executed, then it does not wrap + // according to at least one of NSW or NUW. If V is not executed, then we do + // not know if the calculation that V represents would wrap. Multiple + // instructions can map to the same SCEV. If we apply NSW or NUW from V to + // the SCEV, we must guarantee no wrapping for that SCEV also when it is + // derived from other instructions that map to the same SCEV. We cannot make + // that guarantee for cases where V is not executed. So we need to find the + // loop that V is considered in relation to and prove that V is executed for + // every iteration of that loop. That implies that the value that V + // calculates does not wrap anywhere in the loop, so then we can apply the + // flags to the SCEV. + // + // We check isLoopInvariant to disambiguate in case we are adding two + // recurrences from different loops, so that we know which loop to prove + // that V is executed in. + for (int OpIndex = 0; OpIndex < 2; ++OpIndex) { + const SCEV *Op = getSCEV(BinOp->getOperand(OpIndex)); + if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) { + const int OtherOpIndex = 1 - OpIndex; + const SCEV *OtherOp = getSCEV(BinOp->getOperand(OtherOpIndex)); + if (isLoopInvariant(OtherOp, AddRec->getLoop()) && + isGuaranteedToExecuteForEveryIteration(BinOp, AddRec->getLoop())) + return Flags; + } + } + return SCEV::FlagAnyWrap; +} + +/// createSCEV - We know that there is no SCEV for the specified value. Analyze +/// the expression. /// const SCEV *ScalarEvolution::createSCEV(Value *V) { if (!isSCEVable(V->getType())) @@ -4104,14 +4479,14 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // reachable. Such instructions don't matter, and they aren't required // to obey basic rules for definitions dominating uses which this // analysis depends on. - if (!DT->isReachableFromEntry(I->getParent())) + if (!DT.isReachableFromEntry(I->getParent())) return getUnknown(V); } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) Opcode = CE->getOpcode(); else if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) return getConstant(CI); else if (isa<ConstantPointerNull>(V)) - return getConstant(V->getType(), 0); + return getZero(V->getType()); else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee()); else @@ -4126,47 +4501,79 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // because it leads to N-1 getAddExpr calls for N ultimate operands. // Instead, gather up all the operands and make a single getAddExpr call. // LLVM IR canonical form means we need only traverse the left operands. - // - // Don't apply this instruction's NSW or NUW flags to the new - // expression. The instruction may be guarded by control flow that the - // no-wrap behavior depends on. Non-control-equivalent instructions can be - // mapped to the same SCEV expression, and it would be incorrect to transfer - // NSW/NUW semantics to those operations. SmallVector<const SCEV *, 4> AddOps; - AddOps.push_back(getSCEV(U->getOperand(1))); - for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) { - unsigned Opcode = Op->getValueID() - Value::InstructionVal; - if (Opcode != Instruction::Add && Opcode != Instruction::Sub) + for (Value *Op = U;; Op = U->getOperand(0)) { + U = dyn_cast<Operator>(Op); + unsigned Opcode = U ? U->getOpcode() : 0; + if (!U || (Opcode != Instruction::Add && Opcode != Instruction::Sub)) { + assert(Op != V && "V should be an add"); + AddOps.push_back(getSCEV(Op)); + break; + } + + if (auto *OpSCEV = getExistingSCEV(U)) { + AddOps.push_back(OpSCEV); + break; + } + + // If a NUW or NSW flag can be applied to the SCEV for this + // addition, then compute the SCEV for this addition by itself + // with a separate call to getAddExpr. We need to do that + // instead of pushing the operands of the addition onto AddOps, + // since the flags are only known to apply to this particular + // addition - they may not apply to other additions that can be + // formed with operands from AddOps. + const SCEV *RHS = getSCEV(U->getOperand(1)); + SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U); + if (Flags != SCEV::FlagAnyWrap) { + const SCEV *LHS = getSCEV(U->getOperand(0)); + if (Opcode == Instruction::Sub) + AddOps.push_back(getMinusSCEV(LHS, RHS, Flags)); + else + AddOps.push_back(getAddExpr(LHS, RHS, Flags)); break; - U = cast<Operator>(Op); - const SCEV *Op1 = getSCEV(U->getOperand(1)); + } + if (Opcode == Instruction::Sub) - AddOps.push_back(getNegativeSCEV(Op1)); + AddOps.push_back(getNegativeSCEV(RHS)); else - AddOps.push_back(Op1); + AddOps.push_back(RHS); } - AddOps.push_back(getSCEV(U->getOperand(0))); return getAddExpr(AddOps); } + case Instruction::Mul: { - // Don't transfer NSW/NUW for the same reason as AddExpr. SmallVector<const SCEV *, 4> MulOps; - MulOps.push_back(getSCEV(U->getOperand(1))); - for (Value *Op = U->getOperand(0); - Op->getValueID() == Instruction::Mul + Value::InstructionVal; - Op = U->getOperand(0)) { - U = cast<Operator>(Op); + for (Value *Op = U;; Op = U->getOperand(0)) { + U = dyn_cast<Operator>(Op); + if (!U || U->getOpcode() != Instruction::Mul) { + assert(Op != V && "V should be a mul"); + MulOps.push_back(getSCEV(Op)); + break; + } + + if (auto *OpSCEV = getExistingSCEV(U)) { + MulOps.push_back(OpSCEV); + break; + } + + SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U); + if (Flags != SCEV::FlagAnyWrap) { + MulOps.push_back(getMulExpr(getSCEV(U->getOperand(0)), + getSCEV(U->getOperand(1)), Flags)); + break; + } + MulOps.push_back(getSCEV(U->getOperand(1))); } - MulOps.push_back(getSCEV(U->getOperand(0))); return getMulExpr(MulOps); } case Instruction::UDiv: return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1))); case Instruction::Sub: - return getMinusSCEV(getSCEV(U->getOperand(0)), - getSCEV(U->getOperand(1))); + return getMinusSCEV(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1)), + getNoWrapFlagsFromUB(U)); case Instruction::And: // For an expression like x&255 that merely masks off the high bits, // use zext(trunc(x)) as the SCEV expression. @@ -4185,8 +4592,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { unsigned TZ = A.countTrailingZeros(); unsigned BitWidth = A.getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(U->getOperand(0), KnownZero, KnownOne, - F->getParent()->getDataLayout(), 0, AC, nullptr, DT); + computeKnownBits(U->getOperand(0), KnownZero, KnownOne, getDataLayout(), + 0, &AC, nullptr, &DT); APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ); @@ -4286,9 +4693,18 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { if (SA->getValue().uge(BitWidth)) break; + // It is currently not resolved how to interpret NSW for left + // shift by BitWidth - 1, so we avoid applying flags in that + // case. Remove this check (or this comment) once the situation + // is resolved. See + // http://lists.llvm.org/pipermail/llvm-dev/2015-April/084195.html + // and http://reviews.llvm.org/D8890 . + auto Flags = SCEV::FlagAnyWrap; + if (SA->getValue().ult(BitWidth - 1)) Flags = getNoWrapFlagsFromUB(U); + Constant *X = ConstantInt::get(getContext(), APInt::getOneBitSet(BitWidth, SA->getZExtValue())); - return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X)); + return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X), Flags); } break; @@ -4363,94 +4779,13 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { return createNodeForPHI(cast<PHINode>(U)); case Instruction::Select: - // This could be a smax or umax that was lowered earlier. - // Try to recover it. - if (ICmpInst *ICI = dyn_cast<ICmpInst>(U->getOperand(0))) { - Value *LHS = ICI->getOperand(0); - Value *RHS = ICI->getOperand(1); - switch (ICI->getPredicate()) { - case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_SLE: - std::swap(LHS, RHS); - // fall through - case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_SGE: - // a >s b ? a+x : b+x -> smax(a, b)+x - // a >s b ? b+x : a+x -> smin(a, b)+x - if (getTypeSizeInBits(LHS->getType()) <= - getTypeSizeInBits(U->getType())) { - const SCEV *LS = getNoopOrSignExtend(getSCEV(LHS), U->getType()); - const SCEV *RS = getNoopOrSignExtend(getSCEV(RHS), U->getType()); - const SCEV *LA = getSCEV(U->getOperand(1)); - const SCEV *RA = getSCEV(U->getOperand(2)); - const SCEV *LDiff = getMinusSCEV(LA, LS); - const SCEV *RDiff = getMinusSCEV(RA, RS); - if (LDiff == RDiff) - return getAddExpr(getSMaxExpr(LS, RS), LDiff); - LDiff = getMinusSCEV(LA, RS); - RDiff = getMinusSCEV(RA, LS); - if (LDiff == RDiff) - return getAddExpr(getSMinExpr(LS, RS), LDiff); - } - break; - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_ULE: - std::swap(LHS, RHS); - // fall through - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_UGE: - // a >u b ? a+x : b+x -> umax(a, b)+x - // a >u b ? b+x : a+x -> umin(a, b)+x - if (getTypeSizeInBits(LHS->getType()) <= - getTypeSizeInBits(U->getType())) { - const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType()); - const SCEV *RS = getNoopOrZeroExtend(getSCEV(RHS), U->getType()); - const SCEV *LA = getSCEV(U->getOperand(1)); - const SCEV *RA = getSCEV(U->getOperand(2)); - const SCEV *LDiff = getMinusSCEV(LA, LS); - const SCEV *RDiff = getMinusSCEV(RA, RS); - if (LDiff == RDiff) - return getAddExpr(getUMaxExpr(LS, RS), LDiff); - LDiff = getMinusSCEV(LA, RS); - RDiff = getMinusSCEV(RA, LS); - if (LDiff == RDiff) - return getAddExpr(getUMinExpr(LS, RS), LDiff); - } - break; - case ICmpInst::ICMP_NE: - // n != 0 ? n+x : 1+x -> umax(n, 1)+x - if (getTypeSizeInBits(LHS->getType()) <= - getTypeSizeInBits(U->getType()) && - isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) { - const SCEV *One = getConstant(U->getType(), 1); - const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType()); - const SCEV *LA = getSCEV(U->getOperand(1)); - const SCEV *RA = getSCEV(U->getOperand(2)); - const SCEV *LDiff = getMinusSCEV(LA, LS); - const SCEV *RDiff = getMinusSCEV(RA, One); - if (LDiff == RDiff) - return getAddExpr(getUMaxExpr(One, LS), LDiff); - } - break; - case ICmpInst::ICMP_EQ: - // n == 0 ? 1+x : n+x -> umax(n, 1)+x - if (getTypeSizeInBits(LHS->getType()) <= - getTypeSizeInBits(U->getType()) && - isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) { - const SCEV *One = getConstant(U->getType(), 1); - const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType()); - const SCEV *LA = getSCEV(U->getOperand(1)); - const SCEV *RA = getSCEV(U->getOperand(2)); - const SCEV *LDiff = getMinusSCEV(LA, One); - const SCEV *RDiff = getMinusSCEV(RA, LS); - if (LDiff == RDiff) - return getAddExpr(getUMaxExpr(One, LS), LDiff); - } - break; - default: - break; - } - } + // U can also be a select constant expr, which let fall through. Since + // createNodeForSelect only works for a condition that is an `ICmpInst`, and + // constant expressions cannot have instructions as operands, we'd have + // returned getUnknown for a select constant expressions anyway. + if (isa<Instruction>(U)) + return createNodeForSelectOrPHI(cast<Instruction>(U), U->getOperand(0), + U->getOperand(1), U->getOperand(2)); default: // We cannot analyze this expression. break; @@ -4534,8 +4869,7 @@ ScalarEvolution::getSmallConstantTripMultiple(Loop *L, return 1; // Get the trip count from the BE count by adding 1. - const SCEV *TCMul = getAddExpr(ExitCount, - getConstant(ExitCount->getType(), 1)); + const SCEV *TCMul = getAddExpr(ExitCount, getOne(ExitCount->getType())); // FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt // to factor simple cases. if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(TCMul)) @@ -4610,10 +4944,10 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { if (!Pair.second) return Pair.first->second; - // ComputeBackedgeTakenCount may allocate memory for its result. Inserting it + // computeBackedgeTakenCount may allocate memory for its result. Inserting it // into the BackedgeTakenCounts map transfers ownership. Otherwise, the result // must be cleared in this scope. - BackedgeTakenInfo Result = ComputeBackedgeTakenCount(L); + BackedgeTakenInfo Result = computeBackedgeTakenCount(L); if (Result.getExact(this) != getCouldNotCompute()) { assert(isLoopInvariant(Result.getExact(this), L) && @@ -4666,7 +5000,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { } // Re-lookup the insert position, since the call to - // ComputeBackedgeTakenCount above could result in a + // computeBackedgeTakenCount above could result in a // recusive call to getBackedgeTakenInfo (on a different // loop), which would invalidate the iterator computed // earlier. @@ -4744,12 +5078,12 @@ void ScalarEvolution::forgetValue(Value *V) { } /// getExact - Get the exact loop backedge taken count considering all loop -/// exits. A computable result can only be return for loops with a single exit. -/// Returning the minimum taken count among all exits is incorrect because one -/// of the loop's exit limit's may have been skipped. HowFarToZero assumes that -/// the limit of each loop test is never skipped. This is a valid assumption as -/// long as the loop exits via that test. For precise results, it is the -/// caller's responsibility to specify the relevant loop exit using +/// exits. A computable result can only be returned for loops with a single +/// exit. Returning the minimum taken count among all exits is incorrect +/// because one of the loop's exit limit's may have been skipped. HowFarToZero +/// assumes that the limit of each loop test is never skipped. This is a valid +/// assumption as long as the loop exits via that test. For precise results, it +/// is the caller's responsibility to specify the relevant loop exit using /// getExact(ExitingBlock, SE). const SCEV * ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const { @@ -4847,10 +5181,10 @@ void ScalarEvolution::BackedgeTakenInfo::clear() { delete[] ExitNotTaken.getNextExit(); } -/// ComputeBackedgeTakenCount - Compute the number of times the backedge +/// computeBackedgeTakenCount - Compute the number of times the backedge /// of the specified loop will execute. ScalarEvolution::BackedgeTakenInfo -ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { +ScalarEvolution::computeBackedgeTakenCount(const Loop *L) { SmallVector<BasicBlock *, 8> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); @@ -4864,7 +5198,7 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { // and compute maxBECount. for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { BasicBlock *ExitBB = ExitingBlocks[i]; - ExitLimit EL = ComputeExitLimit(L, ExitBB); + ExitLimit EL = computeExitLimit(L, ExitBB); // 1. For each exit that can be computed, add an entry to ExitCounts. // CouldComputeBECount is true only if all exits can be computed. @@ -4885,7 +5219,7 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { // MaxBECount is conservatively the maximum EL.Max, where CouldNotCompute is // considered greater than any computable EL.Max. if (EL.Max != getCouldNotCompute() && Latch && - DT->dominates(ExitBB, Latch)) { + DT.dominates(ExitBB, Latch)) { if (!MustExitMaxBECount) MustExitMaxBECount = EL.Max; else { @@ -4906,13 +5240,11 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount); } -/// ComputeExitLimit - Compute the number of times the backedge of the specified -/// loop will execute if it exits via the specified block. ScalarEvolution::ExitLimit -ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { +ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { - // Okay, we've chosen an exiting block. See what condition causes us to - // exit at this block and remember the exit block and whether all other targets + // Okay, we've chosen an exiting block. See what condition causes us to exit + // at this block and remember the exit block and whether all other targets // lead to the loop header. bool MustExecuteLoopHeader = true; BasicBlock *Exit = nullptr; @@ -4952,8 +5284,7 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { if (!Pred) return getCouldNotCompute(); TerminatorInst *PredTerm = Pred->getTerminator(); - for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) { - BasicBlock *PredSucc = PredTerm->getSuccessor(i); + for (const BasicBlock *PredSucc : PredTerm->successors()) { if (PredSucc == BB) continue; // If the predecessor has a successor that isn't BB and isn't @@ -4976,19 +5307,19 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { if (BranchInst *BI = dyn_cast<BranchInst>(Term)) { assert(BI->isConditional() && "If unconditional, it can't be in loop!"); // Proceed to the next level to examine the exit condition expression. - return ComputeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0), + return computeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0), BI->getSuccessor(1), /*ControlsExit=*/IsOnlyExit); } if (SwitchInst *SI = dyn_cast<SwitchInst>(Term)) - return ComputeExitLimitFromSingleExitSwitch(L, SI, Exit, + return computeExitLimitFromSingleExitSwitch(L, SI, Exit, /*ControlsExit=*/IsOnlyExit); return getCouldNotCompute(); } -/// ComputeExitLimitFromCond - Compute the number of times the +/// computeExitLimitFromCond - Compute the number of times the /// backedge of the specified loop will execute if its exit condition /// were a conditional branch of ExitCond, TBB, and FBB. /// @@ -4997,7 +5328,7 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { /// condition is true and can infer that failing to meet the condition prior to /// integer wraparound results in undefined behavior. ScalarEvolution::ExitLimit -ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, +ScalarEvolution::computeExitLimitFromCond(const Loop *L, Value *ExitCond, BasicBlock *TBB, BasicBlock *FBB, @@ -5007,9 +5338,9 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, if (BO->getOpcode() == Instruction::And) { // Recurse on the operands of the and. bool EitherMayExit = L->contains(TBB); - ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, + ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, ControlsExit && !EitherMayExit); - ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, + ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, ControlsExit && !EitherMayExit); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); @@ -5042,9 +5373,9 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, if (BO->getOpcode() == Instruction::Or) { // Recurse on the operands of the or. bool EitherMayExit = L->contains(FBB); - ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, + ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, ControlsExit && !EitherMayExit); - ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, + ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, ControlsExit && !EitherMayExit); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); @@ -5079,7 +5410,7 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, // With an icmp, it may be feasible to compute an exact backedge-taken count. // Proceed to the next level to examine the icmp. if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond)) - return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit); + return computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit); // Check for a constant condition. These are normally stripped out by // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to @@ -5091,18 +5422,15 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, return getCouldNotCompute(); else // The backedge is never taken. - return getConstant(CI->getType(), 0); + return getZero(CI->getType()); } // If it's not an integer or pointer comparison then compute it the hard way. - return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB)); + return computeExitCountExhaustively(L, ExitCond, !L->contains(TBB)); } -/// ComputeExitLimitFromICmp - Compute the number of times the -/// backedge of the specified loop will execute if its exit condition -/// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB. ScalarEvolution::ExitLimit -ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L, +ScalarEvolution::computeExitLimitFromICmp(const Loop *L, ICmpInst *ExitCond, BasicBlock *TBB, BasicBlock *FBB, @@ -5119,11 +5447,16 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L, if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0))) if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) { ExitLimit ItCnt = - ComputeLoadConstantCompareExitLimit(LI, RHS, L, Cond); + computeLoadConstantCompareExitLimit(LI, RHS, L, Cond); if (ItCnt.hasAnyInfo()) return ItCnt; } + ExitLimit ShiftEL = computeShiftCompareExitLimit( + ExitCond->getOperand(0), ExitCond->getOperand(1), L, Cond); + if (ShiftEL.hasAnyInfo()) + return ShiftEL; + const SCEV *LHS = getSCEV(ExitCond->getOperand(0)); const SCEV *RHS = getSCEV(ExitCond->getOperand(1)); @@ -5149,7 +5482,7 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L, if (AddRec->getLoop() == L) { // Form the constant range. ConstantRange CompRange( - ICmpInst::makeConstantRange(Cond, RHSC->getValue()->getValue())); + ICmpInst::makeConstantRange(Cond, RHSC->getAPInt())); const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this); if (!isa<SCEVCouldNotCompute>(Ret)) return Ret; @@ -5183,21 +5516,13 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L, break; } default: -#if 0 - dbgs() << "ComputeBackedgeTakenCount "; - if (ExitCond->getOperand(0)->getType()->isUnsigned()) - dbgs() << "[unsigned] "; - dbgs() << *LHS << " " - << Instruction::getOpcodeName(Instruction::ICmp) - << " " << *RHS << "\n"; -#endif break; } - return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB)); + return computeExitCountExhaustively(L, ExitCond, !L->contains(TBB)); } ScalarEvolution::ExitLimit -ScalarEvolution::ComputeExitLimitFromSingleExitSwitch(const Loop *L, +ScalarEvolution::computeExitLimitFromSingleExitSwitch(const Loop *L, SwitchInst *Switch, BasicBlock *ExitingBlock, bool ControlsExit) { @@ -5230,11 +5555,11 @@ EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C, return cast<SCEVConstant>(Val)->getValue(); } -/// ComputeLoadConstantCompareExitLimit - Given an exit condition of +/// computeLoadConstantCompareExitLimit - Given an exit condition of /// 'icmp op load X, cst', try to see if we can compute the backedge /// execution count. ScalarEvolution::ExitLimit -ScalarEvolution::ComputeLoadConstantCompareExitLimit( +ScalarEvolution::computeLoadConstantCompareExitLimit( LoadInst *LI, Constant *RHS, const Loop *L, @@ -5303,11 +5628,6 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit( Result = ConstantExpr::getICmp(predicate, Result, RHS); if (!isa<ConstantInt>(Result)) break; // Couldn't decide for sure if (cast<ConstantInt>(Result)->getValue().isMinValue()) { -#if 0 - dbgs() << "\n***\n*** Computed loop count " << *ItCst - << "\n*** From global " << *GV << "*** BB: " << *L->getHeader() - << "***\n"; -#endif ++NumArrayLenItCounts; return getConstant(ItCst); // Found terminating iteration! } @@ -5315,6 +5635,149 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit( return getCouldNotCompute(); } +ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit( + Value *LHS, Value *RHSV, const Loop *L, ICmpInst::Predicate Pred) { + ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV); + if (!RHS) + return getCouldNotCompute(); + + const BasicBlock *Latch = L->getLoopLatch(); + if (!Latch) + return getCouldNotCompute(); + + const BasicBlock *Predecessor = L->getLoopPredecessor(); + if (!Predecessor) + return getCouldNotCompute(); + + // Return true if V is of the form "LHS `shift_op` <positive constant>". + // Return LHS in OutLHS and shift_opt in OutOpCode. + auto MatchPositiveShift = + [](Value *V, Value *&OutLHS, Instruction::BinaryOps &OutOpCode) { + + using namespace PatternMatch; + + ConstantInt *ShiftAmt; + if (match(V, m_LShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt)))) + OutOpCode = Instruction::LShr; + else if (match(V, m_AShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt)))) + OutOpCode = Instruction::AShr; + else if (match(V, m_Shl(m_Value(OutLHS), m_ConstantInt(ShiftAmt)))) + OutOpCode = Instruction::Shl; + else + return false; + + return ShiftAmt->getValue().isStrictlyPositive(); + }; + + // Recognize a "shift recurrence" either of the form %iv or of %iv.shifted in + // + // loop: + // %iv = phi i32 [ %iv.shifted, %loop ], [ %val, %preheader ] + // %iv.shifted = lshr i32 %iv, <positive constant> + // + // Return true on a succesful match. Return the corresponding PHI node (%iv + // above) in PNOut and the opcode of the shift operation in OpCodeOut. + auto MatchShiftRecurrence = + [&](Value *V, PHINode *&PNOut, Instruction::BinaryOps &OpCodeOut) { + Optional<Instruction::BinaryOps> PostShiftOpCode; + + { + Instruction::BinaryOps OpC; + Value *V; + + // If we encounter a shift instruction, "peel off" the shift operation, + // and remember that we did so. Later when we inspect %iv's backedge + // value, we will make sure that the backedge value uses the same + // operation. + // + // Note: the peeled shift operation does not have to be the same + // instruction as the one feeding into the PHI's backedge value. We only + // really care about it being the same *kind* of shift instruction -- + // that's all that is required for our later inferences to hold. + if (MatchPositiveShift(LHS, V, OpC)) { + PostShiftOpCode = OpC; + LHS = V; + } + } + + PNOut = dyn_cast<PHINode>(LHS); + if (!PNOut || PNOut->getParent() != L->getHeader()) + return false; + + Value *BEValue = PNOut->getIncomingValueForBlock(Latch); + Value *OpLHS; + + return + // The backedge value for the PHI node must be a shift by a positive + // amount + MatchPositiveShift(BEValue, OpLHS, OpCodeOut) && + + // of the PHI node itself + OpLHS == PNOut && + + // and the kind of shift should be match the kind of shift we peeled + // off, if any. + (!PostShiftOpCode.hasValue() || *PostShiftOpCode == OpCodeOut); + }; + + PHINode *PN; + Instruction::BinaryOps OpCode; + if (!MatchShiftRecurrence(LHS, PN, OpCode)) + return getCouldNotCompute(); + + const DataLayout &DL = getDataLayout(); + + // The key rationale for this optimization is that for some kinds of shift + // recurrences, the value of the recurrence "stabilizes" to either 0 or -1 + // within a finite number of iterations. If the condition guarding the + // backedge (in the sense that the backedge is taken if the condition is true) + // is false for the value the shift recurrence stabilizes to, then we know + // that the backedge is taken only a finite number of times. + + ConstantInt *StableValue = nullptr; + switch (OpCode) { + default: + llvm_unreachable("Impossible case!"); + + case Instruction::AShr: { + // {K,ashr,<positive-constant>} stabilizes to signum(K) in at most + // bitwidth(K) iterations. + Value *FirstValue = PN->getIncomingValueForBlock(Predecessor); + bool KnownZero, KnownOne; + ComputeSignBit(FirstValue, KnownZero, KnownOne, DL, 0, nullptr, + Predecessor->getTerminator(), &DT); + auto *Ty = cast<IntegerType>(RHS->getType()); + if (KnownZero) + StableValue = ConstantInt::get(Ty, 0); + else if (KnownOne) + StableValue = ConstantInt::get(Ty, -1, true); + else + return getCouldNotCompute(); + + break; + } + case Instruction::LShr: + case Instruction::Shl: + // Both {K,lshr,<positive-constant>} and {K,shl,<positive-constant>} + // stabilize to 0 in at most bitwidth(K) iterations. + StableValue = ConstantInt::get(cast<IntegerType>(RHS->getType()), 0); + break; + } + + auto *Result = + ConstantFoldCompareInstOperands(Pred, StableValue, RHS, DL, &TLI); + assert(Result->getType()->isIntegerTy(1) && + "Otherwise cannot be an operand to a branch instruction"); + + if (Result->isZeroValue()) { + unsigned BitWidth = getTypeSizeInBits(RHS->getType()); + const SCEV *UpperBound = + getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth); + return ExitLimit(getCouldNotCompute(), UpperBound); + } + + return getCouldNotCompute(); +} /// CanConstantFold - Return true if we can constant fold an instruction of the /// specified type, assuming that all operands were constants. @@ -5356,12 +5819,10 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L, // Otherwise, we can evaluate this instruction if all of its operands are // constant or derived from a PHI node themselves. PHINode *PHI = nullptr; - for (Instruction::op_iterator OpI = UseInst->op_begin(), - OpE = UseInst->op_end(); OpI != OpE; ++OpI) { - - if (isa<Constant>(*OpI)) continue; + for (Value *Op : UseInst->operands()) { + if (isa<Constant>(Op)) continue; - Instruction *OpInst = dyn_cast<Instruction>(*OpI); + Instruction *OpInst = dyn_cast<Instruction>(Op); if (!OpInst || !canConstantEvolve(OpInst, L)) return nullptr; PHINode *P = dyn_cast<PHINode>(OpInst); @@ -5395,9 +5856,8 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { Instruction *I = dyn_cast<Instruction>(V); if (!I || !canConstantEvolve(I, L)) return nullptr; - if (PHINode *PN = dyn_cast<PHINode>(I)) { + if (PHINode *PN = dyn_cast<PHINode>(I)) return PN; - } // Record non-constant instructions contained by the loop. DenseMap<Instruction *, PHINode *> PHIMap; @@ -5454,6 +5914,30 @@ static Constant *EvaluateExpression(Value *V, const Loop *L, TLI); } + +// If every incoming value to PN except the one for BB is a specific Constant, +// return that, else return nullptr. +static Constant *getOtherIncomingValue(PHINode *PN, BasicBlock *BB) { + Constant *IncomingVal = nullptr; + + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + if (PN->getIncomingBlock(i) == BB) + continue; + + auto *CurrentVal = dyn_cast<Constant>(PN->getIncomingValue(i)); + if (!CurrentVal) + return nullptr; + + if (IncomingVal != CurrentVal) { + if (IncomingVal) + return nullptr; + IncomingVal = CurrentVal; + } + } + + return IncomingVal; +} + /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is /// in the header of its containing loop, we know the loop executes a /// constant number of times, and the PHI node is just a recurrence @@ -5462,8 +5946,7 @@ Constant * ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, const APInt &BEs, const Loop *L) { - DenseMap<PHINode*, Constant*>::const_iterator I = - ConstantEvolutionLoopExitValue.find(PN); + auto I = ConstantEvolutionLoopExitValue.find(PN); if (I != ConstantEvolutionLoopExitValue.end()) return I->second; @@ -5476,22 +5959,21 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, BasicBlock *Header = L->getHeader(); assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!"); - // Since the loop is canonicalized, the PHI node must have two entries. One - // entry must be a constant (coming in from outside of the loop), and the - // second must be derived from the same PHI. - bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); - PHINode *PHI = nullptr; - for (BasicBlock::iterator I = Header->begin(); - (PHI = dyn_cast<PHINode>(I)); ++I) { - Constant *StartCST = - dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge)); + BasicBlock *Latch = L->getLoopLatch(); + if (!Latch) + return nullptr; + + for (auto &I : *Header) { + PHINode *PHI = dyn_cast<PHINode>(&I); + if (!PHI) break; + auto *StartCST = getOtherIncomingValue(PHI, Latch); if (!StartCST) continue; CurrentIterVals[PHI] = StartCST; } if (!CurrentIterVals.count(PN)) return RetVal = nullptr; - Value *BEValue = PN->getIncomingValue(SecondIsBackedge); + Value *BEValue = PN->getIncomingValueForBlock(Latch); // Execute the loop symbolically to determine the exit value. if (BEs.getActiveBits() >= 32) @@ -5499,7 +5981,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, unsigned NumIterations = BEs.getZExtValue(); // must be in range unsigned IterationNum = 0; - const DataLayout &DL = F->getParent()->getDataLayout(); + const DataLayout &DL = getDataLayout(); for (; ; ++IterationNum) { if (IterationNum == NumIterations) return RetVal = CurrentIterVals[PN]; // Got exit value! @@ -5508,7 +5990,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, // EvaluateExpression adds non-phi values to the CurrentIterVals map. DenseMap<Instruction *, Constant *> NextIterVals; Constant *NextPHI = - EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI); + EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI); if (!NextPHI) return nullptr; // Couldn't evaluate! NextIterVals[PN] = NextPHI; @@ -5519,23 +6001,21 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, // cease to be able to evaluate one of them or if they stop evolving, // because that doesn't necessarily prevent us from computing PN. SmallVector<std::pair<PHINode *, Constant *>, 8> PHIsToCompute; - for (DenseMap<Instruction *, Constant *>::const_iterator - I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){ - PHINode *PHI = dyn_cast<PHINode>(I->first); + for (const auto &I : CurrentIterVals) { + PHINode *PHI = dyn_cast<PHINode>(I.first); if (!PHI || PHI == PN || PHI->getParent() != Header) continue; - PHIsToCompute.push_back(std::make_pair(PHI, I->second)); + PHIsToCompute.emplace_back(PHI, I.second); } // We use two distinct loops because EvaluateExpression may invalidate any // iterators into CurrentIterVals. - for (SmallVectorImpl<std::pair<PHINode *, Constant*> >::const_iterator - I = PHIsToCompute.begin(), E = PHIsToCompute.end(); I != E; ++I) { - PHINode *PHI = I->first; + for (const auto &I : PHIsToCompute) { + PHINode *PHI = I.first; Constant *&NextPHI = NextIterVals[PHI]; if (!NextPHI) { // Not already computed. - Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); - NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI); + Value *BEValue = PHI->getIncomingValueForBlock(Latch); + NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI); } - if (NextPHI != I->second) + if (NextPHI != I.second) StoppedEvolving = false; } @@ -5548,12 +6028,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, } } -/// ComputeExitCountExhaustively - If the loop is known to execute a -/// constant number of times (the condition evolves only from constants), -/// try to evaluate a few iterations of the loop until we get the exit -/// condition gets a value of ExitWhen (true or false). If we cannot -/// evaluate the trip count of the loop, return getCouldNotCompute(). -const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, +const SCEV *ScalarEvolution::computeExitCountExhaustively(const Loop *L, Value *Cond, bool ExitWhen) { PHINode *PN = getConstantEvolvingPHI(Cond, L); @@ -5567,14 +6042,14 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, BasicBlock *Header = L->getHeader(); assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!"); - // One entry must be a constant (coming in from outside of the loop), and the - // second must be derived from the same PHI. - bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); - PHINode *PHI = nullptr; - for (BasicBlock::iterator I = Header->begin(); - (PHI = dyn_cast<PHINode>(I)); ++I) { - Constant *StartCST = - dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge)); + BasicBlock *Latch = L->getLoopLatch(); + assert(Latch && "Should follow from NumIncomingValues == 2!"); + + for (auto &I : *Header) { + PHINode *PHI = dyn_cast<PHINode>(&I); + if (!PHI) + break; + auto *StartCST = getOtherIncomingValue(PHI, Latch); if (!StartCST) continue; CurrentIterVals[PHI] = StartCST; } @@ -5585,10 +6060,10 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, // the loop symbolically to determine when the condition gets a value of // "ExitWhen". unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. - const DataLayout &DL = F->getParent()->getDataLayout(); + const DataLayout &DL = getDataLayout(); for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){ - ConstantInt *CondVal = dyn_cast_or_null<ConstantInt>( - EvaluateExpression(Cond, L, CurrentIterVals, DL, TLI)); + auto *CondVal = dyn_cast_or_null<ConstantInt>( + EvaluateExpression(Cond, L, CurrentIterVals, DL, &TLI)); // Couldn't symbolically evaluate. if (!CondVal) return getCouldNotCompute(); @@ -5605,20 +6080,17 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, // calling EvaluateExpression on them because that may invalidate iterators // into CurrentIterVals. SmallVector<PHINode *, 8> PHIsToCompute; - for (DenseMap<Instruction *, Constant *>::const_iterator - I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){ - PHINode *PHI = dyn_cast<PHINode>(I->first); + for (const auto &I : CurrentIterVals) { + PHINode *PHI = dyn_cast<PHINode>(I.first); if (!PHI || PHI->getParent() != Header) continue; PHIsToCompute.push_back(PHI); } - for (SmallVectorImpl<PHINode *>::const_iterator I = PHIsToCompute.begin(), - E = PHIsToCompute.end(); I != E; ++I) { - PHINode *PHI = *I; + for (PHINode *PHI : PHIsToCompute) { Constant *&NextPHI = NextIterVals[PHI]; if (NextPHI) continue; // Already computed! - Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); - NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI); + Value *BEValue = PHI->getIncomingValueForBlock(Latch); + NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI); } CurrentIterVals.swap(NextIterVals); } @@ -5638,22 +6110,22 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, /// In the case that a relevant loop exit value cannot be computed, the /// original value V is returned. const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { + SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values = + ValuesAtScopes[V]; // Check to see if we've folded this expression at this loop before. - SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values = ValuesAtScopes[V]; - for (unsigned u = 0; u < Values.size(); u++) { - if (Values[u].first == L) - return Values[u].second ? Values[u].second : V; - } - Values.push_back(std::make_pair(L, static_cast<const SCEV *>(nullptr))); + for (auto &LS : Values) + if (LS.first == L) + return LS.second ? LS.second : V; + + Values.emplace_back(L, nullptr); + // Otherwise compute it. const SCEV *C = computeSCEVAtScope(V, L); - SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values2 = ValuesAtScopes[V]; - for (unsigned u = Values2.size(); u > 0; u--) { - if (Values2[u - 1].first == L) { - Values2[u - 1].second = C; + for (auto &LS : reverse(ValuesAtScopes[V])) + if (LS.first == L) { + LS.second = C; break; } - } return C; } @@ -5763,7 +6235,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { // exit value from the loop without using SCEVs. if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) { if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) { - const Loop *LI = (*this->LI)[I->getParent()]; + const Loop *LI = this->LI[I->getParent()]; if (LI && LI->getParentLoop() == L) // Looking for loop exit value. if (PHINode *PN = dyn_cast<PHINode>(I)) if (PN->getParent() == LI->getHeader()) { @@ -5777,9 +6249,8 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { // Okay, we know how many times the containing loop executes. If // this is a constant evolving PHI node, get the final value at // the specified iteration number. - Constant *RV = getConstantEvolutionLoopExitValue(PN, - BTCC->getValue()->getValue(), - LI); + Constant *RV = + getConstantEvolutionLoopExitValue(PN, BTCC->getAPInt(), LI); if (RV) return getSCEV(RV); } } @@ -5791,8 +6262,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { if (CanConstantFold(I)) { SmallVector<Constant *, 4> Operands; bool MadeImprovement = false; - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - Value *Op = I->getOperand(i); + for (Value *Op : I->operands()) { if (Constant *C = dyn_cast<Constant>(Op)) { Operands.push_back(C); continue; @@ -5821,16 +6291,16 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { // Check to see if getSCEVAtScope actually made an improvement. if (MadeImprovement) { Constant *C = nullptr; - const DataLayout &DL = F->getParent()->getDataLayout(); + const DataLayout &DL = getDataLayout(); if (const CmpInst *CI = dyn_cast<CmpInst>(I)) C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], - Operands[1], DL, TLI); + Operands[1], DL, &TLI); else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { if (!LI->isVolatile()) C = ConstantFoldLoadFromConstPtr(Operands[0], DL); } else C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, - DL, TLI); + DL, &TLI); if (!C) return V; return getSCEV(C); } @@ -6021,10 +6491,10 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { return std::make_pair(CNC, CNC); } - uint32_t BitWidth = LC->getValue()->getValue().getBitWidth(); - const APInt &L = LC->getValue()->getValue(); - const APInt &M = MC->getValue()->getValue(); - const APInt &N = NC->getValue()->getValue(); + uint32_t BitWidth = LC->getAPInt().getBitWidth(); + const APInt &L = LC->getAPInt(); + const APInt &M = MC->getAPInt(); + const APInt &N = NC->getAPInt(); APInt Two(BitWidth, 2); APInt Four(BitWidth, 4); @@ -6103,10 +6573,6 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) { const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first); const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second); if (R1 && R2) { -#if 0 - dbgs() << "HFTZ: " << *V << " - sol#1: " << *R1 - << " sol#2: " << *R2 << "\n"; -#endif // Pick the smallest positive root value. if (ConstantInt *CB = dyn_cast<ConstantInt>(ConstantExpr::getICmp(CmpInst::ICMP_ULT, @@ -6160,7 +6626,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) { // For negative steps (counting down to zero): // N = Start/-Step // First compute the unsigned distance from zero in the direction of Step. - bool CountDown = StepC->getValue()->getValue().isNegative(); + bool CountDown = StepC->getAPInt().isNegative(); const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start); // Handle unitary steps, which cannot wraparound. @@ -6185,13 +6651,53 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) { // done by counting and comparing the number of trailing zeros of Step and // Distance. if (!CountDown) { - const APInt &StepV = StepC->getValue()->getValue(); + const APInt &StepV = StepC->getAPInt(); // StepV.isPowerOf2() returns true if StepV is an positive power of two. It // also returns true if StepV is maximally negative (eg, INT_MIN), but that // case is not handled as this code is guarded by !CountDown. if (StepV.isPowerOf2() && - GetMinTrailingZeros(Distance) >= StepV.countTrailingZeros()) - return getUDivExactExpr(Distance, Step); + GetMinTrailingZeros(Distance) >= StepV.countTrailingZeros()) { + // Here we've constrained the equation to be of the form + // + // 2^(N + k) * Distance' = (StepV == 2^N) * X (mod 2^W) ... (0) + // + // where we're operating on a W bit wide integer domain and k is + // non-negative. The smallest unsigned solution for X is the trip count. + // + // (0) is equivalent to: + // + // 2^(N + k) * Distance' - 2^N * X = L * 2^W + // <=> 2^N(2^k * Distance' - X) = L * 2^(W - N) * 2^N + // <=> 2^k * Distance' - X = L * 2^(W - N) + // <=> 2^k * Distance' = L * 2^(W - N) + X ... (1) + // + // The smallest X satisfying (1) is unsigned remainder of dividing the LHS + // by 2^(W - N). + // + // <=> X = 2^k * Distance' URem 2^(W - N) ... (2) + // + // E.g. say we're solving + // + // 2 * Val = 2 * X (in i8) ... (3) + // + // then from (2), we get X = Val URem i8 128 (k = 0 in this case). + // + // Note: It is tempting to solve (3) by setting X = Val, but Val is not + // necessarily the smallest unsigned value of X that satisfies (3). + // E.g. if Val is i8 -127 then the smallest value of X that satisfies (3) + // is i8 1, not i8 -127 + + const auto *ModuloResult = getUDivExactExpr(Distance, Step); + + // Since SCEV does not have a URem node, we construct one using a truncate + // and a zero extend. + + unsigned NarrowWidth = StepV.getBitWidth() - StepV.countTrailingZeros(); + auto *NarrowTy = IntegerType::get(getContext(), NarrowWidth); + auto *WideTy = Distance->getType(); + + return getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy); + } } // If the condition controls loop exit (the loop exits only if the expression @@ -6207,8 +6713,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) { // Then, try to solve the above equation provided that Start is constant. if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start)) - return SolveLinEquationWithOverflow(StepC->getValue()->getValue(), - -StartC->getValue()->getValue(), + return SolveLinEquationWithOverflow(StepC->getAPInt(), -StartC->getAPInt(), *this); return getCouldNotCompute(); } @@ -6226,7 +6731,7 @@ ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) { // already. If so, the backedge will execute zero times. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) { if (!C->getValue()->isNullValue()) - return getConstant(C->getType(), 0); + return getZero(C->getType()); return getCouldNotCompute(); // Otherwise it will loop infinitely. } @@ -6251,7 +6756,7 @@ ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) { // A loop's header is defined to be a block that dominates the loop. // If the header has a unique predecessor outside the loop, it must be // a block that has exactly one successor that can reach the loop. - if (Loop *L = LI->getLoopFor(BB)) + if (Loop *L = LI.getLoopFor(BB)) return std::make_pair(L->getLoopPredecessor(), L->getHeader()); return std::pair<BasicBlock *, BasicBlock *>(); @@ -6267,13 +6772,20 @@ static bool HasSameValue(const SCEV *A, const SCEV *B) { // Quick check to see if they are the same SCEV. if (A == B) return true; + auto ComputesEqualValues = [](const Instruction *A, const Instruction *B) { + // Not all instructions that are "identical" compute the same value. For + // instance, two distinct alloca instructions allocating the same type are + // identical and do not read memory; but compute distinct values. + return A->isIdenticalTo(B) && (isa<BinaryOperator>(A) || isa<GetElementPtrInst>(A)); + }; + // Otherwise, if they're both SCEVUnknown, it's possible that they hold // two different instructions with the same value. Check for this case. if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A)) if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B)) if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue())) if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue())) - if (AI->isIdenticalTo(BI) && !AI->mayReadFromMemory()) + if (ComputesEqualValues(AI, BI)) return true; // Otherwise assume they may have a different value. @@ -6324,7 +6836,7 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, // If there's a constant operand, canonicalize comparisons with boundary // cases, and canonicalize *-or-equal comparisons to regular comparisons. if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) { - const APInt &RA = RC->getValue()->getValue(); + const APInt &RA = RC->getAPInt(); switch (Pred) { default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); case ICmpInst::ICMP_EQ: @@ -6515,16 +7027,14 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, Pred = ICmpInst::ICMP_ULT; Changed = true; } else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) { - LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS, - SCEV::FlagNUW); + LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS); Pred = ICmpInst::ICMP_ULT; Changed = true; } break; case ICmpInst::ICMP_UGE: if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) { - RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS, - SCEV::FlagNUW); + RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS); Pred = ICmpInst::ICMP_UGT; Changed = true; } else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) { @@ -6612,10 +7122,140 @@ bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred, if (LeftGuarded && RightGuarded) return true; + if (isKnownPredicateViaSplitting(Pred, LHS, RHS)) + return true; + // Otherwise see what can be done with known constant ranges. return isKnownPredicateWithRanges(Pred, LHS, RHS); } +bool ScalarEvolution::isMonotonicPredicate(const SCEVAddRecExpr *LHS, + ICmpInst::Predicate Pred, + bool &Increasing) { + bool Result = isMonotonicPredicateImpl(LHS, Pred, Increasing); + +#ifndef NDEBUG + // Verify an invariant: inverting the predicate should turn a monotonically + // increasing change to a monotonically decreasing one, and vice versa. + bool IncreasingSwapped; + bool ResultSwapped = isMonotonicPredicateImpl( + LHS, ICmpInst::getSwappedPredicate(Pred), IncreasingSwapped); + + assert(Result == ResultSwapped && "should be able to analyze both!"); + if (ResultSwapped) + assert(Increasing == !IncreasingSwapped && + "monotonicity should flip as we flip the predicate"); +#endif + + return Result; +} + +bool ScalarEvolution::isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS, + ICmpInst::Predicate Pred, + bool &Increasing) { + + // A zero step value for LHS means the induction variable is essentially a + // loop invariant value. We don't really depend on the predicate actually + // flipping from false to true (for increasing predicates, and the other way + // around for decreasing predicates), all we care about is that *if* the + // predicate changes then it only changes from false to true. + // + // A zero step value in itself is not very useful, but there may be places + // where SCEV can prove X >= 0 but not prove X > 0, so it is helpful to be + // as general as possible. + + switch (Pred) { + default: + return false; // Conservative answer + + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + if (!LHS->getNoWrapFlags(SCEV::FlagNUW)) + return false; + + Increasing = Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE; + return true; + + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: { + if (!LHS->getNoWrapFlags(SCEV::FlagNSW)) + return false; + + const SCEV *Step = LHS->getStepRecurrence(*this); + + if (isKnownNonNegative(Step)) { + Increasing = Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE; + return true; + } + + if (isKnownNonPositive(Step)) { + Increasing = Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE; + return true; + } + + return false; + } + + } + + llvm_unreachable("switch has default clause!"); +} + +bool ScalarEvolution::isLoopInvariantPredicate( + ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, + ICmpInst::Predicate &InvariantPred, const SCEV *&InvariantLHS, + const SCEV *&InvariantRHS) { + + // If there is a loop-invariant, force it into the RHS, otherwise bail out. + if (!isLoopInvariant(RHS, L)) { + if (!isLoopInvariant(LHS, L)) + return false; + + std::swap(LHS, RHS); + Pred = ICmpInst::getSwappedPredicate(Pred); + } + + const SCEVAddRecExpr *ArLHS = dyn_cast<SCEVAddRecExpr>(LHS); + if (!ArLHS || ArLHS->getLoop() != L) + return false; + + bool Increasing; + if (!isMonotonicPredicate(ArLHS, Pred, Increasing)) + return false; + + // If the predicate "ArLHS `Pred` RHS" monotonically increases from false to + // true as the loop iterates, and the backedge is control dependent on + // "ArLHS `Pred` RHS" == true then we can reason as follows: + // + // * if the predicate was false in the first iteration then the predicate + // is never evaluated again, since the loop exits without taking the + // backedge. + // * if the predicate was true in the first iteration then it will + // continue to be true for all future iterations since it is + // monotonically increasing. + // + // For both the above possibilities, we can replace the loop varying + // predicate with its value on the first iteration of the loop (which is + // loop invariant). + // + // A similar reasoning applies for a monotonically decreasing predicate, by + // replacing true with false and false with true in the above two bullets. + + auto P = Increasing ? Pred : ICmpInst::getInversePredicate(Pred); + + if (!isLoopBackedgeGuardedByCond(L, P, LHS, RHS)) + return false; + + InvariantPred = Pred; + InvariantLHS = ArLHS->getStart(); + InvariantRHS = RHS; + return true; +} + bool ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { @@ -6690,6 +7330,84 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred, return false; } +bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred, + const SCEV *LHS, + const SCEV *RHS) { + + // Match Result to (X + Y)<ExpectedFlags> where Y is a constant integer. + // Return Y via OutY. + auto MatchBinaryAddToConst = + [this](const SCEV *Result, const SCEV *X, APInt &OutY, + SCEV::NoWrapFlags ExpectedFlags) { + const SCEV *NonConstOp, *ConstOp; + SCEV::NoWrapFlags FlagsPresent; + + if (!splitBinaryAdd(Result, ConstOp, NonConstOp, FlagsPresent) || + !isa<SCEVConstant>(ConstOp) || NonConstOp != X) + return false; + + OutY = cast<SCEVConstant>(ConstOp)->getAPInt(); + return (FlagsPresent & ExpectedFlags) == ExpectedFlags; + }; + + APInt C; + + switch (Pred) { + default: + break; + + case ICmpInst::ICMP_SGE: + std::swap(LHS, RHS); + case ICmpInst::ICMP_SLE: + // X s<= (X + C)<nsw> if C >= 0 + if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) && C.isNonNegative()) + return true; + + // (X + C)<nsw> s<= X if C <= 0 + if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) && + !C.isStrictlyPositive()) + return true; + break; + + case ICmpInst::ICMP_SGT: + std::swap(LHS, RHS); + case ICmpInst::ICMP_SLT: + // X s< (X + C)<nsw> if C > 0 + if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) && + C.isStrictlyPositive()) + return true; + + // (X + C)<nsw> s< X if C < 0 + if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) && C.isNegative()) + return true; + break; + } + + return false; +} + +bool ScalarEvolution::isKnownPredicateViaSplitting(ICmpInst::Predicate Pred, + const SCEV *LHS, + const SCEV *RHS) { + if (Pred != ICmpInst::ICMP_ULT || ProvingSplitPredicate) + return false; + + // Allowing arbitrary number of activations of isKnownPredicateViaSplitting on + // the stack can result in exponential time complexity. + SaveAndRestore<bool> Restore(ProvingSplitPredicate, true); + + // If L >= 0 then I `ult` L <=> I >= 0 && I `slt` L + // + // To prove L >= 0 we use isKnownNonNegative whereas to prove I >= 0 we use + // isKnownPredicate. isKnownPredicate is more powerful, but also more + // expensive; and using isKnownNonNegative(RHS) is sufficient for most of the + // interesting cases seen in practice. We can consider "upgrading" L >= 0 to + // use isKnownPredicate later if needed. + return isKnownNonNegative(RHS) && + isKnownPredicate(CmpInst::ICMP_SGE, LHS, getZero(LHS->getType())) && + isKnownPredicate(CmpInst::ICMP_SLT, LHS, RHS); +} + /// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is /// protected by a conditional between LHS and RHS. This is used to /// to eliminate casts. @@ -6715,46 +7433,49 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, LoopContinuePredicate->getSuccessor(0) != L->getHeader())) return true; + // We don't want more than one activation of the following loops on the stack + // -- that can lead to O(n!) time complexity. + if (WalkingBEDominatingConds) + return false; + + SaveAndRestore<bool> ClearOnExit(WalkingBEDominatingConds, true); + + // See if we can exploit a trip count to prove the predicate. + const auto &BETakenInfo = getBackedgeTakenInfo(L); + const SCEV *LatchBECount = BETakenInfo.getExact(Latch, this); + if (LatchBECount != getCouldNotCompute()) { + // We know that Latch branches back to the loop header exactly + // LatchBECount times. This means the backdege condition at Latch is + // equivalent to "{0,+,1} u< LatchBECount". + Type *Ty = LatchBECount->getType(); + auto NoWrapFlags = SCEV::NoWrapFlags(SCEV::FlagNUW | SCEV::FlagNW); + const SCEV *LoopCounter = + getAddRecExpr(getZero(Ty), getOne(Ty), L, NoWrapFlags); + if (isImpliedCond(Pred, LHS, RHS, ICmpInst::ICMP_ULT, LoopCounter, + LatchBECount)) + return true; + } + // Check conditions due to any @llvm.assume intrinsics. - for (auto &AssumeVH : AC->assumptions()) { + for (auto &AssumeVH : AC.assumptions()) { if (!AssumeVH) continue; auto *CI = cast<CallInst>(AssumeVH); - if (!DT->dominates(CI, Latch->getTerminator())) + if (!DT.dominates(CI, Latch->getTerminator())) continue; if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false)) return true; } - struct ClearWalkingBEDominatingCondsOnExit { - ScalarEvolution &SE; - - explicit ClearWalkingBEDominatingCondsOnExit(ScalarEvolution &SE) - : SE(SE){}; - - ~ClearWalkingBEDominatingCondsOnExit() { - SE.WalkingBEDominatingConds = false; - } - }; - - // We don't want more than one activation of the following loop on the stack - // -- that can lead to O(n!) time complexity. - if (WalkingBEDominatingConds) - return false; - - WalkingBEDominatingConds = true; - ClearWalkingBEDominatingCondsOnExit ClearOnExit(*this); - // If the loop is not reachable from the entry block, we risk running into an // infinite loop as we walk up into the dom tree. These loops do not matter // anyway, so we just return a conservative answer when we see them. - if (!DT->isReachableFromEntry(L->getHeader())) + if (!DT.isReachableFromEntry(L->getHeader())) return false; - for (DomTreeNode *DTN = (*DT)[Latch], *HeaderDTN = (*DT)[L->getHeader()]; - DTN != HeaderDTN; - DTN = DTN->getIDom()) { + for (DomTreeNode *DTN = DT[Latch], *HeaderDTN = DT[L->getHeader()]; + DTN != HeaderDTN; DTN = DTN->getIDom()) { assert(DTN && "should reach the loop header before reaching the root!"); @@ -6778,7 +7499,7 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, // We're constructively (and conservatively) enumerating edges within the // loop body that dominate the latch. The dominator tree better agree // with us on this: - assert(DT->dominates(DominatingEdge, Latch) && "should be!"); + assert(DT.dominates(DominatingEdge, Latch) && "should be!"); if (isImpliedCond(Pred, LHS, RHS, Condition, BB != ContinuePredicate->getSuccessor(0))) @@ -6823,11 +7544,11 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, } // Check conditions due to any @llvm.assume intrinsics. - for (auto &AssumeVH : AC->assumptions()) { + for (auto &AssumeVH : AC.assumptions()) { if (!AssumeVH) continue; auto *CI = cast<CallInst>(AssumeVH); - if (!DT->dominates(CI, L->getHeader())) + if (!DT.dominates(CI, L->getHeader())) continue; if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false)) @@ -6837,6 +7558,7 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, return false; } +namespace { /// RAII wrapper to prevent recursive application of isImpliedCond. /// ScalarEvolution's PendingLoopPredicates set must be empty unless we are /// currently evaluating isImpliedCond. @@ -6854,6 +7576,7 @@ struct MarkPendingLoopPredicate { LoopPreds.erase(Cond); } }; +} // end anonymous namespace /// isImpliedCond - Test whether the condition described by Pred, LHS, /// and RHS is true whenever the given Cond value evaluates to true. @@ -6892,6 +7615,14 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *FoundLHS = getSCEV(ICI->getOperand(0)); const SCEV *FoundRHS = getSCEV(ICI->getOperand(1)); + return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS); +} + +bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS, + ICmpInst::Predicate FoundPred, + const SCEV *FoundLHS, + const SCEV *FoundRHS) { // Balance the types. if (getTypeSizeInBits(LHS->getType()) < getTypeSizeInBits(FoundLHS->getType())) { @@ -6947,6 +7678,13 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, RHS, LHS, FoundLHS, FoundRHS); } + // Unsigned comparison is the same as signed comparison when both the operands + // are non-negative. + if (CmpInst::isUnsigned(FoundPred) && + CmpInst::getSignedPredicate(FoundPred) == Pred && + isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS)) + return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS); + // Check if we can make progress by sharpening ranges. if (FoundPred == ICmpInst::ICMP_NE && (isa<SCEVConstant>(FoundLHS) || isa<SCEVConstant>(FoundRHS))) { @@ -6970,7 +7708,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, APInt Min = ICmpInst::isSigned(Pred) ? getSignedRange(V).getSignedMin() : getUnsignedRange(V).getUnsignedMin(); - if (Min == C->getValue()->getValue()) { + if (Min == C->getAPInt()) { // Given (V >= Min && V != Min) we conclude V >= (Min + 1). // This is true even if (Min + 1) wraps around -- in case of // wraparound, (Min + 1) < Min, so (V >= Min => V >= (Min + 1)). @@ -7021,6 +7759,149 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, return false; } +bool ScalarEvolution::splitBinaryAdd(const SCEV *Expr, + const SCEV *&L, const SCEV *&R, + SCEV::NoWrapFlags &Flags) { + const auto *AE = dyn_cast<SCEVAddExpr>(Expr); + if (!AE || AE->getNumOperands() != 2) + return false; + + L = AE->getOperand(0); + R = AE->getOperand(1); + Flags = AE->getNoWrapFlags(); + return true; +} + +bool ScalarEvolution::computeConstantDifference(const SCEV *Less, + const SCEV *More, + APInt &C) { + // We avoid subtracting expressions here because this function is usually + // fairly deep in the call stack (i.e. is called many times). + + if (isa<SCEVAddRecExpr>(Less) && isa<SCEVAddRecExpr>(More)) { + const auto *LAR = cast<SCEVAddRecExpr>(Less); + const auto *MAR = cast<SCEVAddRecExpr>(More); + + if (LAR->getLoop() != MAR->getLoop()) + return false; + + // We look at affine expressions only; not for correctness but to keep + // getStepRecurrence cheap. + if (!LAR->isAffine() || !MAR->isAffine()) + return false; + + if (LAR->getStepRecurrence(*this) != MAR->getStepRecurrence(*this)) + return false; + + Less = LAR->getStart(); + More = MAR->getStart(); + + // fall through + } + + if (isa<SCEVConstant>(Less) && isa<SCEVConstant>(More)) { + const auto &M = cast<SCEVConstant>(More)->getAPInt(); + const auto &L = cast<SCEVConstant>(Less)->getAPInt(); + C = M - L; + return true; + } + + const SCEV *L, *R; + SCEV::NoWrapFlags Flags; + if (splitBinaryAdd(Less, L, R, Flags)) + if (const auto *LC = dyn_cast<SCEVConstant>(L)) + if (R == More) { + C = -(LC->getAPInt()); + return true; + } + + if (splitBinaryAdd(More, L, R, Flags)) + if (const auto *LC = dyn_cast<SCEVConstant>(L)) + if (R == Less) { + C = LC->getAPInt(); + return true; + } + + return false; +} + +bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow( + ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, const SCEV *FoundRHS) { + if (Pred != CmpInst::ICMP_SLT && Pred != CmpInst::ICMP_ULT) + return false; + + const auto *AddRecLHS = dyn_cast<SCEVAddRecExpr>(LHS); + if (!AddRecLHS) + return false; + + const auto *AddRecFoundLHS = dyn_cast<SCEVAddRecExpr>(FoundLHS); + if (!AddRecFoundLHS) + return false; + + // We'd like to let SCEV reason about control dependencies, so we constrain + // both the inequalities to be about add recurrences on the same loop. This + // way we can use isLoopEntryGuardedByCond later. + + const Loop *L = AddRecFoundLHS->getLoop(); + if (L != AddRecLHS->getLoop()) + return false; + + // FoundLHS u< FoundRHS u< -C => (FoundLHS + C) u< (FoundRHS + C) ... (1) + // + // FoundLHS s< FoundRHS s< INT_MIN - C => (FoundLHS + C) s< (FoundRHS + C) + // ... (2) + // + // Informal proof for (2), assuming (1) [*]: + // + // We'll also assume (A s< B) <=> ((A + INT_MIN) u< (B + INT_MIN)) ... (3)[**] + // + // Then + // + // FoundLHS s< FoundRHS s< INT_MIN - C + // <=> (FoundLHS + INT_MIN) u< (FoundRHS + INT_MIN) u< -C [ using (3) ] + // <=> (FoundLHS + INT_MIN + C) u< (FoundRHS + INT_MIN + C) [ using (1) ] + // <=> (FoundLHS + INT_MIN + C + INT_MIN) s< + // (FoundRHS + INT_MIN + C + INT_MIN) [ using (3) ] + // <=> FoundLHS + C s< FoundRHS + C + // + // [*]: (1) can be proved by ruling out overflow. + // + // [**]: This can be proved by analyzing all the four possibilities: + // (A s< 0, B s< 0), (A s< 0, B s>= 0), (A s>= 0, B s< 0) and + // (A s>= 0, B s>= 0). + // + // Note: + // Despite (2), "FoundRHS s< INT_MIN - C" does not mean that "FoundRHS + C" + // will not sign underflow. For instance, say FoundLHS = (i8 -128), FoundRHS + // = (i8 -127) and C = (i8 -100). Then INT_MIN - C = (i8 -28), and FoundRHS + // s< (INT_MIN - C). Lack of sign overflow / underflow in "FoundRHS + C" is + // neither necessary nor sufficient to prove "(FoundLHS + C) s< (FoundRHS + + // C)". + + APInt LDiff, RDiff; + if (!computeConstantDifference(FoundLHS, LHS, LDiff) || + !computeConstantDifference(FoundRHS, RHS, RDiff) || + LDiff != RDiff) + return false; + + if (LDiff == 0) + return true; + + APInt FoundRHSLimit; + + if (Pred == CmpInst::ICMP_ULT) { + FoundRHSLimit = -RDiff; + } else { + assert(Pred == CmpInst::ICMP_SLT && "Checked above!"); + FoundRHSLimit = APInt::getSignedMinValue(getTypeSizeInBits(RHS->getType())) - RDiff; + } + + // Try to prove (1) or (2), as needed. + return isLoopEntryGuardedByCond(L, Pred, FoundRHS, + getConstant(FoundRHSLimit)); +} + /// isImpliedCondOperands - Test whether the condition described by Pred, /// LHS, and RHS is true whenever the condition described by Pred, FoundLHS, /// and FoundRHS is true. @@ -7031,6 +7912,9 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS)) return true; + if (isImpliedCondOperandsViaNoOverflow(Pred, LHS, RHS, FoundLHS, FoundRHS)) + return true; + return isImpliedCondOperandsHelper(Pred, LHS, RHS, FoundLHS, FoundRHS) || // ~x < ~y --> x > y @@ -7043,17 +7927,13 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, /// If Expr computes ~A, return A else return nullptr static const SCEV *MatchNotExpr(const SCEV *Expr) { const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr); - if (!Add || Add->getNumOperands() != 2) return nullptr; - - const SCEVConstant *AddLHS = dyn_cast<SCEVConstant>(Add->getOperand(0)); - if (!(AddLHS && AddLHS->getValue()->getValue().isAllOnesValue())) + if (!Add || Add->getNumOperands() != 2 || + !Add->getOperand(0)->isAllOnesValue()) return nullptr; const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1)); - if (!AddRHS || AddRHS->getNumOperands() != 2) return nullptr; - - const SCEVConstant *MulLHS = dyn_cast<SCEVConstant>(AddRHS->getOperand(0)); - if (!(MulLHS && MulLHS->getValue()->getValue().isAllOnesValue())) + if (!AddRHS || AddRHS->getNumOperands() != 2 || + !AddRHS->getOperand(0)->isAllOnesValue()) return nullptr; return AddRHS->getOperand(1); @@ -7067,8 +7947,7 @@ static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr, const MaxExprType *MaxExpr = dyn_cast<MaxExprType>(MaybeMaxExpr); if (!MaxExpr) return false; - auto It = std::find(MaxExpr->op_begin(), MaxExpr->op_end(), Candidate); - return It != MaxExpr->op_end(); + return find(MaxExpr->operands(), Candidate) != MaxExpr->op_end(); } @@ -7084,6 +7963,38 @@ static bool IsMinConsistingOf(ScalarEvolution &SE, return IsMaxConsistingOf<MaxExprType>(MaybeMaxExpr, SE.getNotSCEV(Candidate)); } +static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE, + ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { + + // If both sides are affine addrecs for the same loop, with equal + // steps, and we know the recurrences don't wrap, then we only + // need to check the predicate on the starting values. + + if (!ICmpInst::isRelational(Pred)) + return false; + + const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS); + if (!LAR) + return false; + const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS); + if (!RAR) + return false; + if (LAR->getLoop() != RAR->getLoop()) + return false; + if (!LAR->isAffine() || !RAR->isAffine()) + return false; + + if (LAR->getStepRecurrence(SE) != RAR->getStepRecurrence(SE)) + return false; + + SCEV::NoWrapFlags NW = ICmpInst::isSigned(Pred) ? + SCEV::FlagNSW : SCEV::FlagNUW; + if (!LAR->getNoWrapFlags(NW) || !RAR->getNoWrapFlags(NW)) + return false; + + return SE.isKnownPredicate(Pred, LAR->getStart(), RAR->getStart()); +} /// Is LHS `Pred` RHS true on the virtue of LHS or RHS being a Min or Max /// expression? @@ -7129,7 +8040,9 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, auto IsKnownPredicateFull = [this](ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { return isKnownPredicateWithRanges(Pred, LHS, RHS) || - IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS); + IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) || + IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) || + isKnownPredicateViaNoOverflow(Pred, LHS, RHS); }; switch (Pred) { @@ -7185,7 +8098,7 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred, !isa<SCEVConstant>(AddLHS->getOperand(0))) return false; - APInt ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getValue()->getValue(); + APInt ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getAPInt(); // `FoundLHSRange` is the range we know `FoundLHS` to be in by virtue of the // antecedent "`FoundLHS` `Pred` `FoundRHS`". @@ -7194,13 +8107,12 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred, // Since `LHS` is `FoundLHS` + `AddLHS->getOperand(0)`, we can compute a range // for `LHS`: - APInt Addend = - cast<SCEVConstant>(AddLHS->getOperand(0))->getValue()->getValue(); + APInt Addend = cast<SCEVConstant>(AddLHS->getOperand(0))->getAPInt(); ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(Addend)); // We can also compute the range of values for `LHS` that satisfy the // consequent, "`LHS` `Pred` `RHS`": - APInt ConstRHS = cast<SCEVConstant>(RHS)->getValue()->getValue(); + APInt ConstRHS = cast<SCEVConstant>(RHS)->getAPInt(); ConstantRange SatisfyingLHSRange = ConstantRange::makeSatisfyingICmpRegion(Pred, ConstRHS); @@ -7217,7 +8129,7 @@ bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, if (NoWrap) return false; unsigned BitWidth = getTypeSizeInBits(RHS->getType()); - const SCEV *One = getConstant(Stride->getType(), 1); + const SCEV *One = getOne(Stride->getType()); if (IsSigned) { APInt MaxRHS = getSignedRange(RHS).getSignedMax(); @@ -7246,7 +8158,7 @@ bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, if (NoWrap) return false; unsigned BitWidth = getTypeSizeInBits(RHS->getType()); - const SCEV *One = getConstant(Stride->getType(), 1); + const SCEV *One = getOne(Stride->getType()); if (IsSigned) { APInt MinRHS = getSignedRange(RHS).getSignedMin(); @@ -7271,7 +8183,7 @@ bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, // stride and presence of the equality in the comparison. const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step, bool Equality) { - const SCEV *One = getConstant(Step->getType(), 1); + const SCEV *One = getOne(Step->getType()); Delta = Equality ? getAddExpr(Delta, Step) : getAddExpr(Delta, getMinusSCEV(Step, One)); return getUDivExpr(Delta, Step); @@ -7324,7 +8236,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, // overflow, in which case if RHS - Start is a constant, we don't need to // do a max operation since we can just figure it out statically if (NoWrap && isa<SCEVConstant>(Diff)) { - APInt D = dyn_cast<const SCEVConstant>(Diff)->getValue()->getValue(); + APInt D = dyn_cast<const SCEVConstant>(Diff)->getAPInt(); if (D.isNegative()) End = Start; } else @@ -7405,7 +8317,7 @@ ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS, // overflow, in which case if RHS - Start is a constant, we don't need to // do a max operation since we can just figure it out statically if (NoWrap && isa<SCEVConstant>(Diff)) { - APInt D = dyn_cast<const SCEVConstant>(Diff)->getValue()->getValue(); + APInt D = dyn_cast<const SCEVConstant>(Diff)->getAPInt(); if (!D.isNegative()) End = Start; } else @@ -7460,23 +8372,20 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart())) if (!SC->getValue()->isZero()) { SmallVector<const SCEV *, 4> Operands(op_begin(), op_end()); - Operands[0] = SE.getConstant(SC->getType(), 0); + Operands[0] = SE.getZero(SC->getType()); const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(), getNoWrapFlags(FlagNW)); - if (const SCEVAddRecExpr *ShiftedAddRec = - dyn_cast<SCEVAddRecExpr>(Shifted)) + if (const auto *ShiftedAddRec = dyn_cast<SCEVAddRecExpr>(Shifted)) return ShiftedAddRec->getNumIterationsInRange( - Range.subtract(SC->getValue()->getValue()), SE); + Range.subtract(SC->getAPInt()), SE); // This is strange and shouldn't happen. return SE.getCouldNotCompute(); } // The only time we can solve this is when we have all constant indices. // Otherwise, we cannot determine the overflow conditions. - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) - if (!isa<SCEVConstant>(getOperand(i))) - return SE.getCouldNotCompute(); - + if (any_of(operands(), [](const SCEV *Op) { return !isa<SCEVConstant>(Op); })) + return SE.getCouldNotCompute(); // Okay at this point we know that all elements of the chrec are constants and // that the start element is zero. @@ -7485,7 +8394,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, // iteration exits. unsigned BitWidth = SE.getTypeSizeInBits(getType()); if (!Range.contains(APInt(BitWidth, 0))) - return SE.getConstant(getType(), 0); + return SE.getZero(getType()); if (isAffine()) { // If this is an affine expression then we have this situation: @@ -7496,7 +8405,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, // If A is negative then the lower of the range is the last possible loop // value. Also note that we already checked for a full range. APInt One(BitWidth,1); - APInt A = cast<SCEVConstant>(getOperand(1))->getValue()->getValue(); + APInt A = cast<SCEVConstant>(getOperand(1))->getAPInt(); APInt End = A.sge(One) ? (Range.getUpper() - One) : Range.getLower(); // The exit value should be (End+A)/A. @@ -7528,15 +8437,13 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, FlagAnyWrap); // Next, solve the constructed addrec - std::pair<const SCEV *,const SCEV *> Roots = - SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE); + auto Roots = SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE); const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first); const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second); if (R1) { // Pick the smallest positive root value. - if (ConstantInt *CB = - dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT, - R1->getValue(), R2->getValue()))) { + if (ConstantInt *CB = dyn_cast<ConstantInt>(ConstantExpr::getICmp( + ICmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) { if (!CB->getZExtValue()) std::swap(R1, R2); // R1 is the minimum root now. @@ -7549,7 +8456,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, if (Range.contains(R1Val->getValue())) { // The next iteration must be out of the range... ConstantInt *NextVal = - ConstantInt::get(SE.getContext(), R1->getValue()->getValue()+1); + ConstantInt::get(SE.getContext(), R1->getAPInt() + 1); R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); if (!Range.contains(R1Val->getValue())) @@ -7560,7 +8467,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, // If R1 was not in the range, then it is a good return value. Make // sure that R1-1 WAS in the range though, just in case. ConstantInt *NextVal = - ConstantInt::get(SE.getContext(), R1->getValue()->getValue()-1); + ConstantInt::get(SE.getContext(), R1->getAPInt() - 1); R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); if (Range.contains(R1Val->getValue())) return R1; @@ -7644,9 +8551,84 @@ struct SCEVCollectTerms { } bool isDone() const { return false; } }; + +// Check if a SCEV contains an AddRecExpr. +struct SCEVHasAddRec { + bool &ContainsAddRec; + + SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) { + ContainsAddRec = false; + } + + bool follow(const SCEV *S) { + if (isa<SCEVAddRecExpr>(S)) { + ContainsAddRec = true; + + // Stop recursion: once we collected a term, do not walk its operands. + return false; + } + + // Keep looking. + return true; + } + bool isDone() const { return false; } +}; + +// Find factors that are multiplied with an expression that (possibly as a +// subexpression) contains an AddRecExpr. In the expression: +// +// 8 * (100 + %p * %q * (%a + {0, +, 1}_loop)) +// +// "%p * %q" are factors multiplied by the expression "(%a + {0, +, 1}_loop)" +// that contains the AddRec {0, +, 1}_loop. %p * %q are likely to be array size +// parameters as they form a product with an induction variable. +// +// This collector expects all array size parameters to be in the same MulExpr. +// It might be necessary to later add support for collecting parameters that are +// spread over different nested MulExpr. +struct SCEVCollectAddRecMultiplies { + SmallVectorImpl<const SCEV *> &Terms; + ScalarEvolution &SE; + + SCEVCollectAddRecMultiplies(SmallVectorImpl<const SCEV *> &T, ScalarEvolution &SE) + : Terms(T), SE(SE) {} + + bool follow(const SCEV *S) { + if (auto *Mul = dyn_cast<SCEVMulExpr>(S)) { + bool HasAddRec = false; + SmallVector<const SCEV *, 0> Operands; + for (auto Op : Mul->operands()) { + if (isa<SCEVUnknown>(Op)) { + Operands.push_back(Op); + } else { + bool ContainsAddRec; + SCEVHasAddRec ContiansAddRec(ContainsAddRec); + visitAll(Op, ContiansAddRec); + HasAddRec |= ContainsAddRec; + } + } + if (Operands.size() == 0) + return true; + + if (!HasAddRec) + return false; + + Terms.push_back(SE.getMulExpr(Operands)); + // Stop recursion: once we collected a term, do not walk its operands. + return false; + } + + // Keep looking. + return true; + } + bool isDone() const { return false; } +}; } -/// Find parametric terms in this SCEVAddRecExpr. +/// Find parametric terms in this SCEVAddRecExpr. We first for parameters in +/// two places: +/// 1) The strides of AddRec expressions. +/// 2) Unknowns that are multiplied with AddRec expressions. void ScalarEvolution::collectParametricTerms(const SCEV *Expr, SmallVectorImpl<const SCEV *> &Terms) { SmallVector<const SCEV *, 4> Strides; @@ -7669,6 +8651,9 @@ void ScalarEvolution::collectParametricTerms(const SCEV *Expr, for (const SCEV *T : Terms) dbgs() << *T << "\n"; }); + + SCEVCollectAddRecMultiplies MulCollector(Terms, *this); + visitAll(Expr, MulCollector); } static bool findArrayDimensionsRec(ScalarEvolution &SE, @@ -7718,30 +8703,28 @@ static bool findArrayDimensionsRec(ScalarEvolution &SE, return true; } -namespace { -struct FindParameter { - bool FoundParameter; - FindParameter() : FoundParameter(false) {} - - bool follow(const SCEV *S) { - if (isa<SCEVUnknown>(S)) { - FoundParameter = true; - // Stop recursion: we found a parameter. - return false; - } - // Keep looking. - return true; - } - bool isDone() const { - // Stop recursion if we have found a parameter. - return FoundParameter; - } -}; -} - // Returns true when S contains at least a SCEVUnknown parameter. static inline bool containsParameters(const SCEV *S) { + struct FindParameter { + bool FoundParameter; + FindParameter() : FoundParameter(false) {} + + bool follow(const SCEV *S) { + if (isa<SCEVUnknown>(S)) { + FoundParameter = true; + // Stop recursion: we found a parameter. + return false; + } + // Keep looking. + return true; + } + bool isDone() const { + // Stop recursion if we have found a parameter. + return FoundParameter; + } + }; + FindParameter F; SCEVTraversal<FindParameter> ST(F); ST.visitAll(S); @@ -7829,11 +8812,13 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms, ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this); - // Divide all terms by the element size. + // Try to divide all terms by the element size. If term is not divisible by + // element size, proceed with the original term. for (const SCEV *&Term : Terms) { const SCEV *Q, *R; SCEVDivision::divide(SE, Term, ElementSize, &Q, &R); - Term = Q; + if (!Q->isZero()) + Term = Q; } SmallVector<const SCEV *, 4> NewTerms; @@ -7875,7 +8860,7 @@ void ScalarEvolution::computeAccessFunctions( if (Sizes.empty()) return; - if (auto AR = dyn_cast<SCEVAddRecExpr>(Expr)) + if (auto *AR = dyn_cast<SCEVAddRecExpr>(Expr)) if (!AR->isAffine()) return; @@ -8059,58 +9044,55 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se) // ScalarEvolution Class Implementation //===----------------------------------------------------------------------===// -ScalarEvolution::ScalarEvolution() - : FunctionPass(ID), WalkingBEDominatingConds(false), ValuesAtScopes(64), - LoopDispositions(64), BlockDispositions(64), FirstUnknown(nullptr) { - initializeScalarEvolutionPass(*PassRegistry::getPassRegistry()); -} - -bool ScalarEvolution::runOnFunction(Function &F) { - this->F = &F; - AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); - DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - return false; -} - -void ScalarEvolution::releaseMemory() { +ScalarEvolution::ScalarEvolution(Function &F, TargetLibraryInfo &TLI, + AssumptionCache &AC, DominatorTree &DT, + LoopInfo &LI) + : F(F), TLI(TLI), AC(AC), DT(DT), LI(LI), + CouldNotCompute(new SCEVCouldNotCompute()), + WalkingBEDominatingConds(false), ProvingSplitPredicate(false), + ValuesAtScopes(64), LoopDispositions(64), BlockDispositions(64), + FirstUnknown(nullptr) {} + +ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg) + : F(Arg.F), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT), LI(Arg.LI), + CouldNotCompute(std::move(Arg.CouldNotCompute)), + ValueExprMap(std::move(Arg.ValueExprMap)), + WalkingBEDominatingConds(false), ProvingSplitPredicate(false), + BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)), + ConstantEvolutionLoopExitValue( + std::move(Arg.ConstantEvolutionLoopExitValue)), + ValuesAtScopes(std::move(Arg.ValuesAtScopes)), + LoopDispositions(std::move(Arg.LoopDispositions)), + BlockDispositions(std::move(Arg.BlockDispositions)), + UnsignedRanges(std::move(Arg.UnsignedRanges)), + SignedRanges(std::move(Arg.SignedRanges)), + UniqueSCEVs(std::move(Arg.UniqueSCEVs)), + UniquePreds(std::move(Arg.UniquePreds)), + SCEVAllocator(std::move(Arg.SCEVAllocator)), + FirstUnknown(Arg.FirstUnknown) { + Arg.FirstUnknown = nullptr; +} + +ScalarEvolution::~ScalarEvolution() { // Iterate through all the SCEVUnknown instances and call their // destructors, so that they release their references to their values. - for (SCEVUnknown *U = FirstUnknown; U; U = U->Next) - U->~SCEVUnknown(); + for (SCEVUnknown *U = FirstUnknown; U;) { + SCEVUnknown *Tmp = U; + U = U->Next; + Tmp->~SCEVUnknown(); + } FirstUnknown = nullptr; ValueExprMap.clear(); // Free any extra memory created for ExitNotTakenInfo in the unlikely event // that a loop had multiple computable exits. - for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I = - BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); - I != E; ++I) { - I->second.clear(); - } + for (auto &BTCI : BackedgeTakenCounts) + BTCI.second.clear(); assert(PendingLoopPredicates.empty() && "isImpliedCond garbage"); assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!"); - - BackedgeTakenCounts.clear(); - ConstantEvolutionLoopExitValue.clear(); - ValuesAtScopes.clear(); - LoopDispositions.clear(); - BlockDispositions.clear(); - UnsignedRanges.clear(); - SignedRanges.clear(); - UniqueSCEVs.clear(); - SCEVAllocator.Reset(); -} - -void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequired<AssumptionCacheTracker>(); - AU.addRequiredTransitive<LoopInfoWrapperPass>(); - AU.addRequiredTransitive<DominatorTreeWrapperPass>(); - AU.addRequired<TargetLibraryInfoWrapperPass>(); + assert(!ProvingSplitPredicate && "ProvingSplitPredicate garbage!"); } bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) { @@ -8152,7 +9134,7 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, OS << "\n"; } -void ScalarEvolution::print(raw_ostream &OS, const Module *) const { +void ScalarEvolution::print(raw_ostream &OS) const { // ScalarEvolution's implementation of the print method is to print // out SCEV values of all instructions that are interesting. Doing // this potentially causes it to create new SCEV objects though, @@ -8162,13 +9144,13 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const { ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this); OS << "Classifying expressions for: "; - F->printAsOperand(OS, /*PrintType=*/false); + F.printAsOperand(OS, /*PrintType=*/false); OS << "\n"; - for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) - if (isSCEVable(I->getType()) && !isa<CmpInst>(*I)) { - OS << *I << '\n'; + for (Instruction &I : instructions(F)) + if (isSCEVable(I.getType()) && !isa<CmpInst>(I)) { + OS << I << '\n'; OS << " --> "; - const SCEV *SV = SE.getSCEV(&*I); + const SCEV *SV = SE.getSCEV(&I); SV->print(OS); if (!isa<SCEVCouldNotCompute>(SV)) { OS << " U: "; @@ -8177,7 +9159,7 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const { SE.getSignedRange(SV).print(OS); } - const Loop *L = LI->getLoopFor((*I).getParent()); + const Loop *L = LI.getLoopFor(I.getParent()); const SCEV *AtUse = SE.getSCEVAtScope(SV, L); if (AtUse != SV) { @@ -8205,9 +9187,9 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const { } OS << "Determining loop execution counts for: "; - F->printAsOperand(OS, /*PrintType=*/false); + F.printAsOperand(OS, /*PrintType=*/false); OS << "\n"; - for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) + for (LoopInfo::iterator I = LI.begin(), E = LI.end(); I != E; ++I) PrintLoopInfo(OS, &SE, *I); } @@ -8260,9 +9242,8 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { // This recurrence is variant w.r.t. L if any of its operands // are variant. - for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end(); - I != E; ++I) - if (!isLoopInvariant(*I, L)) + for (auto *Op : AR->operands()) + if (!isLoopInvariant(Op, L)) return LoopVariant; // Otherwise it's loop-invariant. @@ -8272,11 +9253,9 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { case scMulExpr: case scUMaxExpr: case scSMaxExpr: { - const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S); bool HasVarying = false; - for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); - I != E; ++I) { - LoopDisposition D = getLoopDisposition(*I, L); + for (auto *Op : cast<SCEVNAryExpr>(S)->operands()) { + LoopDisposition D = getLoopDisposition(Op, L); if (D == LoopVariant) return LoopVariant; if (D == LoopComputable) @@ -8300,7 +9279,7 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { // invariant if they are not contained in the specified loop. // Instructions are never considered invariant in the function body // (null loop) because they are defined within the "loop". - if (Instruction *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) + if (auto *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) return (L && !L->contains(I)) ? LoopInvariant : LoopVariant; return LoopInvariant; case scCouldNotCompute: @@ -8351,7 +9330,7 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { // produces the addrec's value is a PHI, and a PHI effectively properly // dominates its entire containing block. const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S); - if (!DT->dominates(AR->getLoop()->getHeader(), BB)) + if (!DT.dominates(AR->getLoop()->getHeader(), BB)) return DoesNotDominateBlock; } // FALL THROUGH into SCEVNAryExpr handling. @@ -8361,9 +9340,8 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { case scSMaxExpr: { const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S); bool Proper = true; - for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); - I != E; ++I) { - BlockDisposition D = getBlockDisposition(*I, BB); + for (const SCEV *NAryOp : NAry->operands()) { + BlockDisposition D = getBlockDisposition(NAryOp, BB); if (D == DoesNotDominateBlock) return DoesNotDominateBlock; if (D == DominatesBlock) @@ -8388,7 +9366,7 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) { if (I->getParent() == BB) return DominatesBlock; - if (DT->properlyDominates(I->getParent(), BB)) + if (DT.properlyDominates(I->getParent(), BB)) return ProperlyDominatesBlock; return DoesNotDominateBlock; } @@ -8407,24 +9385,22 @@ bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) { return getBlockDisposition(S, BB) == ProperlyDominatesBlock; } -namespace { -// Search for a SCEV expression node within an expression tree. -// Implements SCEVTraversal::Visitor. -struct SCEVSearch { - const SCEV *Node; - bool IsFound; +bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const { + // Search for a SCEV expression node within an expression tree. + // Implements SCEVTraversal::Visitor. + struct SCEVSearch { + const SCEV *Node; + bool IsFound; - SCEVSearch(const SCEV *N): Node(N), IsFound(false) {} + SCEVSearch(const SCEV *N): Node(N), IsFound(false) {} - bool follow(const SCEV *S) { - IsFound |= (S == Node); - return !IsFound; - } - bool isDone() const { return IsFound; } -}; -} + bool follow(const SCEV *S) { + IsFound |= (S == Node); + return !IsFound; + } + bool isDone() const { return IsFound; } + }; -bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const { SCEVSearch Search(Op); visitAll(S, Search); return Search.IsFound; @@ -8463,43 +9439,39 @@ static void replaceSubString(std::string &Str, StringRef From, StringRef To) { /// getLoopBackedgeTakenCounts - Helper method for verifyAnalysis. static void getLoopBackedgeTakenCounts(Loop *L, VerifyMap &Map, ScalarEvolution &SE) { - for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I) { - getLoopBackedgeTakenCounts(*I, Map, SE); // recurse. - - std::string &S = Map[L]; - if (S.empty()) { - raw_string_ostream OS(S); - SE.getBackedgeTakenCount(L)->print(OS); + std::string &S = Map[L]; + if (S.empty()) { + raw_string_ostream OS(S); + SE.getBackedgeTakenCount(L)->print(OS); - // false and 0 are semantically equivalent. This can happen in dead loops. - replaceSubString(OS.str(), "false", "0"); - // Remove wrap flags, their use in SCEV is highly fragile. - // FIXME: Remove this when SCEV gets smarter about them. - replaceSubString(OS.str(), "<nw>", ""); - replaceSubString(OS.str(), "<nsw>", ""); - replaceSubString(OS.str(), "<nuw>", ""); - } + // false and 0 are semantically equivalent. This can happen in dead loops. + replaceSubString(OS.str(), "false", "0"); + // Remove wrap flags, their use in SCEV is highly fragile. + // FIXME: Remove this when SCEV gets smarter about them. + replaceSubString(OS.str(), "<nw>", ""); + replaceSubString(OS.str(), "<nsw>", ""); + replaceSubString(OS.str(), "<nuw>", ""); } -} -void ScalarEvolution::verifyAnalysis() const { - if (!VerifySCEV) - return; + for (auto *R : reverse(*L)) + getLoopBackedgeTakenCounts(R, Map, SE); // recurse. +} +void ScalarEvolution::verify() const { ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this); // Gather stringified backedge taken counts for all loops using SCEV's caches. // FIXME: It would be much better to store actual values instead of strings, // but SCEV pointers will change if we drop the caches. VerifyMap BackedgeDumpsOld, BackedgeDumpsNew; - for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I) + for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I) getLoopBackedgeTakenCounts(*I, BackedgeDumpsOld, SE); - // Gather stringified backedge taken counts for all loops without using - // SCEV's caches. - SE.releaseMemory(); - for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I) - getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE); + // Gather stringified backedge taken counts for all loops using a fresh + // ScalarEvolution object. + ScalarEvolution SE2(F, TLI, AC, DT, LI); + for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I) + getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE2); // Now compare whether they're the same with and without caches. This allows // verifying that no pass changed the cache. @@ -8532,3 +9504,238 @@ void ScalarEvolution::verifyAnalysis() const { // TODO: Verify more things. } + +char ScalarEvolutionAnalysis::PassID; + +ScalarEvolution ScalarEvolutionAnalysis::run(Function &F, + AnalysisManager<Function> *AM) { + return ScalarEvolution(F, AM->getResult<TargetLibraryAnalysis>(F), + AM->getResult<AssumptionAnalysis>(F), + AM->getResult<DominatorTreeAnalysis>(F), + AM->getResult<LoopAnalysis>(F)); +} + +PreservedAnalyses +ScalarEvolutionPrinterPass::run(Function &F, AnalysisManager<Function> *AM) { + AM->getResult<ScalarEvolutionAnalysis>(F).print(OS); + return PreservedAnalyses::all(); +} + +INITIALIZE_PASS_BEGIN(ScalarEvolutionWrapperPass, "scalar-evolution", + "Scalar Evolution Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(ScalarEvolutionWrapperPass, "scalar-evolution", + "Scalar Evolution Analysis", false, true) +char ScalarEvolutionWrapperPass::ID = 0; + +ScalarEvolutionWrapperPass::ScalarEvolutionWrapperPass() : FunctionPass(ID) { + initializeScalarEvolutionWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +bool ScalarEvolutionWrapperPass::runOnFunction(Function &F) { + SE.reset(new ScalarEvolution( + F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(), + getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F), + getAnalysis<DominatorTreeWrapperPass>().getDomTree(), + getAnalysis<LoopInfoWrapperPass>().getLoopInfo())); + return false; +} + +void ScalarEvolutionWrapperPass::releaseMemory() { SE.reset(); } + +void ScalarEvolutionWrapperPass::print(raw_ostream &OS, const Module *) const { + SE->print(OS); +} + +void ScalarEvolutionWrapperPass::verifyAnalysis() const { + if (!VerifySCEV) + return; + + SE->verify(); +} + +void ScalarEvolutionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive<AssumptionCacheTracker>(); + AU.addRequiredTransitive<LoopInfoWrapperPass>(); + AU.addRequiredTransitive<DominatorTreeWrapperPass>(); + AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>(); +} + +const SCEVPredicate * +ScalarEvolution::getEqualPredicate(const SCEVUnknown *LHS, + const SCEVConstant *RHS) { + FoldingSetNodeID ID; + // Unique this node based on the arguments + ID.AddInteger(SCEVPredicate::P_Equal); + ID.AddPointer(LHS); + ID.AddPointer(RHS); + void *IP = nullptr; + if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP)) + return S; + SCEVEqualPredicate *Eq = new (SCEVAllocator) + SCEVEqualPredicate(ID.Intern(SCEVAllocator), LHS, RHS); + UniquePreds.InsertNode(Eq, IP); + return Eq; +} + +namespace { +class SCEVPredicateRewriter : public SCEVRewriteVisitor<SCEVPredicateRewriter> { +public: + static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE, + SCEVUnionPredicate &A) { + SCEVPredicateRewriter Rewriter(SE, A); + return Rewriter.visit(Scev); + } + + SCEVPredicateRewriter(ScalarEvolution &SE, SCEVUnionPredicate &P) + : SCEVRewriteVisitor(SE), P(P) {} + + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + auto ExprPreds = P.getPredicatesForExpr(Expr); + for (auto *Pred : ExprPreds) + if (const auto *IPred = dyn_cast<const SCEVEqualPredicate>(Pred)) + if (IPred->getLHS() == Expr) + return IPred->getRHS(); + + return Expr; + } + +private: + SCEVUnionPredicate &P; +}; +} // end anonymous namespace + +const SCEV *ScalarEvolution::rewriteUsingPredicate(const SCEV *Scev, + SCEVUnionPredicate &Preds) { + return SCEVPredicateRewriter::rewrite(Scev, *this, Preds); +} + +/// SCEV predicates +SCEVPredicate::SCEVPredicate(const FoldingSetNodeIDRef ID, + SCEVPredicateKind Kind) + : FastID(ID), Kind(Kind) {} + +SCEVEqualPredicate::SCEVEqualPredicate(const FoldingSetNodeIDRef ID, + const SCEVUnknown *LHS, + const SCEVConstant *RHS) + : SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) {} + +bool SCEVEqualPredicate::implies(const SCEVPredicate *N) const { + const auto *Op = dyn_cast<const SCEVEqualPredicate>(N); + + if (!Op) + return false; + + return Op->LHS == LHS && Op->RHS == RHS; +} + +bool SCEVEqualPredicate::isAlwaysTrue() const { return false; } + +const SCEV *SCEVEqualPredicate::getExpr() const { return LHS; } + +void SCEVEqualPredicate::print(raw_ostream &OS, unsigned Depth) const { + OS.indent(Depth) << "Equal predicate: " << *LHS << " == " << *RHS << "\n"; +} + +/// Union predicates don't get cached so create a dummy set ID for it. +SCEVUnionPredicate::SCEVUnionPredicate() + : SCEVPredicate(FoldingSetNodeIDRef(nullptr, 0), P_Union) {} + +bool SCEVUnionPredicate::isAlwaysTrue() const { + return all_of(Preds, + [](const SCEVPredicate *I) { return I->isAlwaysTrue(); }); +} + +ArrayRef<const SCEVPredicate *> +SCEVUnionPredicate::getPredicatesForExpr(const SCEV *Expr) { + auto I = SCEVToPreds.find(Expr); + if (I == SCEVToPreds.end()) + return ArrayRef<const SCEVPredicate *>(); + return I->second; +} + +bool SCEVUnionPredicate::implies(const SCEVPredicate *N) const { + if (const auto *Set = dyn_cast<const SCEVUnionPredicate>(N)) + return all_of(Set->Preds, + [this](const SCEVPredicate *I) { return this->implies(I); }); + + auto ScevPredsIt = SCEVToPreds.find(N->getExpr()); + if (ScevPredsIt == SCEVToPreds.end()) + return false; + auto &SCEVPreds = ScevPredsIt->second; + + return any_of(SCEVPreds, + [N](const SCEVPredicate *I) { return I->implies(N); }); +} + +const SCEV *SCEVUnionPredicate::getExpr() const { return nullptr; } + +void SCEVUnionPredicate::print(raw_ostream &OS, unsigned Depth) const { + for (auto Pred : Preds) + Pred->print(OS, Depth); +} + +void SCEVUnionPredicate::add(const SCEVPredicate *N) { + if (const auto *Set = dyn_cast<const SCEVUnionPredicate>(N)) { + for (auto Pred : Set->Preds) + add(Pred); + return; + } + + if (implies(N)) + return; + + const SCEV *Key = N->getExpr(); + assert(Key && "Only SCEVUnionPredicate doesn't have an " + " associated expression!"); + + SCEVToPreds[Key].push_back(N); + Preds.push_back(N); +} + +PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE) + : SE(SE), Generation(0) {} + +const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) { + const SCEV *Expr = SE.getSCEV(V); + RewriteEntry &Entry = RewriteMap[Expr]; + + // If we already have an entry and the version matches, return it. + if (Entry.second && Generation == Entry.first) + return Entry.second; + + // We found an entry but it's stale. Rewrite the stale entry + // acording to the current predicate. + if (Entry.second) + Expr = Entry.second; + + const SCEV *NewSCEV = SE.rewriteUsingPredicate(Expr, Preds); + Entry = {Generation, NewSCEV}; + + return NewSCEV; +} + +void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) { + if (Preds.implies(&Pred)) + return; + Preds.add(&Pred); + updateGeneration(); +} + +const SCEVUnionPredicate &PredicatedScalarEvolution::getUnionPredicate() const { + return Preds; +} + +void PredicatedScalarEvolution::updateGeneration() { + // If the generation number wrapped recompute everything. + if (++Generation == 0) { + for (auto &II : RewriteMap) { + const SCEV *Rewritten = II.second.second; + II.second = {Generation, SE.rewriteUsingPredicate(Rewritten, Preds)}; + } + } +} diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp index 6bc0d85..2e50c80 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -19,125 +19,42 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Analysis/TargetLibraryInfo.h" using namespace llvm; -namespace { - /// ScalarEvolutionAliasAnalysis - This is a simple alias analysis - /// implementation that uses ScalarEvolution to answer queries. - class ScalarEvolutionAliasAnalysis : public FunctionPass, - public AliasAnalysis { - ScalarEvolution *SE; - - public: - static char ID; // Class identification, replacement for typeinfo - ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(nullptr) { - initializeScalarEvolutionAliasAnalysisPass( - *PassRegistry::getPassRegistry()); - } - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - void *getAdjustedAnalysisPointer(AnalysisID PI) override { - if (PI == &AliasAnalysis::ID) - return (AliasAnalysis*)this; - return this; - } - - private: - void getAnalysisUsage(AnalysisUsage &AU) const override; - bool runOnFunction(Function &F) override; - AliasResult alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) override; - - Value *GetBaseValue(const SCEV *S); - }; -} // End of anonymous namespace - -// Register this pass... -char ScalarEvolutionAliasAnalysis::ID = 0; -INITIALIZE_AG_PASS_BEGIN(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa", - "ScalarEvolution-based Alias Analysis", false, true, false) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) -INITIALIZE_AG_PASS_END(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa", - "ScalarEvolution-based Alias Analysis", false, true, false) - -FunctionPass *llvm::createScalarEvolutionAliasAnalysisPass() { - return new ScalarEvolutionAliasAnalysis(); -} - -void -ScalarEvolutionAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequiredTransitive<ScalarEvolution>(); - AU.setPreservesAll(); - AliasAnalysis::getAnalysisUsage(AU); -} - -bool -ScalarEvolutionAliasAnalysis::runOnFunction(Function &F) { - InitializeAliasAnalysis(this, &F.getParent()->getDataLayout()); - SE = &getAnalysis<ScalarEvolution>(); - return false; -} - -/// GetBaseValue - Given an expression, try to find a -/// base value. Return null is none was found. -Value * -ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) { - if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { - // In an addrec, assume that the base will be in the start, rather - // than the step. - return GetBaseValue(AR->getStart()); - } else if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) { - // If there's a pointer operand, it'll be sorted at the end of the list. - const SCEV *Last = A->getOperand(A->getNumOperands()-1); - if (Last->getType()->isPointerTy()) - return GetBaseValue(Last); - } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { - // This is a leaf node. - return U->getValue(); - } - // No Identified object found. - return nullptr; -} - -AliasResult ScalarEvolutionAliasAnalysis::alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) { +AliasResult SCEVAAResult::alias(const MemoryLocation &LocA, + const MemoryLocation &LocB) { // If either of the memory references is empty, it doesn't matter what the // pointer values are. This allows the code below to ignore this special // case. if (LocA.Size == 0 || LocB.Size == 0) return NoAlias; - // This is ScalarEvolutionAliasAnalysis. Get the SCEVs! - const SCEV *AS = SE->getSCEV(const_cast<Value *>(LocA.Ptr)); - const SCEV *BS = SE->getSCEV(const_cast<Value *>(LocB.Ptr)); + // This is SCEVAAResult. Get the SCEVs! + const SCEV *AS = SE.getSCEV(const_cast<Value *>(LocA.Ptr)); + const SCEV *BS = SE.getSCEV(const_cast<Value *>(LocB.Ptr)); // If they evaluate to the same expression, it's a MustAlias. - if (AS == BS) return MustAlias; + if (AS == BS) + return MustAlias; // If something is known about the difference between the two addresses, // see if it's enough to prove a NoAlias. - if (SE->getEffectiveSCEVType(AS->getType()) == - SE->getEffectiveSCEVType(BS->getType())) { - unsigned BitWidth = SE->getTypeSizeInBits(AS->getType()); + if (SE.getEffectiveSCEVType(AS->getType()) == + SE.getEffectiveSCEVType(BS->getType())) { + unsigned BitWidth = SE.getTypeSizeInBits(AS->getType()); APInt ASizeInt(BitWidth, LocA.Size); APInt BSizeInt(BitWidth, LocB.Size); // Compute the difference between the two pointers. - const SCEV *BA = SE->getMinusSCEV(BS, AS); + const SCEV *BA = SE.getMinusSCEV(BS, AS); // Test whether the difference is known to be great enough that memory of // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt // are non-zero, which is special-cased above. - if (ASizeInt.ule(SE->getUnsignedRange(BA).getUnsignedMin()) && - (-BSizeInt).uge(SE->getUnsignedRange(BA).getUnsignedMax())) + if (ASizeInt.ule(SE.getUnsignedRange(BA).getUnsignedMin()) && + (-BSizeInt).uge(SE.getUnsignedRange(BA).getUnsignedMax())) return NoAlias; // Folding the subtraction while preserving range information can be tricky @@ -145,13 +62,13 @@ AliasResult ScalarEvolutionAliasAnalysis::alias(const MemoryLocation &LocA, // and try again to see if things fold better that way. // Compute the difference between the two pointers. - const SCEV *AB = SE->getMinusSCEV(AS, BS); + const SCEV *AB = SE.getMinusSCEV(AS, BS); // Test whether the difference is known to be great enough that memory of // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt // are non-zero, which is special-cased above. - if (BSizeInt.ule(SE->getUnsignedRange(AB).getUnsignedMin()) && - (-ASizeInt).uge(SE->getUnsignedRange(AB).getUnsignedMax())) + if (BSizeInt.ule(SE.getUnsignedRange(AB).getUnsignedMin()) && + (-ASizeInt).uge(SE.getUnsignedRange(AB).getUnsignedMax())) return NoAlias; } @@ -170,5 +87,62 @@ AliasResult ScalarEvolutionAliasAnalysis::alias(const MemoryLocation &LocA, return NoAlias; // Forward the query to the next analysis. - return AliasAnalysis::alias(LocA, LocB); + return AAResultBase::alias(LocA, LocB); +} + +/// Given an expression, try to find a base value. +/// +/// Returns null if none was found. +Value *SCEVAAResult::GetBaseValue(const SCEV *S) { + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + // In an addrec, assume that the base will be in the start, rather + // than the step. + return GetBaseValue(AR->getStart()); + } else if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) { + // If there's a pointer operand, it'll be sorted at the end of the list. + const SCEV *Last = A->getOperand(A->getNumOperands() - 1); + if (Last->getType()->isPointerTy()) + return GetBaseValue(Last); + } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { + // This is a leaf node. + return U->getValue(); + } + // No Identified object found. + return nullptr; +} + +SCEVAAResult SCEVAA::run(Function &F, AnalysisManager<Function> *AM) { + return SCEVAAResult(AM->getResult<TargetLibraryAnalysis>(F), + AM->getResult<ScalarEvolutionAnalysis>(F)); +} + +char SCEVAA::PassID; + +char SCEVAAWrapperPass::ID = 0; +INITIALIZE_PASS_BEGIN(SCEVAAWrapperPass, "scev-aa", + "ScalarEvolution-based Alias Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(SCEVAAWrapperPass, "scev-aa", + "ScalarEvolution-based Alias Analysis", false, true) + +FunctionPass *llvm::createSCEVAAWrapperPass() { + return new SCEVAAWrapperPass(); +} + +SCEVAAWrapperPass::SCEVAAWrapperPass() : FunctionPass(ID) { + initializeSCEVAAWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +bool SCEVAAWrapperPass::runOnFunction(Function &F) { + Result.reset( + new SCEVAAResult(getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(), + getAnalysis<ScalarEvolutionWrapperPass>().getSE())); + return false; +} + +void SCEVAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<ScalarEvolutionWrapperPass>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); } diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp index fee2a2d..921403d 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -63,7 +63,7 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, // Create a new cast, and leave the old cast in place in case // it is being used as an insert point. Clear its operand // so that it doesn't hold anything live. - Ret = CastInst::Create(Op, V, Ty, "", IP); + Ret = CastInst::Create(Op, V, Ty, "", &*IP); Ret->takeName(CI); CI->replaceAllUsesWith(Ret); CI->setOperand(0, UndefValue::get(V->getType())); @@ -75,17 +75,39 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, // Create a new cast. if (!Ret) - Ret = CastInst::Create(Op, V, Ty, V->getName(), IP); + Ret = CastInst::Create(Op, V, Ty, V->getName(), &*IP); // We assert at the end of the function since IP might point to an // instruction with different dominance properties than a cast // (an invoke for example) and not dominate BIP (but the cast does). - assert(SE.DT->dominates(Ret, BIP)); + assert(SE.DT.dominates(Ret, &*BIP)); rememberInstruction(Ret); return Ret; } +static BasicBlock::iterator findInsertPointAfter(Instruction *I, + BasicBlock *MustDominate) { + BasicBlock::iterator IP = ++I->getIterator(); + if (auto *II = dyn_cast<InvokeInst>(I)) + IP = II->getNormalDest()->begin(); + + while (isa<PHINode>(IP)) + ++IP; + + while (IP->isEHPad()) { + if (isa<FuncletPadInst>(IP) || isa<LandingPadInst>(IP)) { + ++IP; + } else if (isa<CatchSwitchInst>(IP)) { + IP = MustDominate->getFirstInsertionPt(); + } else { + llvm_unreachable("unexpected eh pad!"); + } + } + + return IP; +} + /// InsertNoopCastOfTo - Insert a cast of V to the specified type, /// which must be possible with a noop cast, doing what we can to share /// the casts. @@ -135,19 +157,14 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { while ((isa<BitCastInst>(IP) && isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) && cast<BitCastInst>(IP)->getOperand(0) != A) || - isa<DbgInfoIntrinsic>(IP) || - isa<LandingPadInst>(IP)) + isa<DbgInfoIntrinsic>(IP)) ++IP; return ReuseOrCreateCast(A, Ty, Op, IP); } // Cast the instruction immediately after the instruction. Instruction *I = cast<Instruction>(V); - BasicBlock::iterator IP = I; ++IP; - if (InvokeInst *II = dyn_cast<InvokeInst>(I)) - IP = II->getNormalDest()->begin(); - while (isa<PHINode>(IP) || isa<LandingPadInst>(IP)) - ++IP; + BasicBlock::iterator IP = findInsertPointAfter(I, Builder.GetInsertBlock()); return ReuseOrCreateCast(I, Ty, Op, IP); } @@ -174,7 +191,7 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, ScanLimit++; if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS && IP->getOperand(1) == RHS) - return IP; + return &*IP; if (IP == BlockBegin) break; } } @@ -184,13 +201,13 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, BuilderType::InsertPointGuard Guard(Builder); // Move the insertion point out of as many loops as we can. - while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { + while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break; BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) break; // Ok, move up a level. - Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); + Builder.SetInsertPoint(Preheader->getTerminator()); } // If we haven't found this binop, insert it. @@ -229,19 +246,15 @@ static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder, // Check for divisibility. if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) { ConstantInt *CI = - ConstantInt::get(SE.getContext(), - C->getValue()->getValue().sdiv( - FC->getValue()->getValue())); + ConstantInt::get(SE.getContext(), C->getAPInt().sdiv(FC->getAPInt())); // If the quotient is zero and the remainder is non-zero, reject // the value at this scale. It will be considered for subsequent // smaller scales. if (!CI->isZero()) { const SCEV *Div = SE.getConstant(CI); S = Div; - Remainder = - SE.getAddExpr(Remainder, - SE.getConstant(C->getValue()->getValue().srem( - FC->getValue()->getValue()))); + Remainder = SE.getAddExpr( + Remainder, SE.getConstant(C->getAPInt().srem(FC->getAPInt()))); return true; } } @@ -254,10 +267,9 @@ static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder, // of the given factor. If so, we can factor it. const SCEVConstant *FC = cast<SCEVConstant>(Factor); if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0))) - if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) { + if (!C->getAPInt().srem(FC->getAPInt())) { SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end()); - NewMulOps[0] = SE.getConstant( - C->getValue()->getValue().sdiv(FC->getValue()->getValue())); + NewMulOps[0] = SE.getConstant(C->getAPInt().sdiv(FC->getAPInt())); S = SE.getMulExpr(NewMulOps); return true; } @@ -402,8 +414,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, const SCEV *ElSize = SE.getSizeOfExpr(IntPtrTy, ElTy); if (!ElSize->isZero()) { SmallVector<const SCEV *, 8> NewOps; - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - const SCEV *Op = Ops[i]; + for (const SCEV *Op : Ops) { const SCEV *Remainder = SE.getConstant(Ty, 0); if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) { // Op now has ElSize factored out. @@ -414,7 +425,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, } else { // The operand was not divisible, so add it to the list of operands // we'll scan next iteration. - NewOps.push_back(Ops[i]); + NewOps.push_back(Op); } } // If we made any changes, update Ops. @@ -483,7 +494,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace())); assert(!isa<Instruction>(V) || - SE.DT->dominates(cast<Instruction>(V), Builder.GetInsertPoint())); + SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint())); // Expand the operands for a plain byte offset. Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty); @@ -508,7 +519,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, ScanLimit++; if (IP->getOpcode() == Instruction::GetElementPtr && IP->getOperand(0) == V && IP->getOperand(1) == Idx) - return IP; + return &*IP; if (IP == BlockBegin) break; } } @@ -517,13 +528,13 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, BuilderType::InsertPointGuard Guard(Builder); // Move the insertion point out of as many loops as we can. - while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { + while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break; BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) break; // Ok, move up a level. - Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); + Builder.SetInsertPoint(Preheader->getTerminator()); } // Emit a GEP. @@ -537,16 +548,13 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, BuilderType::InsertPoint SaveInsertPt = Builder.saveIP(); // Move the insertion point out of as many loops as we can. - while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { + while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { if (!L->isLoopInvariant(V)) break; - bool AnyIndexNotLoopInvariant = false; - for (SmallVectorImpl<Value *>::const_iterator I = GepIndices.begin(), - E = GepIndices.end(); I != E; ++I) - if (!L->isLoopInvariant(*I)) { - AnyIndexNotLoopInvariant = true; - break; - } + bool AnyIndexNotLoopInvariant = + std::any_of(GepIndices.begin(), GepIndices.end(), + [L](Value *Op) { return !L->isLoopInvariant(Op); }); + if (AnyIndexNotLoopInvariant) break; @@ -554,7 +562,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, if (!Preheader) break; // Ok, move up a level. - Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); + Builder.SetInsertPoint(Preheader->getTerminator()); } // Insert a pretty getelementptr. Note that this GEP is not marked inbounds, @@ -563,9 +571,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, Value *Casted = V; if (V->getType() != PTy) Casted = InsertNoopCastOfTo(Casted, PTy); - Value *GEP = Builder.CreateGEP(OriginalElTy, Casted, - GepIndices, - "scevgep"); + Value *GEP = Builder.CreateGEP(OriginalElTy, Casted, GepIndices, "scevgep"); Ops.push_back(SE.getUnknown(GEP)); rememberInstruction(GEP); @@ -593,8 +599,7 @@ static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B, /// expression, according to PickMostRelevantLoop. const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) { // Test whether we've already computed the most relevant loop for this SCEV. - std::pair<DenseMap<const SCEV *, const Loop *>::iterator, bool> Pair = - RelevantLoops.insert(std::make_pair(S, nullptr)); + auto Pair = RelevantLoops.insert(std::make_pair(S, nullptr)); if (!Pair.second) return Pair.first->second; @@ -603,7 +608,7 @@ const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) { return nullptr; if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { if (const Instruction *I = dyn_cast<Instruction>(U->getValue())) - return Pair.first->second = SE.LI->getLoopFor(I->getParent()); + return Pair.first->second = SE.LI.getLoopFor(I->getParent()); // A non-instruction has no relevant loops. return nullptr; } @@ -611,9 +616,8 @@ const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) { const Loop *L = nullptr; if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) L = AR->getLoop(); - for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end(); - I != E; ++I) - L = PickMostRelevantLoop(L, getRelevantLoop(*I), *SE.DT); + for (const SCEV *Op : N->operands()) + L = PickMostRelevantLoop(L, getRelevantLoop(Op), SE.DT); return RelevantLoops[N] = L; } if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) { @@ -621,10 +625,8 @@ const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) { return RelevantLoops[C] = Result; } if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) { - const Loop *Result = - PickMostRelevantLoop(getRelevantLoop(D->getLHS()), - getRelevantLoop(D->getRHS()), - *SE.DT); + const Loop *Result = PickMostRelevantLoop( + getRelevantLoop(D->getLHS()), getRelevantLoop(D->getRHS()), SE.DT); return RelevantLoops[D] = Result; } llvm_unreachable("Unexpected SCEV type!"); @@ -679,13 +681,12 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { // Sort by loop. Use a stable sort so that constants follow non-constants and // pointer operands precede non-pointer operands. - std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT)); + std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT)); // Emit instructions to add all the operands. Hoist as much as possible // out of loops, and form meaningful getelementptrs where possible. Value *Sum = nullptr; - for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator - I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) { + for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) { const Loop *CurLoop = I->first; const SCEV *Op = I->second; if (!Sum) { @@ -747,14 +748,13 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I)); // Sort by loop. Use a stable sort so that constants follow non-constants. - std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT)); + std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT)); // Emit instructions to mul all the operands. Hoist as much as possible // out of loops. Value *Prod = nullptr; - for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator - I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ++I) { - const SCEV *Op = I->second; + for (const auto &I : OpsAndLoops) { + const SCEV *Op = I.second; if (!Prod) { // This is the first operand. Just expand it. Prod = expand(Op); @@ -788,7 +788,7 @@ Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { Value *LHS = expandCodeFor(S->getLHS(), Ty); if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) { - const APInt &RHS = SC->getValue()->getValue(); + const APInt &RHS = SC->getAPInt(); if (RHS.isPowerOf2()) return InsertBinop(Instruction::LShr, LHS, ConstantInt::get(Ty, RHS.logBase2())); @@ -834,7 +834,7 @@ bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, for (User::op_iterator OI = IncV->op_begin()+1, OE = IncV->op_end(); OI != OE; ++OI) if (Instruction *OInst = dyn_cast<Instruction>(OI)) - if (!SE.DT->dominates(OInst, IVIncInsertPos)) + if (!SE.DT.dominates(OInst, IVIncInsertPos)) return false; } // Advance to the next instruction. @@ -873,19 +873,18 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, case Instruction::Add: case Instruction::Sub: { Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1)); - if (!OInst || SE.DT->dominates(OInst, InsertPos)) + if (!OInst || SE.DT.dominates(OInst, InsertPos)) return dyn_cast<Instruction>(IncV->getOperand(0)); return nullptr; } case Instruction::BitCast: return dyn_cast<Instruction>(IncV->getOperand(0)); case Instruction::GetElementPtr: - for (Instruction::op_iterator I = IncV->op_begin()+1, E = IncV->op_end(); - I != E; ++I) { + for (auto I = IncV->op_begin() + 1, E = IncV->op_end(); I != E; ++I) { if (isa<Constant>(*I)) continue; if (Instruction *OInst = dyn_cast<Instruction>(*I)) { - if (!SE.DT->dominates(OInst, InsertPos)) + if (!SE.DT.dominates(OInst, InsertPos)) return nullptr; } if (allowScale) { @@ -912,13 +911,16 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, /// it available to other uses in this loop. Recursively hoist any operands, /// until we reach a value that dominates InsertPos. bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) { - if (SE.DT->dominates(IncV, InsertPos)) + if (SE.DT.dominates(IncV, InsertPos)) return true; // InsertPos must itself dominate IncV so that IncV's new position satisfies // its existing users. - if (isa<PHINode>(InsertPos) - || !SE.DT->dominates(InsertPos->getParent(), IncV->getParent())) + if (isa<PHINode>(InsertPos) || + !SE.DT.dominates(InsertPos->getParent(), IncV->getParent())) + return false; + + if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos)) return false; // Check that the chain of IV operands leading back to Phi can be hoisted. @@ -930,11 +932,10 @@ bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) { // IncV is safe to hoist. IVIncs.push_back(IncV); IncV = Oper; - if (SE.DT->dominates(IncV, InsertPos)) + if (SE.DT.dominates(IncV, InsertPos)) break; } - for (SmallVectorImpl<Instruction*>::reverse_iterator I = IVIncs.rbegin(), - E = IVIncs.rend(); I != E; ++I) { + for (auto I = IVIncs.rbegin(), E = IVIncs.rend(); I != E; ++I) { (*I)->moveBefore(InsertPos); } return true; @@ -1002,7 +1003,7 @@ static void hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist, } /// \brief Check whether we can cheaply express the requested SCEV in terms of -/// the available PHI SCEV by truncation and/or invertion of the step. +/// the available PHI SCEV by truncation and/or inversion of the step. static bool canBeCheaplyTransformed(ScalarEvolution &SE, const SCEVAddRecExpr *Phi, const SCEVAddRecExpr *Requested, @@ -1084,12 +1085,13 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, // Only try partially matching scevs that need truncation and/or // step-inversion if we know this loop is outside the current loop. - bool TryNonMatchingSCEV = IVIncInsertLoop && - SE.DT->properlyDominates(LatchBlock, IVIncInsertLoop->getHeader()); + bool TryNonMatchingSCEV = + IVIncInsertLoop && + SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader()); - for (BasicBlock::iterator I = L->getHeader()->begin(); - PHINode *PN = dyn_cast<PHINode>(I); ++I) { - if (!SE.isSCEVable(PN->getType())) + for (auto &I : *L->getHeader()) { + auto *PN = dyn_cast<PHINode>(&I); + if (!PN || !SE.isSCEVable(PN->getType())) continue; const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PN)); @@ -1142,7 +1144,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, // Potentially, move the increment. We have made sure in // isExpandedAddRecExprPHI or hoistIVInc that this is possible. if (L == IVIncInsertLoop) - hoistBeforePos(SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch); + hoistBeforePos(&SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch); // Ok, the add recurrence looks usable. // Remember this PHI, even in post-inc mode. @@ -1167,13 +1169,13 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, PostIncLoops.clear(); // Expand code for the start value. - Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy, - L->getHeader()->begin()); + Value *StartV = + expandCodeFor(Normalized->getStart(), ExpandTy, &L->getHeader()->front()); // StartV must be hoisted into L's preheader to dominate the new phi. assert(!isa<Instruction>(StartV) || - SE.DT->properlyDominates(cast<Instruction>(StartV)->getParent(), - L->getHeader())); + SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(), + L->getHeader())); // Expand code for the step value. Do this before creating the PHI so that PHI // reuse code doesn't see an incomplete PHI. @@ -1185,7 +1187,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, if (useSubtract) Step = SE.getNegativeSCEV(Step); // Expand the step somewhere that dominates the loop header. - Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); + Value *StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front()); // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if // we actually do emit an addition. It does not apply if we emit a @@ -1249,9 +1251,8 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { if (PostIncLoops.count(L)) { PostIncLoopSet Loops; Loops.insert(L); - Normalized = - cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, nullptr, - nullptr, Loops, SE, *SE.DT)); + Normalized = cast<SCEVAddRecExpr>(TransformForPostIncUse( + Normalize, S, nullptr, nullptr, Loops, SE, SE.DT)); } // Strip off any non-loop-dominating component from the addrec start. @@ -1301,9 +1302,9 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // For an expansion to use the postinc form, the client must call // expandCodeFor with an InsertPoint that is either outside the PostIncLoop // or dominated by IVIncInsertPos. - if (isa<Instruction>(Result) - && !SE.DT->dominates(cast<Instruction>(Result), - Builder.GetInsertPoint())) { + if (isa<Instruction>(Result) && + !SE.DT.dominates(cast<Instruction>(Result), + &*Builder.GetInsertPoint())) { // The induction variable's postinc expansion does not dominate this use. // IVUsers tries to prevent this case, so it is rare. However, it can // happen when an IVUser outside the loop is not dominated by the latch @@ -1321,7 +1322,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { { // Expand the step somewhere that dominates the loop header. BuilderType::InsertPointGuard Guard(Builder); - StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); + StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front()); } Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); } @@ -1395,13 +1396,9 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(), S->getNoWrapFlags(SCEV::FlagNW))); BasicBlock::iterator NewInsertPt = - std::next(BasicBlock::iterator(cast<Instruction>(V))); - BuilderType::InsertPointGuard Guard(Builder); - while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt) || - isa<LandingPadInst>(NewInsertPt)) - ++NewInsertPt; + findInsertPointAfter(cast<Instruction>(V), Builder.GetInsertBlock()); V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr, - NewInsertPt); + &*NewInsertPt); return V; } @@ -1442,7 +1439,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { BasicBlock *Header = L->getHeader(); pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar", - Header->begin()); + &Header->front()); rememberInstruction(CanonicalIV); SmallSet<BasicBlock *, 4> PredSeen; @@ -1587,7 +1584,8 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty, Instruction *IP) { - Builder.SetInsertPoint(IP->getParent(), IP); + assert(IP); + Builder.SetInsertPoint(IP); return expandCodeFor(SH, Ty); } @@ -1605,8 +1603,8 @@ Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) { Value *SCEVExpander::expand(const SCEV *S) { // Compute an insertion point for this SCEV object. Hoist the instructions // as far out in the loop nest as possible. - Instruction *InsertPt = Builder.GetInsertPoint(); - for (Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock()); ; + Instruction *InsertPt = &*Builder.GetInsertPoint(); + for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());; L = L->getParentLoop()) if (SE.isLoopInvariant(S, L)) { if (!L) break; @@ -1616,30 +1614,29 @@ Value *SCEVExpander::expand(const SCEV *S) { // LSR sets the insertion point for AddRec start/step values to the // block start to simplify value reuse, even though it's an invalid // position. SCEVExpander must correct for this in all cases. - InsertPt = L->getHeader()->getFirstInsertionPt(); + InsertPt = &*L->getHeader()->getFirstInsertionPt(); } } else { // If the SCEV is computable at this level, insert it into the header // after the PHIs (and after any other instructions that we've inserted // there) so that it is guaranteed to dominate any user inside the loop. if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) - InsertPt = L->getHeader()->getFirstInsertionPt(); + InsertPt = &*L->getHeader()->getFirstInsertionPt(); while (InsertPt != Builder.GetInsertPoint() && (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt))) { - InsertPt = std::next(BasicBlock::iterator(InsertPt)); + InsertPt = &*std::next(InsertPt->getIterator()); } break; } // Check to see if we already expanded this here. - std::map<std::pair<const SCEV *, Instruction *>, TrackingVH<Value> >::iterator - I = InsertedExpressions.find(std::make_pair(S, InsertPt)); + auto I = InsertedExpressions.find(std::make_pair(S, InsertPt)); if (I != InsertedExpressions.end()) return I->second; BuilderType::InsertPointGuard Guard(Builder); - Builder.SetInsertPoint(InsertPt->getParent(), InsertPt); + Builder.SetInsertPoint(InsertPt); // Expand the expression into instructions. Value *V = visit(S); @@ -1677,8 +1674,8 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, // Emit code for it. BuilderType::InsertPointGuard Guard(Builder); - PHINode *V = cast<PHINode>(expandCodeFor(H, nullptr, - L->getHeader()->begin())); + PHINode *V = + cast<PHINode>(expandCodeFor(H, nullptr, &L->getHeader()->front())); return V; } @@ -1694,10 +1691,13 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, const TargetTransformInfo *TTI) { // Find integer phis in order of increasing width. SmallVector<PHINode*, 8> Phis; - for (BasicBlock::iterator I = L->getHeader()->begin(); - PHINode *Phi = dyn_cast<PHINode>(I); ++I) { - Phis.push_back(Phi); + for (auto &I : *L->getHeader()) { + if (auto *PN = dyn_cast<PHINode>(&I)) + Phis.push_back(PN); + else + break; } + if (TTI) std::sort(Phis.begin(), Phis.end(), [](Value *LHS, Value *RHS) { // Put pointers at the back and make sure pointer < pointer = false. @@ -1711,13 +1711,23 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, DenseMap<const SCEV *, PHINode *> ExprToIVMap; // Process phis from wide to narrow. Map wide phis to their truncation // so narrow phis can reuse them. - for (SmallVectorImpl<PHINode*>::const_iterator PIter = Phis.begin(), - PEnd = Phis.end(); PIter != PEnd; ++PIter) { - PHINode *Phi = *PIter; + for (PHINode *Phi : Phis) { + auto SimplifyPHINode = [&](PHINode *PN) -> Value * { + if (Value *V = SimplifyInstruction(PN, DL, &SE.TLI, &SE.DT, &SE.AC)) + return V; + if (!SE.isSCEVable(PN->getType())) + return nullptr; + auto *Const = dyn_cast<SCEVConstant>(SE.getSCEV(PN)); + if (!Const) + return nullptr; + return Const->getValue(); + }; // Fold constant phis. They may be congruent to other constant phis and // would confuse the logic below that expects proper IVs. - if (Value *V = SimplifyInstruction(Phi, DL, SE.TLI, SE.DT, SE.AC)) { + if (Value *V = SimplifyPHINode(Phi)) { + if (V->getType() != Phi->getType()) + continue; Phi->replaceAllUsesWith(V); DeadInsts.emplace_back(Phi); ++NumElim; @@ -1784,7 +1794,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, if (OrigInc->getType() != IsomorphicInc->getType()) { Instruction *IP = nullptr; if (PHINode *PN = dyn_cast<PHINode>(OrigInc)) - IP = PN->getParent()->getFirstInsertionPt(); + IP = &*PN->getParent()->getFirstInsertionPt(); else IP = OrigInc->getNextNode(); @@ -1802,7 +1812,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, ++NumElim; Value *NewIV = OrigPhiRef; if (OrigPhiRef->getType() != Phi->getType()) { - IRBuilder<> Builder(L->getHeader()->getFirstInsertionPt()); + IRBuilder<> Builder(&*L->getHeader()->getFirstInsertionPt()); Builder.SetCurrentDebugLocation(Phi->getDebugLoc()); NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName); } @@ -1812,8 +1822,46 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, return NumElim; } +Value *SCEVExpander::findExistingExpansion(const SCEV *S, + const Instruction *At, Loop *L) { + using namespace llvm::PatternMatch; + + SmallVector<BasicBlock *, 4> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + // Look for suitable value in simple conditions at the loop exits. + for (BasicBlock *BB : ExitingBlocks) { + ICmpInst::Predicate Pred; + Instruction *LHS, *RHS; + BasicBlock *TrueBB, *FalseBB; + + if (!match(BB->getTerminator(), + m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)), + TrueBB, FalseBB))) + continue; + + if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At)) + return LHS; + + if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At)) + return RHS; + } + + // There is potential to make this significantly smarter, but this simple + // heuristic already gets some interesting cases. + + // Can not find suitable value. + return nullptr; +} + bool SCEVExpander::isHighCostExpansionHelper( - const SCEV *S, Loop *L, SmallPtrSetImpl<const SCEV *> &Processed) { + const SCEV *S, Loop *L, const Instruction *At, + SmallPtrSetImpl<const SCEV *> &Processed) { + + // If we can find an existing value for this scev avaliable at the point "At" + // then consider the expression cheap. + if (At && findExistingExpansion(S, At, L) != nullptr) + return false; // Zero/One operand expressions switch (S->getSCEVType()) { @@ -1821,14 +1869,14 @@ bool SCEVExpander::isHighCostExpansionHelper( case scConstant: return false; case scTruncate: - return isHighCostExpansionHelper(cast<SCEVTruncateExpr>(S)->getOperand(), L, - Processed); + return isHighCostExpansionHelper(cast<SCEVTruncateExpr>(S)->getOperand(), + L, At, Processed); case scZeroExtend: return isHighCostExpansionHelper(cast<SCEVZeroExtendExpr>(S)->getOperand(), - L, Processed); + L, At, Processed); case scSignExtend: return isHighCostExpansionHelper(cast<SCEVSignExtendExpr>(S)->getOperand(), - L, Processed); + L, At, Processed); } if (!Processed.insert(S).second) @@ -1836,10 +1884,10 @@ bool SCEVExpander::isHighCostExpansionHelper( if (auto *UDivExpr = dyn_cast<SCEVUDivExpr>(S)) { // If the divisor is a power of two and the SCEV type fits in a native - // integer, consider the divison cheap irrespective of whether it occurs in + // integer, consider the division cheap irrespective of whether it occurs in // the user code since it can be lowered into a right shift. if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS())) - if (SC->getValue()->getValue().isPowerOf2()) { + if (SC->getAPInt().isPowerOf2()) { const DataLayout &DL = L->getHeader()->getParent()->getParent()->getDataLayout(); unsigned Width = cast<IntegerType>(UDivExpr->getType())->getBitWidth(); @@ -1855,22 +1903,14 @@ bool SCEVExpander::isHighCostExpansionHelper( if (!ExitingBB) return true; - BranchInst *ExitingBI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); - if (!ExitingBI || !ExitingBI->isConditional()) + // At the beginning of this function we already tried to find existing value + // for plain 'S'. Now try to lookup 'S + 1' since it is common pattern + // involving division. This is just a simple search heuristic. + if (!At) + At = &ExitingBB->back(); + if (!findExistingExpansion( + SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), At, L)) return true; - - ICmpInst *OrigCond = dyn_cast<ICmpInst>(ExitingBI->getCondition()); - if (!OrigCond) - return true; - - const SCEV *RHS = SE.getSCEV(OrigCond->getOperand(1)); - RHS = SE.getMinusSCEV(RHS, SE.getConstant(RHS->getType(), 1)); - if (RHS != S) { - const SCEV *LHS = SE.getSCEV(OrigCond->getOperand(0)); - LHS = SE.getMinusSCEV(LHS, SE.getConstant(LHS->getType(), 1)); - if (LHS != S) - return true; - } } // HowManyLessThans uses a Max expression whenever the loop is not guarded by @@ -1882,11 +1922,9 @@ bool SCEVExpander::isHighCostExpansionHelper( // BackedgeTakenCount. They may already exist in program code, and if not, // they are not too expensive rematerialize. if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(S)) { - for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); - I != E; ++I) { - if (isHighCostExpansionHelper(*I, L, Processed)) + for (auto *Op : NAry->operands()) + if (isHighCostExpansionHelper(Op, L, At, Processed)) return true; - } } // If we haven't recognized an expensive SCEV pattern, assume it's an @@ -1894,6 +1932,43 @@ bool SCEVExpander::isHighCostExpansionHelper( return false; } +Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred, + Instruction *IP) { + assert(IP); + switch (Pred->getKind()) { + case SCEVPredicate::P_Union: + return expandUnionPredicate(cast<SCEVUnionPredicate>(Pred), IP); + case SCEVPredicate::P_Equal: + return expandEqualPredicate(cast<SCEVEqualPredicate>(Pred), IP); + } + llvm_unreachable("Unknown SCEV predicate type"); +} + +Value *SCEVExpander::expandEqualPredicate(const SCEVEqualPredicate *Pred, + Instruction *IP) { + Value *Expr0 = expandCodeFor(Pred->getLHS(), Pred->getLHS()->getType(), IP); + Value *Expr1 = expandCodeFor(Pred->getRHS(), Pred->getRHS()->getType(), IP); + + Builder.SetInsertPoint(IP); + auto *I = Builder.CreateICmpNE(Expr0, Expr1, "ident.check"); + return I; +} + +Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union, + Instruction *IP) { + auto *BoolType = IntegerType::get(IP->getContext(), 1); + Value *Check = ConstantInt::getNullValue(BoolType); + + // Loop over all checks in this set. + for (auto Pred : Union->getPredicates()) { + auto *NextCheck = expandCodeForPredicate(Pred, IP); + Builder.SetInsertPoint(IP); + Check = Builder.CreateOr(Check, NextCheck); + } + + return Check; +} + namespace { // Search for a SCEV subexpression that is not safe to expand. Any expression // that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp index b238fe4..b7fd5d5 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -109,7 +109,7 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) { SmallVector<const SCEV *, 8> Operands; const Loop *L = AR->getLoop(); // The addrec conceptually uses its operands at loop entry. - Instruction *LUser = L->getHeader()->begin(); + Instruction *LUser = &L->getHeader()->front(); // Transform each operand. for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end(); I != E; ++I) { diff --git a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp index a5fca3e..029997a 100644 --- a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp +++ b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp @@ -32,22 +32,23 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" + using namespace llvm; // A handy option for disabling scoped no-alias functionality. The same effect // can also be achieved by stripping the associated metadata tags from IR, but // this option is sometimes more convenient. -static cl::opt<bool> -EnableScopedNoAlias("enable-scoped-noalias", cl::init(true)); +static cl::opt<bool> EnableScopedNoAlias("enable-scoped-noalias", + cl::init(true)); namespace { /// AliasScopeNode - This is a simple wrapper around an MDNode which provides @@ -57,7 +58,7 @@ class AliasScopeNode { const MDNode *Node; public: - AliasScopeNode() : Node(0) {} + AliasScopeNode() : Node(nullptr) {} explicit AliasScopeNode(const MDNode *N) : Node(N) {} /// getNode - Get the MDNode for this AliasScopeNode. @@ -70,79 +71,74 @@ public: return dyn_cast_or_null<MDNode>(Node->getOperand(1)); } }; +} // end of anonymous namespace -/// ScopedNoAliasAA - This is a simple alias analysis -/// implementation that uses scoped-noalias metadata to answer queries. -class ScopedNoAliasAA : public ImmutablePass, public AliasAnalysis { -public: - static char ID; // Class identification, replacement for typeinfo - ScopedNoAliasAA() : ImmutablePass(ID) { - initializeScopedNoAliasAAPass(*PassRegistry::getPassRegistry()); - } +AliasResult ScopedNoAliasAAResult::alias(const MemoryLocation &LocA, + const MemoryLocation &LocB) { + if (!EnableScopedNoAlias) + return AAResultBase::alias(LocA, LocB); - bool doInitialization(Module &M) override; + // Get the attached MDNodes. + const MDNode *AScopes = LocA.AATags.Scope, *BScopes = LocB.AATags.Scope; - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - void *getAdjustedAnalysisPointer(const void *PI) override { - if (PI == &AliasAnalysis::ID) - return (AliasAnalysis*)this; - return this; - } + const MDNode *ANoAlias = LocA.AATags.NoAlias, *BNoAlias = LocB.AATags.NoAlias; -protected: - bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const; - void collectMDInDomain(const MDNode *List, const MDNode *Domain, - SmallPtrSetImpl<const MDNode *> &Nodes) const; - -private: - void getAnalysisUsage(AnalysisUsage &AU) const override; - AliasResult alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) override; - bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal) override; - ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override; - ModRefBehavior getModRefBehavior(const Function *F) override; - ModRefResult getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) override; - ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) override; -}; -} // End of anonymous namespace + if (!mayAliasInScopes(AScopes, BNoAlias)) + return NoAlias; -// Register this pass... -char ScopedNoAliasAA::ID = 0; -INITIALIZE_AG_PASS(ScopedNoAliasAA, AliasAnalysis, "scoped-noalias", - "Scoped NoAlias Alias Analysis", false, true, false) + if (!mayAliasInScopes(BScopes, ANoAlias)) + return NoAlias; -ImmutablePass *llvm::createScopedNoAliasAAPass() { - return new ScopedNoAliasAA(); + // If they may alias, chain to the next AliasAnalysis. + return AAResultBase::alias(LocA, LocB); } -bool ScopedNoAliasAA::doInitialization(Module &M) { - InitializeAliasAnalysis(this, &M.getDataLayout()); - return true; +ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS, + const MemoryLocation &Loc) { + if (!EnableScopedNoAlias) + return AAResultBase::getModRefInfo(CS, Loc); + + if (!mayAliasInScopes(Loc.AATags.Scope, CS.getInstruction()->getMetadata( + LLVMContext::MD_noalias))) + return MRI_NoModRef; + + if (!mayAliasInScopes( + CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope), + Loc.AATags.NoAlias)) + return MRI_NoModRef; + + return AAResultBase::getModRefInfo(CS, Loc); } -void -ScopedNoAliasAA::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AliasAnalysis::getAnalysisUsage(AU); +ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + if (!EnableScopedNoAlias) + return AAResultBase::getModRefInfo(CS1, CS2); + + if (!mayAliasInScopes( + CS1.getInstruction()->getMetadata(LLVMContext::MD_alias_scope), + CS2.getInstruction()->getMetadata(LLVMContext::MD_noalias))) + return MRI_NoModRef; + + if (!mayAliasInScopes( + CS2.getInstruction()->getMetadata(LLVMContext::MD_alias_scope), + CS1.getInstruction()->getMetadata(LLVMContext::MD_noalias))) + return MRI_NoModRef; + + return AAResultBase::getModRefInfo(CS1, CS2); } -void -ScopedNoAliasAA::collectMDInDomain(const MDNode *List, const MDNode *Domain, - SmallPtrSetImpl<const MDNode *> &Nodes) const { +void ScopedNoAliasAAResult::collectMDInDomain( + const MDNode *List, const MDNode *Domain, + SmallPtrSetImpl<const MDNode *> &Nodes) const { for (unsigned i = 0, ie = List->getNumOperands(); i != ie; ++i) if (const MDNode *MD = dyn_cast<MDNode>(List->getOperand(i))) if (AliasScopeNode(MD).getDomain() == Domain) Nodes.insert(MD); } -bool -ScopedNoAliasAA::mayAliasInScopes(const MDNode *Scopes, - const MDNode *NoAlias) const { +bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes, + const MDNode *NoAlias) const { if (!Scopes || !NoAlias) return true; @@ -177,76 +173,40 @@ ScopedNoAliasAA::mayAliasInScopes(const MDNode *Scopes, return true; } -AliasResult ScopedNoAliasAA::alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) { - if (!EnableScopedNoAlias) - return AliasAnalysis::alias(LocA, LocB); - - // Get the attached MDNodes. - const MDNode *AScopes = LocA.AATags.Scope, - *BScopes = LocB.AATags.Scope; +ScopedNoAliasAAResult ScopedNoAliasAA::run(Function &F, + AnalysisManager<Function> *AM) { + return ScopedNoAliasAAResult(AM->getResult<TargetLibraryAnalysis>(F)); +} - const MDNode *ANoAlias = LocA.AATags.NoAlias, - *BNoAlias = LocB.AATags.NoAlias; +char ScopedNoAliasAA::PassID; - if (!mayAliasInScopes(AScopes, BNoAlias)) - return NoAlias; - - if (!mayAliasInScopes(BScopes, ANoAlias)) - return NoAlias; +char ScopedNoAliasAAWrapperPass::ID = 0; +INITIALIZE_PASS_BEGIN(ScopedNoAliasAAWrapperPass, "scoped-noalias", + "Scoped NoAlias Alias Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(ScopedNoAliasAAWrapperPass, "scoped-noalias", + "Scoped NoAlias Alias Analysis", false, true) - // If they may alias, chain to the next AliasAnalysis. - return AliasAnalysis::alias(LocA, LocB); +ImmutablePass *llvm::createScopedNoAliasAAWrapperPass() { + return new ScopedNoAliasAAWrapperPass(); } -bool ScopedNoAliasAA::pointsToConstantMemory(const MemoryLocation &Loc, - bool OrLocal) { - return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); +ScopedNoAliasAAWrapperPass::ScopedNoAliasAAWrapperPass() : ImmutablePass(ID) { + initializeScopedNoAliasAAWrapperPassPass(*PassRegistry::getPassRegistry()); } -AliasAnalysis::ModRefBehavior -ScopedNoAliasAA::getModRefBehavior(ImmutableCallSite CS) { - return AliasAnalysis::getModRefBehavior(CS); +bool ScopedNoAliasAAWrapperPass::doInitialization(Module &M) { + Result.reset(new ScopedNoAliasAAResult( + getAnalysis<TargetLibraryInfoWrapperPass>().getTLI())); + return false; } -AliasAnalysis::ModRefBehavior -ScopedNoAliasAA::getModRefBehavior(const Function *F) { - return AliasAnalysis::getModRefBehavior(F); +bool ScopedNoAliasAAWrapperPass::doFinalization(Module &M) { + Result.reset(); + return false; } -AliasAnalysis::ModRefResult -ScopedNoAliasAA::getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) { - if (!EnableScopedNoAlias) - return AliasAnalysis::getModRefInfo(CS, Loc); - - if (!mayAliasInScopes(Loc.AATags.Scope, CS.getInstruction()->getMetadata( - LLVMContext::MD_noalias))) - return NoModRef; - - if (!mayAliasInScopes( - CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope), - Loc.AATags.NoAlias)) - return NoModRef; - - return AliasAnalysis::getModRefInfo(CS, Loc); -} - -AliasAnalysis::ModRefResult -ScopedNoAliasAA::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { - if (!EnableScopedNoAlias) - return AliasAnalysis::getModRefInfo(CS1, CS2); - - if (!mayAliasInScopes( - CS1.getInstruction()->getMetadata(LLVMContext::MD_alias_scope), - CS2.getInstruction()->getMetadata(LLVMContext::MD_noalias))) - return NoModRef; - - if (!mayAliasInScopes( - CS2.getInstruction()->getMetadata(LLVMContext::MD_alias_scope), - CS1.getInstruction()->getMetadata(LLVMContext::MD_noalias))) - return NoModRef; - - return AliasAnalysis::getModRefInfo(CS1, CS2); +void ScopedNoAliasAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); } - diff --git a/contrib/llvm/lib/Analysis/SparsePropagation.cpp b/contrib/llvm/lib/Analysis/SparsePropagation.cpp index edd82f5..f5a927b 100644 --- a/contrib/llvm/lib/Analysis/SparsePropagation.cpp +++ b/contrib/llvm/lib/Analysis/SparsePropagation.cpp @@ -328,17 +328,17 @@ void SparseSolver::Solve(Function &F) { void SparseSolver::Print(Function &F, raw_ostream &OS) const { OS << "\nFUNCTION: " << F.getName() << "\n"; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - if (!BBExecutable.count(BB)) + for (auto &BB : F) { + if (!BBExecutable.count(&BB)) OS << "INFEASIBLE: "; OS << "\t"; - if (BB->hasName()) - OS << BB->getName() << ":\n"; + if (BB.hasName()) + OS << BB.getName() << ":\n"; else OS << "; anon bb\n"; - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - LatticeFunc->PrintValue(getLatticeState(I), OS); - OS << *I << "\n"; + for (auto &I : BB) { + LatticeFunc->PrintValue(getLatticeState(&I), OS); + OS << I << "\n"; } OS << "\n"; diff --git a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp index 635c50c..e00f4ae 100644 --- a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -61,10 +61,19 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, } #endif // !NDEBUG + if (T.getArch() == Triple::r600 || + T.getArch() == Triple::amdgcn) { + TLI.setUnavailable(LibFunc::ldexp); + TLI.setUnavailable(LibFunc::ldexpf); + TLI.setUnavailable(LibFunc::ldexpl); + } + // There are no library implementations of mempcy and memset for AMD gpus and // these can be difficult to lower in the backend. if (T.getArch() == Triple::r600 || - T.getArch() == Triple::amdgcn) { + T.getArch() == Triple::amdgcn || + T.getArch() == Triple::wasm32 || + T.getArch() == Triple::wasm64) { TLI.setUnavailable(LibFunc::memcpy); TLI.setUnavailable(LibFunc::memset); TLI.setUnavailable(LibFunc::memset_pattern16); @@ -72,13 +81,14 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, } // memset_pattern16 is only available on iOS 3.0 and Mac OS X 10.5 and later. + // All versions of watchOS support it. if (T.isMacOSX()) { if (T.isMacOSXVersionLT(10, 5)) TLI.setUnavailable(LibFunc::memset_pattern16); } else if (T.isiOS()) { if (T.isOSVersionLT(3, 0)) TLI.setUnavailable(LibFunc::memset_pattern16); - } else { + } else if (!T.isWatchOS()) { TLI.setUnavailable(LibFunc::memset_pattern16); } @@ -286,8 +296,13 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, } break; case Triple::IOS: + case Triple::TvOS: + case Triple::WatchOS: TLI.setUnavailable(LibFunc::exp10l); - if (T.isOSVersionLT(7, 0)) { + if (!T.isWatchOS() && (T.isOSVersionLT(7, 0) || + (T.isOSVersionLT(9, 0) && + (T.getArch() == Triple::x86 || + T.getArch() == Triple::x86_64)))) { TLI.setUnavailable(LibFunc::exp10); TLI.setUnavailable(LibFunc::exp10f); } else { @@ -311,12 +326,14 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, // ffsl is available on at least Darwin, Mac OS X, iOS, FreeBSD, and // Linux (GLIBC): // http://developer.apple.com/library/mac/#documentation/Darwin/Reference/ManPages/man3/ffsl.3.html - // http://svn.freebsd.org/base/user/eri/pf45/head/lib/libc/string/ffsl.c + // http://svn.freebsd.org/base/head/lib/libc/string/ffsl.c // http://www.gnu.org/software/gnulib/manual/html_node/ffsl.html switch (T.getOS()) { case Triple::Darwin: case Triple::MacOSX: case Triple::IOS: + case Triple::TvOS: + case Triple::WatchOS: case Triple::FreeBSD: case Triple::Linux: break; @@ -325,9 +342,14 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, } // ffsll is available on at least FreeBSD and Linux (GLIBC): - // http://svn.freebsd.org/base/user/eri/pf45/head/lib/libc/string/ffsll.c + // http://svn.freebsd.org/base/head/lib/libc/string/ffsll.c // http://www.gnu.org/software/gnulib/manual/html_node/ffsll.html switch (T.getOS()) { + case Triple::Darwin: + case Triple::MacOSX: + case Triple::IOS: + case Triple::TvOS: + case Triple::WatchOS: case Triple::FreeBSD: case Triple::Linux: break; @@ -335,6 +357,16 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc::ffsll); } + // The following functions are available on at least FreeBSD: + // http://svn.freebsd.org/base/head/lib/libc/string/fls.c + // http://svn.freebsd.org/base/head/lib/libc/string/flsl.c + // http://svn.freebsd.org/base/head/lib/libc/string/flsll.c + if (!T.isOSFreeBSD()) { + TLI.setUnavailable(LibFunc::fls); + TLI.setUnavailable(LibFunc::flsl); + TLI.setUnavailable(LibFunc::flsll); + } + // The following functions are available on at least Linux: if (!T.isOSLinux()) { TLI.setUnavailable(LibFunc::dunder_strdup); diff --git a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp index 7d1c3fb..9c1d3fd 100644 --- a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -46,30 +46,37 @@ TargetTransformInfo &TargetTransformInfo::operator=(TargetTransformInfo &&RHS) { return *this; } -unsigned TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty, - Type *OpTy) const { - return TTIImpl->getOperationCost(Opcode, Ty, OpTy); +int TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty, + Type *OpTy) const { + int Cost = TTIImpl->getOperationCost(Opcode, Ty, OpTy); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned TargetTransformInfo::getCallCost(FunctionType *FTy, - int NumArgs) const { - return TTIImpl->getCallCost(FTy, NumArgs); +int TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs) const { + int Cost = TTIImpl->getCallCost(FTy, NumArgs); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned -TargetTransformInfo::getCallCost(const Function *F, - ArrayRef<const Value *> Arguments) const { - return TTIImpl->getCallCost(F, Arguments); +int TargetTransformInfo::getCallCost(const Function *F, + ArrayRef<const Value *> Arguments) const { + int Cost = TTIImpl->getCallCost(F, Arguments); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned -TargetTransformInfo::getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef<const Value *> Arguments) const { - return TTIImpl->getIntrinsicCost(IID, RetTy, Arguments); +int TargetTransformInfo::getIntrinsicCost( + Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments) const { + int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned TargetTransformInfo::getUserCost(const User *U) const { - return TTIImpl->getUserCost(U); +int TargetTransformInfo::getUserCost(const User *U) const { + int Cost = TTIImpl->getUserCost(U); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } bool TargetTransformInfo::hasBranchDivergence() const { @@ -106,14 +113,20 @@ bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, Scale, AddrSpace); } -bool TargetTransformInfo::isLegalMaskedStore(Type *DataType, - int Consecutive) const { - return TTIImpl->isLegalMaskedStore(DataType, Consecutive); +bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const { + return TTIImpl->isLegalMaskedStore(DataType); +} + +bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType) const { + return TTIImpl->isLegalMaskedLoad(DataType); } -bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType, - int Consecutive) const { - return TTIImpl->isLegalMaskedLoad(DataType, Consecutive); +bool TargetTransformInfo::isLegalMaskedGather(Type *DataType) const { + return TTIImpl->isLegalMaskedGather(DataType); +} + +bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType) const { + return TTIImpl->isLegalMaskedGather(DataType); } int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, @@ -121,8 +134,10 @@ int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const { - return TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, - Scale, AddrSpace); + int Cost = TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, + Scale, AddrSpace); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const { @@ -153,6 +168,10 @@ bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) c return TTIImpl->enableAggressiveInterleaving(LoopHasReductions); } +bool TargetTransformInfo::enableInterleavedAccessVectorization() const { + return TTIImpl->enableInterleavedAccessVectorization(); +} + TargetTransformInfo::PopcntSupportKind TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const { return TTIImpl->getPopcntSupport(IntTyWidthInBit); @@ -162,22 +181,30 @@ bool TargetTransformInfo::haveFastSqrt(Type *Ty) const { return TTIImpl->haveFastSqrt(Ty); } -unsigned TargetTransformInfo::getFPOpCost(Type *Ty) const { - return TTIImpl->getFPOpCost(Ty); +int TargetTransformInfo::getFPOpCost(Type *Ty) const { + int Cost = TTIImpl->getFPOpCost(Ty); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const { - return TTIImpl->getIntImmCost(Imm, Ty); +int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const { + int Cost = TTIImpl->getIntImmCost(Imm, Ty); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx, - const APInt &Imm, Type *Ty) const { - return TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty); +int TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty) const { + int Cost = TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty) const { - return TTIImpl->getIntImmCost(IID, Idx, Imm, Ty); +int TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty) const { + int Cost = TTIImpl->getIntImmCost(IID, Idx, Imm, Ty); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const { @@ -192,81 +219,122 @@ unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const { return TTIImpl->getMaxInterleaveFactor(VF); } -unsigned TargetTransformInfo::getArithmeticInstrCost( +int TargetTransformInfo::getArithmeticInstrCost( unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo) const { - return TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, - Opd1PropInfo, Opd2PropInfo); + int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + Opd1PropInfo, Opd2PropInfo); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty, - int Index, Type *SubTp) const { - return TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp); +int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty, int Index, + Type *SubTp) const { + int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const { - return TTIImpl->getCastInstrCost(Opcode, Dst, Src); +int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const { + int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned TargetTransformInfo::getCFInstrCost(unsigned Opcode) const { - return TTIImpl->getCFInstrCost(Opcode); +int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const { + int Cost = TTIImpl->getCFInstrCost(Opcode); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) const { - return TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy); +int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const { + int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) const { - return TTIImpl->getVectorInstrCost(Opcode, Val, Index); +int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const { + int Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) const { - return TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); +int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const { + int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned -TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) const { - return TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); +int TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const { + int Cost = + TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +int TargetTransformInfo::getGatherScatterOpCost(unsigned Opcode, Type *DataTy, + Value *Ptr, bool VariableMask, + unsigned Alignment) const { + int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, + Alignment); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned TargetTransformInfo::getInterleavedMemoryOpCost( +int TargetTransformInfo::getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment, unsigned AddressSpace) const { - return TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace); + int Cost = TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, + Alignment, AddressSpace); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned -TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Type *> Tys) const { - return TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys); +int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + ArrayRef<Type *> Tys) const { + int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy, - ArrayRef<Type *> Tys) const { - return TTIImpl->getCallInstrCost(F, RetTy, Tys); +int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + ArrayRef<Value *> Args) const { + int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +int TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy, + ArrayRef<Type *> Tys) const { + int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const { return TTIImpl->getNumberOfParts(Tp); } -unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp, - bool IsComplex) const { - return TTIImpl->getAddressComputationCost(Tp, IsComplex); +int TargetTransformInfo::getAddressComputationCost(Type *Tp, + bool IsComplex) const { + int Cost = TTIImpl->getAddressComputationCost(Tp, IsComplex); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty, - bool IsPairwiseForm) const { - return TTIImpl->getReductionCost(Opcode, Ty, IsPairwiseForm); +int TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty, + bool IsPairwiseForm) const { + int Cost = TTIImpl->getReductionCost(Opcode, Ty, IsPairwiseForm); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } unsigned @@ -284,9 +352,9 @@ Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic( return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); } -bool TargetTransformInfo::hasCompatibleFunctionAttributes( - const Function *Caller, const Function *Callee) const { - return TTIImpl->hasCompatibleFunctionAttributes(Caller, Callee); +bool TargetTransformInfo::areInlineCompatible(const Function *Caller, + const Function *Callee) const { + return TTIImpl->areInlineCompatible(Caller, Callee); } TargetTransformInfo::Concept::~Concept() {} @@ -294,16 +362,16 @@ TargetTransformInfo::Concept::~Concept() {} TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {} TargetIRAnalysis::TargetIRAnalysis( - std::function<Result(Function &)> TTICallback) + std::function<Result(const Function &)> TTICallback) : TTICallback(TTICallback) {} -TargetIRAnalysis::Result TargetIRAnalysis::run(Function &F) { +TargetIRAnalysis::Result TargetIRAnalysis::run(const Function &F) { return TTICallback(F); } char TargetIRAnalysis::PassID; -TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(Function &F) { +TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) { return Result(F.getParent()->getDataLayout()); } @@ -327,7 +395,7 @@ TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass( *PassRegistry::getPassRegistry()); } -TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(Function &F) { +TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(const Function &F) { TTI = TIRA.run(F); return *TTI; } diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp index 4e9c6f6..805f3ef 100644 --- a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -121,15 +121,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/TypeBasedAliasAnalysis.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/ADT/SetVector.h" #include "llvm/IR/Constants.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" -#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" -#include "llvm/ADT/SetVector.h" using namespace llvm; // A handy option for disabling TBAA functionality. The same effect can also be @@ -138,199 +136,138 @@ using namespace llvm; static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true)); namespace { - /// TBAANode - This is a simple wrapper around an MDNode which provides a - /// higher-level interface by hiding the details of how alias analysis - /// information is encoded in its operands. - class TBAANode { - const MDNode *Node; - - public: - TBAANode() : Node(nullptr) {} - explicit TBAANode(const MDNode *N) : Node(N) {} - - /// getNode - Get the MDNode for this TBAANode. - const MDNode *getNode() const { return Node; } - - /// getParent - Get this TBAANode's Alias tree parent. - TBAANode getParent() const { - if (Node->getNumOperands() < 2) - return TBAANode(); - MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1)); - if (!P) - return TBAANode(); - // Ok, this node has a valid parent. Return it. - return TBAANode(P); - } - - /// TypeIsImmutable - Test if this TBAANode represents a type for objects - /// which are not modified (by any means) in the context where this - /// AliasAnalysis is relevant. - bool TypeIsImmutable() const { - if (Node->getNumOperands() < 3) - return false; - ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(2)); - if (!CI) - return false; - return CI->getValue()[0]; - } - }; - - /// This is a simple wrapper around an MDNode which provides a - /// higher-level interface by hiding the details of how alias analysis - /// information is encoded in its operands. - class TBAAStructTagNode { - /// This node should be created with createTBAAStructTagNode. - const MDNode *Node; +/// TBAANode - This is a simple wrapper around an MDNode which provides a +/// higher-level interface by hiding the details of how alias analysis +/// information is encoded in its operands. +class TBAANode { + const MDNode *Node; + +public: + TBAANode() : Node(nullptr) {} + explicit TBAANode(const MDNode *N) : Node(N) {} + + /// getNode - Get the MDNode for this TBAANode. + const MDNode *getNode() const { return Node; } + + /// getParent - Get this TBAANode's Alias tree parent. + TBAANode getParent() const { + if (Node->getNumOperands() < 2) + return TBAANode(); + MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1)); + if (!P) + return TBAANode(); + // Ok, this node has a valid parent. Return it. + return TBAANode(P); + } - public: - explicit TBAAStructTagNode(const MDNode *N) : Node(N) {} + /// TypeIsImmutable - Test if this TBAANode represents a type for objects + /// which are not modified (by any means) in the context where this + /// AliasAnalysis is relevant. + bool TypeIsImmutable() const { + if (Node->getNumOperands() < 3) + return false; + ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(2)); + if (!CI) + return false; + return CI->getValue()[0]; + } +}; - /// Get the MDNode for this TBAAStructTagNode. - const MDNode *getNode() const { return Node; } +/// This is a simple wrapper around an MDNode which provides a +/// higher-level interface by hiding the details of how alias analysis +/// information is encoded in its operands. +class TBAAStructTagNode { + /// This node should be created with createTBAAStructTagNode. + const MDNode *Node; - const MDNode *getBaseType() const { - return dyn_cast_or_null<MDNode>(Node->getOperand(0)); - } - const MDNode *getAccessType() const { - return dyn_cast_or_null<MDNode>(Node->getOperand(1)); - } - uint64_t getOffset() const { - return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue(); - } - /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for - /// objects which are not modified (by any means) in the context where this - /// AliasAnalysis is relevant. - bool TypeIsImmutable() const { - if (Node->getNumOperands() < 4) - return false; - ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(3)); - if (!CI) - return false; - return CI->getValue()[0]; - } - }; - - /// This is a simple wrapper around an MDNode which provides a - /// higher-level interface by hiding the details of how alias analysis - /// information is encoded in its operands. - class TBAAStructTypeNode { - /// This node should be created with createTBAAStructTypeNode. - const MDNode *Node; - - public: - TBAAStructTypeNode() : Node(nullptr) {} - explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {} - - /// Get the MDNode for this TBAAStructTypeNode. - const MDNode *getNode() const { return Node; } - - /// Get this TBAAStructTypeNode's field in the type DAG with - /// given offset. Update the offset to be relative to the field type. - TBAAStructTypeNode getParent(uint64_t &Offset) const { - // Parent can be omitted for the root node. - if (Node->getNumOperands() < 2) - return TBAAStructTypeNode(); +public: + explicit TBAAStructTagNode(const MDNode *N) : Node(N) {} - // Fast path for a scalar type node and a struct type node with a single - // field. - if (Node->getNumOperands() <= 3) { - uint64_t Cur = Node->getNumOperands() == 2 - ? 0 - : mdconst::extract<ConstantInt>(Node->getOperand(2)) - ->getZExtValue(); - Offset -= Cur; - MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1)); - if (!P) - return TBAAStructTypeNode(); - return TBAAStructTypeNode(P); - } + /// Get the MDNode for this TBAAStructTagNode. + const MDNode *getNode() const { return Node; } - // Assume the offsets are in order. We return the previous field if - // the current offset is bigger than the given offset. - unsigned TheIdx = 0; - for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) { - uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(Idx + 1)) - ->getZExtValue(); - if (Cur > Offset) { - assert(Idx >= 3 && - "TBAAStructTypeNode::getParent should have an offset match!"); - TheIdx = Idx - 2; - break; - } - } - // Move along the last field. - if (TheIdx == 0) - TheIdx = Node->getNumOperands() - 2; - uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(TheIdx + 1)) - ->getZExtValue(); + const MDNode *getBaseType() const { + return dyn_cast_or_null<MDNode>(Node->getOperand(0)); + } + const MDNode *getAccessType() const { + return dyn_cast_or_null<MDNode>(Node->getOperand(1)); + } + uint64_t getOffset() const { + return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue(); + } + /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for + /// objects which are not modified (by any means) in the context where this + /// AliasAnalysis is relevant. + bool TypeIsImmutable() const { + if (Node->getNumOperands() < 4) + return false; + ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(3)); + if (!CI) + return false; + return CI->getValue()[0]; + } +}; + +/// This is a simple wrapper around an MDNode which provides a +/// higher-level interface by hiding the details of how alias analysis +/// information is encoded in its operands. +class TBAAStructTypeNode { + /// This node should be created with createTBAAStructTypeNode. + const MDNode *Node; + +public: + TBAAStructTypeNode() : Node(nullptr) {} + explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {} + + /// Get the MDNode for this TBAAStructTypeNode. + const MDNode *getNode() const { return Node; } + + /// Get this TBAAStructTypeNode's field in the type DAG with + /// given offset. Update the offset to be relative to the field type. + TBAAStructTypeNode getParent(uint64_t &Offset) const { + // Parent can be omitted for the root node. + if (Node->getNumOperands() < 2) + return TBAAStructTypeNode(); + + // Fast path for a scalar type node and a struct type node with a single + // field. + if (Node->getNumOperands() <= 3) { + uint64_t Cur = Node->getNumOperands() == 2 + ? 0 + : mdconst::extract<ConstantInt>(Node->getOperand(2)) + ->getZExtValue(); Offset -= Cur; - MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx)); + MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1)); if (!P) return TBAAStructTypeNode(); return TBAAStructTypeNode(P); } - }; -} - -namespace { - /// TypeBasedAliasAnalysis - This is a simple alias analysis - /// implementation that uses TypeBased to answer queries. - class TypeBasedAliasAnalysis : public ImmutablePass, - public AliasAnalysis { - public: - static char ID; // Class identification, replacement for typeinfo - TypeBasedAliasAnalysis() : ImmutablePass(ID) { - initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry()); - } - bool doInitialization(Module &M) override; - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - void *getAdjustedAnalysisPointer(const void *PI) override { - if (PI == &AliasAnalysis::ID) - return (AliasAnalysis*)this; - return this; + // Assume the offsets are in order. We return the previous field if + // the current offset is bigger than the given offset. + unsigned TheIdx = 0; + for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) { + uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(Idx + 1)) + ->getZExtValue(); + if (Cur > Offset) { + assert(Idx >= 3 && + "TBAAStructTypeNode::getParent should have an offset match!"); + TheIdx = Idx - 2; + break; + } } - - bool Aliases(const MDNode *A, const MDNode *B) const; - bool PathAliases(const MDNode *A, const MDNode *B) const; - - private: - void getAnalysisUsage(AnalysisUsage &AU) const override; - AliasResult alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) override; - bool pointsToConstantMemory(const MemoryLocation &Loc, - bool OrLocal) override; - ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override; - ModRefBehavior getModRefBehavior(const Function *F) override; - ModRefResult getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) override; - ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) override; - }; -} // End of anonymous namespace - -// Register this pass... -char TypeBasedAliasAnalysis::ID = 0; -INITIALIZE_AG_PASS(TypeBasedAliasAnalysis, AliasAnalysis, "tbaa", - "Type-Based Alias Analysis", false, true, false) - -ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() { - return new TypeBasedAliasAnalysis(); -} - -bool TypeBasedAliasAnalysis::doInitialization(Module &M) { - InitializeAliasAnalysis(this, &M.getDataLayout()); - return true; -} - -void -TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AliasAnalysis::getAnalysisUsage(AU); + // Move along the last field. + if (TheIdx == 0) + TheIdx = Node->getNumOperands() - 2; + uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(TheIdx + 1)) + ->getZExtValue(); + Offset -= Cur; + MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx)); + if (!P) + return TBAAStructTypeNode(); + return TBAAStructTypeNode(P); + } +}; } /// Check the first operand of the tbaa tag node, if it is a MDNode, we treat @@ -342,145 +279,36 @@ static bool isStructPathTBAA(const MDNode *MD) { return isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3; } -/// Aliases - Test whether the type represented by A may alias the -/// type represented by B. -bool -TypeBasedAliasAnalysis::Aliases(const MDNode *A, - const MDNode *B) const { - // Make sure that both MDNodes are struct-path aware. - if (isStructPathTBAA(A) && isStructPathTBAA(B)) - return PathAliases(A, B); - - // Keep track of the root node for A and B. - TBAANode RootA, RootB; - - // Climb the tree from A to see if we reach B. - for (TBAANode T(A); ; ) { - if (T.getNode() == B) - // B is an ancestor of A. - return true; - - RootA = T; - T = T.getParent(); - if (!T.getNode()) - break; - } - - // Climb the tree from B to see if we reach A. - for (TBAANode T(B); ; ) { - if (T.getNode() == A) - // A is an ancestor of B. - return true; - - RootB = T; - T = T.getParent(); - if (!T.getNode()) - break; - } - - // Neither node is an ancestor of the other. - - // If they have different roots, they're part of different potentially - // unrelated type systems, so we must be conservative. - if (RootA.getNode() != RootB.getNode()) - return true; - - // If they have the same root, then we've proved there's no alias. - return false; -} - -/// Test whether the struct-path tag represented by A may alias the -/// struct-path tag represented by B. -bool -TypeBasedAliasAnalysis::PathAliases(const MDNode *A, - const MDNode *B) const { - // Verify that both input nodes are struct-path aware. - assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware."); - assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware."); - - // Keep track of the root node for A and B. - TBAAStructTypeNode RootA, RootB; - TBAAStructTagNode TagA(A), TagB(B); - - // TODO: We need to check if AccessType of TagA encloses AccessType of - // TagB to support aggregate AccessType. If yes, return true. - - // Start from the base type of A, follow the edge with the correct offset in - // the type DAG and adjust the offset until we reach the base type of B or - // until we reach the Root node. - // Compare the adjusted offset once we have the same base. - - // Climb the type DAG from base type of A to see if we reach base type of B. - const MDNode *BaseA = TagA.getBaseType(); - const MDNode *BaseB = TagB.getBaseType(); - uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset(); - for (TBAAStructTypeNode T(BaseA); ; ) { - if (T.getNode() == BaseB) - // Base type of A encloses base type of B, check if the offsets match. - return OffsetA == OffsetB; - - RootA = T; - // Follow the edge with the correct offset, OffsetA will be adjusted to - // be relative to the field type. - T = T.getParent(OffsetA); - if (!T.getNode()) - break; - } - - // Reset OffsetA and climb the type DAG from base type of B to see if we reach - // base type of A. - OffsetA = TagA.getOffset(); - for (TBAAStructTypeNode T(BaseB); ; ) { - if (T.getNode() == BaseA) - // Base type of B encloses base type of A, check if the offsets match. - return OffsetA == OffsetB; - - RootB = T; - // Follow the edge with the correct offset, OffsetB will be adjusted to - // be relative to the field type. - T = T.getParent(OffsetB); - if (!T.getNode()) - break; - } - - // Neither node is an ancestor of the other. - - // If they have different roots, they're part of different potentially - // unrelated type systems, so we must be conservative. - if (RootA.getNode() != RootB.getNode()) - return true; - - // If they have the same root, then we've proved there's no alias. - return false; -} - -AliasResult TypeBasedAliasAnalysis::alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) { +AliasResult TypeBasedAAResult::alias(const MemoryLocation &LocA, + const MemoryLocation &LocB) { if (!EnableTBAA) - return AliasAnalysis::alias(LocA, LocB); + return AAResultBase::alias(LocA, LocB); // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must // be conservative. const MDNode *AM = LocA.AATags.TBAA; - if (!AM) return AliasAnalysis::alias(LocA, LocB); + if (!AM) + return AAResultBase::alias(LocA, LocB); const MDNode *BM = LocB.AATags.TBAA; - if (!BM) return AliasAnalysis::alias(LocA, LocB); + if (!BM) + return AAResultBase::alias(LocA, LocB); // If they may alias, chain to the next AliasAnalysis. if (Aliases(AM, BM)) - return AliasAnalysis::alias(LocA, LocB); + return AAResultBase::alias(LocA, LocB); // Otherwise return a definitive result. return NoAlias; } -bool TypeBasedAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc, - bool OrLocal) { +bool TypeBasedAAResult::pointsToConstantMemory(const MemoryLocation &Loc, + bool OrLocal) { if (!EnableTBAA) - return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + return AAResultBase::pointsToConstantMemory(Loc, OrLocal); const MDNode *M = Loc.AATags.TBAA; - if (!M) return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + if (!M) + return AAResultBase::pointsToConstantMemory(Loc, OrLocal); // If this is an "immutable" type, we can assume the pointer is pointing // to constant memory. @@ -488,80 +316,82 @@ bool TypeBasedAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc, (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable())) return true; - return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + return AAResultBase::pointsToConstantMemory(Loc, OrLocal); } -AliasAnalysis::ModRefBehavior -TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { +FunctionModRefBehavior +TypeBasedAAResult::getModRefBehavior(ImmutableCallSite CS) { if (!EnableTBAA) - return AliasAnalysis::getModRefBehavior(CS); + return AAResultBase::getModRefBehavior(CS); - ModRefBehavior Min = UnknownModRefBehavior; + FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior; // If this is an "immutable" type, we can assume the call doesn't write // to memory. if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) || (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable())) - Min = OnlyReadsMemory; + Min = FMRB_OnlyReadsMemory; - return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); + return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min); } -AliasAnalysis::ModRefBehavior -TypeBasedAliasAnalysis::getModRefBehavior(const Function *F) { +FunctionModRefBehavior TypeBasedAAResult::getModRefBehavior(const Function *F) { // Functions don't have metadata. Just chain to the next implementation. - return AliasAnalysis::getModRefBehavior(F); + return AAResultBase::getModRefBehavior(F); } -AliasAnalysis::ModRefResult -TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) { +ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS, + const MemoryLocation &Loc) { if (!EnableTBAA) - return AliasAnalysis::getModRefInfo(CS, Loc); + return AAResultBase::getModRefInfo(CS, Loc); if (const MDNode *L = Loc.AATags.TBAA) if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) if (!Aliases(L, M)) - return NoModRef; + return MRI_NoModRef; - return AliasAnalysis::getModRefInfo(CS, Loc); + return AAResultBase::getModRefInfo(CS, Loc); } -AliasAnalysis::ModRefResult -TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) { +ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { if (!EnableTBAA) - return AliasAnalysis::getModRefInfo(CS1, CS2); + return AAResultBase::getModRefInfo(CS1, CS2); if (const MDNode *M1 = CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) if (const MDNode *M2 = CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) if (!Aliases(M1, M2)) - return NoModRef; + return MRI_NoModRef; - return AliasAnalysis::getModRefInfo(CS1, CS2); + return AAResultBase::getModRefInfo(CS1, CS2); } bool MDNode::isTBAAVtableAccess() const { if (!isStructPathTBAA(this)) { - if (getNumOperands() < 1) return false; + if (getNumOperands() < 1) + return false; if (MDString *Tag1 = dyn_cast<MDString>(getOperand(0))) { - if (Tag1->getString() == "vtable pointer") return true; + if (Tag1->getString() == "vtable pointer") + return true; } return false; } // For struct-path aware TBAA, we use the access type of the tag. - if (getNumOperands() < 2) return false; + if (getNumOperands() < 2) + return false; MDNode *Tag = cast_or_null<MDNode>(getOperand(1)); - if (!Tag) return false; + if (!Tag) + return false; if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) { - if (Tag1->getString() == "vtable pointer") return true; + if (Tag1->getString() == "vtable pointer") + return true; } - return false; + return false; } MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { @@ -575,9 +405,11 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { bool StructPath = isStructPathTBAA(A) && isStructPathTBAA(B); if (StructPath) { A = cast_or_null<MDNode>(A->getOperand(1)); - if (!A) return nullptr; + if (!A) + return nullptr; B = cast_or_null<MDNode>(B->getOperand(1)); - if (!B) return nullptr; + if (!B) + return nullptr; } SmallSetVector<MDNode *, 4> PathA; @@ -604,7 +436,7 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { int IB = PathB.size() - 1; MDNode *Ret = nullptr; - while (IA >= 0 && IB >=0) { + while (IA >= 0 && IB >= 0) { if (PathA[IA] == PathB[IB]) Ret = PathA[IA]; else @@ -644,3 +476,147 @@ void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const { N.NoAlias = getMetadata(LLVMContext::MD_noalias); } +/// Aliases - Test whether the type represented by A may alias the +/// type represented by B. +bool TypeBasedAAResult::Aliases(const MDNode *A, const MDNode *B) const { + // Make sure that both MDNodes are struct-path aware. + if (isStructPathTBAA(A) && isStructPathTBAA(B)) + return PathAliases(A, B); + + // Keep track of the root node for A and B. + TBAANode RootA, RootB; + + // Climb the tree from A to see if we reach B. + for (TBAANode T(A);;) { + if (T.getNode() == B) + // B is an ancestor of A. + return true; + + RootA = T; + T = T.getParent(); + if (!T.getNode()) + break; + } + + // Climb the tree from B to see if we reach A. + for (TBAANode T(B);;) { + if (T.getNode() == A) + // A is an ancestor of B. + return true; + + RootB = T; + T = T.getParent(); + if (!T.getNode()) + break; + } + + // Neither node is an ancestor of the other. + + // If they have different roots, they're part of different potentially + // unrelated type systems, so we must be conservative. + if (RootA.getNode() != RootB.getNode()) + return true; + + // If they have the same root, then we've proved there's no alias. + return false; +} + +/// Test whether the struct-path tag represented by A may alias the +/// struct-path tag represented by B. +bool TypeBasedAAResult::PathAliases(const MDNode *A, const MDNode *B) const { + // Verify that both input nodes are struct-path aware. + assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware."); + assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware."); + + // Keep track of the root node for A and B. + TBAAStructTypeNode RootA, RootB; + TBAAStructTagNode TagA(A), TagB(B); + + // TODO: We need to check if AccessType of TagA encloses AccessType of + // TagB to support aggregate AccessType. If yes, return true. + + // Start from the base type of A, follow the edge with the correct offset in + // the type DAG and adjust the offset until we reach the base type of B or + // until we reach the Root node. + // Compare the adjusted offset once we have the same base. + + // Climb the type DAG from base type of A to see if we reach base type of B. + const MDNode *BaseA = TagA.getBaseType(); + const MDNode *BaseB = TagB.getBaseType(); + uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset(); + for (TBAAStructTypeNode T(BaseA);;) { + if (T.getNode() == BaseB) + // Base type of A encloses base type of B, check if the offsets match. + return OffsetA == OffsetB; + + RootA = T; + // Follow the edge with the correct offset, OffsetA will be adjusted to + // be relative to the field type. + T = T.getParent(OffsetA); + if (!T.getNode()) + break; + } + + // Reset OffsetA and climb the type DAG from base type of B to see if we reach + // base type of A. + OffsetA = TagA.getOffset(); + for (TBAAStructTypeNode T(BaseB);;) { + if (T.getNode() == BaseA) + // Base type of B encloses base type of A, check if the offsets match. + return OffsetA == OffsetB; + + RootB = T; + // Follow the edge with the correct offset, OffsetB will be adjusted to + // be relative to the field type. + T = T.getParent(OffsetB); + if (!T.getNode()) + break; + } + + // Neither node is an ancestor of the other. + + // If they have different roots, they're part of different potentially + // unrelated type systems, so we must be conservative. + if (RootA.getNode() != RootB.getNode()) + return true; + + // If they have the same root, then we've proved there's no alias. + return false; +} + +TypeBasedAAResult TypeBasedAA::run(Function &F, AnalysisManager<Function> *AM) { + return TypeBasedAAResult(AM->getResult<TargetLibraryAnalysis>(F)); +} + +char TypeBasedAA::PassID; + +char TypeBasedAAWrapperPass::ID = 0; +INITIALIZE_PASS_BEGIN(TypeBasedAAWrapperPass, "tbaa", + "Type-Based Alias Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(TypeBasedAAWrapperPass, "tbaa", "Type-Based Alias Analysis", + false, true) + +ImmutablePass *llvm::createTypeBasedAAWrapperPass() { + return new TypeBasedAAWrapperPass(); +} + +TypeBasedAAWrapperPass::TypeBasedAAWrapperPass() : ImmutablePass(ID) { + initializeTypeBasedAAWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +bool TypeBasedAAWrapperPass::doInitialization(Module &M) { + Result.reset(new TypeBasedAAResult( + getAnalysis<TargetLibraryInfoWrapperPass>().getTLI())); + return false; +} + +bool TypeBasedAAWrapperPass::doFinalization(Module &M) { + Result.reset(); + return false; +} + +void TypeBasedAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); +} diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp index fa0d779..314ec9c 100644 --- a/contrib/llvm/lib/Analysis/ValueTracking.cpp +++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ValueTracking.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -43,7 +44,7 @@ const unsigned MaxDepth = 6; /// Enable an experimental feature to leverage information about dominating /// conditions to compute known bits. The individual options below control how -/// hard we search. The defaults are choosen to be fairly aggressive. If you +/// hard we search. The defaults are chosen to be fairly aggressive. If you /// run into compile time problems when testing, scale them back and report /// your findings. static cl::opt<bool> EnableDomConditions("value-tracking-dom-conditions", @@ -58,12 +59,12 @@ static cl::opt<unsigned> DomConditionsMaxDepth("dom-conditions-max-depth", /// conditions? static cl::opt<unsigned> DomConditionsMaxDomBlocks("dom-conditions-dom-blocks", cl::Hidden, - cl::init(20000)); + cl::init(20)); // Controls the number of uses of the value searched for possible // dominating comparisons. static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses", - cl::Hidden, cl::init(2000)); + cl::Hidden, cl::init(20)); // If true, don't consider only compares whose only use is a branch. static cl::opt<bool> DomConditionsSingleCmpUse("dom-conditions-single-cmp-use", @@ -185,6 +186,25 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, return ::isKnownNonZero(V, DL, Depth, Query(AC, safeCxtI(V, CxtI), DT)); } +bool llvm::isKnownNonNegative(Value *V, const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT) { + bool NonNegative, Negative; + ComputeSignBit(V, NonNegative, Negative, DL, Depth, AC, CxtI, DT); + return NonNegative; +} + +static bool isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL, + const Query &Q); + +bool llvm::isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT) { + return ::isKnownNonEqual(V1, V2, DL, Query(AC, + safeCxtI(V1, safeCxtI(V2, CxtI)), + DT)); +} + static bool MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL, unsigned Depth, const Query &Q); @@ -320,7 +340,7 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW, } // If low bits are zero in either operand, output low known-0 bits. - // Also compute a conserative estimate for high known-0 bits. + // Also compute a conservative estimate for high known-0 bits. // More trickiness is possible, but this is sufficient for the // interesting case of alignment computation. KnownOne.clearAllBits(); @@ -347,26 +367,30 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW, } void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges, - APInt &KnownZero) { + APInt &KnownZero, + APInt &KnownOne) { unsigned BitWidth = KnownZero.getBitWidth(); unsigned NumRanges = Ranges.getNumOperands() / 2; assert(NumRanges >= 1); - // Use the high end of the ranges to find leading zeros. - unsigned MinLeadingZeros = BitWidth; + KnownZero.setAllBits(); + KnownOne.setAllBits(); + for (unsigned i = 0; i < NumRanges; ++i) { ConstantInt *Lower = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0)); ConstantInt *Upper = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1)); ConstantRange Range(Lower->getValue(), Upper->getValue()); - if (Range.isWrappedSet()) - MinLeadingZeros = 0; // -1 has no zeros - unsigned LeadingZeros = (Upper->getValue() - 1).countLeadingZeros(); - MinLeadingZeros = std::min(LeadingZeros, MinLeadingZeros); - } - KnownZero = APInt::getHighBitsSet(BitWidth, MinLeadingZeros); + // The first CommonPrefixBits of all values in Range are equal. + unsigned CommonPrefixBits = + (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countLeadingZeros(); + + APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits); + KnownOne &= Range.getUnsignedMax() & Mask; + KnownZero &= ~Range.getUnsignedMax() & Mask; + } } static bool isEphemeralValueOf(Instruction *I, const Value *E) { @@ -374,20 +398,20 @@ static bool isEphemeralValueOf(Instruction *I, const Value *E) { SmallPtrSet<const Value *, 32> Visited; SmallPtrSet<const Value *, 16> EphValues; + // The instruction defining an assumption's condition itself is always + // considered ephemeral to that assumption (even if it has other + // non-ephemeral users). See r246696's test case for an example. + if (std::find(I->op_begin(), I->op_end(), E) != I->op_end()) + return true; + while (!WorkSet.empty()) { const Value *V = WorkSet.pop_back_val(); if (!Visited.insert(V).second) continue; // If all uses of this value are ephemeral, then so is this value. - bool FoundNEUse = false; - for (const User *I : V->users()) - if (!EphValues.count(I)) { - FoundNEUse = true; - break; - } - - if (!FoundNEUse) { + if (std::all_of(V->user_begin(), V->user_end(), + [&](const User *U) { return EphValues.count(U); })) { if (V == E) return true; @@ -447,7 +471,7 @@ static bool isValidAssumeForContext(Value *V, const Query &Q) { for (BasicBlock::const_iterator I = std::next(BasicBlock::const_iterator(Q.CxtI)), IE(Inv); I != IE; ++I) - if (!isSafeToSpeculativelyExecute(I) && !isAssumeLikeIntrinsic(I)) + if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I)) return false; return !isEphemeralValueOf(Inv, Q.CxtI); @@ -464,14 +488,14 @@ static bool isValidAssumeForContext(Value *V, const Query &Q) { // of the block); the common case is that the assume will come first. for (BasicBlock::iterator I = std::next(BasicBlock::iterator(Inv)), IE = Inv->getParent()->end(); I != IE; ++I) - if (I == Q.CxtI) + if (&*I == Q.CxtI) return true; // The context must come first... for (BasicBlock::const_iterator I = std::next(BasicBlock::const_iterator(Q.CxtI)), IE(Inv); I != IE; ++I) - if (!isSafeToSpeculativelyExecute(I) && !isAssumeLikeIntrinsic(I)) + if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I)) return false; return !isEphemeralValueOf(Inv, Q.CxtI); @@ -601,6 +625,11 @@ static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero, if (!Q.DT || !Q.CxtI) return; Instruction *Cxt = const_cast<Instruction *>(Q.CxtI); + // The context instruction might be in a statically unreachable block. If + // so, asking dominator queries may yield suprising results. (e.g. the block + // may not have a dom tree node) + if (!Q.DT->isReachableFromEntry(Cxt->getParent())) + return; // Avoid useless work if (auto VI = dyn_cast<Instruction>(V)) @@ -647,7 +676,9 @@ static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero, // instruction. Finding a condition where one path dominates the context // isn't enough because both the true and false cases could merge before // the context instruction we're actually interested in. Instead, we need - // to ensure that the taken *edge* dominates the context instruction. + // to ensure that the taken *edge* dominates the context instruction. We + // know that the edge must be reachable since we started from a reachable + // block. BasicBlock *BB0 = BI->getSuccessor(0); BasicBlockEdge Edge(BI->getParent(), BB0); if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent())) @@ -941,6 +972,90 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, } } +// Compute known bits from a shift operator, including those with a +// non-constant shift amount. KnownZero and KnownOne are the outputs of this +// function. KnownZero2 and KnownOne2 are pre-allocated temporaries with the +// same bit width as KnownZero and KnownOne. KZF and KOF are operator-specific +// functors that, given the known-zero or known-one bits respectively, and a +// shift amount, compute the implied known-zero or known-one bits of the shift +// operator's result respectively for that shift amount. The results from calling +// KZF and KOF are conservatively combined for all permitted shift amounts. +template <typename KZFunctor, typename KOFunctor> +static void computeKnownBitsFromShiftOperator(Operator *I, + APInt &KnownZero, APInt &KnownOne, + APInt &KnownZero2, APInt &KnownOne2, + const DataLayout &DL, unsigned Depth, const Query &Q, + KZFunctor KZF, KOFunctor KOF) { + unsigned BitWidth = KnownZero.getBitWidth(); + + if (auto *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { + unsigned ShiftAmt = SA->getLimitedValue(BitWidth-1); + + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); + KnownZero = KZF(KnownZero, ShiftAmt); + KnownOne = KOF(KnownOne, ShiftAmt); + return; + } + + computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q); + + // Note: We cannot use KnownZero.getLimitedValue() here, because if + // BitWidth > 64 and any upper bits are known, we'll end up returning the + // limit value (which implies all bits are known). + uint64_t ShiftAmtKZ = KnownZero.zextOrTrunc(64).getZExtValue(); + uint64_t ShiftAmtKO = KnownOne.zextOrTrunc(64).getZExtValue(); + + // It would be more-clearly correct to use the two temporaries for this + // calculation. Reusing the APInts here to prevent unnecessary allocations. + KnownZero.clearAllBits(), KnownOne.clearAllBits(); + + // If we know the shifter operand is nonzero, we can sometimes infer more + // known bits. However this is expensive to compute, so be lazy about it and + // only compute it when absolutely necessary. + Optional<bool> ShifterOperandIsNonZero; + + // Early exit if we can't constrain any well-defined shift amount. + if (!(ShiftAmtKZ & (BitWidth - 1)) && !(ShiftAmtKO & (BitWidth - 1))) { + ShifterOperandIsNonZero = + isKnownNonZero(I->getOperand(1), DL, Depth + 1, Q); + if (!*ShifterOperandIsNonZero) + return; + } + + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q); + + KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + for (unsigned ShiftAmt = 0; ShiftAmt < BitWidth; ++ShiftAmt) { + // Combine the shifted known input bits only for those shift amounts + // compatible with its known constraints. + if ((ShiftAmt & ~ShiftAmtKZ) != ShiftAmt) + continue; + if ((ShiftAmt | ShiftAmtKO) != ShiftAmt) + continue; + // If we know the shifter is nonzero, we may be able to infer more known + // bits. This check is sunk down as far as possible to avoid the expensive + // call to isKnownNonZero if the cheaper checks above fail. + if (ShiftAmt == 0) { + if (!ShifterOperandIsNonZero.hasValue()) + ShifterOperandIsNonZero = + isKnownNonZero(I->getOperand(1), DL, Depth + 1, Q); + if (*ShifterOperandIsNonZero) + continue; + } + + KnownZero &= KZF(KnownZero2, ShiftAmt); + KnownOne &= KOF(KnownOne2, ShiftAmt); + } + + // If there are no compatible shift amounts, then we've proven that the shift + // amount must be >= the BitWidth, and the result is undefined. We could + // return anything we'd like, but we need to make sure the sets of known bits + // stay disjoint (it should be better for some other code to actually + // propagate the undef than to pick a value here using known bits). + if ((KnownZero & KnownOne) != 0) + KnownZero.clearAllBits(), KnownOne.clearAllBits(); +} + static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, APInt &KnownOne, const DataLayout &DL, unsigned Depth, const Query &Q) { @@ -951,7 +1066,7 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, default: break; case Instruction::Load: if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range)) - computeKnownBitsFromRangeMetadata(*MD, KnownZero); + computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne); break; case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. @@ -962,6 +1077,22 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, KnownOne &= KnownOne2; // Output known-0 are known to be clear if zero in either the LHS | RHS. KnownZero |= KnownZero2; + + // and(x, add (x, -1)) is a common idiom that always clears the low bit; + // here we handle the more general case of adding any odd number by + // matching the form add(x, add(x, y)) where y is odd. + // TODO: This could be generalized to clearing any bit set in y where the + // following bit is known to be unset in y. + Value *Y = nullptr; + if (match(I->getOperand(0), m_Add(m_Specific(I->getOperand(1)), + m_Value(Y))) || + match(I->getOperand(1), m_Add(m_Specific(I->getOperand(0)), + m_Value(Y)))) { + APInt KnownZero3(BitWidth, 0), KnownOne3(BitWidth, 0); + computeKnownBits(Y, KnownZero3, KnownOne3, DL, Depth + 1, Q); + if (KnownOne3.countTrailingOnes() > 0) + KnownZero |= APInt::getLowBitsSet(BitWidth, 1); + } break; } case Instruction::Or: { @@ -1050,7 +1181,8 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, } case Instruction::BitCast: { Type *SrcTy = I->getOperand(0)->getType(); - if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy() || + SrcTy->isFloatingPointTy()) && // TODO: For now, not handling conversions like: // (bitcast i64 %x to <2 x i32>) !I->getType()->isVectorTy()) { @@ -1077,48 +1209,54 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); break; } - case Instruction::Shl: + case Instruction::Shl: { // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 - if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { - uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); - KnownZero <<= ShiftAmt; - KnownOne <<= ShiftAmt; - KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0 - } + auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) { + return (KnownZero << ShiftAmt) | + APInt::getLowBitsSet(BitWidth, ShiftAmt); // Low bits known 0. + }; + + auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) { + return KnownOne << ShiftAmt; + }; + + computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, + KnownZero2, KnownOne2, DL, Depth, Q, + KZF, KOF); break; - case Instruction::LShr: + } + case Instruction::LShr: { // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 - if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { - // Compute the new bits that are at the top now. - uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); - - // Unsigned shift right. - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); - KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); - KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); - // high bits known zero. - KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt); - } + auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) { + return APIntOps::lshr(KnownZero, ShiftAmt) | + // High bits known zero. + APInt::getHighBitsSet(BitWidth, ShiftAmt); + }; + + auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) { + return APIntOps::lshr(KnownOne, ShiftAmt); + }; + + computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, + KnownZero2, KnownOne2, DL, Depth, Q, + KZF, KOF); break; - case Instruction::AShr: + } + case Instruction::AShr: { // (ashr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 - if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { - // Compute the new bits that are at the top now. - uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); + auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) { + return APIntOps::ashr(KnownZero, ShiftAmt); + }; - // Signed shift right. - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); - KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); - KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); + auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) { + return APIntOps::ashr(KnownOne, ShiftAmt); + }; - APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); - if (KnownZero[BitWidth-ShiftAmt-1]) // New bits are known zero. - KnownZero |= HighBits; - else if (KnownOne[BitWidth-ShiftAmt-1]) // New bits are known one. - KnownOne |= HighBits; - } + computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, + KnownZero2, KnownOne2, DL, Depth, Q, + KZF, KOF); break; + } case Instruction::Sub: { bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, @@ -1336,13 +1474,19 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, case Instruction::Call: case Instruction::Invoke: if (MDNode *MD = cast<Instruction>(I)->getMetadata(LLVMContext::MD_range)) - computeKnownBitsFromRangeMetadata(*MD, KnownZero); + computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne); // If a range metadata is attached to this IntrinsicInst, intersect the // explicit range specified by the metadata and the implicit range of // the intrinsic. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { switch (II->getIntrinsicID()) { default: break; + case Intrinsic::bswap: + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, + Depth + 1, Q); + KnownZero |= KnownZero2.byteSwap(); + KnownOne |= KnownOne2.byteSwap(); + break; case Intrinsic::ctlz: case Intrinsic::cttz: { unsigned LowBits = Log2_32(BitWidth)+1; @@ -1353,8 +1497,24 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, break; } case Intrinsic::ctpop: { - unsigned LowBits = Log2_32(BitWidth)+1; - KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, + Depth + 1, Q); + // We can bound the space the count needs. Also, bits known to be zero + // can't contribute to the population. + unsigned BitsPossiblySet = BitWidth - KnownZero2.countPopulation(); + unsigned LeadingZeros = + APInt(BitWidth, BitsPossiblySet).countLeadingZeros(); + assert(LeadingZeros <= BitWidth); + KnownZero |= APInt::getHighBitsSet(BitWidth, LeadingZeros); + KnownOne &= ~KnownZero; + // TODO: we could bound KnownOne using the lower bound on the number + // of bits which might be set provided by popcnt KnownOne2. + break; + } + case Intrinsic::fabs: { + Type *Ty = II->getType(); + APInt SignBit = APInt::getSignBit(Ty->getScalarSizeInBits()); + KnownZero |= APInt::getSplat(Ty->getPrimitiveSizeInBits(), SignBit); break; } case Intrinsic::x86_sse42_crc32_64_64: @@ -1394,6 +1554,46 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, } } +static unsigned getAlignment(const Value *V, const DataLayout &DL) { + unsigned Align = 0; + if (auto *GO = dyn_cast<GlobalObject>(V)) { + Align = GO->getAlignment(); + if (Align == 0) { + if (auto *GVar = dyn_cast<GlobalVariable>(GO)) { + Type *ObjectType = GVar->getType()->getElementType(); + if (ObjectType->isSized()) { + // If the object is defined in the current Module, we'll be giving + // it the preferred alignment. Otherwise, we have to assume that it + // may only have the minimum ABI alignment. + if (GVar->isStrongDefinitionForLinker()) + Align = DL.getPreferredAlignment(GVar); + else + Align = DL.getABITypeAlignment(ObjectType); + } + } + } + } else if (const Argument *A = dyn_cast<Argument>(V)) { + Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0; + + if (!Align && A->hasStructRetAttr()) { + // An sret parameter has at least the ABI alignment of the return type. + Type *EltTy = cast<PointerType>(A->getType())->getElementType(); + if (EltTy->isSized()) + Align = DL.getABITypeAlignment(EltTy); + } + } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) + Align = AI->getAlignment(); + else if (auto CS = ImmutableCallSite(V)) + Align = CS.getAttributes().getParamAlignment(AttributeSet::ReturnIndex); + else if (const LoadInst *LI = dyn_cast<LoadInst>(V)) + if (MDNode *MD = LI->getMetadata(LLVMContext::MD_align)) { + ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(0)); + Align = CI->getLimitedValue(); + } + + return Align; +} + /// Determine which bits of V are known to be either zero or one and return /// them in the KnownZero/KnownOne bit sets. /// @@ -1416,8 +1616,9 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, unsigned BitWidth = KnownZero.getBitWidth(); assert((V->getType()->isIntOrIntVectorTy() || + V->getType()->isFPOrFPVectorTy() || V->getType()->getScalarType()->isPointerTy()) && - "Not integer or pointer type!"); + "Not integer, floating point, or pointer type!"); assert((DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) && (!V->getType()->isIntOrIntVectorTy() || V->getType()->getScalarSizeInBits() == BitWidth) && @@ -1454,59 +1655,6 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, return; } - // The address of an aligned GlobalValue has trailing zeros. - if (auto *GO = dyn_cast<GlobalObject>(V)) { - unsigned Align = GO->getAlignment(); - if (Align == 0) { - if (auto *GVar = dyn_cast<GlobalVariable>(GO)) { - Type *ObjectType = GVar->getType()->getElementType(); - if (ObjectType->isSized()) { - // If the object is defined in the current Module, we'll be giving - // it the preferred alignment. Otherwise, we have to assume that it - // may only have the minimum ABI alignment. - if (GVar->isStrongDefinitionForLinker()) - Align = DL.getPreferredAlignment(GVar); - else - Align = DL.getABITypeAlignment(ObjectType); - } - } - } - if (Align > 0) - KnownZero = APInt::getLowBitsSet(BitWidth, - countTrailingZeros(Align)); - else - KnownZero.clearAllBits(); - KnownOne.clearAllBits(); - return; - } - - if (Argument *A = dyn_cast<Argument>(V)) { - unsigned Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0; - - if (!Align && A->hasStructRetAttr()) { - // An sret parameter has at least the ABI alignment of the return type. - Type *EltTy = cast<PointerType>(A->getType())->getElementType(); - if (EltTy->isSized()) - Align = DL.getABITypeAlignment(EltTy); - } - - if (Align) - KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align)); - else - KnownZero.clearAllBits(); - KnownOne.clearAllBits(); - - // Don't give up yet... there might be an assumption that provides more - // information... - computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q); - - // Or a dominating condition for that matter - if (EnableDomConditions && Depth <= DomConditionsMaxDepth) - computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL, - Depth, Q); - return; - } - // Start out not knowing anything. KnownZero.clearAllBits(); KnownOne.clearAllBits(); @@ -1525,6 +1673,14 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, if (Operator *I = dyn_cast<Operator>(V)) computeKnownBitsFromOperator(I, KnownZero, KnownOne, DL, Depth, Q); + + // Aligned pointers have trailing zeros - refine KnownZero set + if (V->getType()->isPointerTy()) { + unsigned Align = getAlignment(V, DL); + if (Align) + KnownZero |= APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align)); + } + // computeKnownBitsFromAssume and computeKnownBitsFromDominatingCondition // strictly refines KnownZero and KnownOne. Therefore, we run them after // computeKnownBitsFromOperator. @@ -1812,6 +1968,23 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, ComputeSignBit(X, XKnownNonNegative, XKnownNegative, DL, Depth, Q); if (XKnownNegative) return true; + + // If the shifter operand is a constant, and all of the bits shifted + // out are known to be zero, and X is known non-zero then at least one + // non-zero bit must remain. + if (ConstantInt *Shift = dyn_cast<ConstantInt>(Y)) { + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + computeKnownBits(X, KnownZero, KnownOne, DL, Depth, Q); + + auto ShiftVal = Shift->getLimitedValue(BitWidth - 1); + // Is there a known one in the portion not shifted out? + if (KnownOne.countLeadingZeros() < BitWidth - ShiftVal) + return true; + // Are all the bits to be shifted out known zero? + if (KnownZero.countTrailingOnes() >= ShiftVal) + return isKnownNonZero(X, DL, Depth, Q); + } } // div exact can only produce a zero if the dividend is zero. else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) { @@ -1871,6 +2044,26 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, isKnownNonZero(SI->getFalseValue(), DL, Depth, Q)) return true; } + // PHI + else if (PHINode *PN = dyn_cast<PHINode>(V)) { + // Try and detect a recurrence that monotonically increases from a + // starting value, as these are common as induction variables. + if (PN->getNumIncomingValues() == 2) { + Value *Start = PN->getIncomingValue(0); + Value *Induction = PN->getIncomingValue(1); + if (isa<ConstantInt>(Induction) && !isa<ConstantInt>(Start)) + std::swap(Start, Induction); + if (ConstantInt *C = dyn_cast<ConstantInt>(Start)) { + if (!C->isZero() && !C->isNegative()) { + ConstantInt *X; + if ((match(Induction, m_NSWAdd(m_Specific(PN), m_ConstantInt(X))) || + match(Induction, m_NUWAdd(m_Specific(PN), m_ConstantInt(X)))) && + !X->isNegative()) + return true; + } + } + } + } if (!BitWidth) return false; APInt KnownZero(BitWidth, 0); @@ -1879,6 +2072,51 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, return KnownOne != 0; } +/// Return true if V2 == V1 + X, where X is known non-zero. +static bool isAddOfNonZero(Value *V1, Value *V2, const DataLayout &DL, + const Query &Q) { + BinaryOperator *BO = dyn_cast<BinaryOperator>(V1); + if (!BO || BO->getOpcode() != Instruction::Add) + return false; + Value *Op = nullptr; + if (V2 == BO->getOperand(0)) + Op = BO->getOperand(1); + else if (V2 == BO->getOperand(1)) + Op = BO->getOperand(0); + else + return false; + return isKnownNonZero(Op, DL, 0, Q); +} + +/// Return true if it is known that V1 != V2. +static bool isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL, + const Query &Q) { + if (V1->getType()->isVectorTy() || V1 == V2) + return false; + if (V1->getType() != V2->getType()) + // We can't look through casts yet. + return false; + if (isAddOfNonZero(V1, V2, DL, Q) || isAddOfNonZero(V2, V1, DL, Q)) + return true; + + if (IntegerType *Ty = dyn_cast<IntegerType>(V1->getType())) { + // Are any known bits in V1 contradictory to known bits in V2? If V1 + // has a known zero where V2 has a known one, they must not be equal. + auto BitWidth = Ty->getBitWidth(); + APInt KnownZero1(BitWidth, 0); + APInt KnownOne1(BitWidth, 0); + computeKnownBits(V1, KnownZero1, KnownOne1, DL, 0, Q); + APInt KnownZero2(BitWidth, 0); + APInt KnownOne2(BitWidth, 0); + computeKnownBits(V2, KnownZero2, KnownOne2, DL, 0, Q); + + auto OppositeBits = (KnownZero1 & KnownOne2) | (KnownZero2 & KnownOne1); + if (OppositeBits.getBoolValue()) + return true; + } + return false; +} + /// Return true if 'V & Mask' is known to be zero. We use this predicate to /// simplify operations downstream. Mask is known to be zero for bits that V /// cannot have. @@ -2545,7 +2783,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, } // This insert value inserts something else than what we are looking for. - // See if the (aggregrate) value inserted into has the value we are + // See if the (aggregate) value inserted into has the value we are // looking for, then. if (*req_idx != *i) return FindInsertedValue(I->getAggregateOperand(), idx_range, @@ -2560,7 +2798,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, } if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) { - // If we're extracting a value from an aggregrate that was extracted from + // If we're extracting a value from an aggregate that was extracted from // something else, we can extract from that something else directly instead. // However, we will need to chain I's indices with the requested indices. @@ -2935,20 +3173,42 @@ static bool isDereferenceableFromAttribute(const Value *V, const DataLayout &DL, return isDereferenceableFromAttribute(V, Offset, Ty, DL, CtxI, DT, TLI); } -/// Return true if Value is always a dereferenceable pointer. -/// +static bool isAligned(const Value *Base, APInt Offset, unsigned Align, + const DataLayout &DL) { + APInt BaseAlign(Offset.getBitWidth(), getAlignment(Base, DL)); + + if (!BaseAlign) { + Type *Ty = Base->getType()->getPointerElementType(); + if (!Ty->isSized()) + return false; + BaseAlign = DL.getABITypeAlignment(Ty); + } + + APInt Alignment(Offset.getBitWidth(), Align); + + assert(Alignment.isPowerOf2() && "must be a power of 2!"); + return BaseAlign.uge(Alignment) && !(Offset & (Alignment-1)); +} + +static bool isAligned(const Value *Base, unsigned Align, const DataLayout &DL) { + Type *Ty = Base->getType(); + assert(Ty->isSized() && "must be sized"); + APInt Offset(DL.getTypeStoreSizeInBits(Ty), 0); + return isAligned(Base, Offset, Align, DL); +} + /// Test if V is always a pointer to allocated and suitably aligned memory for /// a simple load or store. -static bool isDereferenceablePointer(const Value *V, const DataLayout &DL, - const Instruction *CtxI, - const DominatorTree *DT, - const TargetLibraryInfo *TLI, - SmallPtrSetImpl<const Value *> &Visited) { +static bool isDereferenceableAndAlignedPointer( + const Value *V, unsigned Align, const DataLayout &DL, + const Instruction *CtxI, const DominatorTree *DT, + const TargetLibraryInfo *TLI, SmallPtrSetImpl<const Value *> &Visited) { // Note that it is not safe to speculate into a malloc'd region because // malloc may return null. - // These are obviously ok. - if (isa<AllocaInst>(V)) return true; + // These are obviously ok if aligned. + if (isa<AllocaInst>(V)) + return isAligned(V, Align, DL); // It's not always safe to follow a bitcast, for example: // bitcast i8* (alloca i8) to i32* @@ -2963,21 +3223,22 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout &DL, if (STy->isSized() && DTy->isSized() && (DL.getTypeStoreSize(STy) >= DL.getTypeStoreSize(DTy)) && (DL.getABITypeAlignment(STy) >= DL.getABITypeAlignment(DTy))) - return isDereferenceablePointer(BC->getOperand(0), DL, CtxI, - DT, TLI, Visited); + return isDereferenceableAndAlignedPointer(BC->getOperand(0), Align, DL, + CtxI, DT, TLI, Visited); } // Global variables which can't collapse to null are ok. if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) - return !GV->hasExternalWeakLinkage(); + if (!GV->hasExternalWeakLinkage()) + return isAligned(V, Align, DL); // byval arguments are okay. if (const Argument *A = dyn_cast<Argument>(V)) if (A->hasByValAttr()) - return true; - + return isAligned(V, Align, DL); + if (isDereferenceableFromAttribute(V, DL, CtxI, DT, TLI)) - return true; + return isAligned(V, Align, DL); // For GEPs, determine if the indexing lands within the allocated object. if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { @@ -2985,61 +3246,79 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout &DL, Type *Ty = VTy->getPointerElementType(); const Value *Base = GEP->getPointerOperand(); - // Conservatively require that the base pointer be fully dereferenceable. + // Conservatively require that the base pointer be fully dereferenceable + // and aligned. if (!Visited.insert(Base).second) return false; - if (!isDereferenceablePointer(Base, DL, CtxI, - DT, TLI, Visited)) + if (!isDereferenceableAndAlignedPointer(Base, Align, DL, CtxI, DT, TLI, + Visited)) return false; - + APInt Offset(DL.getPointerTypeSizeInBits(VTy), 0); if (!GEP->accumulateConstantOffset(DL, Offset)) return false; - - // Check if the load is within the bounds of the underlying object. + + // Check if the load is within the bounds of the underlying object + // and offset is aligned. uint64_t LoadSize = DL.getTypeStoreSize(Ty); Type *BaseType = Base->getType()->getPointerElementType(); - return (Offset + LoadSize).ule(DL.getTypeAllocSize(BaseType)); + assert(isPowerOf2_32(Align) && "must be a power of 2!"); + return (Offset + LoadSize).ule(DL.getTypeAllocSize(BaseType)) && + !(Offset & APInt(Offset.getBitWidth(), Align-1)); } // For gc.relocate, look through relocations if (const IntrinsicInst *I = dyn_cast<IntrinsicInst>(V)) if (I->getIntrinsicID() == Intrinsic::experimental_gc_relocate) { GCRelocateOperands RelocateInst(I); - return isDereferenceablePointer(RelocateInst.getDerivedPtr(), DL, CtxI, - DT, TLI, Visited); + return isDereferenceableAndAlignedPointer( + RelocateInst.getDerivedPtr(), Align, DL, CtxI, DT, TLI, Visited); } if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(V)) - return isDereferenceablePointer(ASC->getOperand(0), DL, CtxI, - DT, TLI, Visited); + return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Align, DL, + CtxI, DT, TLI, Visited); // If we don't know, assume the worst. return false; } -bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL, - const Instruction *CtxI, - const DominatorTree *DT, - const TargetLibraryInfo *TLI) { +bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, + const DataLayout &DL, + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { // When dereferenceability information is provided by a dereferenceable // attribute, we know exactly how many bytes are dereferenceable. If we can // determine the exact offset to the attributed variable, we can use that // information here. Type *VTy = V->getType(); Type *Ty = VTy->getPointerElementType(); + + // Require ABI alignment for loads without alignment specification + if (Align == 0) + Align = DL.getABITypeAlignment(Ty); + if (Ty->isSized()) { APInt Offset(DL.getTypeStoreSizeInBits(VTy), 0); const Value *BV = V->stripAndAccumulateInBoundsConstantOffsets(DL, Offset); - + if (Offset.isNonNegative()) - if (isDereferenceableFromAttribute(BV, Offset, Ty, DL, - CtxI, DT, TLI)) + if (isDereferenceableFromAttribute(BV, Offset, Ty, DL, CtxI, DT, TLI) && + isAligned(BV, Offset, Align, DL)) return true; } SmallPtrSet<const Value *, 32> Visited; - return ::isDereferenceablePointer(V, DL, CtxI, DT, TLI, Visited); + return ::isDereferenceableAndAlignedPointer(V, Align, DL, CtxI, DT, TLI, + Visited); +} + +bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL, + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { + return isDereferenceableAndAlignedPointer(V, 1, DL, CtxI, DT, TLI); } bool llvm::isSafeToSpeculativelyExecute(const Value *V, @@ -3089,10 +3368,15 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, const LoadInst *LI = cast<LoadInst>(Inst); if (!LI->isUnordered() || // Speculative load may create a race that did not exist in the source. - LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread)) + LI->getParent()->getParent()->hasFnAttribute( + Attribute::SanitizeThread) || + // Speculative load may load data from dirty regions. + LI->getParent()->getParent()->hasFnAttribute( + Attribute::SanitizeAddress)) return false; const DataLayout &DL = LI->getModule()->getDataLayout(); - return isDereferenceablePointer(LI->getPointerOperand(), DL, CtxI, DT, TLI); + return isDereferenceableAndAlignedPointer( + LI->getPointerOperand(), LI->getAlignment(), DL, CtxI, DT, TLI); } case Instruction::Call: { if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { @@ -3147,16 +3431,27 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, case Instruction::Switch: case Instruction::Unreachable: case Instruction::Fence: - case Instruction::LandingPad: case Instruction::AtomicRMW: case Instruction::AtomicCmpXchg: + case Instruction::LandingPad: case Instruction::Resume: + case Instruction::CatchSwitch: + case Instruction::CatchPad: + case Instruction::CatchRet: + case Instruction::CleanupPad: + case Instruction::CleanupRet: return false; // Misc instructions which have effects } } +bool llvm::mayBeMemoryDependent(const Instruction &I) { + return I.mayReadOrWriteMemory() || !isSafeToSpeculativelyExecute(&I); +} + /// Return true if we know that the specified value is never null. bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { + assert(V->getType()->isPointerTy() && "V must be pointer type"); + // Alloca never returns null, malloc might. if (isa<AllocaInst>(V)) return true; @@ -3164,9 +3459,12 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { if (const Argument *A = dyn_cast<Argument>(V)) return A->hasByValOrInAllocaAttr() || A->hasNonNullAttr(); - // Global values are not null unless extern weak. + // A global variable in address space 0 is non null unless extern weak. + // Other address spaces may have null as a valid address for a global, + // so we can't assume anything. if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) - return !GV->hasExternalWeakLinkage(); + return !GV->hasExternalWeakLinkage() && + GV->getType()->getAddressSpace() == 0; // A Load tagged w/nonnull metadata is never null. if (const LoadInst *LI = dyn_cast<LoadInst>(V)) @@ -3186,6 +3484,8 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { static bool isKnownNonNullFromDominatingCondition(const Value *V, const Instruction *CtxI, const DominatorTree *DT) { + assert(V->getType()->isPointerTy() && "V must be pointer type"); + unsigned NumUsesExplored = 0; for (auto U : V->users()) { // Avoid massive lists @@ -3316,40 +3616,339 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS, return OverflowResult::MayOverflow; } -static SelectPatternFlavor matchSelectPattern(ICmpInst::Predicate Pred, +static OverflowResult computeOverflowForSignedAdd( + Value *LHS, Value *RHS, AddOperator *Add, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { + if (Add && Add->hasNoSignedWrap()) { + return OverflowResult::NeverOverflows; + } + + bool LHSKnownNonNegative, LHSKnownNegative; + bool RHSKnownNonNegative, RHSKnownNegative; + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, /*Depth=*/0, + AC, CxtI, DT); + ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, /*Depth=*/0, + AC, CxtI, DT); + + if ((LHSKnownNonNegative && RHSKnownNegative) || + (LHSKnownNegative && RHSKnownNonNegative)) { + // The sign bits are opposite: this CANNOT overflow. + return OverflowResult::NeverOverflows; + } + + // The remaining code needs Add to be available. Early returns if not so. + if (!Add) + return OverflowResult::MayOverflow; + + // If the sign of Add is the same as at least one of the operands, this add + // CANNOT overflow. This is particularly useful when the sum is + // @llvm.assume'ed non-negative rather than proved so from analyzing its + // operands. + bool LHSOrRHSKnownNonNegative = + (LHSKnownNonNegative || RHSKnownNonNegative); + bool LHSOrRHSKnownNegative = (LHSKnownNegative || RHSKnownNegative); + if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) { + bool AddKnownNonNegative, AddKnownNegative; + ComputeSignBit(Add, AddKnownNonNegative, AddKnownNegative, DL, + /*Depth=*/0, AC, CxtI, DT); + if ((AddKnownNonNegative && LHSOrRHSKnownNonNegative) || + (AddKnownNegative && LHSOrRHSKnownNegative)) { + return OverflowResult::NeverOverflows; + } + } + + return OverflowResult::MayOverflow; +} + +OverflowResult llvm::computeOverflowForSignedAdd(AddOperator *Add, + const DataLayout &DL, + AssumptionCache *AC, + const Instruction *CxtI, + const DominatorTree *DT) { + return ::computeOverflowForSignedAdd(Add->getOperand(0), Add->getOperand(1), + Add, DL, AC, CxtI, DT); +} + +OverflowResult llvm::computeOverflowForSignedAdd(Value *LHS, Value *RHS, + const DataLayout &DL, + AssumptionCache *AC, + const Instruction *CxtI, + const DominatorTree *DT) { + return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, DL, AC, CxtI, DT); +} + +bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { + // FIXME: This conservative implementation can be relaxed. E.g. most + // atomic operations are guaranteed to terminate on most platforms + // and most functions terminate. + + return !I->isAtomic() && // atomics may never succeed on some platforms + !isa<CallInst>(I) && // could throw and might not terminate + !isa<InvokeInst>(I) && // might not terminate and could throw to + // non-successor (see bug 24185 for details). + !isa<ResumeInst>(I) && // has no successors + !isa<ReturnInst>(I); // has no successors +} + +bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I, + const Loop *L) { + // The loop header is guaranteed to be executed for every iteration. + // + // FIXME: Relax this constraint to cover all basic blocks that are + // guaranteed to be executed at every iteration. + if (I->getParent() != L->getHeader()) return false; + + for (const Instruction &LI : *L->getHeader()) { + if (&LI == I) return true; + if (!isGuaranteedToTransferExecutionToSuccessor(&LI)) return false; + } + llvm_unreachable("Instruction not contained in its own parent basic block."); +} + +bool llvm::propagatesFullPoison(const Instruction *I) { + switch (I->getOpcode()) { + case Instruction::Add: + case Instruction::Sub: + case Instruction::Xor: + case Instruction::Trunc: + case Instruction::BitCast: + case Instruction::AddrSpaceCast: + // These operations all propagate poison unconditionally. Note that poison + // is not any particular value, so xor or subtraction of poison with + // itself still yields poison, not zero. + return true; + + case Instruction::AShr: + case Instruction::SExt: + // For these operations, one bit of the input is replicated across + // multiple output bits. A replicated poison bit is still poison. + return true; + + case Instruction::Shl: { + // Left shift *by* a poison value is poison. The number of + // positions to shift is unsigned, so no negative values are + // possible there. Left shift by zero places preserves poison. So + // it only remains to consider left shift of poison by a positive + // number of places. + // + // A left shift by a positive number of places leaves the lowest order bit + // non-poisoned. However, if such a shift has a no-wrap flag, then we can + // make the poison operand violate that flag, yielding a fresh full-poison + // value. + auto *OBO = cast<OverflowingBinaryOperator>(I); + return OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap(); + } + + case Instruction::Mul: { + // A multiplication by zero yields a non-poison zero result, so we need to + // rule out zero as an operand. Conservatively, multiplication by a + // non-zero constant is not multiplication by zero. + // + // Multiplication by a non-zero constant can leave some bits + // non-poisoned. For example, a multiplication by 2 leaves the lowest + // order bit unpoisoned. So we need to consider that. + // + // Multiplication by 1 preserves poison. If the multiplication has a + // no-wrap flag, then we can make the poison operand violate that flag + // when multiplied by any integer other than 0 and 1. + auto *OBO = cast<OverflowingBinaryOperator>(I); + if (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) { + for (Value *V : OBO->operands()) { + if (auto *CI = dyn_cast<ConstantInt>(V)) { + // A ConstantInt cannot yield poison, so we can assume that it is + // the other operand that is poison. + return !CI->isZero(); + } + } + } + return false; + } + + case Instruction::GetElementPtr: + // A GEP implicitly represents a sequence of additions, subtractions, + // truncations, sign extensions and multiplications. The multiplications + // are by the non-zero sizes of some set of types, so we do not have to be + // concerned with multiplication by zero. If the GEP is in-bounds, then + // these operations are implicitly no-signed-wrap so poison is propagated + // by the arguments above for Add, Sub, Trunc, SExt and Mul. + return cast<GEPOperator>(I)->isInBounds(); + + default: + return false; + } +} + +const Value *llvm::getGuaranteedNonFullPoisonOp(const Instruction *I) { + switch (I->getOpcode()) { + case Instruction::Store: + return cast<StoreInst>(I)->getPointerOperand(); + + case Instruction::Load: + return cast<LoadInst>(I)->getPointerOperand(); + + case Instruction::AtomicCmpXchg: + return cast<AtomicCmpXchgInst>(I)->getPointerOperand(); + + case Instruction::AtomicRMW: + return cast<AtomicRMWInst>(I)->getPointerOperand(); + + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + return I->getOperand(1); + + default: + return nullptr; + } +} + +bool llvm::isKnownNotFullPoison(const Instruction *PoisonI) { + // We currently only look for uses of poison values within the same basic + // block, as that makes it easier to guarantee that the uses will be + // executed given that PoisonI is executed. + // + // FIXME: Expand this to consider uses beyond the same basic block. To do + // this, look out for the distinction between post-dominance and strong + // post-dominance. + const BasicBlock *BB = PoisonI->getParent(); + + // Set of instructions that we have proved will yield poison if PoisonI + // does. + SmallSet<const Value *, 16> YieldsPoison; + YieldsPoison.insert(PoisonI); + + for (BasicBlock::const_iterator I = PoisonI->getIterator(), E = BB->end(); + I != E; ++I) { + if (&*I != PoisonI) { + const Value *NotPoison = getGuaranteedNonFullPoisonOp(&*I); + if (NotPoison != nullptr && YieldsPoison.count(NotPoison)) return true; + if (!isGuaranteedToTransferExecutionToSuccessor(&*I)) + return false; + } + + // Mark poison that propagates from I through uses of I. + if (YieldsPoison.count(&*I)) { + for (const User *User : I->users()) { + const Instruction *UserI = cast<Instruction>(User); + if (UserI->getParent() == BB && propagatesFullPoison(UserI)) + YieldsPoison.insert(User); + } + } + } + return false; +} + +static bool isKnownNonNaN(Value *V, FastMathFlags FMF) { + if (FMF.noNaNs()) + return true; + + if (auto *C = dyn_cast<ConstantFP>(V)) + return !C->isNaN(); + return false; +} + +static bool isKnownNonZero(Value *V) { + if (auto *C = dyn_cast<ConstantFP>(V)) + return !C->isZero(); + return false; +} + +static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, + FastMathFlags FMF, Value *CmpLHS, Value *CmpRHS, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS) { LHS = CmpLHS; RHS = CmpRHS; - // (icmp X, Y) ? X : Y - if (TrueVal == CmpLHS && FalseVal == CmpRHS) { - switch (Pred) { - default: return SPF_UNKNOWN; // Equality. - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_UGE: return SPF_UMAX; - case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_SGE: return SPF_SMAX; - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_ULE: return SPF_UMIN; - case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_SLE: return SPF_SMIN; + // If the predicate is an "or-equal" (FP) predicate, then signed zeroes may + // return inconsistent results between implementations. + // (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0 + // minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1) + // Therefore we behave conservatively and only proceed if at least one of the + // operands is known to not be zero, or if we don't care about signed zeroes. + switch (Pred) { + default: break; + case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE: + case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE: + if (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) && + !isKnownNonZero(CmpRHS)) + return {SPF_UNKNOWN, SPNB_NA, false}; + } + + SelectPatternNaNBehavior NaNBehavior = SPNB_NA; + bool Ordered = false; + + // When given one NaN and one non-NaN input: + // - maxnum/minnum (C99 fmaxf()/fminf()) return the non-NaN input. + // - A simple C99 (a < b ? a : b) construction will return 'b' (as the + // ordered comparison fails), which could be NaN or non-NaN. + // so here we discover exactly what NaN behavior is required/accepted. + if (CmpInst::isFPPredicate(Pred)) { + bool LHSSafe = isKnownNonNaN(CmpLHS, FMF); + bool RHSSafe = isKnownNonNaN(CmpRHS, FMF); + + if (LHSSafe && RHSSafe) { + // Both operands are known non-NaN. + NaNBehavior = SPNB_RETURNS_ANY; + } else if (CmpInst::isOrdered(Pred)) { + // An ordered comparison will return false when given a NaN, so it + // returns the RHS. + Ordered = true; + if (LHSSafe) + // LHS is non-NaN, so if RHS is NaN then NaN will be returned. + NaNBehavior = SPNB_RETURNS_NAN; + else if (RHSSafe) + NaNBehavior = SPNB_RETURNS_OTHER; + else + // Completely unsafe. + return {SPF_UNKNOWN, SPNB_NA, false}; + } else { + Ordered = false; + // An unordered comparison will return true when given a NaN, so it + // returns the LHS. + if (LHSSafe) + // LHS is non-NaN, so if RHS is NaN then non-NaN will be returned. + NaNBehavior = SPNB_RETURNS_OTHER; + else if (RHSSafe) + NaNBehavior = SPNB_RETURNS_NAN; + else + // Completely unsafe. + return {SPF_UNKNOWN, SPNB_NA, false}; } } - // (icmp X, Y) ? Y : X if (TrueVal == CmpRHS && FalseVal == CmpLHS) { + std::swap(CmpLHS, CmpRHS); + Pred = CmpInst::getSwappedPredicate(Pred); + if (NaNBehavior == SPNB_RETURNS_NAN) + NaNBehavior = SPNB_RETURNS_OTHER; + else if (NaNBehavior == SPNB_RETURNS_OTHER) + NaNBehavior = SPNB_RETURNS_NAN; + Ordered = !Ordered; + } + + // ([if]cmp X, Y) ? X : Y + if (TrueVal == CmpLHS && FalseVal == CmpRHS) { switch (Pred) { - default: return SPF_UNKNOWN; // Equality. + default: return {SPF_UNKNOWN, SPNB_NA, false}; // Equality. case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_UGE: return SPF_UMIN; + case ICmpInst::ICMP_UGE: return {SPF_UMAX, SPNB_NA, false}; case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_SGE: return SPF_SMIN; + case ICmpInst::ICMP_SGE: return {SPF_SMAX, SPNB_NA, false}; case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_ULE: return SPF_UMAX; + case ICmpInst::ICMP_ULE: return {SPF_UMIN, SPNB_NA, false}; case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_SLE: return SPF_SMAX; + case ICmpInst::ICMP_SLE: return {SPF_SMIN, SPNB_NA, false}; + case FCmpInst::FCMP_UGT: + case FCmpInst::FCMP_UGE: + case FCmpInst::FCMP_OGT: + case FCmpInst::FCMP_OGE: return {SPF_FMAXNUM, NaNBehavior, Ordered}; + case FCmpInst::FCMP_ULT: + case FCmpInst::FCMP_ULE: + case FCmpInst::FCMP_OLT: + case FCmpInst::FCMP_OLE: return {SPF_FMINNUM, NaNBehavior, Ordered}; } } @@ -3360,13 +3959,13 @@ static SelectPatternFlavor matchSelectPattern(ICmpInst::Predicate Pred, // ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X // NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X if (Pred == ICmpInst::ICMP_SGT && (C1->isZero() || C1->isMinusOne())) { - return (CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS; + return {(CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false}; } // ABS(X) ==> (X <s 0) ? -X : X and (X <s 1) ? -X : X // NABS(X) ==> (X <s 0) ? X : -X and (X <s 1) ? X : -X if (Pred == ICmpInst::ICMP_SLT && (C1->isZero() || C1->isOne())) { - return (CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS; + return {(CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false}; } } @@ -3377,24 +3976,36 @@ static SelectPatternFlavor matchSelectPattern(ICmpInst::Predicate Pred, match(CmpLHS, m_Not(m_Specific(TrueVal))))) { LHS = TrueVal; RHS = FalseVal; - return SPF_SMIN; + return {SPF_SMIN, SPNB_NA, false}; } } } // TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5) - return SPF_UNKNOWN; + return {SPF_UNKNOWN, SPNB_NA, false}; } -static Constant *lookThroughCast(ICmpInst *CmpI, Value *V1, Value *V2, - Instruction::CastOps *CastOp) { +static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2, + Instruction::CastOps *CastOp) { CastInst *CI = dyn_cast<CastInst>(V1); Constant *C = dyn_cast<Constant>(V2); - if (!CI || !C) + CastInst *CI2 = dyn_cast<CastInst>(V2); + if (!CI) return nullptr; *CastOp = CI->getOpcode(); + if (CI2) { + // If V1 and V2 are both the same cast from the same type, we can look + // through V1. + if (CI2->getOpcode() == CI->getOpcode() && + CI2->getSrcTy() == CI->getSrcTy()) + return CI2->getOperand(0); + return nullptr; + } else if (!C) { + return nullptr; + } + if (isa<SExtInst>(CI) && CmpI->isSigned()) { Constant *T = ConstantExpr::getTrunc(C, CI->getSrcTy()); // This is only valid if the truncated value can be sign-extended @@ -3409,39 +4020,200 @@ static Constant *lookThroughCast(ICmpInst *CmpI, Value *V1, Value *V2, if (isa<TruncInst>(CI)) return ConstantExpr::getIntegerCast(C, CI->getSrcTy(), CmpI->isSigned()); + if (isa<FPToUIInst>(CI)) + return ConstantExpr::getUIToFP(C, CI->getSrcTy(), true); + + if (isa<FPToSIInst>(CI)) + return ConstantExpr::getSIToFP(C, CI->getSrcTy(), true); + + if (isa<UIToFPInst>(CI)) + return ConstantExpr::getFPToUI(C, CI->getSrcTy(), true); + + if (isa<SIToFPInst>(CI)) + return ConstantExpr::getFPToSI(C, CI->getSrcTy(), true); + + if (isa<FPTruncInst>(CI)) + return ConstantExpr::getFPExtend(C, CI->getSrcTy(), true); + + if (isa<FPExtInst>(CI)) + return ConstantExpr::getFPTrunc(C, CI->getSrcTy(), true); + return nullptr; } -SelectPatternFlavor llvm::matchSelectPattern(Value *V, +SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp) { SelectInst *SI = dyn_cast<SelectInst>(V); - if (!SI) return SPF_UNKNOWN; + if (!SI) return {SPF_UNKNOWN, SPNB_NA, false}; - ICmpInst *CmpI = dyn_cast<ICmpInst>(SI->getCondition()); - if (!CmpI) return SPF_UNKNOWN; + CmpInst *CmpI = dyn_cast<CmpInst>(SI->getCondition()); + if (!CmpI) return {SPF_UNKNOWN, SPNB_NA, false}; - ICmpInst::Predicate Pred = CmpI->getPredicate(); + CmpInst::Predicate Pred = CmpI->getPredicate(); Value *CmpLHS = CmpI->getOperand(0); Value *CmpRHS = CmpI->getOperand(1); Value *TrueVal = SI->getTrueValue(); Value *FalseVal = SI->getFalseValue(); + FastMathFlags FMF; + if (isa<FPMathOperator>(CmpI)) + FMF = CmpI->getFastMathFlags(); // Bail out early. if (CmpI->isEquality()) - return SPF_UNKNOWN; + return {SPF_UNKNOWN, SPNB_NA, false}; // Deal with type mismatches. if (CastOp && CmpLHS->getType() != TrueVal->getType()) { - if (Constant *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp)) - return ::matchSelectPattern(Pred, CmpLHS, CmpRHS, + if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp)) + return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, cast<CastInst>(TrueVal)->getOperand(0), C, LHS, RHS); - if (Constant *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) - return ::matchSelectPattern(Pred, CmpLHS, CmpRHS, + if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) + return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, C, cast<CastInst>(FalseVal)->getOperand(0), LHS, RHS); } - return ::matchSelectPattern(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, + return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS); } + +ConstantRange llvm::getConstantRangeFromMetadata(MDNode &Ranges) { + const unsigned NumRanges = Ranges.getNumOperands() / 2; + assert(NumRanges >= 1 && "Must have at least one range!"); + assert(Ranges.getNumOperands() % 2 == 0 && "Must be a sequence of pairs"); + + auto *FirstLow = mdconst::extract<ConstantInt>(Ranges.getOperand(0)); + auto *FirstHigh = mdconst::extract<ConstantInt>(Ranges.getOperand(1)); + + ConstantRange CR(FirstLow->getValue(), FirstHigh->getValue()); + + for (unsigned i = 1; i < NumRanges; ++i) { + auto *Low = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0)); + auto *High = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1)); + + // Note: unionWith will potentially create a range that contains values not + // contained in any of the original N ranges. + CR = CR.unionWith(ConstantRange(Low->getValue(), High->getValue())); + } + + return CR; +} + +/// Return true if "icmp Pred LHS RHS" is always true. +static bool isTruePredicate(CmpInst::Predicate Pred, Value *LHS, Value *RHS, + const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT) { + assert(!LHS->getType()->isVectorTy() && "TODO: extend to handle vectors!"); + if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS) + return true; + + switch (Pred) { + default: + return false; + + case CmpInst::ICMP_SLE: { + const APInt *C; + + // LHS s<= LHS +_{nsw} C if C >= 0 + if (match(RHS, m_NSWAdd(m_Specific(LHS), m_APInt(C)))) + return !C->isNegative(); + return false; + } + + case CmpInst::ICMP_ULE: { + const APInt *C; + + // LHS u<= LHS +_{nuw} C for any C + if (match(RHS, m_NUWAdd(m_Specific(LHS), m_APInt(C)))) + return true; + + // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB) + auto MatchNUWAddsToSameValue = [&](Value *A, Value *B, Value *&X, + const APInt *&CA, const APInt *&CB) { + if (match(A, m_NUWAdd(m_Value(X), m_APInt(CA))) && + match(B, m_NUWAdd(m_Specific(X), m_APInt(CB)))) + return true; + + // If X & C == 0 then (X | C) == X +_{nuw} C + if (match(A, m_Or(m_Value(X), m_APInt(CA))) && + match(B, m_Or(m_Specific(X), m_APInt(CB)))) { + unsigned BitWidth = CA->getBitWidth(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + computeKnownBits(X, KnownZero, KnownOne, DL, Depth + 1, AC, CxtI, DT); + + if ((KnownZero & *CA) == *CA && (KnownZero & *CB) == *CB) + return true; + } + + return false; + }; + + Value *X; + const APInt *CLHS, *CRHS; + if (MatchNUWAddsToSameValue(LHS, RHS, X, CLHS, CRHS)) + return CLHS->ule(*CRHS); + + return false; + } + } +} + +/// Return true if "icmp Pred BLHS BRHS" is true whenever "icmp Pred +/// ALHS ARHS" is true. +static bool isImpliedCondOperands(CmpInst::Predicate Pred, Value *ALHS, + Value *ARHS, Value *BLHS, Value *BRHS, + const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT) { + switch (Pred) { + default: + return false; + + case CmpInst::ICMP_SLT: + case CmpInst::ICMP_SLE: + return isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS, DL, Depth, AC, CxtI, + DT) && + isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth, AC, CxtI, + DT); + + case CmpInst::ICMP_ULT: + case CmpInst::ICMP_ULE: + return isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS, DL, Depth, AC, CxtI, + DT) && + isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth, AC, CxtI, + DT); + } +} + +bool llvm::isImpliedCondition(Value *LHS, Value *RHS, const DataLayout &DL, + unsigned Depth, AssumptionCache *AC, + const Instruction *CxtI, + const DominatorTree *DT) { + assert(LHS->getType() == RHS->getType() && "mismatched type"); + Type *OpTy = LHS->getType(); + assert(OpTy->getScalarType()->isIntegerTy(1)); + + // LHS ==> RHS by definition + if (LHS == RHS) return true; + + if (OpTy->isVectorTy()) + // TODO: extending the code below to handle vectors + return false; + assert(OpTy->isIntegerTy(1) && "implied by above"); + + ICmpInst::Predicate APred, BPred; + Value *ALHS, *ARHS; + Value *BLHS, *BRHS; + + if (!match(LHS, m_ICmp(APred, m_Value(ALHS), m_Value(ARHS))) || + !match(RHS, m_ICmp(BPred, m_Value(BLHS), m_Value(BRHS)))) + return false; + + if (APred == BPred) + return isImpliedCondOperands(APred, ALHS, ARHS, BLHS, BRHS, DL, Depth, AC, + CxtI, DT); + + return false; +} diff --git a/contrib/llvm/lib/Analysis/VectorUtils.cpp b/contrib/llvm/lib/Analysis/VectorUtils.cpp index 8c671ef..4b244ec 100644 --- a/contrib/llvm/lib/Analysis/VectorUtils.cpp +++ b/contrib/llvm/lib/Analysis/VectorUtils.cpp @@ -11,13 +11,20 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Value.h" +#include "llvm/IR/Constants.h" + +using namespace llvm; +using namespace llvm::PatternMatch; /// \brief Identify if the intrinsic is trivially vectorizable. /// This method returns true if the intrinsic's argument types are all @@ -79,7 +86,7 @@ bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, /// d) call should only reads memory. /// If all these condition is met then return ValidIntrinsicID /// else return not_intrinsic. -llvm::Intrinsic::ID +Intrinsic::ID llvm::checkUnaryFloatSignature(const CallInst &I, Intrinsic::ID ValidIntrinsicID) { if (I.getNumArgOperands() != 1 || @@ -98,7 +105,7 @@ llvm::checkUnaryFloatSignature(const CallInst &I, /// d) call should only reads memory. /// If all these condition is met then return ValidIntrinsicID /// else return not_intrinsic. -llvm::Intrinsic::ID +Intrinsic::ID llvm::checkBinaryFloatSignature(const CallInst &I, Intrinsic::ID ValidIntrinsicID) { if (I.getNumArgOperands() != 2 || @@ -114,8 +121,8 @@ llvm::checkBinaryFloatSignature(const CallInst &I, /// \brief Returns intrinsic ID for call. /// For the input call instruction it finds mapping intrinsic and returns /// its ID, in case it does not found it return not_intrinsic. -llvm::Intrinsic::ID llvm::getIntrinsicIDForCall(CallInst *CI, - const TargetLibraryInfo *TLI) { +Intrinsic::ID llvm::getIntrinsicIDForCall(CallInst *CI, + const TargetLibraryInfo *TLI) { // If we have an intrinsic call, check if it is trivially vectorizable. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) { Intrinsic::ID ID = II->getIntrinsicID(); @@ -228,8 +235,7 @@ unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) { cast<PointerType>(Gep->getType()->getScalarType())->getElementType()); // Walk backwards and try to peel off zeros. - while (LastOperand > 1 && - match(Gep->getOperand(LastOperand), llvm::PatternMatch::m_Zero())) { + while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) { // Find the type we're currently indexing into. gep_type_iterator GEPTI = gep_type_begin(Gep); std::advance(GEPTI, LastOperand - 1); @@ -247,8 +253,7 @@ unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) { /// \brief If the argument is a GEP, then returns the operand identified by /// getGEPInductionOperand. However, if there is some other non-loop-invariant /// operand, it returns that instead. -llvm::Value *llvm::stripGetElementPtr(llvm::Value *Ptr, ScalarEvolution *SE, - Loop *Lp) { +Value *llvm::stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr); if (!GEP) return Ptr; @@ -265,8 +270,8 @@ llvm::Value *llvm::stripGetElementPtr(llvm::Value *Ptr, ScalarEvolution *SE, } /// \brief If a value has only one user that is a CastInst, return it. -llvm::Value *llvm::getUniqueCastUse(llvm::Value *Ptr, Loop *Lp, Type *Ty) { - llvm::Value *UniqueCast = nullptr; +Value *llvm::getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) { + Value *UniqueCast = nullptr; for (User *U : Ptr->users()) { CastInst *CI = dyn_cast<CastInst>(U); if (CI && CI->getType() == Ty) { @@ -281,16 +286,15 @@ llvm::Value *llvm::getUniqueCastUse(llvm::Value *Ptr, Loop *Lp, Type *Ty) { /// \brief Get the stride of a pointer access in a loop. Looks for symbolic /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise. -llvm::Value *llvm::getStrideFromPointer(llvm::Value *Ptr, ScalarEvolution *SE, - Loop *Lp) { - const PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType()); +Value *llvm::getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { + auto *PtrTy = dyn_cast<PointerType>(Ptr->getType()); if (!PtrTy || PtrTy->isAggregateType()) return nullptr; // Try to remove a gep instruction to make the pointer (actually index at this // point) easier analyzable. If OrigPtr is equal to Ptr we are analzying the // pointer, otherwise, we are analyzing the index. - llvm::Value *OrigPtr = Ptr; + Value *OrigPtr = Ptr; // The size of the pointer access. int64_t PtrAccessSize = 1; @@ -320,8 +324,7 @@ llvm::Value *llvm::getStrideFromPointer(llvm::Value *Ptr, ScalarEvolution *SE, if (M->getOperand(0)->getSCEVType() != scConstant) return nullptr; - const APInt &APStepVal = - cast<SCEVConstant>(M->getOperand(0))->getValue()->getValue(); + const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt(); // Huge step value - give up. if (APStepVal.getBitWidth() > 64) @@ -346,7 +349,7 @@ llvm::Value *llvm::getStrideFromPointer(llvm::Value *Ptr, ScalarEvolution *SE, if (!U) return nullptr; - llvm::Value *Stride = U->getValue(); + Value *Stride = U->getValue(); if (!Lp->isLoopInvariant(Stride)) return nullptr; @@ -361,7 +364,7 @@ llvm::Value *llvm::getStrideFromPointer(llvm::Value *Ptr, ScalarEvolution *SE, /// \brief Given a vector and an element number, see if the scalar value is /// already around as a register, for example if it were inserted then extracted /// from the vector. -llvm::Value *llvm::findScalarElement(llvm::Value *V, unsigned EltNo) { +Value *llvm::findScalarElement(Value *V, unsigned EltNo) { assert(V->getType()->isVectorTy() && "Not looking at a vector?"); VectorType *VTy = cast<VectorType>(V->getType()); unsigned Width = VTy->getNumElements(); @@ -399,14 +402,166 @@ llvm::Value *llvm::findScalarElement(llvm::Value *V, unsigned EltNo) { // Extract a value from a vector add operation with a constant zero. Value *Val = nullptr; Constant *Con = nullptr; - if (match(V, - llvm::PatternMatch::m_Add(llvm::PatternMatch::m_Value(Val), - llvm::PatternMatch::m_Constant(Con)))) { + if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) if (Constant *Elt = Con->getAggregateElement(EltNo)) if (Elt->isNullValue()) return findScalarElement(Val, EltNo); - } // Otherwise, we don't know. return nullptr; } + +/// \brief Get splat value if the input is a splat vector or return nullptr. +/// This function is not fully general. It checks only 2 cases: +/// the input value is (1) a splat constants vector or (2) a sequence +/// of instructions that broadcast a single value into a vector. +/// +const llvm::Value *llvm::getSplatValue(const Value *V) { + + if (auto *C = dyn_cast<Constant>(V)) + if (isa<VectorType>(V->getType())) + return C->getSplatValue(); + + auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V); + if (!ShuffleInst) + return nullptr; + // All-zero (or undef) shuffle mask elements. + for (int MaskElt : ShuffleInst->getShuffleMask()) + if (MaskElt != 0 && MaskElt != -1) + return nullptr; + // The first shuffle source is 'insertelement' with index 0. + auto *InsertEltInst = + dyn_cast<InsertElementInst>(ShuffleInst->getOperand(0)); + if (!InsertEltInst || !isa<ConstantInt>(InsertEltInst->getOperand(2)) || + !cast<ConstantInt>(InsertEltInst->getOperand(2))->isNullValue()) + return nullptr; + + return InsertEltInst->getOperand(1); +} + +MapVector<Instruction *, uint64_t> +llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB, + const TargetTransformInfo *TTI) { + + // DemandedBits will give us every value's live-out bits. But we want + // to ensure no extra casts would need to be inserted, so every DAG + // of connected values must have the same minimum bitwidth. + EquivalenceClasses<Value *> ECs; + SmallVector<Value *, 16> Worklist; + SmallPtrSet<Value *, 4> Roots; + SmallPtrSet<Value *, 16> Visited; + DenseMap<Value *, uint64_t> DBits; + SmallPtrSet<Instruction *, 4> InstructionSet; + MapVector<Instruction *, uint64_t> MinBWs; + + // Determine the roots. We work bottom-up, from truncs or icmps. + bool SeenExtFromIllegalType = false; + for (auto *BB : Blocks) + for (auto &I : *BB) { + InstructionSet.insert(&I); + + if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) && + !TTI->isTypeLegal(I.getOperand(0)->getType())) + SeenExtFromIllegalType = true; + + // Only deal with non-vector integers up to 64-bits wide. + if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) && + !I.getType()->isVectorTy() && + I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) { + // Don't make work for ourselves. If we know the loaded type is legal, + // don't add it to the worklist. + if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType())) + continue; + + Worklist.push_back(&I); + Roots.insert(&I); + } + } + // Early exit. + if (Worklist.empty() || (TTI && !SeenExtFromIllegalType)) + return MinBWs; + + // Now proceed breadth-first, unioning values together. + while (!Worklist.empty()) { + Value *Val = Worklist.pop_back_val(); + Value *Leader = ECs.getOrInsertLeaderValue(Val); + + if (Visited.count(Val)) + continue; + Visited.insert(Val); + + // Non-instructions terminate a chain successfully. + if (!isa<Instruction>(Val)) + continue; + Instruction *I = cast<Instruction>(Val); + + // If we encounter a type that is larger than 64 bits, we can't represent + // it so bail out. + if (DB.getDemandedBits(I).getBitWidth() > 64) + return MapVector<Instruction *, uint64_t>(); + + uint64_t V = DB.getDemandedBits(I).getZExtValue(); + DBits[Leader] |= V; + + // Casts, loads and instructions outside of our range terminate a chain + // successfully. + if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) || + !InstructionSet.count(I)) + continue; + + // Unsafe casts terminate a chain unsuccessfully. We can't do anything + // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to + // transform anything that relies on them. + if (isa<BitCastInst>(I) || isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I) || + !I->getType()->isIntegerTy()) { + DBits[Leader] |= ~0ULL; + continue; + } + + // We don't modify the types of PHIs. Reductions will already have been + // truncated if possible, and inductions' sizes will have been chosen by + // indvars. + if (isa<PHINode>(I)) + continue; + + if (DBits[Leader] == ~0ULL) + // All bits demanded, no point continuing. + continue; + + for (Value *O : cast<User>(I)->operands()) { + ECs.unionSets(Leader, O); + Worklist.push_back(O); + } + } + + // Now we've discovered all values, walk them to see if there are + // any users we didn't see. If there are, we can't optimize that + // chain. + for (auto &I : DBits) + for (auto *U : I.first->users()) + if (U->getType()->isIntegerTy() && DBits.count(U) == 0) + DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL; + + for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) { + uint64_t LeaderDemandedBits = 0; + for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI) + LeaderDemandedBits |= DBits[*MI]; + + uint64_t MinBW = (sizeof(LeaderDemandedBits) * 8) - + llvm::countLeadingZeros(LeaderDemandedBits); + // Round up to a power of 2 + if (!isPowerOf2_64((uint64_t)MinBW)) + MinBW = NextPowerOf2(MinBW); + for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI) { + if (!isa<Instruction>(*MI)) + continue; + Type *Ty = (*MI)->getType(); + if (Roots.count(*MI)) + Ty = cast<Instruction>(*MI)->getOperand(0)->getType(); + if (MinBW < Ty->getScalarSizeInBits()) + MinBWs[cast<Instruction>(*MI)] = MinBW; + } + } + + return MinBWs; +} |