diff options
Diffstat (limited to 'contrib/llvm/lib/Analysis')
52 files changed, 6442 insertions, 2658 deletions
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp index 6eea817..f54e234 100644 --- a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp @@ -27,6 +27,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" @@ -37,7 +38,6 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" -#include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; // Register the AliasAnalysis interface, providing a nice name to refer to. @@ -82,6 +82,23 @@ void AliasAnalysis::addEscapingUse(Use &U) { AA->addEscapingUse(U); } +AliasAnalysis::ModRefResult +AliasAnalysis::getModRefInfo(Instruction *I, ImmutableCallSite Call) { + // We may have two calls + if (auto CS = ImmutableCallSite(I)) { + // Check if the two calls modify the same memory + return getModRefInfo(Call, CS); + } else { + // Otherwise, check if the call modifies or references the + // location this memory access defines. The best we can say + // is that if the call references what this instruction + // defines, it must be clobbered by this location. + const AliasAnalysis::Location DefLoc = AA->getLocation(I); + if (getModRefInfo(Call, DefLoc) != AliasAnalysis::NoModRef) + return AliasAnalysis::ModRef; + } + return AliasAnalysis::NoModRef; +} AliasAnalysis::ModRefResult AliasAnalysis::getModRefInfo(ImmutableCallSite CS, @@ -330,7 +347,7 @@ AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) { // If the load address doesn't alias the given address, it doesn't read // or write the specified memory. - if (!alias(getLocation(L), Loc)) + if (Loc.Ptr && !alias(getLocation(L), Loc)) return NoModRef; // Otherwise, a load just reads. @@ -343,15 +360,18 @@ AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) { if (!S->isUnordered()) return ModRef; - // If the store address cannot alias the pointer in question, then the - // specified memory cannot be modified by the store. - if (!alias(getLocation(S), Loc)) - return NoModRef; + if (Loc.Ptr) { + // If the store address cannot alias the pointer in question, then the + // specified memory cannot be modified by the store. + if (!alias(getLocation(S), Loc)) + return NoModRef; - // If the pointer is a pointer to constant memory, then it could not have been - // modified by this store. - if (pointsToConstantMemory(Loc)) - return NoModRef; + // If the pointer is a pointer to constant memory, then it could not have + // been modified by this store. + if (pointsToConstantMemory(Loc)) + return NoModRef; + + } // Otherwise, a store just writes. return Mod; @@ -359,15 +379,18 @@ AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) { AliasAnalysis::ModRefResult AliasAnalysis::getModRefInfo(const VAArgInst *V, const Location &Loc) { - // If the va_arg address cannot alias the pointer in question, then the - // specified memory cannot be accessed by the va_arg. - if (!alias(getLocation(V), Loc)) - return NoModRef; - // If the pointer is a pointer to constant memory, then it could not have been - // modified by this va_arg. - if (pointsToConstantMemory(Loc)) - return NoModRef; + if (Loc.Ptr) { + // If the va_arg address cannot alias the pointer in question, then the + // specified memory cannot be accessed by the va_arg. + if (!alias(getLocation(V), Loc)) + return NoModRef; + + // If the pointer is a pointer to constant memory, then it could not have + // been modified by this va_arg. + if (pointsToConstantMemory(Loc)) + return NoModRef; + } // Otherwise, a va_arg reads and writes. return ModRef; @@ -380,7 +403,7 @@ AliasAnalysis::getModRefInfo(const AtomicCmpXchgInst *CX, const Location &Loc) { return ModRef; // If the cmpxchg address does not alias the location, it does not access it. - if (!alias(getLocation(CX), Loc)) + if (Loc.Ptr && !alias(getLocation(CX), Loc)) return NoModRef; return ModRef; @@ -393,7 +416,7 @@ AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc) { return ModRef; // If the atomicrmw address does not alias the location, it does not access it. - if (!alias(getLocation(RMW), Loc)) + if (Loc.Ptr && !alias(getLocation(RMW), Loc)) return NoModRef; return ModRef; @@ -407,9 +430,10 @@ AliasAnalysis::ModRefResult AliasAnalysis::callCapturesBefore(const Instruction *I, const AliasAnalysis::Location &MemLoc, DominatorTree *DT) { - if (!DT || !DL) return AliasAnalysis::ModRef; + if (!DT) + return AliasAnalysis::ModRef; - const Value *Object = GetUnderlyingObject(MemLoc.Ptr, DL); + const Value *Object = GetUnderlyingObject(MemLoc.Ptr, *DL); if (!isIdentifiedObject(Object) || isa<GlobalValue>(Object) || isa<Constant>(Object)) return AliasAnalysis::ModRef; @@ -462,10 +486,10 @@ AliasAnalysis::~AliasAnalysis() {} /// InitializeAliasAnalysis - Subclasses must call this method to initialize the /// AliasAnalysis interface before any other methods are called. /// -void AliasAnalysis::InitializeAliasAnalysis(Pass *P) { - DataLayoutPass *DLP = P->getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; - TLI = P->getAnalysisIfAvailable<TargetLibraryInfo>(); +void AliasAnalysis::InitializeAliasAnalysis(Pass *P, const DataLayout *NewDL) { + DL = NewDL; + auto *TLIP = P->getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); + TLI = TLIP ? &TLIP->getTLI() : nullptr; AA = &P->getAnalysis<AliasAnalysis>(); } @@ -494,7 +518,7 @@ bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB, /// execution of the specified instructions to mod\ref (according to the /// mode) the location Loc. The instructions to consider are all /// of the instructions in the range of [I1,I2] INCLUSIVE. -/// I1 and I2 must be in the same basic block. +/// I1 and I2 must be in the same basic block. bool AliasAnalysis::canInstructionRangeModRef(const Instruction &I1, const Instruction &I2, const Location &Loc, diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp index b860914..a1bfba1 100644 --- a/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp +++ b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp @@ -14,6 +14,7 @@ #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -43,7 +44,7 @@ namespace { errs() << " " << Val << " " << Desc << " responses (" << Val*100/Sum << "%)\n"; } - ~AliasAnalysisCounter() { + ~AliasAnalysisCounter() override { unsigned AASum = No+May+Partial+Must; unsigned MRSum = NoMR+JustRef+JustMod+MR; if (AASum + MRSum) { // Print a report if any counted queries occurred... @@ -76,7 +77,7 @@ namespace { bool runOnModule(Module &M) override { this->M = &M; - InitializeAliasAnalysis(this); + InitializeAliasAnalysis(this, &M.getDataLayout()); return false; } diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp index fe4bd4c..273eacc 100644 --- a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -158,7 +158,7 @@ bool AAEval::runOnFunction(Function &F) { if (EvalAAMD && isa<StoreInst>(&*I)) Stores.insert(&*I); Instruction &Inst = *I; - if (CallSite CS = cast<Value>(&Inst)) { + if (auto CS = CallSite(&Inst)) { Value *Callee = CS.getCalledValue(); // Skip actual functions for direct function calls. if (!isa<Function>(Callee) && isInterestingPointer(Callee)) diff --git a/contrib/llvm/lib/Analysis/AliasDebugger.cpp b/contrib/llvm/lib/Analysis/AliasDebugger.cpp index 5d61cf9..f98b578 100644 --- a/contrib/llvm/lib/Analysis/AliasDebugger.cpp +++ b/contrib/llvm/lib/Analysis/AliasDebugger.cpp @@ -44,7 +44,7 @@ namespace { } bool runOnModule(Module &M) override { - InitializeAliasAnalysis(this); // set up super class + InitializeAliasAnalysis(this, &M.getDataLayout()); // set up super class for(Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp index 45442b0..50890c1 100644 --- a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp +++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp @@ -182,12 +182,13 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size, return false; } -bool AliasSet::aliasesUnknownInst(Instruction *Inst, AliasAnalysis &AA) const { +bool AliasSet::aliasesUnknownInst(const Instruction *Inst, + AliasAnalysis &AA) const { if (!Inst->mayReadOrWriteMemory()) return false; for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) { - CallSite C1 = getUnknownInst(i), C2 = Inst; + ImmutableCallSite C1(getUnknownInst(i)), C2(Inst); if (!C1 || !C2 || AA.getModRefInfo(C1, C2) != AliasAnalysis::NoModRef || AA.getModRefInfo(C2, C1) != AliasAnalysis::NoModRef) @@ -242,7 +243,7 @@ AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr, /// containsPointer - Return true if the specified location is represented by /// this alias set, false otherwise. This does not modify the AST object or /// alias sets. -bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size, +bool AliasSetTracker::containsPointer(const Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) const { for (const_iterator I = begin(), E = end(); I != E; ++I) if (!I->Forward && I->aliasesPointer(Ptr, Size, AAInfo, AA)) @@ -250,7 +251,7 @@ bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size, return false; } -bool AliasSetTracker::containsUnknown(Instruction *Inst) const { +bool AliasSetTracker::containsUnknown(const Instruction *Inst) const { for (const_iterator I = begin(), E = end(); I != E; ++I) if (!I->Forward && I->aliasesUnknownInst(Inst, AA)) return true; diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp index f64bf0e..842ff0a 100644 --- a/contrib/llvm/lib/Analysis/Analysis.cpp +++ b/contrib/llvm/lib/Analysis/Analysis.cpp @@ -37,6 +37,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeCFLAliasAnalysisPass(Registry); initializeDependenceAnalysisPass(Registry); initializeDelinearizationPass(Registry); + initializeDivergenceAnalysisPass(Registry); initializeDominanceFrontierPass(Registry); initializeDomViewerPass(Registry); initializeDomPrinterPass(Registry); @@ -49,12 +50,12 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeIVUsersPass(Registry); initializeInstCountPass(Registry); initializeIntervalPartitionPass(Registry); - initializeJumpInstrTableInfoPass(Registry); initializeLazyValueInfoPass(Registry); initializeLibCallAliasAnalysisPass(Registry); initializeLintPass(Registry); - initializeLoopInfoPass(Registry); + initializeLoopInfoWrapperPassPass(Registry); initializeMemDepPrinterPass(Registry); + initializeMemDerefPrinterPass(Registry); initializeMemoryDependenceAnalysisPass(Registry); initializeModuleDebugInfoPrinterPass(Registry); initializePostDominatorTreePass(Registry); @@ -65,7 +66,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeRegionOnlyPrinterPass(Registry); initializeScalarEvolutionPass(Registry); initializeScalarEvolutionAliasAnalysisPass(Registry); - initializeTargetTransformInfoAnalysisGroup(Registry); + initializeTargetTransformInfoWrapperPassPass(Registry); initializeTypeBasedAliasAnalysisPass(Registry); initializeScopedNoAliasAAPass(Registry); } diff --git a/contrib/llvm/lib/Analysis/AssumptionCache.cpp b/contrib/llvm/lib/Analysis/AssumptionCache.cpp index da5ba18..f468a43 100644 --- a/contrib/llvm/lib/Analysis/AssumptionCache.cpp +++ b/contrib/llvm/lib/Analysis/AssumptionCache.cpp @@ -18,6 +18,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" using namespace llvm; @@ -73,6 +74,20 @@ void AssumptionCache::registerAssumption(CallInst *CI) { #endif } +char AssumptionAnalysis::PassID; + +PreservedAnalyses AssumptionPrinterPass::run(Function &F, + AnalysisManager<Function> *AM) { + AssumptionCache &AC = AM->getResult<AssumptionAnalysis>(F); + + OS << "Cached assumptions for function: " << F.getName() << "\n"; + for (auto &VH : AC.assumptions()) + if (VH) + OS << " " << *cast<CallInst>(VH)->getArgOperand(0) << "\n"; + + return PreservedAnalyses::all(); +} + void AssumptionCacheTracker::FunctionCallbackVH::deleted() { auto I = ACT->AssumptionCaches.find_as(cast<Function>(getValPtr())); if (I != ACT->AssumptionCaches.end()) diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp index a9efc5a..a61faca 100644 --- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -38,7 +39,6 @@ #include "llvm/IR/Operator.h" #include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetLibraryInfo.h" #include <algorithm> using namespace llvm; @@ -103,7 +103,7 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &DL, const TargetLibraryInfo &TLI, bool RoundToAlign = false) { uint64_t Size; - if (getObjectSize(V, Size, &DL, &TLI, RoundToAlign)) + if (getObjectSize(V, Size, DL, &TLI, RoundToAlign)) return Size; return AliasAnalysis::UnknownSize; } @@ -221,7 +221,7 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, case Instruction::Or: // X|C == X+C if all the bits in C are unset in X. Otherwise we can't // analyze it. - if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &DL, 0, AC, + if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), DL, 0, AC, BOp, DT)) break; // FALL THROUGH. @@ -292,7 +292,7 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, static const Value * DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, SmallVectorImpl<VariableGEPIndex> &VarIndices, - bool &MaxLookupReached, const DataLayout *DL, + bool &MaxLookupReached, const DataLayout &DL, AssumptionCache *AC, DominatorTree *DT) { // Limit recursion depth to limit compile time in crazy cases. unsigned MaxLookup = MaxLookupSearchDepth; @@ -341,16 +341,6 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, if (!GEPOp->getOperand(0)->getType()->getPointerElementType()->isSized()) return V; - // If we are lacking DataLayout information, we can't compute the offets of - // elements computed by GEPs. However, we can handle bitcast equivalent - // GEPs. - if (!DL) { - if (!GEPOp->hasAllZeroIndices()) - return V; - V = GEPOp->getOperand(0); - continue; - } - unsigned AS = GEPOp->getPointerAddressSpace(); // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices. gep_type_iterator GTI = gep_type_begin(GEPOp); @@ -363,30 +353,30 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue(); if (FieldNo == 0) continue; - BaseOffs += DL->getStructLayout(STy)->getElementOffset(FieldNo); + BaseOffs += DL.getStructLayout(STy)->getElementOffset(FieldNo); continue; } // For an array/pointer, add the element offset, explicitly scaled. if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) { if (CIdx->isZero()) continue; - BaseOffs += DL->getTypeAllocSize(*GTI)*CIdx->getSExtValue(); + BaseOffs += DL.getTypeAllocSize(*GTI) * CIdx->getSExtValue(); continue; } - uint64_t Scale = DL->getTypeAllocSize(*GTI); + uint64_t Scale = DL.getTypeAllocSize(*GTI); ExtensionKind Extension = EK_NotExtended; // If the integer type is smaller than the pointer size, it is implicitly // sign extended to pointer size. unsigned Width = Index->getType()->getIntegerBitWidth(); - if (DL->getPointerSizeInBits(AS) > Width) + if (DL.getPointerSizeInBits(AS) > Width) Extension = EK_SignExt; // Use GetLinearExpression to decompose the index into a C1*V+C2 form. APInt IndexScale(Width, 0), IndexOffset(Width, 0); - Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension, - *DL, 0, AC, DT); + Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension, DL, + 0, AC, DT); // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale. // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale. @@ -408,7 +398,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, // Make sure that we have a scale that makes sense for this target's // pointer size. - if (unsigned ShiftBits = 64 - DL->getPointerSizeInBits(AS)) { + if (unsigned ShiftBits = 64 - DL.getPointerSizeInBits(AS)) { Scale <<= ShiftBits; Scale = (int64_t)Scale >> ShiftBits; } @@ -461,14 +451,12 @@ namespace { initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry()); } - void initializePass() override { - InitializeAliasAnalysis(this); - } + bool doInitialization(Module &M) override; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<AliasAnalysis>(); AU.addRequired<AssumptionCacheTracker>(); - AU.addRequired<TargetLibraryInfo>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); } AliasResult alias(const Location &LocA, const Location &LocB) override { @@ -591,7 +579,7 @@ INITIALIZE_AG_PASS_BEGIN(BasicAliasAnalysis, AliasAnalysis, "basicaa", "Basic Alias Analysis (stateless AA impl)", false, true, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_AG_PASS_END(BasicAliasAnalysis, AliasAnalysis, "basicaa", "Basic Alias Analysis (stateless AA impl)", false, true, false) @@ -612,7 +600,7 @@ BasicAliasAnalysis::pointsToConstantMemory(const Location &Loc, bool OrLocal) { SmallVector<const Value *, 16> Worklist; Worklist.push_back(Loc.Ptr); do { - const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), DL); + const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), *DL); if (!Visited.insert(V).second) { Visited.clear(); return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); @@ -649,8 +637,8 @@ BasicAliasAnalysis::pointsToConstantMemory(const Location &Loc, bool OrLocal) { Visited.clear(); return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); } - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - Worklist.push_back(PN->getIncomingValue(i)); + for (Value *IncValue : PN->incoming_values()) + Worklist.push_back(IncValue); continue; } @@ -706,7 +694,7 @@ BasicAliasAnalysis::getModRefBehavior(const Function *F) { return DoesNotAccessMemory; // For intrinsics, we can check the table. - if (unsigned iid = F->getIntrinsicID()) { + if (Intrinsic::ID iid = F->getIntrinsicID()) { #define GET_INTRINSIC_MODREF_BEHAVIOR #include "llvm/IR/Intrinsics.gen" #undef GET_INTRINSIC_MODREF_BEHAVIOR @@ -718,7 +706,8 @@ BasicAliasAnalysis::getModRefBehavior(const Function *F) { if (F->onlyReadsMemory()) Min = OnlyReadsMemory; - const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfo>(); + const TargetLibraryInfo &TLI = + getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); if (isMemsetPattern16(F, TLI)) Min = OnlyAccessesArgumentPointees; @@ -730,7 +719,8 @@ AliasAnalysis::Location BasicAliasAnalysis::getArgLocation(ImmutableCallSite CS, unsigned ArgIdx, ModRefResult &Mask) { Location Loc = AliasAnalysis::getArgLocation(CS, ArgIdx, Mask); - const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfo>(); + const TargetLibraryInfo &TLI = + getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction()); if (II != nullptr) switch (II->getIntrinsicID()) { @@ -813,6 +803,11 @@ static bool isAssumeIntrinsic(ImmutableCallSite CS) { return false; } +bool BasicAliasAnalysis::doInitialization(Module &M) { + InitializeAliasAnalysis(this, &M.getDataLayout()); + return true; +} + /// getModRefInfo - Check to see if the specified callsite can clobber the /// specified memory object. Since we only look at local properties of this /// function, we really can't say much about this query. We do, however, use @@ -823,7 +818,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) && "AliasAnalysis query involving multiple functions!"); - const Value *Object = GetUnderlyingObject(Loc.Ptr, DL); + const Value *Object = GetUnderlyingObject(Loc.Ptr, *DL); // If this is a tail call and Loc.Ptr points to a stack location, we know that // the tail call cannot access or modify the local stack. @@ -888,6 +883,99 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, return AliasAnalysis::getModRefInfo(CS1, CS2); } +/// \brief Provide ad-hoc rules to disambiguate accesses through two GEP +/// operators, both having the exact same pointer operand. +static AliasAnalysis::AliasResult +aliasSameBasePointerGEPs(const GEPOperator *GEP1, uint64_t V1Size, + const GEPOperator *GEP2, uint64_t V2Size, + const DataLayout &DL) { + + assert(GEP1->getPointerOperand() == GEP2->getPointerOperand() && + "Expected GEPs with the same pointer operand"); + + // Try to determine whether GEP1 and GEP2 index through arrays, into structs, + // such that the struct field accesses provably cannot alias. + // We also need at least two indices (the pointer, and the struct field). + if (GEP1->getNumIndices() != GEP2->getNumIndices() || + GEP1->getNumIndices() < 2) + return AliasAnalysis::MayAlias; + + // If we don't know the size of the accesses through both GEPs, we can't + // determine whether the struct fields accessed can't alias. + if (V1Size == AliasAnalysis::UnknownSize || + V2Size == AliasAnalysis::UnknownSize) + return AliasAnalysis::MayAlias; + + ConstantInt *C1 = + dyn_cast<ConstantInt>(GEP1->getOperand(GEP1->getNumOperands() - 1)); + ConstantInt *C2 = + dyn_cast<ConstantInt>(GEP2->getOperand(GEP2->getNumOperands() - 1)); + + // If the last (struct) indices aren't constants, we can't say anything. + // If they're identical, the other indices might be also be dynamically + // equal, so the GEPs can alias. + if (!C1 || !C2 || C1 == C2) + return AliasAnalysis::MayAlias; + + // Find the last-indexed type of the GEP, i.e., the type you'd get if + // you stripped the last index. + // On the way, look at each indexed type. If there's something other + // than an array, different indices can lead to different final types. + SmallVector<Value *, 8> IntermediateIndices; + + // Insert the first index; we don't need to check the type indexed + // through it as it only drops the pointer indirection. + assert(GEP1->getNumIndices() > 1 && "Not enough GEP indices to examine"); + IntermediateIndices.push_back(GEP1->getOperand(1)); + + // Insert all the remaining indices but the last one. + // Also, check that they all index through arrays. + for (unsigned i = 1, e = GEP1->getNumIndices() - 1; i != e; ++i) { + if (!isa<ArrayType>(GetElementPtrInst::getIndexedType( + GEP1->getSourceElementType(), IntermediateIndices))) + return AliasAnalysis::MayAlias; + IntermediateIndices.push_back(GEP1->getOperand(i + 1)); + } + + StructType *LastIndexedStruct = + dyn_cast<StructType>(GetElementPtrInst::getIndexedType( + GEP1->getSourceElementType(), IntermediateIndices)); + + if (!LastIndexedStruct) + return AliasAnalysis::MayAlias; + + // We know that: + // - both GEPs begin indexing from the exact same pointer; + // - the last indices in both GEPs are constants, indexing into a struct; + // - said indices are different, hence, the pointed-to fields are different; + // - both GEPs only index through arrays prior to that. + // + // This lets us determine that the struct that GEP1 indexes into and the + // struct that GEP2 indexes into must either precisely overlap or be + // completely disjoint. Because they cannot partially overlap, indexing into + // different non-overlapping fields of the struct will never alias. + + // Therefore, the only remaining thing needed to show that both GEPs can't + // alias is that the fields are not overlapping. + const StructLayout *SL = DL.getStructLayout(LastIndexedStruct); + const uint64_t StructSize = SL->getSizeInBytes(); + const uint64_t V1Off = SL->getElementOffset(C1->getZExtValue()); + const uint64_t V2Off = SL->getElementOffset(C2->getZExtValue()); + + auto EltsDontOverlap = [StructSize](uint64_t V1Off, uint64_t V1Size, + uint64_t V2Off, uint64_t V2Size) { + return V1Off < V2Off && V1Off + V1Size <= V2Off && + ((V2Off + V2Size <= StructSize) || + (V2Off + V2Size - StructSize <= V1Off)); + }; + + if (EltsDontOverlap(V1Off, V1Size, V2Off, V2Size) || + EltsDontOverlap(V2Off, V2Size, V1Off, V1Size)) + return AliasAnalysis::NoAlias; + + return AliasAnalysis::MayAlias; +} + /// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction /// against another pointer. We know that V1 is a GEP, but we don't know /// anything about V2. UnderlyingV1 is GetUnderlyingObject(GEP1, DL), @@ -947,10 +1035,10 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, SmallVector<VariableGEPIndex, 4> GEP2VariableIndices; const Value *GEP2BasePtr = DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, - GEP2MaxLookupReached, DL, AC2, DT); + GEP2MaxLookupReached, *DL, AC2, DT); const Value *GEP1BasePtr = DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, - GEP1MaxLookupReached, DL, AC1, DT); + GEP1MaxLookupReached, *DL, AC1, DT); // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { @@ -979,14 +1067,14 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // about the relation of the resulting pointer. const Value *GEP1BasePtr = DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, - GEP1MaxLookupReached, DL, AC1, DT); + GEP1MaxLookupReached, *DL, AC1, DT); int64_t GEP2BaseOffset; bool GEP2MaxLookupReached; SmallVector<VariableGEPIndex, 4> GEP2VariableIndices; const Value *GEP2BasePtr = DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, - GEP2MaxLookupReached, DL, AC2, DT); + GEP2MaxLookupReached, *DL, AC2, DT); // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. @@ -995,6 +1083,17 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, "DecomposeGEPExpression and GetUnderlyingObject disagree!"); return MayAlias; } + + // If we know the two GEPs are based off of the exact same pointer (and not + // just the same underlying object), see if that tells us anything about + // the resulting pointers. + if (DL && GEP1->getPointerOperand() == GEP2->getPointerOperand()) { + AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, *DL); + // If we couldn't find anything interesting, don't abandon just yet. + if (R != MayAlias) + return R; + } + // If the max search depth is reached the result is undefined if (GEP2MaxLookupReached || GEP1MaxLookupReached) return MayAlias; @@ -1025,7 +1124,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, const Value *GEP1BasePtr = DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, - GEP1MaxLookupReached, DL, AC1, DT); + GEP1MaxLookupReached, *DL, AC1, DT); // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. @@ -1094,7 +1193,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, const Value *V = GEP1VariableIndices[i].V; bool SignKnownZero, SignKnownOne; - ComputeSignBit(const_cast<Value *>(V), SignKnownZero, SignKnownOne, DL, + ComputeSignBit(const_cast<Value *>(V), SignKnownZero, SignKnownOne, *DL, 0, AC1, nullptr, DT); // Zero-extension widens the variable, and so forces the sign @@ -1239,8 +1338,7 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, SmallPtrSet<Value*, 4> UniqueSrc; SmallVector<Value*, 4> V1Srcs; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - Value *PV1 = PN->getIncomingValue(i); + for (Value *PV1 : PN->incoming_values()) { if (isa<PHINode>(PV1)) // If any of the source itself is a PHI, return MayAlias conservatively // to avoid compile time explosion. The worst possible case is if both @@ -1290,6 +1388,11 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, V1 = V1->stripPointerCasts(); V2 = V2->stripPointerCasts(); + // If V1 or V2 is undef, the result is NoAlias because we can always pick a + // value for undef that aliases nothing in the program. + if (isa<UndefValue>(V1) || isa<UndefValue>(V2)) + return NoAlias; + // Are we checking for alias of the same value? // Because we look 'through' phi nodes we could look at "Value" pointers from // different iterations. We must therefore make sure that this is not the @@ -1303,8 +1406,8 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, return NoAlias; // Scalars cannot alias each other // Figure out what objects these things are pointing to if we can. - const Value *O1 = GetUnderlyingObject(V1, DL, MaxLookupSearchDepth); - const Value *O2 = GetUnderlyingObject(V2, DL, MaxLookupSearchDepth); + const Value *O1 = GetUnderlyingObject(V1, *DL, MaxLookupSearchDepth); + const Value *O2 = GetUnderlyingObject(V2, *DL, MaxLookupSearchDepth); // Null values in the default address space don't point to any object, so they // don't alias any other pointer. @@ -1427,6 +1530,9 @@ bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V, if (!Inst) return true; + if (VisitedPhiBBs.empty()) + return true; + if (VisitedPhiBBs.size() > MaxNumPhiBBsValueReachabilityCheck) return false; @@ -1434,7 +1540,8 @@ bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V, DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; - LoopInfo *LI = getAnalysisIfAvailable<LoopInfo>(); + auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); + LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; // Make sure that the visited phis cannot reach the Value. This ensures that // the Values cannot come from different iterations of a potential cycle the diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp index 8ed8e3e..3d819eb 100644 --- a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp +++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp @@ -85,7 +85,7 @@ struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits { std::string Result; raw_string_ostream OS(Result); - OS << Node->getName().str() << ":"; + OS << Node->getName() << ":"; switch (ViewBlockFreqPropagationDAG) { case GVDT_Fraction: Graph->printBlockFreq(OS, Node); @@ -108,7 +108,7 @@ struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits { INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo) -INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis", true, true) @@ -123,13 +123,13 @@ BlockFrequencyInfo::~BlockFrequencyInfo() {} void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<BranchProbabilityInfo>(); - AU.addRequired<LoopInfo>(); + AU.addRequired<LoopInfoWrapperPass>(); AU.setPreservesAll(); } bool BlockFrequencyInfo::runOnFunction(Function &F) { BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>(); - LoopInfo &LI = getAnalysis<LoopInfo>(); + LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); if (!BFI) BFI.reset(new ImplType); BFI->doFunction(&F, &BPI, &LI); diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp index 278073c..456cee1 100644 --- a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp +++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -331,32 +331,35 @@ bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist( return true; } -/// \brief Get the maximum allowed loop scale. -/// -/// Gives the maximum number of estimated iterations allowed for a loop. Very -/// large numbers cause problems downstream (even within 64-bits). -static Scaled64 getMaxLoopScale() { return Scaled64(1, 12); } - /// \brief Compute the loop scale for a loop. void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) { // Compute loop scale. DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n"); + // Infinite loops need special handling. If we give the back edge an infinite + // mass, they may saturate all the other scales in the function down to 1, + // making all the other region temperatures look exactly the same. Choose an + // arbitrary scale to avoid these issues. + // + // FIXME: An alternate way would be to select a symbolic scale which is later + // replaced to be the maximum of all computed scales plus 1. This would + // appropriately describe the loop as having a large scale, without skewing + // the final frequency computation. + const Scaled64 InifiniteLoopScale(1, 12); + // LoopScale == 1 / ExitMass // ExitMass == HeadMass - BackedgeMass BlockMass ExitMass = BlockMass::getFull() - Loop.BackedgeMass; - // Block scale stores the inverse of the scale. - Loop.Scale = ExitMass.toScaled().inverse(); + // Block scale stores the inverse of the scale. If this is an infinite loop, + // its exit mass will be zero. In this case, use an arbitrary scale for the + // loop scale. + Loop.Scale = + ExitMass.isEmpty() ? InifiniteLoopScale : ExitMass.toScaled().inverse(); DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull() << " - " << Loop.BackedgeMass << ")\n" << " - scale = " << Loop.Scale << "\n"); - - if (Loop.Scale > getMaxLoopScale()) { - Loop.Scale = getMaxLoopScale(); - DEBUG(dbgs() << " - reduced-to-max-scale: " << getMaxLoopScale() << "\n"); - } } /// \brief Package up a loop. @@ -424,15 +427,24 @@ static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI, const Scaled64 &Min, const Scaled64 &Max) { // Scale the Factor to a size that creates integers. Ideally, integers would // be scaled so that Max == UINT64_MAX so that they can be best - // differentiated. However, the register allocator currently deals poorly - // with large numbers. Instead, push Min up a little from 1 to give some - // room to differentiate small, unequal numbers. - // - // TODO: fix issues downstream so that ScalingFactor can be - // Scaled64(1,64)/Max. - Scaled64 ScalingFactor = Min.inverse(); - if ((Max / Min).lg() < 60) + // differentiated. However, in the presence of large frequency values, small + // frequencies are scaled down to 1, making it impossible to differentiate + // small, unequal numbers. When the spread between Min and Max frequencies + // fits well within MaxBits, we make the scale be at least 8. + const unsigned MaxBits = 64; + const unsigned SpreadBits = (Max / Min).lg(); + Scaled64 ScalingFactor; + if (SpreadBits <= MaxBits - 3) { + // If the values are small enough, make the scaling factor at least 8 to + // allow distinguishing small values. + ScalingFactor = Min.inverse(); ScalingFactor <<= 3; + } else { + // If the values need more than MaxBits to be represented, saturate small + // frequency values down to 1 by using a scaling factor that benefits large + // frequency values. + ScalingFactor = Scaled64(1, MaxBits) / Max; + } // Translate the floats to integers. DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp index 2b39d47..091943b 100644 --- a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -21,6 +21,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -28,7 +29,7 @@ using namespace llvm; INITIALIZE_PASS_BEGIN(BranchProbabilityInfo, "branch-prob", "Branch Probability Analysis", false, true) -INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(BranchProbabilityInfo, "branch-prob", "Branch Probability Analysis", false, true) @@ -114,11 +115,6 @@ static const uint32_t NORMAL_WEIGHT = 16; // Minimum weight of an edge. Please note, that weight is NEVER 0. static const uint32_t MIN_WEIGHT = 1; -static uint32_t getMaxWeightFor(BasicBlock *BB) { - return UINT32_MAX / BB->getTerminator()->getNumSuccessors(); -} - - /// \brief Calculate edge weights for successors lead to unreachable. /// /// Predict that a successor which leads necessarily to an @@ -184,15 +180,18 @@ bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) { if (!WeightsNode) return false; + // Check that the number of successors is manageable. + assert(TI->getNumSuccessors() < UINT32_MAX && "Too many successors"); + // Ensure there are weights for all of the successors. Note that the first // operand to the metadata node is a name, not a weight. if (WeightsNode->getNumOperands() != TI->getNumSuccessors() + 1) return false; - // Build up the final weights that will be used in a temporary buffer, but - // don't add them until all weihts are present. Each weight value is clamped - // to [1, getMaxWeightFor(BB)]. - uint32_t WeightLimit = getMaxWeightFor(BB); + // Build up the final weights that will be used in a temporary buffer. + // Compute the sum of all weights to later decide whether they need to + // be scaled to fit in 32 bits. + uint64_t WeightSum = 0; SmallVector<uint32_t, 2> Weights; Weights.reserve(TI->getNumSuccessors()); for (unsigned i = 1, e = WeightsNode->getNumOperands(); i != e; ++i) { @@ -200,12 +199,26 @@ bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) { mdconst::dyn_extract<ConstantInt>(WeightsNode->getOperand(i)); if (!Weight) return false; - Weights.push_back( - std::max<uint32_t>(1, Weight->getLimitedValue(WeightLimit))); + assert(Weight->getValue().getActiveBits() <= 32 && + "Too many bits for uint32_t"); + Weights.push_back(Weight->getZExtValue()); + WeightSum += Weights.back(); } assert(Weights.size() == TI->getNumSuccessors() && "Checked above"); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - setEdgeWeight(BB, i, Weights[i]); + + // If the sum of weights does not fit in 32 bits, scale every weight down + // accordingly. + uint64_t ScalingFactor = + (WeightSum > UINT32_MAX) ? WeightSum / UINT32_MAX + 1 : 1; + + WeightSum = 0; + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { + uint32_t W = Weights[i] / ScalingFactor; + WeightSum += W; + setEdgeWeight(BB, i, W); + } + assert(WeightSum <= UINT32_MAX && + "Expected weights to scale down to 32 bits"); return true; } @@ -378,6 +391,14 @@ bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) { if (!CV) return false; + // If the LHS is the result of AND'ing a value with a single bit bitmask, + // we don't have information about probabilities. + if (Instruction *LHS = dyn_cast<Instruction>(CI->getOperand(0))) + if (LHS->getOpcode() == Instruction::And) + if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) + if (AndRHS->getUniqueInteger().isPowerOf2()) + return false; + bool isProb; if (CV->isZero()) { switch (CI->getPredicate()) { @@ -484,7 +505,7 @@ bool BranchProbabilityInfo::calcInvokeHeuristics(BasicBlock *BB) { } void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<LoopInfo>(); + AU.addRequired<LoopInfoWrapperPass>(); AU.setPreservesAll(); } @@ -492,31 +513,29 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) { DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName() << " ----\n\n"); LastF = &F; // Store the last function we ran on for printing. - LI = &getAnalysis<LoopInfo>(); + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); assert(PostDominatedByUnreachable.empty()); assert(PostDominatedByColdCall.empty()); // Walk the basic blocks in post-order so that we can build up state about // the successors of a block iteratively. - for (po_iterator<BasicBlock *> I = po_begin(&F.getEntryBlock()), - E = po_end(&F.getEntryBlock()); - I != E; ++I) { - DEBUG(dbgs() << "Computing probabilities for " << I->getName() << "\n"); - if (calcUnreachableHeuristics(*I)) + for (auto BB : post_order(&F.getEntryBlock())) { + DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n"); + if (calcUnreachableHeuristics(BB)) continue; - if (calcMetadataWeights(*I)) + if (calcMetadataWeights(BB)) continue; - if (calcColdCallHeuristics(*I)) + if (calcColdCallHeuristics(BB)) continue; - if (calcLoopBranchHeuristics(*I)) + if (calcLoopBranchHeuristics(BB)) continue; - if (calcPointerHeuristics(*I)) + if (calcPointerHeuristics(BB)) continue; - if (calcZeroHeuristics(*I)) + if (calcZeroHeuristics(BB)) continue; - if (calcFloatingPointHeuristics(*I)) + if (calcFloatingPointHeuristics(BB)) continue; - calcInvokeHeuristics(*I); + calcInvokeHeuristics(BB); } PostDominatedByUnreachable.clear(); @@ -546,7 +565,7 @@ uint32_t BranchProbabilityInfo::getSumForBlock(const BasicBlock *BB) const { uint32_t PrevSum = Sum; Sum += Weight; - assert(Sum > PrevSum); (void) PrevSum; + assert(Sum >= PrevSum); (void) PrevSum; } return Sum; @@ -609,14 +628,17 @@ uint32_t BranchProbabilityInfo::getEdgeWeight(const BasicBlock *Src, uint32_t BranchProbabilityInfo:: getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const { uint32_t Weight = 0; + bool FoundWeight = false; DenseMap<Edge, uint32_t>::const_iterator MapI; for (succ_const_iterator I = succ_begin(Src), E = succ_end(Src); I != E; ++I) if (*I == Dst) { MapI = Weights.find(std::make_pair(Src, I.getSuccessorIndex())); - if (MapI != Weights.end()) + if (MapI != Weights.end()) { + FoundWeight = true; Weight += MapI->second; + } } - return (Weight == 0) ? DEFAULT_WEIGHT : Weight; + return (!FoundWeight) ? DEFAULT_WEIGHT : Weight; } /// Set the edge weight for a given edge specified by PredBlock and an index diff --git a/contrib/llvm/lib/Analysis/CFGPrinter.cpp b/contrib/llvm/lib/Analysis/CFGPrinter.cpp index 89787f8..c86f1f5 100644 --- a/contrib/llvm/lib/Analysis/CFGPrinter.cpp +++ b/contrib/llvm/lib/Analysis/CFGPrinter.cpp @@ -77,7 +77,7 @@ namespace { } bool runOnFunction(Function &F) override { - std::string Filename = "cfg." + F.getName().str() + ".dot"; + std::string Filename = ("cfg." + F.getName() + ".dot").str(); errs() << "Writing '" << Filename << "'..."; std::error_code EC; @@ -111,7 +111,7 @@ namespace { } bool runOnFunction(Function &F) override { - std::string Filename = "cfg." + F.getName().str() + ".dot"; + std::string Filename = ("cfg." + F.getName() + ".dot").str(); errs() << "Writing '" << Filename << "'..."; std::error_code EC; diff --git a/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp index 88bb84a..84b31df 100644 --- a/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp @@ -43,14 +43,19 @@ #include "llvm/Pass.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cassert> #include <forward_list> +#include <memory> #include <tuple> using namespace llvm; +#define DEBUG_TYPE "cfl-aa" + // Try to go from a Value* to a Function*. Never returns nullptr. static Optional<Function *> parentFunctionOfValue(Value *); @@ -74,7 +79,7 @@ static Optional<Value *> getTargetValue(Instruction *); static bool hasUsefulEdges(Instruction *); const StratifiedIndex StratifiedLink::SetSentinel = - std::numeric_limits<StratifiedIndex>::max(); + std::numeric_limits<StratifiedIndex>::max(); namespace { // StratifiedInfo Attribute things. @@ -82,11 +87,13 @@ typedef unsigned StratifiedAttr; LLVM_CONSTEXPR unsigned MaxStratifiedAttrIndex = NumStratifiedAttrs; LLVM_CONSTEXPR unsigned AttrAllIndex = 0; LLVM_CONSTEXPR unsigned AttrGlobalIndex = 1; -LLVM_CONSTEXPR unsigned AttrFirstArgIndex = 2; +LLVM_CONSTEXPR unsigned AttrUnknownIndex = 2; +LLVM_CONSTEXPR unsigned AttrFirstArgIndex = 3; LLVM_CONSTEXPR unsigned AttrLastArgIndex = MaxStratifiedAttrIndex; LLVM_CONSTEXPR unsigned AttrMaxNumArgs = AttrLastArgIndex - AttrFirstArgIndex; LLVM_CONSTEXPR StratifiedAttr AttrNone = 0; +LLVM_CONSTEXPR StratifiedAttr AttrUnknown = 1 << AttrUnknownIndex; LLVM_CONSTEXPR StratifiedAttr AttrAll = ~AttrNone; // \brief StratifiedSets call for knowledge of "direction", so this is how we @@ -141,9 +148,8 @@ struct FunctionInfo { // Lots of functions have < 4 returns. Adjust as necessary. SmallVector<Value *, 4> ReturnedValues; - FunctionInfo(StratifiedSets<Value *> &&S, - SmallVector<Value *, 4> &&RV) - : Sets(std::move(S)), ReturnedValues(std::move(RV)) {} + FunctionInfo(StratifiedSets<Value *> &&S, SmallVector<Value *, 4> &&RV) + : Sets(std::move(S)), ReturnedValues(std::move(RV)) {} }; struct CFLAliasAnalysis; @@ -155,7 +161,7 @@ struct FunctionHandle : public CallbackVH { assert(CFLAA != nullptr); } - virtual ~FunctionHandle() {} + ~FunctionHandle() override {} void deleted() override { removeSelfFromCache(); } void allUsesReplacedWith(Value *) override { removeSelfFromCache(); } @@ -183,7 +189,7 @@ public: initializeCFLAliasAnalysisPass(*PassRegistry::getPassRegistry()); } - virtual ~CFLAliasAnalysis() {} + ~CFLAliasAnalysis() override {} void getAnalysisUsage(AnalysisUsage &AU) const override { AliasAnalysis::getAnalysisUsage(AU); @@ -226,14 +232,22 @@ public: // Comparisons between global variables and other constants should be // handled by BasicAA. + // TODO: ConstantExpr handling -- CFLAA may report NoAlias when comparing + // a GlobalValue and ConstantExpr, but every query needs to have at least + // one Value tied to a Function, and neither GlobalValues nor ConstantExprs + // are. if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr)) { - return MayAlias; + return AliasAnalysis::alias(LocA, LocB); } - return query(LocA, LocB); + AliasResult QueryResult = query(LocA, LocB); + if (QueryResult == MayAlias) + return AliasAnalysis::alias(LocA, LocB); + + return QueryResult; } - void initializePass() override { InitializeAliasAnalysis(this); } + bool doInitialization(Module &M) override; }; void FunctionHandle::removeSelfFromCache() { @@ -256,9 +270,19 @@ public: llvm_unreachable("Unsupported instruction encountered"); } + void visitPtrToIntInst(PtrToIntInst &Inst) { + auto *Ptr = Inst.getOperand(0); + Output.push_back(Edge(Ptr, Ptr, EdgeType::Assign, AttrUnknown)); + } + + void visitIntToPtrInst(IntToPtrInst &Inst) { + auto *Ptr = &Inst; + Output.push_back(Edge(Ptr, Ptr, EdgeType::Assign, AttrUnknown)); + } + void visitCastInst(CastInst &Inst) { - Output.push_back(Edge(&Inst, Inst.getOperand(0), EdgeType::Assign, - AttrNone)); + Output.push_back( + Edge(&Inst, Inst.getOperand(0), EdgeType::Assign, AttrNone)); } void visitBinaryOperator(BinaryOperator &Inst) { @@ -281,8 +305,7 @@ public: } void visitPHINode(PHINode &Inst) { - for (unsigned I = 0, E = Inst.getNumIncomingValues(); I != E; ++I) { - Value *Val = Inst.getIncomingValue(I); + for (Value *Val : Inst.incoming_values()) { Output.push_back(Edge(&Inst, Val, EdgeType::Assign, AttrNone)); } } @@ -295,8 +318,11 @@ public: } void visitSelectInst(SelectInst &Inst) { - auto *Condition = Inst.getCondition(); - Output.push_back(Edge(&Inst, Condition, EdgeType::Assign, AttrNone)); + // Condition is not processed here (The actual statement producing + // the condition result is processed elsewhere). For select, the + // condition is evaluated, but not loaded, stored, or assigned + // simply as a result of being the condition of a select. + auto *TrueVal = Inst.getTrueValue(); Output.push_back(Edge(&Inst, TrueVal, EdgeType::Assign, AttrNone)); auto *FalseVal = Inst.getFalseValue(); @@ -367,7 +393,7 @@ public: // I put this here to give us an upper bound on time taken by IPA. Is it // really (realistically) needed? Keep in mind that we do have an n^2 algo. - if (std::distance(Args.begin(), Args.end()) > (int) MaxSupportedArgs) + if (std::distance(Args.begin(), Args.end()) > (int)MaxSupportedArgs) return false; // Exit early if we'll fail anyway @@ -419,7 +445,7 @@ public: } if (AddEdge) Output.push_back(Edge(FuncValue, ArgVal, EdgeType::Assign, - StratifiedAttrs().flip())); + StratifiedAttrs().flip())); } if (Parameters.size() != Arguments.size()) @@ -561,8 +587,7 @@ private: EdgeTypeT Weight; Node Other; - Edge(const EdgeTypeT &W, const Node &N) - : Weight(W), Other(N) {} + Edge(const EdgeTypeT &W, const Node &N) : Weight(W), Other(N) {} bool operator==(const Edge &E) const { return Weight == E.Weight && Other == E.Other; @@ -725,6 +750,25 @@ static Level directionOfEdgeType(EdgeType); static void buildGraphFrom(CFLAliasAnalysis &, Function *, SmallVectorImpl<Value *> &, NodeMapT &, GraphT &); +// Gets the edges of a ConstantExpr as if it was an Instruction. This +// function also acts on any nested ConstantExprs, adding the edges +// of those to the given SmallVector as well. +static void constexprToEdges(CFLAliasAnalysis &, ConstantExpr &, + SmallVectorImpl<Edge> &); + +// Given an Instruction, this will add it to the graph, along with any +// Instructions that are potentially only available from said Instruction +// For example, given the following line: +// %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2 +// addInstructionToGraph would add both the `load` and `getelementptr` +// instructions to the graph appropriately. +static void addInstructionToGraph(CFLAliasAnalysis &, Instruction &, + SmallVectorImpl<Value *> &, NodeMapT &, + GraphT &); + +// Notes whether it would be pointless to add the given Value to our sets. +static bool canSkipAddingToSets(Value *Val); + // Builds the graph + StratifiedSets for a function. static FunctionInfo buildSetsFrom(CFLAliasAnalysis &, Function *); @@ -768,13 +812,16 @@ static Optional<StratifiedAttr> valueToAttrIndex(Value *Val) { return AttrGlobalIndex; if (auto *Arg = dyn_cast<Argument>(Val)) - if (!Arg->hasNoAliasAttr()) + // Only pointer arguments should have the argument attribute, + // because things can't escape through scalars without us seeing a + // cast, and thus, interaction with them doesn't matter. + if (!Arg->hasNoAliasAttr() && Arg->getType()->isPointerTy()) return argNumberToAttrIndex(Arg->getArgNo()); return NoneType(); } static StratifiedAttr argNumberToAttrIndex(unsigned ArgNum) { - if (ArgNum > AttrMaxNumArgs) + if (ArgNum >= AttrMaxNumArgs) return AttrAllIndex; return ArgNum + AttrFirstArgIndex; } @@ -793,6 +840,8 @@ static EdgeType flipWeight(EdgeType Initial) { static void argsToEdges(CFLAliasAnalysis &Analysis, Instruction *Inst, SmallVectorImpl<Edge> &Output) { + assert(hasUsefulEdges(Inst) && + "Expected instructions to have 'useful' edges"); GetEdgesVisitor v(Analysis, Output); v.visit(Inst); } @@ -809,13 +858,41 @@ static Level directionOfEdgeType(EdgeType Weight) { llvm_unreachable("Incomplete switch coverage"); } -// Aside: We may remove graph construction entirely, because it doesn't really -// buy us much that we don't already have. I'd like to add interprocedural -// analysis prior to this however, in case that somehow requires the graph -// produced by this for efficient execution -static void buildGraphFrom(CFLAliasAnalysis &Analysis, Function *Fn, - SmallVectorImpl<Value *> &ReturnedValues, - NodeMapT &Map, GraphT &Graph) { +static void constexprToEdges(CFLAliasAnalysis &Analysis, + ConstantExpr &CExprToCollapse, + SmallVectorImpl<Edge> &Results) { + SmallVector<ConstantExpr *, 4> Worklist; + Worklist.push_back(&CExprToCollapse); + + SmallVector<Edge, 8> ConstexprEdges; + while (!Worklist.empty()) { + auto *CExpr = Worklist.pop_back_val(); + std::unique_ptr<Instruction> Inst(CExpr->getAsInstruction()); + + if (!hasUsefulEdges(Inst.get())) + continue; + + ConstexprEdges.clear(); + argsToEdges(Analysis, Inst.get(), ConstexprEdges); + for (auto &Edge : ConstexprEdges) { + if (Edge.From == Inst.get()) + Edge.From = CExpr; + else if (auto *Nested = dyn_cast<ConstantExpr>(Edge.From)) + Worklist.push_back(Nested); + + if (Edge.To == Inst.get()) + Edge.To = CExpr; + else if (auto *Nested = dyn_cast<ConstantExpr>(Edge.To)) + Worklist.push_back(Nested); + } + + Results.append(ConstexprEdges.begin(), ConstexprEdges.end()); + } +} + +static void addInstructionToGraph(CFLAliasAnalysis &Analysis, Instruction &Inst, + SmallVectorImpl<Value *> &ReturnedValues, + NodeMapT &Map, GraphT &Graph) { const auto findOrInsertNode = [&Map, &Graph](Value *Val) { auto Pair = Map.insert(std::make_pair(Val, GraphT::Node())); auto &Iter = Pair.first; @@ -826,42 +903,86 @@ static void buildGraphFrom(CFLAliasAnalysis &Analysis, Function *Fn, return Iter->second; }; + // We don't want the edges of most "return" instructions, but we *do* want + // to know what can be returned. + if (isa<ReturnInst>(&Inst)) + ReturnedValues.push_back(&Inst); + + if (!hasUsefulEdges(&Inst)) + return; + SmallVector<Edge, 8> Edges; - for (auto &Bb : Fn->getBasicBlockList()) { - for (auto &Inst : Bb.getInstList()) { - // We don't want the edges of most "return" instructions, but we *do* want - // to know what can be returned. - if (auto *Ret = dyn_cast<ReturnInst>(&Inst)) - ReturnedValues.push_back(Ret); - - if (!hasUsefulEdges(&Inst)) - continue; + argsToEdges(Analysis, &Inst, Edges); + + // In the case of an unused alloca (or similar), edges may be empty. Note + // that it exists so we can potentially answer NoAlias. + if (Edges.empty()) { + auto MaybeVal = getTargetValue(&Inst); + assert(MaybeVal.hasValue()); + auto *Target = *MaybeVal; + findOrInsertNode(Target); + return; + } - Edges.clear(); - argsToEdges(Analysis, &Inst, Edges); + const auto addEdgeToGraph = [&Graph, &findOrInsertNode](const Edge &E) { + auto To = findOrInsertNode(E.To); + auto From = findOrInsertNode(E.From); + auto FlippedWeight = flipWeight(E.Weight); + auto Attrs = E.AdditionalAttrs; + Graph.addEdge(From, To, std::make_pair(E.Weight, Attrs), + std::make_pair(FlippedWeight, Attrs)); + }; - // In the case of an unused alloca (or similar), edges may be empty. Note - // that it exists so we can potentially answer NoAlias. - if (Edges.empty()) { - auto MaybeVal = getTargetValue(&Inst); - assert(MaybeVal.hasValue()); - auto *Target = *MaybeVal; - findOrInsertNode(Target); - continue; - } + SmallVector<ConstantExpr *, 4> ConstantExprs; + for (const Edge &E : Edges) { + addEdgeToGraph(E); + if (auto *Constexpr = dyn_cast<ConstantExpr>(E.To)) + ConstantExprs.push_back(Constexpr); + if (auto *Constexpr = dyn_cast<ConstantExpr>(E.From)) + ConstantExprs.push_back(Constexpr); + } - for (const Edge &E : Edges) { - auto To = findOrInsertNode(E.To); - auto From = findOrInsertNode(E.From); - auto FlippedWeight = flipWeight(E.Weight); - auto Attrs = E.AdditionalAttrs; - Graph.addEdge(From, To, std::make_pair(E.Weight, Attrs), - std::make_pair(FlippedWeight, Attrs)); - } - } + for (ConstantExpr *CE : ConstantExprs) { + Edges.clear(); + constexprToEdges(Analysis, *CE, Edges); + std::for_each(Edges.begin(), Edges.end(), addEdgeToGraph); } } +// Aside: We may remove graph construction entirely, because it doesn't really +// buy us much that we don't already have. I'd like to add interprocedural +// analysis prior to this however, in case that somehow requires the graph +// produced by this for efficient execution +static void buildGraphFrom(CFLAliasAnalysis &Analysis, Function *Fn, + SmallVectorImpl<Value *> &ReturnedValues, + NodeMapT &Map, GraphT &Graph) { + for (auto &Bb : Fn->getBasicBlockList()) + for (auto &Inst : Bb.getInstList()) + addInstructionToGraph(Analysis, Inst, ReturnedValues, Map, Graph); +} + +static bool canSkipAddingToSets(Value *Val) { + // Constants can share instances, which may falsely unify multiple + // sets, e.g. in + // store i32* null, i32** %ptr1 + // store i32* null, i32** %ptr2 + // clearly ptr1 and ptr2 should not be unified into the same set, so + // we should filter out the (potentially shared) instance to + // i32* null. + if (isa<Constant>(Val)) { + bool Container = isa<ConstantVector>(Val) || isa<ConstantArray>(Val) || + isa<ConstantStruct>(Val); + // TODO: Because all of these things are constant, we can determine whether + // the data is *actually* mutable at graph building time. This will probably + // come for free/cheap with offset awareness. + bool CanStoreMutableData = + isa<GlobalValue>(Val) || isa<ConstantExpr>(Val) || Container; + return !CanStoreMutableData; + } + + return false; +} + static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) { NodeMapT Map; GraphT Graph; @@ -893,7 +1014,7 @@ static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) { while (!Worklist.empty()) { auto Node = Worklist.pop_back_val(); auto *CurValue = findValueOrDie(Node); - if (isa<Constant>(CurValue) && !isa<GlobalValue>(CurValue)) + if (canSkipAddingToSets(CurValue)) continue; for (const auto &EdgeTuple : Graph.edgesFor(Node)) { @@ -902,7 +1023,7 @@ static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) { auto &OtherNode = std::get<1>(EdgeTuple); auto *OtherValue = findValueOrDie(OtherNode); - if (isa<Constant>(OtherValue) && !isa<GlobalValue>(OtherValue)) + if (canSkipAddingToSets(OtherValue)) continue; bool Added; @@ -918,16 +1039,16 @@ static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) { break; } - if (Added) { - auto Aliasing = Weight.second; - if (auto MaybeCurIndex = valueToAttrIndex(CurValue)) - Aliasing.set(*MaybeCurIndex); - if (auto MaybeOtherIndex = valueToAttrIndex(OtherValue)) - Aliasing.set(*MaybeOtherIndex); - Builder.noteAttributes(CurValue, Aliasing); - Builder.noteAttributes(OtherValue, Aliasing); + auto Aliasing = Weight.second; + if (auto MaybeCurIndex = valueToAttrIndex(CurValue)) + Aliasing.set(*MaybeCurIndex); + if (auto MaybeOtherIndex = valueToAttrIndex(OtherValue)) + Aliasing.set(*MaybeOtherIndex); + Builder.noteAttributes(CurValue, Aliasing); + Builder.noteAttributes(OtherValue, Aliasing); + + if (Added) Worklist.push_back(OtherNode); - } } } } @@ -937,7 +1058,12 @@ static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) { // things that were present during construction being present in the graph. // So, we add all present arguments here. for (auto &Arg : Fn->args()) { - Builder.add(&Arg); + if (!Builder.add(&Arg)) + continue; + + auto Attrs = valueToAttrIndex(&Arg); + if (Attrs.hasValue()) + Builder.noteAttributes(&Arg, *Attrs); } return FunctionInfo(Builder.build(), std::move(ReturnedValues)); @@ -964,8 +1090,10 @@ CFLAliasAnalysis::query(const AliasAnalysis::Location &LocA, auto MaybeFnA = parentFunctionOfValue(ValA); auto MaybeFnB = parentFunctionOfValue(ValB); if (!MaybeFnA.hasValue() && !MaybeFnB.hasValue()) { - llvm_unreachable("Don't know how to extract the parent function " - "from values A or B"); + // The only times this is known to happen are when globals + InlineAsm + // are involved + DEBUG(dbgs() << "CFLAA: could not extract parent function information.\n"); + return AliasAnalysis::MayAlias; } if (MaybeFnA.hasValue()) { @@ -991,23 +1119,36 @@ CFLAliasAnalysis::query(const AliasAnalysis::Location &LocA, auto SetA = *MaybeA; auto SetB = *MaybeB; - - if (SetA.Index == SetB.Index) - return AliasAnalysis::PartialAlias; - auto AttrsA = Sets.getLink(SetA.Index).Attrs; auto AttrsB = Sets.getLink(SetB.Index).Attrs; + // Stratified set attributes are used as markets to signify whether a member - // of a StratifiedSet (or a member of a set above the current set) has + // of a StratifiedSet (or a member of a set above the current set) has // interacted with either arguments or globals. "Interacted with" meaning - // its value may be different depending on the value of an argument or + // its value may be different depending on the value of an argument or // global. The thought behind this is that, because arguments and globals // may alias each other, if AttrsA and AttrsB have touched args/globals, - // we must conservatively say that they alias. However, if at least one of - // the sets has no values that could legally be altered by changing the value + // we must conservatively say that they alias. However, if at least one of + // the sets has no values that could legally be altered by changing the value // of an argument or global, then we don't have to be as conservative. if (AttrsA.any() && AttrsB.any()) return AliasAnalysis::MayAlias; + // We currently unify things even if the accesses to them may not be in + // bounds, so we can't return partial alias here because we don't + // know whether the pointer is really within the object or not. + // IE Given an out of bounds GEP and an alloca'd pointer, we may + // unify the two. We can't return partial alias for this case. + // Since we do not currently track enough information to + // differentiate + + if (SetA.Index == SetB.Index) + return AliasAnalysis::MayAlias; + return AliasAnalysis::NoAlias; } + +bool CFLAliasAnalysis::doInitialization(Module &M) { + InitializeAliasAnalysis(this, &M.getDataLayout()); + return true; +} diff --git a/contrib/llvm/lib/Analysis/CodeMetrics.cpp b/contrib/llvm/lib/Analysis/CodeMetrics.cpp index fa5683c..46a2c43 100644 --- a/contrib/llvm/lib/Analysis/CodeMetrics.cpp +++ b/contrib/llvm/lib/Analysis/CodeMetrics.cpp @@ -21,6 +21,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE "code-metrics" diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp index fd8f2ae..2f4c6a9 100644 --- a/contrib/llvm/lib/Analysis/ConstantFolding.cpp +++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Config/config.h" #include "llvm/IR/Constants.h" @@ -33,7 +34,6 @@ #include "llvm/IR/Operator.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetLibraryInfo.h" #include <cerrno> #include <cmath> @@ -50,8 +50,7 @@ using namespace llvm; /// Constant fold bitcast, symbolically evaluating it with DataLayout. /// This always returns a non-null constant, but it may be a /// ConstantExpr if unfoldable. -static Constant *FoldBitCast(Constant *C, Type *DestTy, - const DataLayout &TD) { +static Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { // Catch the obvious splat cases. if (C->isNullValue() && !DestTy->isX86_MMXTy()) return Constant::getNullValue(DestTy); @@ -84,11 +83,11 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, // Now that we know that the input value is a vector of integers, just shift // and insert them into our result. - unsigned BitShift = TD.getTypeAllocSizeInBits(SrcEltTy); + unsigned BitShift = DL.getTypeAllocSizeInBits(SrcEltTy); APInt Result(IT->getBitWidth(), 0); for (unsigned i = 0; i != NumSrcElts; ++i) { Result <<= BitShift; - if (TD.isLittleEndian()) + if (DL.isLittleEndian()) Result |= CDV->getElementAsInteger(NumSrcElts-i-1); else Result |= CDV->getElementAsInteger(i); @@ -106,7 +105,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, // vector so the code below can handle it uniformly. if (isa<ConstantFP>(C) || isa<ConstantInt>(C)) { Constant *Ops = C; // don't take the address of C! - return FoldBitCast(ConstantVector::get(Ops), DestTy, TD); + return FoldBitCast(ConstantVector::get(Ops), DestTy, DL); } // If this is a bitcast from constant vector -> vector, fold it. @@ -138,7 +137,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, Type *DestIVTy = VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt); // Recursively handle this integer conversion, if possible. - C = FoldBitCast(C, DestIVTy, TD); + C = FoldBitCast(C, DestIVTy, DL); // Finally, IR can handle this now that #elts line up. return ConstantExpr::getBitCast(C, DestTy); @@ -162,7 +161,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, // of the same size, and that their #elements is not the same. Do the // conversion here, which depends on whether the input or output has // more elements. - bool isLittleEndian = TD.isLittleEndian(); + bool isLittleEndian = DL.isLittleEndian(); SmallVector<Constant*, 32> Result; if (NumDstElt < NumSrcElt) { @@ -198,7 +197,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>) unsigned Ratio = NumDstElt/NumSrcElt; - unsigned DstBitSize = TD.getTypeSizeInBits(DstEltTy); + unsigned DstBitSize = DL.getTypeSizeInBits(DstEltTy); // Loop over each source value, expanding into multiple results. for (unsigned i = 0; i != NumSrcElt; ++i) { @@ -235,10 +234,10 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, /// If this constant is a constant offset from a global, return the global and /// the constant. Because of constantexprs, this function is recursive. static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, - APInt &Offset, const DataLayout &TD) { + APInt &Offset, const DataLayout &DL) { // Trivial case, constant is the global. if ((GV = dyn_cast<GlobalValue>(C))) { - unsigned BitWidth = TD.getPointerTypeSizeInBits(GV->getType()); + unsigned BitWidth = DL.getPointerTypeSizeInBits(GV->getType()); Offset = APInt(BitWidth, 0); return true; } @@ -251,22 +250,22 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, if (CE->getOpcode() == Instruction::PtrToInt || CE->getOpcode() == Instruction::BitCast || CE->getOpcode() == Instruction::AddrSpaceCast) - return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD); + return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, DL); // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5) GEPOperator *GEP = dyn_cast<GEPOperator>(CE); if (!GEP) return false; - unsigned BitWidth = TD.getPointerTypeSizeInBits(GEP->getType()); + unsigned BitWidth = DL.getPointerTypeSizeInBits(GEP->getType()); APInt TmpOffset(BitWidth, 0); // If the base isn't a global+constant, we aren't either. - if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, TD)) + if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, DL)) return false; // Otherwise, add any offset that our operands provide. - if (!GEP->accumulateConstantOffset(TD, TmpOffset)) + if (!GEP->accumulateConstantOffset(DL, TmpOffset)) return false; Offset = TmpOffset; @@ -276,11 +275,11 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, /// Recursive helper to read bits out of global. C is the constant being copied /// out of. ByteOffset is an offset into C. CurPtr is the pointer to copy /// results into and BytesLeft is the number of bytes left in -/// the CurPtr buffer. TD is the target data. +/// the CurPtr buffer. DL is the DataLayout. static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr, unsigned BytesLeft, - const DataLayout &TD) { - assert(ByteOffset <= TD.getTypeAllocSize(C->getType()) && + const DataLayout &DL) { + assert(ByteOffset <= DL.getTypeAllocSize(C->getType()) && "Out of range access"); // If this element is zero or undefined, we can just return since *CurPtr is @@ -298,7 +297,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) { int n = ByteOffset; - if (!TD.isLittleEndian()) + if (!DL.isLittleEndian()) n = IntBytes - n - 1; CurPtr[i] = (unsigned char)(Val >> (n * 8)); ++ByteOffset; @@ -308,22 +307,22 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) { if (CFP->getType()->isDoubleTy()) { - C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), TD); - return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD); + C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), DL); + return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL); } if (CFP->getType()->isFloatTy()){ - C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), TD); - return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD); + C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), DL); + return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL); } if (CFP->getType()->isHalfTy()){ - C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), TD); - return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD); + C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), DL); + return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL); } return false; } if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) { - const StructLayout *SL = TD.getStructLayout(CS->getType()); + const StructLayout *SL = DL.getStructLayout(CS->getType()); unsigned Index = SL->getElementContainingOffset(ByteOffset); uint64_t CurEltOffset = SL->getElementOffset(Index); ByteOffset -= CurEltOffset; @@ -331,11 +330,11 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, while (1) { // If the element access is to the element itself and not to tail padding, // read the bytes from the element. - uint64_t EltSize = TD.getTypeAllocSize(CS->getOperand(Index)->getType()); + uint64_t EltSize = DL.getTypeAllocSize(CS->getOperand(Index)->getType()); if (ByteOffset < EltSize && !ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr, - BytesLeft, TD)) + BytesLeft, DL)) return false; ++Index; @@ -362,7 +361,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, if (isa<ConstantArray>(C) || isa<ConstantVector>(C) || isa<ConstantDataSequential>(C)) { Type *EltTy = C->getType()->getSequentialElementType(); - uint64_t EltSize = TD.getTypeAllocSize(EltTy); + uint64_t EltSize = DL.getTypeAllocSize(EltTy); uint64_t Index = ByteOffset / EltSize; uint64_t Offset = ByteOffset - Index * EltSize; uint64_t NumElts; @@ -373,7 +372,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, for (; Index != NumElts; ++Index) { if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr, - BytesLeft, TD)) + BytesLeft, DL)) return false; uint64_t BytesWritten = EltSize - Offset; @@ -390,9 +389,9 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { if (CE->getOpcode() == Instruction::IntToPtr && - CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getType())) { + CE->getOperand(0)->getType() == DL.getIntPtrType(CE->getType())) { return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr, - BytesLeft, TD); + BytesLeft, DL); } } @@ -401,7 +400,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, } static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, - const DataLayout &TD) { + const DataLayout &DL) { PointerType *PTy = cast<PointerType>(C->getType()); Type *LoadTy = PTy->getElementType(); IntegerType *IntType = dyn_cast<IntegerType>(LoadTy); @@ -423,14 +422,13 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, MapTy = Type::getInt64PtrTy(C->getContext(), AS); else if (LoadTy->isVectorTy()) { MapTy = PointerType::getIntNPtrTy(C->getContext(), - TD.getTypeAllocSizeInBits(LoadTy), - AS); + DL.getTypeAllocSizeInBits(LoadTy), AS); } else return nullptr; - C = FoldBitCast(C, MapTy, TD); - if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, TD)) - return FoldBitCast(Res, LoadTy, TD); + C = FoldBitCast(C, MapTy, DL); + if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, DL)) + return FoldBitCast(Res, LoadTy, DL); return nullptr; } @@ -440,7 +438,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, GlobalValue *GVal; APInt Offset; - if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD)) + if (!IsConstantOffsetFromGlobal(C, GVal, Offset, DL)) return nullptr; GlobalVariable *GV = dyn_cast<GlobalVariable>(GVal); @@ -455,16 +453,16 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, // If we're not accessing anything in this constant, the result is undefined. if (Offset.getZExtValue() >= - TD.getTypeAllocSize(GV->getInitializer()->getType())) + DL.getTypeAllocSize(GV->getInitializer()->getType())) return UndefValue::get(IntType); unsigned char RawBytes[32] = {0}; if (!ReadDataFromGlobal(GV->getInitializer(), Offset.getZExtValue(), RawBytes, - BytesLoaded, TD)) + BytesLoaded, DL)) return nullptr; APInt ResultVal = APInt(IntType->getBitWidth(), 0); - if (TD.isLittleEndian()) { + if (DL.isLittleEndian()) { ResultVal = RawBytes[BytesLoaded - 1]; for (unsigned i = 1; i != BytesLoaded; ++i) { ResultVal <<= 8; @@ -482,9 +480,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, } static Constant *ConstantFoldLoadThroughBitcast(ConstantExpr *CE, - const DataLayout *DL) { - if (!DL) - return nullptr; + const DataLayout &DL) { auto *DestPtrTy = dyn_cast<PointerType>(CE->getType()); if (!DestPtrTy) return nullptr; @@ -499,7 +495,7 @@ static Constant *ConstantFoldLoadThroughBitcast(ConstantExpr *CE, // If the type sizes are the same and a cast is legal, just directly // cast the constant. - if (DL->getTypeSizeInBits(DestTy) == DL->getTypeSizeInBits(SrcTy)) { + if (DL.getTypeSizeInBits(DestTy) == DL.getTypeSizeInBits(SrcTy)) { Instruction::CastOps Cast = Instruction::BitCast; // If we are going from a pointer to int or vice versa, we spell the cast // differently. @@ -530,7 +526,7 @@ static Constant *ConstantFoldLoadThroughBitcast(ConstantExpr *CE, /// Return the value that a load from C would produce if it is constant and /// determinable. If this is not determinable, return null. Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, - const DataLayout *TD) { + const DataLayout &DL) { // First, try the easy cases: if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) if (GV->isConstant() && GV->hasDefinitiveInitializer()) @@ -552,13 +548,13 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, } if (CE->getOpcode() == Instruction::BitCast) - if (Constant *LoadedC = ConstantFoldLoadThroughBitcast(CE, TD)) + if (Constant *LoadedC = ConstantFoldLoadThroughBitcast(CE, DL)) return LoadedC; // Instead of loading constant c string, use corresponding integer value // directly if string length is small enough. StringRef Str; - if (TD && getConstantStringInfo(CE, Str) && !Str.empty()) { + if (getConstantStringInfo(CE, Str) && !Str.empty()) { unsigned StrLen = Str.size(); Type *Ty = cast<PointerType>(CE->getType())->getElementType(); unsigned NumBits = Ty->getPrimitiveSizeInBits(); @@ -568,7 +564,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, (isa<IntegerType>(Ty) || Ty->isFloatingPointTy())) { APInt StrVal(NumBits, 0); APInt SingleChar(NumBits, 0); - if (TD->isLittleEndian()) { + if (DL.isLittleEndian()) { for (signed i = StrLen-1; i >= 0; i--) { SingleChar = (uint64_t) Str[i] & UCHAR_MAX; StrVal = (StrVal << 8) | SingleChar; @@ -593,7 +589,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, // If this load comes from anywhere in a constant global, and if the global // is all undef or zero, we know what it loads. if (GlobalVariable *GV = - dyn_cast<GlobalVariable>(GetUnderlyingObject(CE, TD))) { + dyn_cast<GlobalVariable>(GetUnderlyingObject(CE, DL))) { if (GV->isConstant() && GV->hasDefinitiveInitializer()) { Type *ResTy = cast<PointerType>(C->getType())->getElementType(); if (GV->getInitializer()->isNullValue()) @@ -604,16 +600,15 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, } // Try hard to fold loads from bitcasted strange and non-type-safe things. - if (TD) - return FoldReinterpretLoadFromConstPtr(CE, *TD); - return nullptr; + return FoldReinterpretLoadFromConstPtr(CE, DL); } -static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){ +static Constant *ConstantFoldLoadInst(const LoadInst *LI, + const DataLayout &DL) { if (LI->isVolatile()) return nullptr; if (Constant *C = dyn_cast<Constant>(LI->getOperand(0))) - return ConstantFoldLoadFromConstPtr(C, TD); + return ConstantFoldLoadFromConstPtr(C, DL); return nullptr; } @@ -623,16 +618,16 @@ static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){ /// these together. If target data info is available, it is provided as DL, /// otherwise DL is null. static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, - Constant *Op1, const DataLayout *DL){ + Constant *Op1, + const DataLayout &DL) { // SROA // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl. // Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute // bits. - - if (Opc == Instruction::And && DL) { - unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType()->getScalarType()); + if (Opc == Instruction::And) { + unsigned BitWidth = DL.getTypeSizeInBits(Op0->getType()->getScalarType()); APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0); APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0); computeKnownBits(Op0, KnownZero0, KnownOne0, DL); @@ -655,14 +650,13 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, // If the constant expr is something like &A[123] - &A[4].f, fold this into a // constant. This happens frequently when iterating over a global array. - if (Opc == Instruction::Sub && DL) { + if (Opc == Instruction::Sub) { GlobalValue *GV1, *GV2; APInt Offs1, Offs2; - if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *DL)) - if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *DL) && - GV1 == GV2) { - unsigned OpSize = DL->getTypeSizeInBits(Op0->getType()); + if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, DL)) + if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, DL) && GV1 == GV2) { + unsigned OpSize = DL.getTypeSizeInBits(Op0->getType()); // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow. // PtrToInt may change the bitwidth so we have convert to the right size @@ -677,21 +671,19 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, /// If array indices are not pointer-sized integers, explicitly cast them so /// that they aren't implicitly casted by the getelementptr. -static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, - Type *ResultTy, const DataLayout *TD, +static Constant *CastGEPIndices(Type *SrcTy, ArrayRef<Constant *> Ops, + Type *ResultTy, const DataLayout &DL, const TargetLibraryInfo *TLI) { - if (!TD) - return nullptr; - - Type *IntPtrTy = TD->getIntPtrType(ResultTy); + Type *IntPtrTy = DL.getIntPtrType(ResultTy); bool Any = false; SmallVector<Constant*, 32> NewIdxs; for (unsigned i = 1, e = Ops.size(); i != e; ++i) { if ((i == 1 || !isa<StructType>(GetElementPtrInst::getIndexedType( - Ops[0]->getType(), - Ops.slice(1, i - 1)))) && + cast<PointerType>(Ops[0]->getType()->getScalarType()) + ->getElementType(), + Ops.slice(1, i - 1)))) && Ops[i]->getType() != IntPtrTy) { Any = true; NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i], @@ -706,9 +698,9 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, if (!Any) return nullptr; - Constant *C = ConstantExpr::getGetElementPtr(Ops[0], NewIdxs); + Constant *C = ConstantExpr::getGetElementPtr(SrcTy, Ops[0], NewIdxs); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI)) C = Folded; } @@ -732,15 +724,15 @@ static Constant* StripPtrCastKeepAS(Constant* Ptr) { } /// If we can symbolically evaluate the GEP constant expression, do so. -static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, - Type *ResultTy, const DataLayout *TD, +static Constant *SymbolicallyEvaluateGEP(Type *SrcTy, ArrayRef<Constant *> Ops, + Type *ResultTy, const DataLayout &DL, const TargetLibraryInfo *TLI) { Constant *Ptr = Ops[0]; - if (!TD || !Ptr->getType()->getPointerElementType()->isSized() || + if (!Ptr->getType()->getPointerElementType()->isSized() || !Ptr->getType()->isPointerTy()) return nullptr; - Type *IntPtrTy = TD->getIntPtrType(Ptr->getType()); + Type *IntPtrTy = DL.getIntPtrType(Ptr->getType()); Type *ResultElementTy = ResultTy->getPointerElementType(); // If this is a constant expr gep that is effectively computing an @@ -760,19 +752,19 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, Res = ConstantExpr::getSub(Res, CE->getOperand(1)); Res = ConstantExpr::getIntToPtr(Res, ResultTy); if (ConstantExpr *ResCE = dyn_cast<ConstantExpr>(Res)) - Res = ConstantFoldConstantExpression(ResCE, TD, TLI); + Res = ConstantFoldConstantExpression(ResCE, DL, TLI); return Res; } } return nullptr; } - unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy); + unsigned BitWidth = DL.getTypeSizeInBits(IntPtrTy); APInt Offset = - APInt(BitWidth, TD->getIndexedOffset(Ptr->getType(), - makeArrayRef((Value *const*) - Ops.data() + 1, - Ops.size() - 1))); + APInt(BitWidth, + DL.getIndexedOffset( + Ptr->getType(), + makeArrayRef((Value * const *)Ops.data() + 1, Ops.size() - 1))); Ptr = StripPtrCastKeepAS(Ptr); // If this is a GEP of a GEP, fold it all into a single GEP. @@ -790,8 +782,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, break; Ptr = cast<Constant>(GEP->getOperand(0)); - Offset += APInt(BitWidth, - TD->getIndexedOffset(Ptr->getType(), NestedOps)); + Offset += APInt(BitWidth, DL.getIndexedOffset(Ptr->getType(), NestedOps)); Ptr = StripPtrCastKeepAS(Ptr); } @@ -831,7 +822,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, } // Determine which element of the array the offset points into. - APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType())); + APInt ElemSize(BitWidth, DL.getTypeAllocSize(ATy->getElementType())); if (ElemSize == 0) // The element size is 0. This may be [0 x Ty]*, so just use a zero // index for this level and proceed to the next level to see if it can @@ -850,7 +841,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, // can't re-form this GEP in a regular form, so bail out. The pointer // operand likely went through casts that are necessary to make the GEP // sensible. - const StructLayout &SL = *TD->getStructLayout(STy); + const StructLayout &SL = *DL.getStructLayout(STy); if (Offset.uge(SL.getSizeInBytes())) break; @@ -875,14 +866,14 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, return nullptr; // Create a GEP. - Constant *C = ConstantExpr::getGetElementPtr(Ptr, NewIdxs); + Constant *C = ConstantExpr::getGetElementPtr(SrcTy, Ptr, NewIdxs); assert(C->getType()->getPointerElementType() == Ty && "Computed GetElementPtr has unexpected type!"); // If we ended up indexing a member with a type that doesn't match // the type of what the original indices indexed, add a cast. if (Ty != ResultElementTy) - C = FoldBitCast(C, ResultTy, *TD); + C = FoldBitCast(C, ResultTy, DL); return C; } @@ -898,15 +889,13 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, /// Note that this fails if not all of the operands are constant. Otherwise, /// this function can only fail when attempting to fold instructions like loads /// and stores, which have no constant expression form. -Constant *llvm::ConstantFoldInstruction(Instruction *I, - const DataLayout *TD, +Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI) { // Handle PHI nodes quickly here... if (PHINode *PN = dyn_cast<PHINode>(I)) { Constant *CommonValue = nullptr; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - Value *Incoming = PN->getIncomingValue(i); + for (Value *Incoming : PN->incoming_values()) { // If the incoming value is undef then skip it. Note that while we could // skip the value if it is equal to the phi node itself we choose not to // because that would break the rule that constant folding only applies if @@ -919,7 +908,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, return nullptr; // Fold the PHI's operands. if (ConstantExpr *NewC = dyn_cast<ConstantExpr>(C)) - C = ConstantFoldConstantExpression(NewC, TD, TLI); + C = ConstantFoldConstantExpression(NewC, DL, TLI); // If the incoming value is a different constant to // the one we saw previously, then give up. if (CommonValue && C != CommonValue) @@ -942,17 +931,17 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, // Fold the Instruction's operands. if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(Op)) - Op = ConstantFoldConstantExpression(NewCE, TD, TLI); + Op = ConstantFoldConstantExpression(NewCE, DL, TLI); Ops.push_back(Op); } if (const CmpInst *CI = dyn_cast<CmpInst>(I)) return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1], - TD, TLI); + DL, TLI); if (const LoadInst *LI = dyn_cast<LoadInst>(I)) - return ConstantFoldLoadInst(LI, TD); + return ConstantFoldLoadInst(LI, DL); if (InsertValueInst *IVI = dyn_cast<InsertValueInst>(I)) { return ConstantExpr::getInsertValue( @@ -967,11 +956,11 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, EVI->getIndices()); } - return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD, TLI); + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, DL, TLI); } static Constant * -ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout *TD, +ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout &DL, const TargetLibraryInfo *TLI, SmallPtrSetImpl<ConstantExpr *> &FoldedOps) { SmallVector<Constant *, 8> Ops; @@ -982,25 +971,25 @@ ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout *TD, // a ConstantExpr, we don't have to process it again. if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC)) { if (FoldedOps.insert(NewCE).second) - NewC = ConstantFoldConstantExpressionImpl(NewCE, TD, TLI, FoldedOps); + NewC = ConstantFoldConstantExpressionImpl(NewCE, DL, TLI, FoldedOps); } Ops.push_back(NewC); } if (CE->isCompare()) return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1], - TD, TLI); - return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD, TLI); + DL, TLI); + return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, DL, TLI); } /// Attempt to fold the constant expression /// using the specified DataLayout. If successful, the constant result is /// result is returned, if not, null is returned. Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, - const DataLayout *TD, + const DataLayout &DL, const TargetLibraryInfo *TLI) { SmallPtrSet<ConstantExpr *, 4> FoldedOps; - return ConstantFoldConstantExpressionImpl(CE, TD, TLI, FoldedOps); + return ConstantFoldConstantExpressionImpl(CE, DL, TLI, FoldedOps); } /// Attempt to constant fold an instruction with the @@ -1015,12 +1004,12 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, /// Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, ArrayRef<Constant *> Ops, - const DataLayout *TD, + const DataLayout &DL, const TargetLibraryInfo *TLI) { // Handle easy binops first. if (Instruction::isBinaryOp(Opcode)) { if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1])) { - if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD)) + if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], DL)) return C; } @@ -1040,10 +1029,10 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, // If the input is a inttoptr, eliminate the pair. This requires knowing // the width of a pointer, so it can't be done in ConstantExpr::getCast. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) { - if (TD && CE->getOpcode() == Instruction::IntToPtr) { + if (CE->getOpcode() == Instruction::IntToPtr) { Constant *Input = CE->getOperand(0); unsigned InWidth = Input->getType()->getScalarSizeInBits(); - unsigned PtrWidth = TD->getPointerTypeSizeInBits(CE->getType()); + unsigned PtrWidth = DL.getPointerTypeSizeInBits(CE->getType()); if (PtrWidth < InWidth) { Constant *Mask = ConstantInt::get(CE->getContext(), @@ -1061,15 +1050,15 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, // This requires knowing the width of a pointer, so it can't be done in // ConstantExpr::getCast. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) { - if (TD && CE->getOpcode() == Instruction::PtrToInt) { + if (CE->getOpcode() == Instruction::PtrToInt) { Constant *SrcPtr = CE->getOperand(0); - unsigned SrcPtrSize = TD->getPointerTypeSizeInBits(SrcPtr->getType()); + unsigned SrcPtrSize = DL.getPointerTypeSizeInBits(SrcPtr->getType()); unsigned MidIntSize = CE->getType()->getScalarSizeInBits(); if (MidIntSize >= SrcPtrSize) { unsigned SrcAS = SrcPtr->getType()->getPointerAddressSpace(); if (SrcAS == DestTy->getPointerAddressSpace()) - return FoldBitCast(CE->getOperand(0), DestTy, *TD); + return FoldBitCast(CE->getOperand(0), DestTy, DL); } } } @@ -1087,9 +1076,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, case Instruction::AddrSpaceCast: return ConstantExpr::getCast(Opcode, Ops[0], DestTy); case Instruction::BitCast: - if (TD) - return FoldBitCast(Ops[0], DestTy, *TD); - return ConstantExpr::getBitCast(Ops[0], DestTy); + return FoldBitCast(Ops[0], DestTy, DL); case Instruction::Select: return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]); case Instruction::ExtractElement: @@ -1098,13 +1085,15 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]); case Instruction::ShuffleVector: return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]); - case Instruction::GetElementPtr: - if (Constant *C = CastGEPIndices(Ops, DestTy, TD, TLI)) + case Instruction::GetElementPtr: { + Type *SrcTy = nullptr; + if (Constant *C = CastGEPIndices(SrcTy, Ops, DestTy, DL, TLI)) return C; - if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, TD, TLI)) + if (Constant *C = SymbolicallyEvaluateGEP(SrcTy, Ops, DestTy, DL, TLI)) return C; - return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1)); + return ConstantExpr::getGetElementPtr(SrcTy, Ops[0], Ops.slice(1)); + } } } @@ -1113,43 +1102,44 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, /// returns a constant expression of the specified operands. Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, Constant *Ops0, Constant *Ops1, - const DataLayout *TD, + const DataLayout &DL, const TargetLibraryInfo *TLI) { // fold: icmp (inttoptr x), null -> icmp x, 0 // fold: icmp (ptrtoint x), 0 -> icmp x, null // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y // - // ConstantExpr::getCompare cannot do this, because it doesn't have TD + // FIXME: The following comment is out of data and the DataLayout is here now. + // ConstantExpr::getCompare cannot do this, because it doesn't have DL // around to know if bit truncation is happening. if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops0)) { - if (TD && Ops1->isNullValue()) { + if (Ops1->isNullValue()) { if (CE0->getOpcode() == Instruction::IntToPtr) { - Type *IntPtrTy = TD->getIntPtrType(CE0->getType()); + Type *IntPtrTy = DL.getIntPtrType(CE0->getType()); // Convert the integer value to the right size to ensure we get the // proper extension or truncation. Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0), IntPtrTy, false); Constant *Null = Constant::getNullValue(C->getType()); - return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI); + return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI); } // Only do this transformation if the int is intptrty in size, otherwise // there is a truncation or extension that we aren't modeling. if (CE0->getOpcode() == Instruction::PtrToInt) { - Type *IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType()); + Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType()); if (CE0->getType() == IntPtrTy) { Constant *C = CE0->getOperand(0); Constant *Null = Constant::getNullValue(C->getType()); - return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI); + return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI); } } } if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops1)) { - if (TD && CE0->getOpcode() == CE1->getOpcode()) { + if (CE0->getOpcode() == CE1->getOpcode()) { if (CE0->getOpcode() == Instruction::IntToPtr) { - Type *IntPtrTy = TD->getIntPtrType(CE0->getType()); + Type *IntPtrTy = DL.getIntPtrType(CE0->getType()); // Convert the integer value to the right size to ensure we get the // proper extension or truncation. @@ -1157,20 +1147,17 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, IntPtrTy, false); Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0), IntPtrTy, false); - return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD, TLI); + return ConstantFoldCompareInstOperands(Predicate, C0, C1, DL, TLI); } // Only do this transformation if the int is intptrty in size, otherwise // there is a truncation or extension that we aren't modeling. if (CE0->getOpcode() == Instruction::PtrToInt) { - Type *IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType()); + Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType()); if (CE0->getType() == IntPtrTy && CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()) { - return ConstantFoldCompareInstOperands(Predicate, - CE0->getOperand(0), - CE1->getOperand(0), - TD, - TLI); + return ConstantFoldCompareInstOperands( + Predicate, CE0->getOperand(0), CE1->getOperand(0), DL, TLI); } } } @@ -1180,16 +1167,14 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, // icmp ne (or x, y), 0 -> (icmp ne x, 0) | (icmp ne y, 0) if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) && CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) { - Constant *LHS = - ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1, - TD, TLI); - Constant *RHS = - ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1, - TD, TLI); + Constant *LHS = ConstantFoldCompareInstOperands( + Predicate, CE0->getOperand(0), Ops1, DL, TLI); + Constant *RHS = ConstantFoldCompareInstOperands( + Predicate, CE0->getOperand(1), Ops1, DL, TLI); unsigned OpC = Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; Constant *Ops[] = { LHS, RHS }; - return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, TD, TLI); + return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, DL, TLI); } } @@ -1451,26 +1436,16 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, default: break; case Intrinsic::fabs: return ConstantFoldFP(fabs, V, Ty); -#if HAVE_LOG2 case Intrinsic::log2: - return ConstantFoldFP(log2, V, Ty); -#endif -#if HAVE_LOG + return ConstantFoldFP(Log2, V, Ty); case Intrinsic::log: return ConstantFoldFP(log, V, Ty); -#endif -#if HAVE_LOG10 case Intrinsic::log10: return ConstantFoldFP(log10, V, Ty); -#endif -#if HAVE_EXP case Intrinsic::exp: return ConstantFoldFP(exp, V, Ty); -#endif -#if HAVE_EXP2 case Intrinsic::exp2: return ConstantFoldFP(exp2, V, Ty); -#endif case Intrinsic::floor: return ConstantFoldFP(floor, V, Ty); case Intrinsic::ceil: @@ -1568,8 +1543,8 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, APFloat Val(APFloat::IEEEhalf, Op->getValue()); bool lost = false; - APFloat::opStatus status = - Val.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &lost); + APFloat::opStatus status = Val.convert( + Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &lost); // Conversion is always precise. (void)status; diff --git a/contrib/llvm/lib/Analysis/CostModel.cpp b/contrib/llvm/lib/Analysis/CostModel.cpp index 1b74f8c..b529c1a 100644 --- a/contrib/llvm/lib/Analysis/CostModel.cpp +++ b/contrib/llvm/lib/Analysis/CostModel.cpp @@ -83,7 +83,8 @@ CostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { bool CostModelAnalysis::runOnFunction(Function &F) { this->F = &F; - TTI = getAnalysisIfAvailable<TargetTransformInfo>(); + auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); + TTI = TTIWP ? &TTIWP->getTTI(F) : nullptr; return false; } diff --git a/contrib/llvm/lib/Analysis/Delinearization.cpp b/contrib/llvm/lib/Analysis/Delinearization.cpp index 9334ceb..d603b7b 100644 --- a/contrib/llvm/lib/Analysis/Delinearization.cpp +++ b/contrib/llvm/lib/Analysis/Delinearization.cpp @@ -59,14 +59,14 @@ public: void Delinearization::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequired<LoopInfo>(); + AU.addRequired<LoopInfoWrapperPass>(); AU.addRequired<ScalarEvolution>(); } bool Delinearization::runOnFunction(Function &F) { this->F = &F; SE = &getAnalysis<ScalarEvolution>(); - LI = &getAnalysis<LoopInfo>(); + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); return false; } @@ -141,7 +141,7 @@ char Delinearization::ID = 0; static const char delinearization_name[] = "Delinearization"; INITIALIZE_PASS_BEGIN(Delinearization, DL_NAME, delinearization_name, true, true) -INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(Delinearization, DL_NAME, delinearization_name, true, true) FunctionPass *llvm::createDelinearizationPass() { return new Delinearization; } diff --git a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp index 092df5c..808a38b 100644 --- a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -52,6 +52,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopInfo.h" @@ -59,6 +60,7 @@ #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -114,7 +116,7 @@ Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore, INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da", "Dependence Analysis", true, true) -INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(DependenceAnalysis, "da", @@ -132,7 +134,7 @@ bool DependenceAnalysis::runOnFunction(Function &F) { this->F = &F; AA = &getAnalysis<AliasAnalysis>(); SE = &getAnalysis<ScalarEvolution>(); - LI = &getAnalysis<LoopInfo>(); + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); return false; } @@ -145,7 +147,7 @@ void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequiredTransitive<AliasAnalysis>(); AU.addRequiredTransitive<ScalarEvolution>(); - AU.addRequiredTransitive<LoopInfo>(); + AU.addRequiredTransitive<LoopInfoWrapperPass>(); } @@ -225,13 +227,11 @@ bool Dependence::isScalar(unsigned level) const { //===----------------------------------------------------------------------===// // FullDependence methods -FullDependence::FullDependence(Instruction *Source, - Instruction *Destination, +FullDependence::FullDependence(Instruction *Source, Instruction *Destination, bool PossiblyLoopIndependent, - unsigned CommonLevels) : - Dependence(Source, Destination), - Levels(CommonLevels), - LoopIndependent(PossiblyLoopIndependent) { + unsigned CommonLevels) + : Dependence(Source, Destination), Levels(CommonLevels), + LoopIndependent(PossiblyLoopIndependent) { Consistent = true; DV = CommonLevels ? new DVEntry[CommonLevels] : nullptr; } @@ -625,14 +625,12 @@ void Dependence::dump(raw_ostream &OS) const { OS << "!\n"; } - - -static -AliasAnalysis::AliasResult underlyingObjectsAlias(AliasAnalysis *AA, - const Value *A, - const Value *B) { - const Value *AObj = GetUnderlyingObject(A); - const Value *BObj = GetUnderlyingObject(B); +static AliasAnalysis::AliasResult underlyingObjectsAlias(AliasAnalysis *AA, + const DataLayout &DL, + const Value *A, + const Value *B) { + const Value *AObj = GetUnderlyingObject(A, DL); + const Value *BObj = GetUnderlyingObject(B, DL); return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()), BObj, AA->getTypeStoreSize(BObj->getType())); } @@ -832,6 +830,14 @@ bool DependenceAnalysis::checkSrcSubscript(const SCEV *Src, return isLoopInvariant(Src, LoopNest); const SCEV *Start = AddRec->getStart(); const SCEV *Step = AddRec->getStepRecurrence(*SE); + const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); + if (!isa<SCEVCouldNotCompute>(UB)) { + if (SE->getTypeSizeInBits(Start->getType()) < + SE->getTypeSizeInBits(UB->getType())) { + if (!AddRec->getNoWrapFlags()) + return false; + } + } if (!isLoopInvariant(Step, LoopNest)) return false; Loops.set(mapSrcLoop(AddRec->getLoop())); @@ -850,6 +856,14 @@ bool DependenceAnalysis::checkDstSubscript(const SCEV *Dst, return isLoopInvariant(Dst, LoopNest); const SCEV *Start = AddRec->getStart(); const SCEV *Step = AddRec->getStepRecurrence(*SE); + const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); + if (!isa<SCEVCouldNotCompute>(UB)) { + if (SE->getTypeSizeInBits(Start->getType()) < + SE->getTypeSizeInBits(UB->getType())) { + if (!AddRec->getNoWrapFlags()) + return false; + } + } if (!isLoopInvariant(Step, LoopNest)) return false; Loops.set(mapDstLoop(AddRec->getLoop())); @@ -944,13 +958,15 @@ bool DependenceAnalysis::isKnownPredicate(ICmpInst::Predicate Pred, // All subscripts are all the same type. // Loop bound may be smaller (e.g., a char). // Should zero extend loop bound, since it's always >= 0. -// This routine collects upper bound and extends if needed. +// This routine collects upper bound and extends or truncates if needed. +// Truncating is safe when subscripts are known not to wrap. Cases without +// nowrap flags should have been rejected earlier. // Return null if no bound available. const SCEV *DependenceAnalysis::collectUpperBound(const Loop *L, Type *T) const { if (SE->hasLoopInvariantBackedgeTakenCount(L)) { const SCEV *UB = SE->getBackedgeTakenCount(L); - return SE->getNoopOrZeroExtend(UB, T); + return SE->getTruncateOrZeroExtend(UB, T); } return nullptr; } @@ -3314,7 +3330,8 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst, Value *SrcPtr = getPointerOperand(Src); Value *DstPtr = getPointerOperand(Dst); - switch (underlyingObjectsAlias(AA, DstPtr, SrcPtr)) { + switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr, + SrcPtr)) { case AliasAnalysis::MayAlias: case AliasAnalysis::PartialAlias: // cannot analyse objects if we don't understand their aliasing. @@ -3347,9 +3364,9 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst, DEBUG(dbgs() << " SrcPtrSCEV = " << *SrcPtrSCEV << "\n"); DEBUG(dbgs() << " DstPtrSCEV = " << *DstPtrSCEV << "\n"); - UsefulGEP = - isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && - isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())); + UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && + isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) && + (SrcGEP->getNumOperands() == DstGEP->getNumOperands()); } unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; SmallVector<Subscript, 4> Pair(Pairs); @@ -3472,8 +3489,7 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst, LI->getLoopFor(Dst->getParent()), Pair[SI].Loops); Result.Consistent = false; - } - else if (Pair[SI].Classification == Subscript::ZIV) { + } else if (Pair[SI].Classification == Subscript::ZIV) { // always separable Separable.set(SI); } @@ -3525,8 +3541,8 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst, DEBUG(dbgs() << ", SIV\n"); unsigned Level; const SCEV *SplitIter = nullptr; - if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, - Result, NewConstraint, SplitIter)) + if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, + SplitIter)) return nullptr; break; } @@ -3574,8 +3590,8 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst, unsigned Level; const SCEV *SplitIter = nullptr; DEBUG(dbgs() << "SIV\n"); - if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, - Result, NewConstraint, SplitIter)) + if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, + SplitIter)) return nullptr; ConstrainedLevels.set(Level); if (intersectConstraints(&Constraints[Level], &NewConstraint)) { @@ -3651,8 +3667,10 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst, // update Result.DV from constraint vector DEBUG(dbgs() << " updating\n"); - for (int SJ = ConstrainedLevels.find_first(); - SJ >= 0; SJ = ConstrainedLevels.find_next(SJ)) { + for (int SJ = ConstrainedLevels.find_first(); SJ >= 0; + SJ = ConstrainedLevels.find_next(SJ)) { + if (SJ > (int)CommonLevels) + break; updateDirection(Result.DV[SJ - 1], Constraints[SJ]); if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) return nullptr; @@ -3759,8 +3777,8 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence &Dep, assert(isLoadOrStore(Dst)); Value *SrcPtr = getPointerOperand(Src); Value *DstPtr = getPointerOperand(Dst); - assert(underlyingObjectsAlias(AA, DstPtr, SrcPtr) == - AliasAnalysis::MustAlias); + assert(underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr, + SrcPtr) == AliasAnalysis::MustAlias); // establish loop nesting levels establishNestingLevels(Src, Dst); @@ -3775,9 +3793,9 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence &Dep, SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); - UsefulGEP = - isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && - isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())); + UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && + isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) && + (SrcGEP->getNumOperands() == DstGEP->getNumOperands()); } unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; SmallVector<Subscript, 4> Pair(Pairs); diff --git a/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp new file mode 100644 index 0000000..e5ee295 --- /dev/null +++ b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp @@ -0,0 +1,337 @@ +//===- DivergenceAnalysis.cpp ------ Divergence Analysis ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines divergence analysis which determines whether a branch in a +// GPU program is divergent. It can help branch optimizations such as jump +// threading and loop unswitching to make better decisions. +// +// GPU programs typically use the SIMD execution model, where multiple threads +// in the same execution group have to execute in lock-step. Therefore, if the +// code contains divergent branches (i.e., threads in a group do not agree on +// which path of the branch to take), the group of threads has to execute all +// the paths from that branch with different subsets of threads enabled until +// they converge at the immediately post-dominating BB of the paths. +// +// Due to this execution model, some optimizations such as jump +// threading and loop unswitching can be unfortunately harmful when performed on +// divergent branches. Therefore, an analysis that computes which branches in a +// GPU program are divergent can help the compiler to selectively run these +// optimizations. +// +// This file defines divergence analysis which computes a conservative but +// non-trivial approximation of all divergent branches in a GPU program. It +// partially implements the approach described in +// +// Divergence Analysis +// Sampaio, Souza, Collange, Pereira +// TOPLAS '13 +// +// The divergence analysis identifies the sources of divergence (e.g., special +// variables that hold the thread ID), and recursively marks variables that are +// data or sync dependent on a source of divergence as divergent. +// +// While data dependency is a well-known concept, the notion of sync dependency +// is worth more explanation. Sync dependence characterizes the control flow +// aspect of the propagation of branch divergence. For example, +// +// %cond = icmp slt i32 %tid, 10 +// br i1 %cond, label %then, label %else +// then: +// br label %merge +// else: +// br label %merge +// merge: +// %a = phi i32 [ 0, %then ], [ 1, %else ] +// +// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid +// because %tid is not on its use-def chains, %a is sync dependent on %tid +// because the branch "br i1 %cond" depends on %tid and affects which value %a +// is assigned to. +// +// The current implementation has the following limitations: +// 1. intra-procedural. It conservatively considers the arguments of a +// non-kernel-entry function and the return value of a function call as +// divergent. +// 2. memory as black box. It conservatively considers values loaded from +// generic or local address as divergent. This can be improved by leveraging +// pointer analysis. +//===----------------------------------------------------------------------===// + +#include <vector> +#include "llvm/IR/Dominators.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +using namespace llvm; + +#define DEBUG_TYPE "divergence" + +namespace { +class DivergenceAnalysis : public FunctionPass { +public: + static char ID; + + DivergenceAnalysis() : FunctionPass(ID) { + initializeDivergenceAnalysisPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<PostDominatorTree>(); + AU.setPreservesAll(); + } + + bool runOnFunction(Function &F) override; + + // Print all divergent branches in the function. + void print(raw_ostream &OS, const Module *) const override; + + // Returns true if V is divergent. + bool isDivergent(const Value *V) const { return DivergentValues.count(V); } + // Returns true if V is uniform/non-divergent. + bool isUniform(const Value *V) const { return !isDivergent(V); } + +private: + // Stores all divergent values. + DenseSet<const Value *> DivergentValues; +}; +} // End of anonymous namespace + +// Register this pass. +char DivergenceAnalysis::ID = 0; +INITIALIZE_PASS_BEGIN(DivergenceAnalysis, "divergence", "Divergence Analysis", + false, true) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTree) +INITIALIZE_PASS_END(DivergenceAnalysis, "divergence", "Divergence Analysis", + false, true) + +namespace { + +class DivergencePropagator { +public: + DivergencePropagator(Function &F, TargetTransformInfo &TTI, + DominatorTree &DT, PostDominatorTree &PDT, + DenseSet<const Value *> &DV) + : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {} + void populateWithSourcesOfDivergence(); + void propagate(); + +private: + // A helper function that explores data dependents of V. + void exploreDataDependency(Value *V); + // A helper function that explores sync dependents of TI. + void exploreSyncDependency(TerminatorInst *TI); + // Computes the influence region from Start to End. This region includes all + // basic blocks on any path from Start to End. + void computeInfluenceRegion(BasicBlock *Start, BasicBlock *End, + DenseSet<BasicBlock *> &InfluenceRegion); + // Finds all users of I that are outside the influence region, and add these + // users to Worklist. + void findUsersOutsideInfluenceRegion( + Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion); + + Function &F; + TargetTransformInfo &TTI; + DominatorTree &DT; + PostDominatorTree &PDT; + std::vector<Value *> Worklist; // Stack for DFS. + DenseSet<const Value *> &DV; // Stores all divergent values. +}; + +void DivergencePropagator::populateWithSourcesOfDivergence() { + Worklist.clear(); + DV.clear(); + for (auto &I : inst_range(F)) { + if (TTI.isSourceOfDivergence(&I)) { + Worklist.push_back(&I); + DV.insert(&I); + } + } + for (auto &Arg : F.args()) { + if (TTI.isSourceOfDivergence(&Arg)) { + Worklist.push_back(&Arg); + DV.insert(&Arg); + } + } +} + +void DivergencePropagator::exploreSyncDependency(TerminatorInst *TI) { + // Propagation rule 1: if branch TI is divergent, all PHINodes in TI's + // immediate post dominator are divergent. This rule handles if-then-else + // patterns. For example, + // + // if (tid < 5) + // a1 = 1; + // else + // a2 = 2; + // a = phi(a1, a2); // sync dependent on (tid < 5) + BasicBlock *ThisBB = TI->getParent(); + BasicBlock *IPostDom = PDT.getNode(ThisBB)->getIDom()->getBlock(); + if (IPostDom == nullptr) + return; + + for (auto I = IPostDom->begin(); isa<PHINode>(I); ++I) { + // A PHINode is uniform if it returns the same value no matter which path is + // taken. + if (!cast<PHINode>(I)->hasConstantValue() && DV.insert(I).second) + Worklist.push_back(I); + } + + // Propagation rule 2: if a value defined in a loop is used outside, the user + // is sync dependent on the condition of the loop exits that dominate the + // user. For example, + // + // int i = 0; + // do { + // i++; + // if (foo(i)) ... // uniform + // } while (i < tid); + // if (bar(i)) ... // divergent + // + // A program may contain unstructured loops. Therefore, we cannot leverage + // LoopInfo, which only recognizes natural loops. + // + // The algorithm used here handles both natural and unstructured loops. Given + // a branch TI, we first compute its influence region, the union of all simple + // paths from TI to its immediate post dominator (IPostDom). Then, we search + // for all the values defined in the influence region but used outside. All + // these users are sync dependent on TI. + DenseSet<BasicBlock *> InfluenceRegion; + computeInfluenceRegion(ThisBB, IPostDom, InfluenceRegion); + // An insight that can speed up the search process is that all the in-region + // values that are used outside must dominate TI. Therefore, instead of + // searching every basic blocks in the influence region, we search all the + // dominators of TI until it is outside the influence region. + BasicBlock *InfluencedBB = ThisBB; + while (InfluenceRegion.count(InfluencedBB)) { + for (auto &I : *InfluencedBB) + findUsersOutsideInfluenceRegion(I, InfluenceRegion); + DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom(); + if (IDomNode == nullptr) + break; + InfluencedBB = IDomNode->getBlock(); + } +} + +void DivergencePropagator::findUsersOutsideInfluenceRegion( + Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) { + for (User *U : I.users()) { + Instruction *UserInst = cast<Instruction>(U); + if (!InfluenceRegion.count(UserInst->getParent())) { + if (DV.insert(UserInst).second) + Worklist.push_back(UserInst); + } + } +} + +void DivergencePropagator::computeInfluenceRegion( + BasicBlock *Start, BasicBlock *End, + DenseSet<BasicBlock *> &InfluenceRegion) { + assert(PDT.properlyDominates(End, Start) && + "End does not properly dominate Start"); + std::vector<BasicBlock *> InfluenceStack; + InfluenceStack.push_back(Start); + InfluenceRegion.insert(Start); + while (!InfluenceStack.empty()) { + BasicBlock *BB = InfluenceStack.back(); + InfluenceStack.pop_back(); + for (BasicBlock *Succ : successors(BB)) { + if (End != Succ && InfluenceRegion.insert(Succ).second) + InfluenceStack.push_back(Succ); + } + } +} + +void DivergencePropagator::exploreDataDependency(Value *V) { + // Follow def-use chains of V. + for (User *U : V->users()) { + Instruction *UserInst = cast<Instruction>(U); + if (DV.insert(UserInst).second) + Worklist.push_back(UserInst); + } +} + +void DivergencePropagator::propagate() { + // Traverse the dependency graph using DFS. + while (!Worklist.empty()) { + Value *V = Worklist.back(); + Worklist.pop_back(); + if (TerminatorInst *TI = dyn_cast<TerminatorInst>(V)) { + // Terminators with less than two successors won't introduce sync + // dependency. Ignore them. + if (TI->getNumSuccessors() > 1) + exploreSyncDependency(TI); + } + exploreDataDependency(V); + } +} + +} /// end namespace anonymous + +FunctionPass *llvm::createDivergenceAnalysisPass() { + return new DivergenceAnalysis(); +} + +bool DivergenceAnalysis::runOnFunction(Function &F) { + auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); + if (TTIWP == nullptr) + return false; + + TargetTransformInfo &TTI = TTIWP->getTTI(F); + // Fast path: if the target does not have branch divergence, we do not mark + // any branch as divergent. + if (!TTI.hasBranchDivergence()) + return false; + + DivergentValues.clear(); + DivergencePropagator DP(F, TTI, + getAnalysis<DominatorTreeWrapperPass>().getDomTree(), + getAnalysis<PostDominatorTree>(), DivergentValues); + DP.populateWithSourcesOfDivergence(); + DP.propagate(); + return false; +} + +void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const { + if (DivergentValues.empty()) + return; + const Value *FirstDivergentValue = *DivergentValues.begin(); + const Function *F; + if (const Argument *Arg = dyn_cast<Argument>(FirstDivergentValue)) { + F = Arg->getParent(); + } else if (const Instruction *I = + dyn_cast<Instruction>(FirstDivergentValue)) { + F = I->getParent()->getParent(); + } else { + llvm_unreachable("Only arguments and instructions can be divergent"); + } + + // Dumps all divergent values in F, arguments and then instructions. + for (auto &Arg : F->args()) { + if (DivergentValues.count(&Arg)) + OS << "DIVERGENT: " << Arg << "\n"; + } + // Iterate instructions using inst_range to ensure a deterministic order. + for (auto &I : inst_range(F)) { + if (DivergentValues.count(&I)) + OS << "DIVERGENT:" << I << "\n"; + } +} diff --git a/contrib/llvm/lib/Analysis/FunctionTargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/FunctionTargetTransformInfo.cpp deleted file mode 100644 index a686bec..0000000 --- a/contrib/llvm/lib/Analysis/FunctionTargetTransformInfo.cpp +++ /dev/null @@ -1,50 +0,0 @@ -//===- llvm/Analysis/FunctionTargetTransformInfo.h --------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass wraps a TargetTransformInfo in a FunctionPass so that it can -// forward along the current Function so that we can make target specific -// decisions based on the particular subtarget specified for each Function. -// -//===----------------------------------------------------------------------===// - -#include "llvm/InitializePasses.h" -#include "llvm/Analysis/FunctionTargetTransformInfo.h" - -using namespace llvm; - -#define DEBUG_TYPE "function-tti" -static const char ftti_name[] = "Function TargetTransformInfo"; -INITIALIZE_PASS_BEGIN(FunctionTargetTransformInfo, "function_tti", ftti_name, false, true) -INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) -INITIALIZE_PASS_END(FunctionTargetTransformInfo, "function_tti", ftti_name, false, true) -char FunctionTargetTransformInfo::ID = 0; - -namespace llvm { -FunctionPass *createFunctionTargetTransformInfoPass() { - return new FunctionTargetTransformInfo(); -} -} - -FunctionTargetTransformInfo::FunctionTargetTransformInfo() - : FunctionPass(ID), Fn(nullptr), TTI(nullptr) { - initializeFunctionTargetTransformInfoPass(*PassRegistry::getPassRegistry()); -} - -void FunctionTargetTransformInfo::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequired<TargetTransformInfo>(); -} - -void FunctionTargetTransformInfo::releaseMemory() {} - -bool FunctionTargetTransformInfo::runOnFunction(Function &F) { - Fn = &F; - TTI = &getAnalysis<TargetTransformInfo>(); - return false; -} diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp index ded1de7..65ba1c7 100644 --- a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -49,7 +49,7 @@ public: explicit CGPassManager() : ModulePass(ID), PMDataManager() { } - /// run - Execute all of the passes scheduled for execution. Keep track of + /// Execute all of the passes scheduled for execution. Keep track of /// whether any of the passes modifies the module, and if so, return true. bool runOnModule(Module &M) override; @@ -142,9 +142,8 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, FPPassManager *FPP = (FPPassManager*)P; // Run pass P on all functions in the current SCC. - for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end(); - I != E; ++I) { - if (Function *F = (*I)->getFunction()) { + for (CallGraphNode *CGN : CurSCC) { + if (Function *F = CGN->getFunction()) { dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName()); { TimeRegion PassTimer(getPassTimer(FPP)); @@ -165,7 +164,7 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, } -/// RefreshCallGraph - Scan the functions in the specified CFG and resync the +/// Scan the functions in the specified CFG and resync the /// callgraph with the call sites found in it. This is used after /// FunctionPasses have potentially munged the callgraph, and can be used after /// CallGraphSCC passes to verify that they correctly updated the callgraph. @@ -181,9 +180,8 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, DEBUG(dbgs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size() << " nodes:\n"; - for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end(); - I != E; ++I) - (*I)->dump(); + for (CallGraphNode *CGN : CurSCC) + CGN->dump(); ); bool MadeChange = false; @@ -214,10 +212,13 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, // list of the same call. CallSites.count(I->first) || - // If the call edge is not from a call or invoke, then the function - // pass RAUW'd a call with another value. This can happen when - // constant folding happens of well known functions etc. - !CallSite(I->first)) { + // If the call edge is not from a call or invoke, or it is a + // instrinsic call, then the function pass RAUW'd a call with + // another value. This can happen when constant folding happens + // of well known functions etc. + !CallSite(I->first) || + (CallSite(I->first).getCalledFunction() && + CallSite(I->first).getCalledFunction()->isIntrinsic())) { assert(!CheckingMode && "CallGraphSCCPass did not update the CallGraph correctly!"); @@ -357,9 +358,8 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, DEBUG(if (MadeChange) { dbgs() << "CGSCCPASSMGR: Refreshed SCC is now:\n"; - for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end(); - I != E; ++I) - (*I)->dump(); + for (CallGraphNode *CGN : CurSCC) + CGN->dump(); if (DevirtualizedCall) dbgs() << "CGSCCPASSMGR: Refresh devirtualized a call!\n"; @@ -372,15 +372,15 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, return DevirtualizedCall; } -/// RunAllPassesOnSCC - Execute the body of the entire pass manager on the -/// specified SCC. This keeps track of whether a function pass devirtualizes +/// Execute the body of the entire pass manager on the specified SCC. +/// This keeps track of whether a function pass devirtualizes /// any calls and returns it in DevirtualizedCall. bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG, bool &DevirtualizedCall) { bool Changed = false; - // CallGraphUpToDate - Keep track of whether the callgraph is known to be - // up-to-date or not. The CGSSC pass manager runs two types of passes: + // Keep track of whether the callgraph is known to be up-to-date or not. + // The CGSSC pass manager runs two types of passes: // CallGraphSCC Passes and other random function passes. Because other // random function passes are not CallGraph aware, they may clobber the // call graph by introducing new calls or deleting other ones. This flag @@ -433,7 +433,7 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG, return Changed; } -/// run - Execute all of the passes scheduled for execution. Keep track of +/// Execute all of the passes scheduled for execution. Keep track of /// whether any of the passes modifies the module, and if so, return true. bool CGPassManager::runOnModule(Module &M) { CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); @@ -519,7 +519,7 @@ bool CGPassManager::doFinalization(CallGraph &CG) { // CallGraphSCC Implementation //===----------------------------------------------------------------------===// -/// ReplaceNode - This informs the SCC and the pass manager that the specified +/// This informs the SCC and the pass manager that the specified /// Old node has been deleted, and New is to be used in its place. void CallGraphSCC::ReplaceNode(CallGraphNode *Old, CallGraphNode *New) { assert(Old != New && "Should not replace node with self"); @@ -578,8 +578,8 @@ void CallGraphSCCPass::assignPassManager(PMStack &PMS, CGP->add(this); } -/// getAnalysisUsage - For this class, we declare that we require and preserve -/// the call graph. If the derived class implements this method, it should +/// For this class, we declare that we require and preserve the call graph. +/// If the derived class implements this method, it should /// always explicitly call the implementation here. void CallGraphSCCPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<CallGraphWrapperPass>(); @@ -609,9 +609,9 @@ namespace { bool runOnSCC(CallGraphSCC &SCC) override { Out << Banner; - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { - if ((*I)->getFunction()) - (*I)->getFunction()->print(Out); + for (CallGraphNode *CGN : SCC) { + if (CGN->getFunction()) + CGN->getFunction()->print(Out); else Out << "\nPrinting <null> Function\n"; } diff --git a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp index 607c068..018ae99 100644 --- a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp @@ -96,7 +96,7 @@ namespace { } bool runOnModule(Module &M) override { - InitializeAliasAnalysis(this); + InitializeAliasAnalysis(this, &M.getDataLayout()); // Find non-addr taken globals. AnalyzeGlobals(M); @@ -269,7 +269,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, } else if (Operator::getOpcode(I) == Instruction::BitCast) { if (AnalyzeUsesOfPointer(I, Readers, Writers, OkayStoreDest)) return true; - } else if (CallSite CS = I) { + } else if (auto CS = CallSite(I)) { // Make sure that this is just the function being called, not that it is // passing into the function. if (!CS.isCallee(&U)) { @@ -322,7 +322,8 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { continue; // Check the value being stored. - Value *Ptr = GetUnderlyingObject(SI->getOperand(0)); + Value *Ptr = GetUnderlyingObject(SI->getOperand(0), + GV->getParent()->getDataLayout()); if (!isAllocLikeFn(Ptr, TLI)) return false; // Too hard to analyze. @@ -481,8 +482,8 @@ AliasAnalysis::AliasResult GlobalsModRef::alias(const Location &LocA, const Location &LocB) { // Get the base object these pointers point to. - const Value *UV1 = GetUnderlyingObject(LocA.Ptr); - const Value *UV2 = GetUnderlyingObject(LocB.Ptr); + const Value *UV1 = GetUnderlyingObject(LocA.Ptr, *DL); + const Value *UV2 = GetUnderlyingObject(LocB.Ptr, *DL); // If either of the underlying values is a global, they may be non-addr-taken // globals, which we can answer queries about. @@ -540,8 +541,9 @@ GlobalsModRef::getModRefInfo(ImmutableCallSite CS, // If we are asking for mod/ref info of a direct call with a pointer to a // global we are tracking, return information if we have it. + const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout(); if (const GlobalValue *GV = - dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr))) + dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr, DL))) if (GV->hasLocalLinkage()) if (const Function *F = CS.getCalledFunction()) if (NonAddressTakenGlobals.count(GV)) diff --git a/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp b/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp index 86e7fc2..2bd959d 100644 --- a/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp +++ b/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp @@ -45,9 +45,6 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { typedef InstVisitor<CallAnalyzer, bool> Base; friend class InstVisitor<CallAnalyzer, bool>; - // DataLayout if available, or null. - const DataLayout *const DL; - /// The TargetTransformInfo available for this compilation. const TargetTransformInfo &TTI; @@ -67,6 +64,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { bool ContainsNoDuplicateCall; bool HasReturn; bool HasIndirectBr; + bool HasFrameEscape; /// Number of bytes allocated statically by the callee. uint64_t AllocatedSize; @@ -145,18 +143,18 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { bool visitUnreachableInst(UnreachableInst &I); public: - CallAnalyzer(const DataLayout *DL, const TargetTransformInfo &TTI, - AssumptionCacheTracker *ACT, Function &Callee, int Threshold) - : DL(DL), TTI(TTI), ACT(ACT), F(Callee), Threshold(Threshold), Cost(0), + CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT, + Function &Callee, int Threshold) + : TTI(TTI), ACT(ACT), F(Callee), Threshold(Threshold), Cost(0), IsCallerRecursive(false), IsRecursiveCall(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), - AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), - FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0), - NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), - NumConstantPtrCmps(0), NumConstantPtrDiffs(0), - NumInstructionsSimplified(0), SROACostSavings(0), - SROACostSavingsLost(0) {} + HasFrameEscape(false), AllocatedSize(0), NumInstructions(0), + NumVectorInstructions(0), FiftyPercentVectorBonus(0), + TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), + NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), + NumConstantPtrDiffs(0), NumInstructionsSimplified(0), + SROACostSavings(0), SROACostSavingsLost(0) {} bool analyzeCall(CallSite CS); @@ -244,10 +242,8 @@ bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) { /// Returns false if unable to compute the offset for any reason. Respects any /// simplified values known during the analysis of this callsite. bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { - if (!DL) - return false; - - unsigned IntPtrWidth = DL->getPointerSizeInBits(); + const DataLayout &DL = F.getParent()->getDataLayout(); + unsigned IntPtrWidth = DL.getPointerSizeInBits(); assert(IntPtrWidth == Offset.getBitWidth()); for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); @@ -263,12 +259,12 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { // Handle a struct index, which adds its field offset to the pointer. if (StructType *STy = dyn_cast<StructType>(*GTI)) { unsigned ElementIdx = OpC->getZExtValue(); - const StructLayout *SL = DL->getStructLayout(STy); + const StructLayout *SL = DL.getStructLayout(STy); Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); continue; } - APInt TypeSize(IntPtrWidth, DL->getTypeAllocSize(GTI.getIndexedType())); + APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType())); Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; } return true; @@ -289,9 +285,9 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) { // Accumulate the allocated size. if (I.isStaticAlloca()) { + const DataLayout &DL = F.getParent()->getDataLayout(); Type *Ty = I.getAllocatedType(); - AllocatedSize += (DL ? DL->getTypeAllocSize(Ty) : - Ty->getPrimitiveSizeInBits()); + AllocatedSize += DL.getTypeAllocSize(Ty); } // We will happily inline static alloca instructions. @@ -327,7 +323,7 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { // Try to fold GEPs of constant-offset call site argument pointers. This // requires target data and inbounds GEPs. - if (DL && I.isInBounds()) { + if (I.isInBounds()) { // Check if we have a base + offset for the pointer. Value *Ptr = I.getPointerOperand(); std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr); @@ -396,7 +392,6 @@ bool CallAnalyzer::visitBitCast(BitCastInst &I) { } bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { - const DataLayout *DL = I.getDataLayout(); // Propagate constants through ptrtoint. Constant *COp = dyn_cast<Constant>(I.getOperand(0)); if (!COp) @@ -410,7 +405,8 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { // Track base/offset pairs when converted to a plain integer provided the // integer is large enough to represent the pointer. unsigned IntegerSize = I.getType()->getScalarSizeInBits(); - if (DL && IntegerSize >= DL->getPointerSizeInBits()) { + const DataLayout &DL = F.getParent()->getDataLayout(); + if (IntegerSize >= DL.getPointerSizeInBits()) { std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(I.getOperand(0)); if (BaseAndOffset.first) @@ -433,7 +429,6 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { } bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { - const DataLayout *DL = I.getDataLayout(); // Propagate constants through ptrtoint. Constant *COp = dyn_cast<Constant>(I.getOperand(0)); if (!COp) @@ -448,7 +443,8 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { // modifications provided the integer is not too large. Value *Op = I.getOperand(0); unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); - if (DL && IntegerSize <= DL->getPointerSizeInBits()) { + const DataLayout &DL = F.getParent()->getDataLayout(); + if (IntegerSize <= DL.getPointerSizeInBits()) { std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op); if (BaseAndOffset.first) ConstantOffsetPtrs[&I] = BaseAndOffset; @@ -485,12 +481,14 @@ bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { Constant *COp = dyn_cast<Constant>(Operand); if (!COp) COp = SimplifiedValues.lookup(Operand); - if (COp) + if (COp) { + const DataLayout &DL = F.getParent()->getDataLayout(); if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(), COp, DL)) { SimplifiedValues[&I] = C; return true; } + } // Disable any SROA on the argument to arbitrary unary operators. disableSROA(Operand); @@ -595,13 +593,20 @@ bool CallAnalyzer::visitSub(BinaryOperator &I) { bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + const DataLayout &DL = F.getParent()->getDataLayout(); if (!isa<Constant>(LHS)) if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) LHS = SimpleLHS; if (!isa<Constant>(RHS)) if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) RHS = SimpleRHS; - Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL); + Value *SimpleV = nullptr; + if (auto FI = dyn_cast<FPMathOperator>(&I)) + SimpleV = + SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL); + else + SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL); + if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) { SimplifiedValues[&I] = C; return true; @@ -617,7 +622,7 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { bool CallAnalyzer::visitLoad(LoadInst &I) { Value *SROAArg; DenseMap<Value *, int>::iterator CostIt; - if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { + if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) { if (I.isSimple()) { accumulateSROACost(CostIt, InlineConstants::InstrCost); return true; @@ -632,7 +637,7 @@ bool CallAnalyzer::visitLoad(LoadInst &I) { bool CallAnalyzer::visitStore(StoreInst &I) { Value *SROAArg; DenseMap<Value *, int>::iterator CostIt; - if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { + if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) { if (I.isSimple()) { accumulateSROACost(CostIt, InlineConstants::InstrCost); return true; @@ -713,8 +718,7 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) { bool CallAnalyzer::visitCallSite(CallSite CS) { if (CS.hasFnAttr(Attribute::ReturnsTwice) && - !F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::ReturnsTwice)) { + !F.hasFnAttribute(Attribute::ReturnsTwice)) { // This aborts the entire analysis. ExposesReturnsTwice = true; return false; @@ -740,6 +744,9 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { case Intrinsic::memmove: // SROA can usually chew through these intrinsics, but they aren't free. return false; + case Intrinsic::frameescape: + HasFrameEscape = true; + return false; } } @@ -783,7 +790,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { // during devirtualization and so we want to give it a hefty bonus for // inlining, but cap that bonus in the event that inlining wouldn't pan // out. Pretend to inline the function, with a custom threshold. - CallAnalyzer CA(DL, TTI, ACT, *F, InlineConstants::IndirectCallThreshold); + CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold); if (CA.analyzeCall(CS)) { // We were able to inline the indirect call! Subtract the cost from the // bonus we want to apply, but don't go below zero. @@ -907,6 +914,25 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB, if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy()) ++NumVectorInstructions; + // If the instruction is floating point, and the target says this operation is + // expensive or the function has the "use-soft-float" attribute, this may + // eventually become a library call. Treat the cost as such. + if (I->getType()->isFloatingPointTy()) { + bool hasSoftFloatAttr = false; + + // If the function has the "use-soft-float" attribute, mark it as expensive. + if (F.hasFnAttribute("use-soft-float")) { + Attribute Attr = F.getFnAttribute("use-soft-float"); + StringRef Val = Attr.getValueAsString(); + if (Val == "true") + hasSoftFloatAttr = true; + } + + if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive || + hasSoftFloatAttr) + Cost += InlineConstants::CallPenalty; + } + // If the instruction simplified to a constant, there is no cost to this // instruction. Visit the instructions using our InstVisitor to account for // all of the per-instruction logic. The visit tree returns true if we @@ -919,7 +945,7 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB, // If the visit this instruction detected an uninlinable pattern, abort. if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || - HasIndirectBr) + HasIndirectBr || HasFrameEscape) return false; // If the caller is a recursive function then we don't want to inline @@ -929,16 +955,9 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB, AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) return false; - if (NumVectorInstructions > NumInstructions/2) - VectorBonus = FiftyPercentVectorBonus; - else if (NumVectorInstructions > NumInstructions/10) - VectorBonus = TenPercentVectorBonus; - else - VectorBonus = 0; - - // Check if we've past the threshold so we don't spin in huge basic - // blocks that will never inline. - if (Cost > (Threshold + VectorBonus)) + // Check if we've past the maximum possible threshold so we don't spin in + // huge basic blocks that will never inline. + if (Cost > Threshold) return false; } @@ -952,10 +971,11 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB, /// returns 0 if V is not a pointer, and returns the constant '0' if there are /// no constant offsets applied. ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { - if (!DL || !V->getType()->isPointerTy()) + if (!V->getType()->isPointerTy()) return nullptr; - unsigned IntPtrWidth = DL->getPointerSizeInBits(); + const DataLayout &DL = F.getParent()->getDataLayout(); + unsigned IntPtrWidth = DL.getPointerSizeInBits(); APInt Offset = APInt::getNullValue(IntPtrWidth); // Even though we don't look through PHI nodes, we could be called on an @@ -979,7 +999,7 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { assert(V->getType()->isPointerTy() && "Unexpected operand type!"); } while (Visited.insert(V).second); - Type *IntPtrTy = DL->getIntPtrType(V->getContext()); + Type *IntPtrTy = DL.getIntPtrType(V->getContext()); return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset)); } @@ -993,33 +1013,42 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { bool CallAnalyzer::analyzeCall(CallSite CS) { ++NumCallsAnalyzed; - // Track whether the post-inlining function would have more than one basic - // block. A single basic block is often intended for inlining. Balloon the - // threshold by 50% until we pass the single-BB phase. - bool SingleBB = true; - int SingleBBBonus = Threshold / 2; - Threshold += SingleBBBonus; - // Perform some tweaks to the cost and threshold based on the direct // callsite information. // We want to more aggressively inline vector-dense kernels, so up the // threshold, and we'll lower it if the % of vector instructions gets too - // low. + // low. Note that these bonuses are some what arbitrary and evolved over time + // by accident as much as because they are principled bonuses. + // + // FIXME: It would be nice to remove all such bonuses. At least it would be + // nice to base the bonus values on something more scientific. assert(NumInstructions == 0); assert(NumVectorInstructions == 0); - FiftyPercentVectorBonus = Threshold; - TenPercentVectorBonus = Threshold / 2; + FiftyPercentVectorBonus = 3 * Threshold / 2; + TenPercentVectorBonus = 3 * Threshold / 4; + const DataLayout &DL = F.getParent()->getDataLayout(); + + // Track whether the post-inlining function would have more than one basic + // block. A single basic block is often intended for inlining. Balloon the + // threshold by 50% until we pass the single-BB phase. + bool SingleBB = true; + int SingleBBBonus = Threshold / 2; + + // Speculatively apply all possible bonuses to Threshold. If cost exceeds + // this Threshold any time, and cost cannot decrease, we can stop processing + // the rest of the function body. + Threshold += (SingleBBBonus + FiftyPercentVectorBonus); // Give out bonuses per argument, as the instructions setting them up will // be gone after inlining. for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { - if (DL && CS.isByValArgument(I)) { + if (CS.isByValArgument(I)) { // We approximate the number of loads and stores needed by dividing the // size of the byval type by the target's pointer size. PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType()); - unsigned TypeSize = DL->getTypeSizeInBits(PTy->getElementType()); - unsigned PointerSize = DL->getPointerSizeInBits(); + unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType()); + unsigned PointerSize = DL.getPointerSizeInBits(); // Ceiling division. unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; @@ -1053,9 +1082,9 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { Instruction *Instr = CS.getInstruction(); if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) { if (isa<UnreachableInst>(II->getNormalDest()->begin())) - Threshold = 1; + Threshold = 0; } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr))) - Threshold = 1; + Threshold = 0; // If this function uses the coldcc calling convention, prefer not to inline // it. @@ -1127,7 +1156,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { // Bail out the moment we cross the threshold. This means we'll under-count // the cost, but only when undercounting doesn't matter. - if (Cost > (Threshold + VectorBonus)) + if (Cost > Threshold) break; BasicBlock *BB = BBWorklist[Idx]; @@ -1147,7 +1176,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { // returns false, and we can bail on out. if (!analyzeBlock(BB, EphValues)) { if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || - HasIndirectBr) + HasIndirectBr || HasFrameEscape) return false; // If the caller is a recursive function then we don't want to inline @@ -1205,7 +1234,13 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall) return false; - Threshold += VectorBonus; + // We applied the maximum possible vector bonus at the beginning. Now, + // subtract the excess bonus, if any, from the Threshold before + // comparing against Cost. + if (NumVectorInstructions <= NumInstructions / 10) + Threshold -= FiftyPercentVectorBonus; + else if (NumVectorInstructions <= NumInstructions / 2) + Threshold -= (FiftyPercentVectorBonus - TenPercentVectorBonus); return Cost < Threshold; } @@ -1220,19 +1255,19 @@ void CallAnalyzer::dump() { DEBUG_PRINT_STAT(NumConstantPtrCmps); DEBUG_PRINT_STAT(NumConstantPtrDiffs); DEBUG_PRINT_STAT(NumInstructionsSimplified); + DEBUG_PRINT_STAT(NumInstructions); DEBUG_PRINT_STAT(SROACostSavings); DEBUG_PRINT_STAT(SROACostSavingsLost); DEBUG_PRINT_STAT(ContainsNoDuplicateCall); DEBUG_PRINT_STAT(Cost); DEBUG_PRINT_STAT(Threshold); - DEBUG_PRINT_STAT(VectorBonus); #undef DEBUG_PRINT_STAT } #endif INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", true, true) -INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", true, true) @@ -1246,12 +1281,12 @@ InlineCostAnalysis::~InlineCostAnalysis() {} void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired<AssumptionCacheTracker>(); - AU.addRequired<TargetTransformInfo>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); CallGraphSCCPass::getAnalysisUsage(AU); } bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) { - TTI = &getAnalysis<TargetTransformInfo>(); + TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>(); ACT = &getAnalysis<AssumptionCacheTracker>(); return false; } @@ -1262,16 +1297,18 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) { /// \brief Test that two functions either have or have not the given attribute /// at the same time. -static bool attributeMatches(Function *F1, Function *F2, - Attribute::AttrKind Attr) { - return F1->hasFnAttribute(Attr) == F2->hasFnAttribute(Attr); +template<typename AttrKind> +static bool attributeMatches(Function *F1, Function *F2, AttrKind Attr) { + return F1->getFnAttribute(Attr) == F2->getFnAttribute(Attr); } /// \brief Test that there are no attribute conflicts between Caller and Callee /// that prevent inlining. static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee) { - return attributeMatches(Caller, Callee, Attribute::SanitizeAddress) && + return attributeMatches(Caller, Callee, "target-cpu") && + attributeMatches(Caller, Callee, "target-features") && + attributeMatches(Caller, Callee, Attribute::SanitizeAddress) && attributeMatches(Caller, Callee, Attribute::SanitizeMemory) && attributeMatches(Caller, Callee, Attribute::SanitizeThread); } @@ -1309,8 +1346,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "...\n"); - CallAnalyzer CA(Callee->getDataLayout(), *TTI, - ACT, *Callee, Threshold); + CallAnalyzer CA(TTIWP->getTTI(*Callee), ACT, *Callee, Threshold); bool ShouldInline = CA.analyzeCall(CS); DEBUG(CA.dump()); @@ -1325,9 +1361,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, } bool InlineCostAnalysis::isInlineViable(Function &F) { - bool ReturnsTwice = - F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::ReturnsTwice); + bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice); for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { // Disallow inlining of functions which contain indirect branches or // blockaddresses. @@ -1349,6 +1383,13 @@ bool InlineCostAnalysis::isInlineViable(Function &F) { if (!ReturnsTwice && CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice()) return false; + + // Disallow inlining functions that call @llvm.frameescape. Doing this + // correctly would require major changes to the inliner. + if (CS.getCalledFunction() && + CS.getCalledFunction()->getIntrinsicID() == + llvm::Intrinsic::frameescape) + return false; } } diff --git a/contrib/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm/lib/Analysis/IVUsers.cpp index 6b5f370..b88b249 100644 --- a/contrib/llvm/lib/Analysis/IVUsers.cpp +++ b/contrib/llvm/lib/Analysis/IVUsers.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -33,7 +34,7 @@ using namespace llvm; char IVUsers::ID = 0; INITIALIZE_PASS_BEGIN(IVUsers, "iv-users", "Induction Variable Users", false, true) -INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) INITIALIZE_PASS_END(IVUsers, "iv-users", @@ -113,6 +114,8 @@ static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT, /// return true. Otherwise, return false. bool IVUsers::AddUsersImpl(Instruction *I, SmallPtrSetImpl<Loop*> &SimpleLoopNests) { + const DataLayout &DL = I->getModule()->getDataLayout(); + // Add this IV user to the Processed set before returning false to ensure that // all IV users are members of the set. See IVUsers::isIVUserOrOperand. if (!Processed.insert(I).second) @@ -124,14 +127,14 @@ bool IVUsers::AddUsersImpl(Instruction *I, // IVUsers is used by LSR which assumes that all SCEV expressions are safe to // pass to SCEVExpander. Expressions are not safe to expand if they represent // operations that are not safe to speculate, namely integer division. - if (!isa<PHINode>(I) && !isSafeToSpeculativelyExecute(I, DL)) + if (!isa<PHINode>(I) && !isSafeToSpeculativelyExecute(I)) return false; // LSR is not APInt clean, do not touch integers bigger than 64-bits. // Also avoid creating IVs of non-native types. For example, we don't want a // 64-bit IV in 32-bit code just because the loop has one 64-bit cast. uint64_t Width = SE->getTypeSizeInBits(I->getType()); - if (Width > 64 || (DL && !DL->isLegalInteger(Width))) + if (Width > 64 || !DL.isLegalInteger(Width)) return false; // Get the symbolic expression for this instruction. @@ -241,7 +244,7 @@ IVUsers::IVUsers() } void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<LoopInfo>(); + AU.addRequired<LoopInfoWrapperPass>(); AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<ScalarEvolution>(); AU.setPreservesAll(); @@ -250,11 +253,9 @@ void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const { bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { L = l; - LI = &getAnalysis<LoopInfo>(); + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); SE = &getAnalysis<ScalarEvolution>(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; // Find all uses of induction variables in this loop, and categorize // them by stride. Start by finding all of the PHI nodes in the header for diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp index 3fbbd7c..097b99e 100644 --- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp @@ -45,13 +45,13 @@ STATISTIC(NumReassoc, "Number of reassociations"); namespace { struct Query { - const DataLayout *DL; + const DataLayout &DL; const TargetLibraryInfo *TLI; const DominatorTree *DT; AssumptionCache *AC; const Instruction *CxtI; - Query(const DataLayout *DL, const TargetLibraryInfo *tli, + Query(const DataLayout &DL, const TargetLibraryInfo *tli, const DominatorTree *dt, AssumptionCache *ac = nullptr, const Instruction *cxti = nullptr) : DL(DL), TLI(tli), DT(dt), AC(ac), CxtI(cxti) {} @@ -61,6 +61,8 @@ struct Query { static Value *SimplifyAndInst(Value *, Value *, const Query &, unsigned); static Value *SimplifyBinOp(unsigned, Value *, Value *, const Query &, unsigned); +static Value *SimplifyFPBinOp(unsigned, Value *, Value *, const FastMathFlags &, + const Query &, unsigned); static Value *SimplifyCmpInst(unsigned, Value *, Value *, const Query &, unsigned); static Value *SimplifyOrInst(Value *, Value *, const Query &, unsigned); @@ -467,8 +469,7 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS, // Evaluate the BinOp on the incoming phi values. Value *CommonValue = nullptr; - for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) { - Value *Incoming = PI->getIncomingValue(i); + for (Value *Incoming : PI->incoming_values()) { // If the incoming value is the phi node itself, it can safely be skipped. if (Incoming == PI) continue; Value *V = PI == LHS ? @@ -508,8 +509,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, // Evaluate the BinOp on the incoming phi values. Value *CommonValue = nullptr; - for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) { - Value *Incoming = PI->getIncomingValue(i); + for (Value *Incoming : PI->incoming_values()) { // If the incoming value is the phi node itself, it can safely be skipped. if (Incoming == PI) continue; Value *V = SimplifyCmpInst(Pred, Incoming, RHS, Q, MaxRecurse); @@ -582,7 +582,7 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, } Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const DataLayout *DL, const TargetLibraryInfo *TLI, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI), @@ -599,17 +599,11 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, /// This is very similar to GetPointerBaseWithConstantOffset except it doesn't /// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc. /// folding. -static Constant *stripAndComputeConstantOffsets(const DataLayout *DL, - Value *&V, +static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V, bool AllowNonInbounds = false) { assert(V->getType()->getScalarType()->isPointerTy()); - // Without DataLayout, just be conservative for now. Theoretically, more could - // be done in this case. - if (!DL) - return ConstantInt::get(IntegerType::get(V->getContext(), 64), 0); - - Type *IntPtrTy = DL->getIntPtrType(V->getType())->getScalarType(); + Type *IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType(); APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth()); // Even though we don't look through PHI nodes, we could be called on an @@ -619,7 +613,7 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *DL, do { if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { if ((!AllowNonInbounds && !GEP->isInBounds()) || - !GEP->accumulateConstantOffset(*DL, Offset)) + !GEP->accumulateConstantOffset(DL, Offset)) break; V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast) { @@ -644,8 +638,8 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *DL, /// \brief Compute the constant difference between two pointer values. /// If the difference is not a constant, returns zero. -static Constant *computePointerDifference(const DataLayout *DL, - Value *LHS, Value *RHS) { +static Constant *computePointerDifference(const DataLayout &DL, Value *LHS, + Value *RHS) { Constant *LHSOffset = stripAndComputeConstantOffsets(DL, LHS); Constant *RHSOffset = stripAndComputeConstantOffsets(DL, RHS); @@ -781,7 +775,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, } Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const DataLayout *DL, const TargetLibraryInfo *TLI, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI), @@ -960,7 +954,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, } Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const DataLayout *DL, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -969,7 +963,7 @@ Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, } Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const DataLayout *DL, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -978,7 +972,7 @@ Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, } Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const DataLayout *DL, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -986,7 +980,7 @@ Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, RecursionLimit); } -Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout *DL, +Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -1090,7 +1084,7 @@ static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q, return nullptr; } -Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout *DL, +Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -1108,7 +1102,7 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q, return nullptr; } -Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout *DL, +Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -1116,8 +1110,8 @@ Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout *DL, RecursionLimit); } -static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q, - unsigned) { +static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const Query &Q, unsigned) { // undef / X -> undef (the undef could be a snan). if (match(Op0, m_Undef())) return Op0; @@ -1126,14 +1120,21 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q, if (match(Op1, m_Undef())) return Op1; + // 0 / X -> 0 + // Requires that NaNs are off (X could be zero) and signed zeroes are + // ignored (X could be positive or negative, so the output sign is unknown). + if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZero())) + return Op0; + return nullptr; } -Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const DataLayout *DL, +Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyFDivInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyFDivInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -1208,7 +1209,7 @@ static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q, return nullptr; } -Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout *DL, +Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -1226,7 +1227,7 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q, return nullptr; } -Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout *DL, +Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -1234,8 +1235,8 @@ Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout *DL, RecursionLimit); } -static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &, - unsigned) { +static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const Query &, unsigned) { // undef % X -> undef (the undef could be a snan). if (match(Op0, m_Undef())) return Op0; @@ -1244,14 +1245,21 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &, if (match(Op1, m_Undef())) return Op1; + // 0 % X -> 0 + // Requires that NaNs are off (X could be zero) and signed zeroes are + // ignored (X could be positive or negative, so the output sign is unknown). + if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZero())) + return Op0; + return nullptr; } -Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const DataLayout *DL, +Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyFRemInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyFRemInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -1371,7 +1379,7 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, } Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const DataLayout *DL, const TargetLibraryInfo *TLI, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI), @@ -1395,7 +1403,7 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, } Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, - const DataLayout *DL, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -1429,7 +1437,7 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, } Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, - const DataLayout *DL, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -1580,9 +1588,11 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, // A & (-A) = A if A is a power of two or zero. if (match(Op0, m_Neg(m_Specific(Op1))) || match(Op1, m_Neg(m_Specific(Op0)))) { - if (isKnownToBeAPowerOfTwo(Op0, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT)) + if (isKnownToBeAPowerOfTwo(Op0, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, + Q.DT)) return Op0; - if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT)) + if (isKnownToBeAPowerOfTwo(Op1, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, + Q.DT)) return Op1; } @@ -1627,7 +1637,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, return nullptr; } -Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout *DL, +Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -1815,7 +1825,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, return nullptr; } -Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout *DL, +Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -1872,7 +1882,7 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q, return nullptr; } -Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout *DL, +Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -1932,10 +1942,10 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, // If the C and C++ standards are ever made sufficiently restrictive in this // area, it may be possible to update LLVM's semantics accordingly and reinstate // this optimization. -static Constant *computePointerICmp(const DataLayout *DL, +static Constant *computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI, - CmpInst::Predicate Pred, - Value *LHS, Value *RHS) { + CmpInst::Predicate Pred, Value *LHS, + Value *RHS) { // First, skip past any trivial no-ops. LHS = LHS->stripPointerCasts(); RHS = RHS->stripPointerCasts(); @@ -2353,8 +2363,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input // if the integer type is the same size as the pointer type. - if (MaxRecurse && Q.DL && isa<PtrToIntInst>(LI) && - Q.DL->getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) { + if (MaxRecurse && isa<PtrToIntInst>(LI) && + Q.DL.getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) { if (Constant *RHSC = dyn_cast<Constant>(RHS)) { // Transfer the cast to the constant. if (Value *V = SimplifyICmpInst(Pred, SrcOp, @@ -2966,10 +2976,12 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // what constant folding can make out of it. Constant *Null = Constant::getNullValue(GLHS->getPointerOperandType()); SmallVector<Value *, 4> IndicesLHS(GLHS->idx_begin(), GLHS->idx_end()); - Constant *NewLHS = ConstantExpr::getGetElementPtr(Null, IndicesLHS); + Constant *NewLHS = ConstantExpr::getGetElementPtr( + GLHS->getSourceElementType(), Null, IndicesLHS); SmallVector<Value *, 4> IndicesRHS(GRHS->idx_begin(), GRHS->idx_end()); - Constant *NewRHS = ConstantExpr::getGetElementPtr(Null, IndicesRHS); + Constant *NewRHS = ConstantExpr::getGetElementPtr( + GLHS->getSourceElementType(), Null, IndicesRHS); return ConstantExpr::getICmp(Pred, NewLHS, NewRHS); } } @@ -3008,7 +3020,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const DataLayout *DL, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, Instruction *CxtI) { @@ -3038,8 +3050,13 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (Pred == FCmpInst::FCMP_TRUE) return ConstantInt::get(GetCompareTy(LHS), 1); - if (isa<UndefValue>(RHS)) // fcmp pred X, undef -> undef - return UndefValue::get(GetCompareTy(LHS)); + // fcmp pred x, undef and fcmp pred undef, x + // fold to true if unordered, false if ordered + if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) { + // Choosing NaN for the undef will always make unordered comparison succeed + // and ordered comparison fail. + return ConstantInt::get(GetCompareTy(LHS), CmpInst::isUnordered(Pred)); + } // fcmp x,x -> true/false. Not all compares are foldable. if (LHS == RHS) { @@ -3050,44 +3067,57 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, } // Handle fcmp with constant RHS - if (Constant *RHSC = dyn_cast<Constant>(RHS)) { + if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) { // If the constant is a nan, see if we can fold the comparison based on it. - if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) { - if (CFP->getValueAPF().isNaN()) { - if (FCmpInst::isOrdered(Pred)) // True "if ordered and foo" + if (CFP->getValueAPF().isNaN()) { + if (FCmpInst::isOrdered(Pred)) // True "if ordered and foo" + return ConstantInt::getFalse(CFP->getContext()); + assert(FCmpInst::isUnordered(Pred) && + "Comparison must be either ordered or unordered!"); + // True if unordered. + return ConstantInt::getTrue(CFP->getContext()); + } + // Check whether the constant is an infinity. + if (CFP->getValueAPF().isInfinity()) { + if (CFP->getValueAPF().isNegative()) { + switch (Pred) { + case FCmpInst::FCMP_OLT: + // No value is ordered and less than negative infinity. return ConstantInt::getFalse(CFP->getContext()); - assert(FCmpInst::isUnordered(Pred) && - "Comparison must be either ordered or unordered!"); - // True if unordered. - return ConstantInt::getTrue(CFP->getContext()); - } - // Check whether the constant is an infinity. - if (CFP->getValueAPF().isInfinity()) { - if (CFP->getValueAPF().isNegative()) { - switch (Pred) { - case FCmpInst::FCMP_OLT: - // No value is ordered and less than negative infinity. - return ConstantInt::getFalse(CFP->getContext()); - case FCmpInst::FCMP_UGE: - // All values are unordered with or at least negative infinity. - return ConstantInt::getTrue(CFP->getContext()); - default: - break; - } - } else { - switch (Pred) { - case FCmpInst::FCMP_OGT: - // No value is ordered and greater than infinity. - return ConstantInt::getFalse(CFP->getContext()); - case FCmpInst::FCMP_ULE: - // All values are unordered with and at most infinity. - return ConstantInt::getTrue(CFP->getContext()); - default: - break; - } + case FCmpInst::FCMP_UGE: + // All values are unordered with or at least negative infinity. + return ConstantInt::getTrue(CFP->getContext()); + default: + break; + } + } else { + switch (Pred) { + case FCmpInst::FCMP_OGT: + // No value is ordered and greater than infinity. + return ConstantInt::getFalse(CFP->getContext()); + case FCmpInst::FCMP_ULE: + // All values are unordered with and at most infinity. + return ConstantInt::getTrue(CFP->getContext()); + default: + break; } } } + if (CFP->getValueAPF().isZero()) { + switch (Pred) { + case FCmpInst::FCMP_UGE: + if (CannotBeOrderedLessThanZero(LHS)) + return ConstantInt::getTrue(CFP->getContext()); + break; + case FCmpInst::FCMP_OLT: + // X < 0 + if (CannotBeOrderedLessThanZero(LHS)) + return ConstantInt::getFalse(CFP->getContext()); + break; + default: + break; + } + } } // If the comparison is with the result of a select instruction, check whether @@ -3106,7 +3136,7 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, } Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const DataLayout *DL, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -3201,7 +3231,7 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, } Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, - const DataLayout *DL, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -3211,17 +3241,18 @@ Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can /// fold the result. If not, this returns null. -static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) { +static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, + const Query &Q, unsigned) { // The type of the GEP pointer operand. - PointerType *PtrTy = cast<PointerType>(Ops[0]->getType()->getScalarType()); - unsigned AS = PtrTy->getAddressSpace(); + unsigned AS = + cast<PointerType>(Ops[0]->getType()->getScalarType())->getAddressSpace(); // getelementptr P -> P. if (Ops.size() == 1) return Ops[0]; // Compute the (pointer) type returned by the GEP instruction. - Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, Ops.slice(1)); + Type *LastType = GetElementPtrInst::getIndexedType(SrcTy, Ops.slice(1)); Type *GEPTy = PointerType::get(LastType, AS); if (VectorType *VT = dyn_cast<VectorType>(Ops[0]->getType())) GEPTy = VectorType::get(GEPTy, VT->getNumElements()); @@ -3234,11 +3265,11 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) { if (match(Ops[1], m_Zero())) return Ops[0]; - Type *Ty = PtrTy->getElementType(); - if (Q.DL && Ty->isSized()) { + Type *Ty = SrcTy; + if (Ty->isSized()) { Value *P; uint64_t C; - uint64_t TyAllocSize = Q.DL->getTypeAllocSize(Ty); + uint64_t TyAllocSize = Q.DL.getTypeAllocSize(Ty); // getelementptr P, N -> P if P points to a type of zero size. if (TyAllocSize == 0) return Ops[0]; @@ -3246,7 +3277,7 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) { // The following transforms are only safe if the ptrtoint cast // doesn't truncate the pointers. if (Ops[1]->getType()->getScalarSizeInBits() == - Q.DL->getPointerSizeInBits(AS)) { + Q.DL.getPointerSizeInBits(AS)) { auto PtrToIntOrZero = [GEPTy](Value *P) -> Value * { if (match(P, m_Zero())) return Constant::getNullValue(GEPTy); @@ -3288,14 +3319,17 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) { if (!isa<Constant>(Ops[i])) return nullptr; - return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), Ops.slice(1)); + return ConstantExpr::getGetElementPtr(SrcTy, cast<Constant>(Ops[0]), + Ops.slice(1)); } -Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout *DL, +Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyGEPInst(Ops, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); + return ::SimplifyGEPInst( + cast<PointerType>(Ops[0]->getType()->getScalarType())->getElementType(), + Ops, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } /// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we @@ -3328,7 +3362,7 @@ static Value *SimplifyInsertValueInst(Value *Agg, Value *Val, } Value *llvm::SimplifyInsertValueInst( - Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, const DataLayout *DL, + Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query(DL, TLI, DT, AC, CxtI), @@ -3341,8 +3375,7 @@ static Value *SimplifyPHINode(PHINode *PN, const Query &Q) { // with the common value. Value *CommonValue = nullptr; bool HasUndefInput = false; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - Value *Incoming = PN->getIncomingValue(i); + for (Value *Incoming : PN->incoming_values()) { // If the incoming value is the phi node itself, it can safely be skipped. if (Incoming == PN) continue; if (isa<UndefValue>(Incoming)) { @@ -3376,7 +3409,7 @@ static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) { return nullptr; } -Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout *DL, +Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { @@ -3408,10 +3441,12 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, return SimplifyFMulInst (LHS, RHS, FastMathFlags(), Q, MaxRecurse); case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, Q, MaxRecurse); case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse); - case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, Q, MaxRecurse); + case Instruction::FDiv: + return SimplifyFDivInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); case Instruction::SRem: return SimplifySRemInst(LHS, RHS, Q, MaxRecurse); case Instruction::URem: return SimplifyURemInst(LHS, RHS, Q, MaxRecurse); - case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, Q, MaxRecurse); + case Instruction::FRem: + return SimplifyFRemInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); case Instruction::Shl: return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, Q, MaxRecurse); @@ -3451,14 +3486,42 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, } } +/// SimplifyFPBinOp - Given operands for a BinaryOperator, see if we can +/// fold the result. If not, this returns null. +/// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the +/// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp. +static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, + const FastMathFlags &FMF, const Query &Q, + unsigned MaxRecurse) { + switch (Opcode) { + case Instruction::FAdd: + return SimplifyFAddInst(LHS, RHS, FMF, Q, MaxRecurse); + case Instruction::FSub: + return SimplifyFSubInst(LHS, RHS, FMF, Q, MaxRecurse); + case Instruction::FMul: + return SimplifyFMulInst(LHS, RHS, FMF, Q, MaxRecurse); + default: + return SimplifyBinOp(Opcode, LHS, RHS, Q, MaxRecurse); + } +} + Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const DataLayout *DL, const TargetLibraryInfo *TLI, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { return ::SimplifyBinOp(Opcode, LHS, RHS, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } +Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, + const FastMathFlags &FMF, const DataLayout &DL, + const TargetLibraryInfo *TLI, + const DominatorTree *DT, AssumptionCache *AC, + const Instruction *CxtI) { + return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, Query(DL, TLI, DT, AC, CxtI), + RecursionLimit); +} + /// SimplifyCmpInst - Given operands for a CmpInst, see if we can /// fold the result. static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, @@ -3469,7 +3532,7 @@ static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, } Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const DataLayout *DL, const TargetLibraryInfo *TLI, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { return ::SimplifyCmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI), @@ -3493,14 +3556,53 @@ static bool IsIdempotent(Intrinsic::ID ID) { } template <typename IterTy> -static Value *SimplifyIntrinsic(Intrinsic::ID IID, IterTy ArgBegin, IterTy ArgEnd, +static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, const Query &Q, unsigned MaxRecurse) { + Intrinsic::ID IID = F->getIntrinsicID(); + unsigned NumOperands = std::distance(ArgBegin, ArgEnd); + Type *ReturnType = F->getReturnType(); + + // Binary Ops + if (NumOperands == 2) { + Value *LHS = *ArgBegin; + Value *RHS = *(ArgBegin + 1); + if (IID == Intrinsic::usub_with_overflow || + IID == Intrinsic::ssub_with_overflow) { + // X - X -> { 0, false } + if (LHS == RHS) + return Constant::getNullValue(ReturnType); + + // X - undef -> undef + // undef - X -> undef + if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) + return UndefValue::get(ReturnType); + } + + if (IID == Intrinsic::uadd_with_overflow || + IID == Intrinsic::sadd_with_overflow) { + // X + undef -> undef + if (isa<UndefValue>(RHS)) + return UndefValue::get(ReturnType); + } + + if (IID == Intrinsic::umul_with_overflow || + IID == Intrinsic::smul_with_overflow) { + // X * 0 -> { 0, false } + if (match(RHS, m_Zero())) + return Constant::getNullValue(ReturnType); + + // X * undef -> { 0, false } + if (match(RHS, m_Undef())) + return Constant::getNullValue(ReturnType); + } + } + // Perform idempotent optimizations if (!IsIdempotent(IID)) return nullptr; // Unary Ops - if (std::distance(ArgBegin, ArgEnd) == 1) + if (NumOperands == 1) if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*ArgBegin)) if (II->getIntrinsicID() == IID) return II; @@ -3524,9 +3626,8 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, if (!F) return nullptr; - if (unsigned IID = F->getIntrinsicID()) - if (Value *Ret = - SimplifyIntrinsic((Intrinsic::ID) IID, ArgBegin, ArgEnd, Q, MaxRecurse)) + if (F->isIntrinsic()) + if (Value *Ret = SimplifyIntrinsic(F, ArgBegin, ArgEnd, Q, MaxRecurse)) return Ret; if (!canConstantFoldCallTo(F)) @@ -3545,7 +3646,7 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, } Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin, - User::op_iterator ArgEnd, const DataLayout *DL, + User::op_iterator ArgEnd, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(DL, TLI, DT, AC, CxtI), @@ -3553,7 +3654,7 @@ Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin, } Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args, - const DataLayout *DL, const TargetLibraryInfo *TLI, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { return ::SimplifyCall(V, Args.begin(), Args.end(), @@ -3562,7 +3663,7 @@ Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args, /// SimplifyInstruction - See if we can compute a simplified version of this /// instruction. If not, this returns null. -Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL, +Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC) { Value *Result; @@ -3608,8 +3709,8 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL, AC, I); break; case Instruction::FDiv: - Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, - AC, I); + Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), + I->getFastMathFlags(), DL, TLI, DT, AC, I); break; case Instruction::SRem: Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, @@ -3620,8 +3721,8 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL, AC, I); break; case Instruction::FRem: - Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, - AC, I); + Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), + I->getFastMathFlags(), DL, TLI, DT, AC, I); break; case Instruction::Shl: Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1), @@ -3710,12 +3811,12 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL, /// This routine returns 'true' only when *it* simplifies something. The passed /// in simplified value does not count toward this. static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, - const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC) { bool Simplified = false; SmallSetVector<Instruction *, 8> Worklist; + const DataLayout &DL = I->getModule()->getDataLayout(); // If we have an explicit value to collapse to, do that round of the // simplification loop by hand initially. @@ -3763,19 +3864,18 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, return Simplified; } -bool llvm::recursivelySimplifyInstruction(Instruction *I, const DataLayout *DL, +bool llvm::recursivelySimplifyInstruction(Instruction *I, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC) { - return replaceAndRecursivelySimplifyImpl(I, nullptr, DL, TLI, DT, AC); + return replaceAndRecursivelySimplifyImpl(I, nullptr, TLI, DT, AC); } bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, - const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC) { assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!"); assert(SimpleV && "Must provide a simplified value."); - return replaceAndRecursivelySimplifyImpl(I, SimpleV, DL, TLI, DT, AC); + return replaceAndRecursivelySimplifyImpl(I, SimpleV, TLI, DT, AC); } diff --git a/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp b/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp new file mode 100644 index 0000000..9f1edd2 --- /dev/null +++ b/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp @@ -0,0 +1,95 @@ +//===- IteratedDominanceFrontier.cpp - Compute IDF ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \brief Compute iterated dominance frontiers using a linear time algorithm. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/IteratedDominanceFrontier.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" +#include <queue> + +using namespace llvm; + +void IDFCalculator::calculate(SmallVectorImpl<BasicBlock *> &PHIBlocks) { + // If we haven't computed dominator tree levels, do so now. + if (DomLevels.empty()) { + for (auto DFI = df_begin(DT.getRootNode()), DFE = df_end(DT.getRootNode()); + DFI != DFE; ++DFI) { + DomLevels[*DFI] = DFI.getPathLength() - 1; + } + } + + // Use a priority queue keyed on dominator tree level so that inserted nodes + // are handled from the bottom of the dominator tree upwards. + typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair; + typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>, + less_second> IDFPriorityQueue; + IDFPriorityQueue PQ; + + for (BasicBlock *BB : *DefBlocks) { + if (DomTreeNode *Node = DT.getNode(BB)) + PQ.push(std::make_pair(Node, DomLevels.lookup(Node))); + } + + SmallVector<DomTreeNode *, 32> Worklist; + SmallPtrSet<DomTreeNode *, 32> VisitedPQ; + SmallPtrSet<DomTreeNode *, 32> VisitedWorklist; + + while (!PQ.empty()) { + DomTreeNodePair RootPair = PQ.top(); + PQ.pop(); + DomTreeNode *Root = RootPair.first; + unsigned RootLevel = RootPair.second; + + // Walk all dominator tree children of Root, inspecting their CFG edges with + // targets elsewhere on the dominator tree. Only targets whose level is at + // most Root's level are added to the iterated dominance frontier of the + // definition set. + + Worklist.clear(); + Worklist.push_back(Root); + VisitedWorklist.insert(Root); + + while (!Worklist.empty()) { + DomTreeNode *Node = Worklist.pop_back_val(); + BasicBlock *BB = Node->getBlock(); + + for (auto Succ : successors(BB)) { + DomTreeNode *SuccNode = DT.getNode(Succ); + + // Quickly skip all CFG edges that are also dominator tree edges instead + // of catching them below. + if (SuccNode->getIDom() == Node) + continue; + + unsigned SuccLevel = DomLevels.lookup(SuccNode); + if (SuccLevel > RootLevel) + continue; + + if (!VisitedPQ.insert(SuccNode).second) + continue; + + BasicBlock *SuccBB = SuccNode->getBlock(); + if (useLiveIn && !LiveInBlocks->count(SuccBB)) + continue; + + PHIBlocks.emplace_back(SuccBB); + if (!DefBlocks->count(SuccBB)) + PQ.push(std::make_pair(SuccNode, SuccLevel)); + } + + for (auto DomChild : *Node) { + if (VisitedWorklist.insert(DomChild).second) + Worklist.push_back(DomChild); + } + } + } +} diff --git a/contrib/llvm/lib/Analysis/JumpInstrTableInfo.cpp b/contrib/llvm/lib/Analysis/JumpInstrTableInfo.cpp deleted file mode 100644 index 7aae2a5..0000000 --- a/contrib/llvm/lib/Analysis/JumpInstrTableInfo.cpp +++ /dev/null @@ -1,55 +0,0 @@ -//===-- JumpInstrTableInfo.cpp: Info for Jump-Instruction Tables ----------===// -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// \brief Information about jump-instruction tables that have been created by -/// JumpInstrTables pass. -/// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "jiti" - -#include "llvm/Analysis/JumpInstrTableInfo.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Type.h" -#include "llvm/Support/MathExtras.h" - -using namespace llvm; - -INITIALIZE_PASS(JumpInstrTableInfo, "jump-instr-table-info", - "Jump-Instruction Table Info", true, true) -char JumpInstrTableInfo::ID = 0; - -ImmutablePass *llvm::createJumpInstrTableInfoPass() { - return new JumpInstrTableInfo(); -} - -ModulePass *llvm::createJumpInstrTableInfoPass(unsigned Bound) { - // This cast is always safe, since Bound is always in a subset of uint64_t. - uint64_t B = static_cast<uint64_t>(Bound); - return new JumpInstrTableInfo(B); -} - -JumpInstrTableInfo::JumpInstrTableInfo(uint64_t ByteAlign) - : ImmutablePass(ID), Tables(), ByteAlignment(ByteAlign) { - if (!llvm::isPowerOf2_64(ByteAlign)) { - // Note that we don't explicitly handle overflow here, since we handle the 0 - // case explicitly when a caller actually tries to create jumptable entries, - // and this is the return value on overflow. - ByteAlignment = llvm::NextPowerOf2(ByteAlign); - } - - initializeJumpInstrTableInfoPass(*PassRegistry::getPassRegistry()); -} - -JumpInstrTableInfo::~JumpInstrTableInfo() {} - -void JumpInstrTableInfo::insertEntry(FunctionType *TableFunTy, Function *Target, - Function *Jump) { - Tables[TableFunTy].push_back(JumpPair(Target, Jump)); -} diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp index 4de56f1..e6f586a 100644 --- a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/ConstantRange.h" @@ -29,7 +30,6 @@ #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLibraryInfo.h" #include <map> #include <stack> using namespace llvm; @@ -41,7 +41,7 @@ char LazyValueInfo::ID = 0; INITIALIZE_PASS_BEGIN(LazyValueInfo, "lazy-value-info", "Lazy Value Information Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(LazyValueInfo, "lazy-value-info", "Lazy Value Information Analysis", false, true) @@ -191,7 +191,7 @@ public: /// Merge the specified lattice value into this one, updating this /// one and returning true if anything changed. - bool mergeIn(const LVILatticeVal &RHS) { + bool mergeIn(const LVILatticeVal &RHS, const DataLayout &DL) { if (RHS.isUndefined() || isOverdefined()) return false; if (RHS.isOverdefined()) return markOverdefined(); @@ -215,11 +215,9 @@ public: // Unless we can prove that the two Constants are different, we must // move to overdefined. - // FIXME: use DataLayout/TargetLibraryInfo for smarter constant folding. - if (ConstantInt *Res = dyn_cast<ConstantInt>( - ConstantFoldCompareInstOperands(CmpInst::ICMP_NE, - getConstant(), - RHS.getNotConstant()))) + if (ConstantInt *Res = + dyn_cast<ConstantInt>(ConstantFoldCompareInstOperands( + CmpInst::ICMP_NE, getConstant(), RHS.getNotConstant(), DL))) if (Res->isOne()) return markNotConstant(RHS.getNotConstant()); @@ -241,11 +239,9 @@ public: // Unless we can prove that the two Constants are different, we must // move to overdefined. - // FIXME: use DataLayout/TargetLibraryInfo for smarter constant folding. - if (ConstantInt *Res = dyn_cast<ConstantInt>( - ConstantFoldCompareInstOperands(CmpInst::ICMP_NE, - getNotConstant(), - RHS.getConstant()))) + if (ConstantInt *Res = + dyn_cast<ConstantInt>(ConstantFoldCompareInstOperands( + CmpInst::ICMP_NE, getNotConstant(), RHS.getConstant(), DL))) if (Res->isOne()) return false; @@ -346,21 +342,17 @@ namespace { /// Push BV onto BlockValueStack unless it's already in there. /// Returns true on success. bool pushBlockValue(const std::pair<BasicBlock *, Value *> &BV) { - if (BlockValueSet.count(BV)) + if (!BlockValueSet.insert(BV).second) return false; // It's already in the stack. BlockValueStack.push(BV); - BlockValueSet.insert(BV); return true; } - /// A pointer to the cache of @llvm.assume calls. - AssumptionCache *AC; - /// An optional DL pointer. - const DataLayout *DL; - /// An optional DT pointer. - DominatorTree *DT; - + AssumptionCache *AC; ///< A pointer to the cache of @llvm.assume calls. + const DataLayout &DL; ///< A mandatory DataLayout + DominatorTree *DT; ///< An optional DT pointer. + friend struct LVIValueHandle; void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) { @@ -426,7 +418,7 @@ namespace { OverDefinedCache.clear(); } - LazyValueInfoCache(AssumptionCache *AC, const DataLayout *DL = nullptr, + LazyValueInfoCache(AssumptionCache *AC, const DataLayout &DL, DominatorTree *DT = nullptr) : AC(AC), DL(DL), DT(DT) {} }; @@ -579,11 +571,13 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) { if (LoadInst *L = dyn_cast<LoadInst>(I)) { return L->getPointerAddressSpace() == 0 && - GetUnderlyingObject(L->getPointerOperand()) == Ptr; + GetUnderlyingObject(L->getPointerOperand(), + L->getModule()->getDataLayout()) == Ptr; } if (StoreInst *S = dyn_cast<StoreInst>(I)) { return S->getPointerAddressSpace() == 0 && - GetUnderlyingObject(S->getPointerOperand()) == Ptr; + GetUnderlyingObject(S->getPointerOperand(), + S->getModule()->getDataLayout()) == Ptr; } if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) { if (MI->isVolatile()) return false; @@ -593,11 +587,13 @@ static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) { if (!Len || Len->isZero()) return false; if (MI->getDestAddressSpace() == 0) - if (GetUnderlyingObject(MI->getRawDest()) == Ptr) + if (GetUnderlyingObject(MI->getRawDest(), + MI->getModule()->getDataLayout()) == Ptr) return true; if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) if (MTI->getSourceAddressSpace() == 0) - if (GetUnderlyingObject(MTI->getRawSource()) == Ptr) + if (GetUnderlyingObject(MTI->getRawSource(), + MTI->getModule()->getDataLayout()) == Ptr) return true; } return false; @@ -614,10 +610,11 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV, if (isKnownNonNull(Val)) { NotNull = true; } else { - Value *UnderlyingVal = GetUnderlyingObject(Val); + const DataLayout &DL = BB->getModule()->getDataLayout(); + Value *UnderlyingVal = GetUnderlyingObject(Val, DL); // If 'GetUnderlyingObject' didn't converge, skip it. It won't converge // inside InstructionDereferencesPointer either. - if (UnderlyingVal == GetUnderlyingObject(UnderlyingVal, nullptr, 1)) { + if (UnderlyingVal == GetUnderlyingObject(UnderlyingVal, DL, 1)) { for (Instruction &I : *BB) { if (InstructionDereferencesPointer(&I, UnderlyingVal)) { NotNull = true; @@ -651,7 +648,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV, if (EdgesMissing) continue; - Result.mergeIn(EdgeResult); + Result.mergeIn(EdgeResult, DL); // If we hit overdefined, exit early. The BlockVals entry is already set // to overdefined. @@ -696,7 +693,7 @@ bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV, if (EdgesMissing) continue; - Result.mergeIn(EdgeResult); + Result.mergeIn(EdgeResult, DL); // If we hit overdefined, exit early. The BlockVals entry is already set // to overdefined. @@ -735,7 +732,7 @@ void LazyValueInfoCache::mergeAssumeBlockValueConstantRange(Value *Val, if (!AssumeVH) continue; auto *I = cast<CallInst>(AssumeVH); - if (!isValidAssumeForContext(I, BBI, DL, DT)) + if (!isValidAssumeForContext(I, BBI, DT)) continue; Value *C = I->getArgOperand(0); @@ -745,7 +742,7 @@ void LazyValueInfoCache::mergeAssumeBlockValueConstantRange(Value *Val, if (BBLV.isOverdefined()) BBLV = Result; else - BBLV.mergeIn(Result); + BBLV.mergeIn(Result, DL); } } } @@ -857,10 +854,10 @@ bool getValueFromFromCondition(Value *Val, ICmpInst *ICI, ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1)); if (CI && (ICI->getOperand(0) == Val || NegOffset)) { - // Calculate the range of values that would satisfy the comparison. + // Calculate the range of values that are allowed by the comparison ConstantRange CmpRange(CI->getValue()); ConstantRange TrueValues = - ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange); + ConstantRange::makeAllowedICmpRegion(ICI->getPredicate(), CmpRange); if (NegOffset) // Apply the offset from above. TrueValues = TrueValues.subtract(NegOffset->getValue()); @@ -1104,27 +1101,27 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, /// This lazily constructs the LazyValueInfoCache. static LazyValueInfoCache &getCache(void *&PImpl, AssumptionCache *AC, - const DataLayout *DL = nullptr, + const DataLayout *DL, DominatorTree *DT = nullptr) { - if (!PImpl) - PImpl = new LazyValueInfoCache(AC, DL, DT); + if (!PImpl) { + assert(DL && "getCache() called with a null DataLayout"); + PImpl = new LazyValueInfoCache(AC, *DL, DT); + } return *static_cast<LazyValueInfoCache*>(PImpl); } bool LazyValueInfo::runOnFunction(Function &F) { AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + const DataLayout &DL = F.getParent()->getDataLayout(); DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); DT = DTWP ? &DTWP->getDomTree() : nullptr; - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; - - TLI = &getAnalysis<TargetLibraryInfo>(); + TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); if (PImpl) - getCache(PImpl, AC, DL, DT).clear(); + getCache(PImpl, AC, &DL, DT).clear(); // Fully lazy. return false; @@ -1133,21 +1130,22 @@ bool LazyValueInfo::runOnFunction(Function &F) { void LazyValueInfo::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired<AssumptionCacheTracker>(); - AU.addRequired<TargetLibraryInfo>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); } void LazyValueInfo::releaseMemory() { // If the cache was allocated, free it. if (PImpl) { - delete &getCache(PImpl, AC); + delete &getCache(PImpl, AC, nullptr); PImpl = nullptr; } } Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB, Instruction *CxtI) { + const DataLayout &DL = BB->getModule()->getDataLayout(); LVILatticeVal Result = - getCache(PImpl, AC, DL, DT).getValueInBlock(V, BB, CxtI); + getCache(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI); if (Result.isConstant()) return Result.getConstant(); @@ -1164,8 +1162,9 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB, Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI) { + const DataLayout &DL = FromBB->getModule()->getDataLayout(); LVILatticeVal Result = - getCache(PImpl, AC, DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); + getCache(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); if (Result.isConstant()) return Result.getConstant(); @@ -1177,9 +1176,10 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB, return nullptr; } -static LazyValueInfo::Tristate -getPredicateResult(unsigned Pred, Constant *C, LVILatticeVal &Result, - const DataLayout *DL, TargetLibraryInfo *TLI) { +static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C, + LVILatticeVal &Result, + const DataLayout &DL, + TargetLibraryInfo *TLI) { // If we know the value is a constant, evaluate the conditional. Constant *Res = nullptr; @@ -1250,8 +1250,9 @@ LazyValueInfo::Tristate LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI) { + const DataLayout &DL = FromBB->getModule()->getDataLayout(); LVILatticeVal Result = - getCache(PImpl, AC, DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); + getCache(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); return getPredicateResult(Pred, C, Result, DL, TLI); } @@ -1259,18 +1260,23 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, LazyValueInfo::Tristate LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C, Instruction *CxtI) { - LVILatticeVal Result = getCache(PImpl, AC, DL, DT).getValueAt(V, CxtI); + const DataLayout &DL = CxtI->getModule()->getDataLayout(); + LVILatticeVal Result = getCache(PImpl, AC, &DL, DT).getValueAt(V, CxtI); return getPredicateResult(Pred, C, Result, DL, TLI); } void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, BasicBlock *NewSucc) { - if (PImpl) - getCache(PImpl, AC, DL, DT).threadEdge(PredBB, OldSucc, NewSucc); + if (PImpl) { + const DataLayout &DL = PredBB->getModule()->getDataLayout(); + getCache(PImpl, AC, &DL, DT).threadEdge(PredBB, OldSucc, NewSucc); + } } void LazyValueInfo::eraseBlock(BasicBlock *BB) { - if (PImpl) - getCache(PImpl, AC, DL, DT).eraseBlock(BB); + if (PImpl) { + const DataLayout &DL = BB->getModule()->getDataLayout(); + getCache(PImpl, AC, &DL, DT).eraseBlock(BB); + } } diff --git a/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp index 016f8c5..f6025e3 100644 --- a/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp @@ -36,7 +36,11 @@ void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); // Does not transform code } - +bool LibCallAliasAnalysis::runOnFunction(Function &F) { + // set up super class + InitializeAliasAnalysis(this, &F.getParent()->getDataLayout()); + return false; +} /// AnalyzeLibCallDetails - Given a call to a function with the specified /// LibCallFunctionInfo, see if we can improve the mod/ref footprint of the call diff --git a/contrib/llvm/lib/Analysis/LibCallSemantics.cpp b/contrib/llvm/lib/Analysis/LibCallSemantics.cpp index 23639e7..e98540b 100644 --- a/contrib/llvm/lib/Analysis/LibCallSemantics.cpp +++ b/contrib/llvm/lib/Analysis/LibCallSemantics.cpp @@ -15,6 +15,7 @@ #include "llvm/Analysis/LibCallSemantics.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/IR/Function.h" using namespace llvm; @@ -61,3 +62,29 @@ LibCallInfo::getFunctionInfo(const Function *F) const { return Map->lookup(F->getName()); } +/// See if the given exception handling personality function is one that we +/// understand. If so, return a description of it; otherwise return Unknown. +EHPersonality llvm::classifyEHPersonality(const Value *Pers) { + const Function *F = dyn_cast<Function>(Pers->stripPointerCasts()); + if (!F) + return EHPersonality::Unknown; + return StringSwitch<EHPersonality>(F->getName()) + .Case("__gnat_eh_personality", EHPersonality::GNU_Ada) + .Case("__gxx_personality_v0", EHPersonality::GNU_CXX) + .Case("__gcc_personality_v0", EHPersonality::GNU_C) + .Case("__objc_personality_v0", EHPersonality::GNU_ObjC) + .Case("_except_handler3", EHPersonality::MSVC_X86SEH) + .Case("_except_handler4", EHPersonality::MSVC_X86SEH) + .Case("__C_specific_handler", EHPersonality::MSVC_Win64SEH) + .Case("__CxxFrameHandler3", EHPersonality::MSVC_CXX) + .Default(EHPersonality::Unknown); +} + +bool llvm::canSimplifyInvokeNoUnwind(const InvokeInst *II) { + const LandingPadInst *LP = II->getLandingPadInst(); + EHPersonality Personality = classifyEHPersonality(LP->getPersonalityFn()); + // We can't simplify any invokes to nounwind functions if the personality + // function wants to catch asynch exceptions. The nounwind attribute only + // implies that the function does not throw synchronous exceptions. + return !isAsynchronousEHPersonality(Personality); +} diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp index b5c7245..65a90d7 100644 --- a/contrib/llvm/lib/Analysis/Lint.cpp +++ b/contrib/llvm/lib/Analysis/Lint.cpp @@ -36,12 +36,14 @@ #include "llvm/Analysis/Lint.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" @@ -49,19 +51,18 @@ #include "llvm/IR/Function.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/Pass.h" -#include "llvm/PassManager.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; namespace { namespace MemRef { - static unsigned Read = 1; - static unsigned Write = 2; - static unsigned Callee = 4; - static unsigned Branchee = 8; + static const unsigned Read = 1; + static const unsigned Write = 2; + static const unsigned Callee = 4; + static const unsigned Branchee = 8; } class Lint : public FunctionPass, public InstVisitor<Lint> { @@ -73,6 +74,8 @@ namespace { void visitMemoryReference(Instruction &I, Value *Ptr, uint64_t Size, unsigned Align, Type *Ty, unsigned Flags); + void visitEHBeginCatch(IntrinsicInst *II); + void visitEHEndCatch(IntrinsicInst *II); void visitCallInst(CallInst &I); void visitInvokeInst(InvokeInst &I); @@ -95,8 +98,8 @@ namespace { void visitInsertElementInst(InsertElementInst &I); void visitUnreachableInst(UnreachableInst &I); - Value *findValue(Value *V, bool OffsetOk) const; - Value *findValueImpl(Value *V, bool OffsetOk, + Value *findValue(Value *V, const DataLayout &DL, bool OffsetOk) const; + Value *findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk, SmallPtrSetImpl<Value *> &Visited) const; public: @@ -104,7 +107,6 @@ namespace { AliasAnalysis *AA; AssumptionCache *AC; DominatorTree *DT; - const DataLayout *DL; TargetLibraryInfo *TLI; std::string Messages; @@ -121,32 +123,38 @@ namespace { AU.setPreservesAll(); AU.addRequired<AliasAnalysis>(); AU.addRequired<AssumptionCacheTracker>(); - AU.addRequired<TargetLibraryInfo>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); AU.addRequired<DominatorTreeWrapperPass>(); } void print(raw_ostream &O, const Module *M) const override {} - void WriteValue(const Value *V) { - if (!V) return; - if (isa<Instruction>(V)) { - MessagesStr << *V << '\n'; - } else { - V->printAsOperand(MessagesStr, true, Mod); - MessagesStr << '\n'; + void WriteValues(ArrayRef<const Value *> Vs) { + for (const Value *V : Vs) { + if (!V) + continue; + if (isa<Instruction>(V)) { + MessagesStr << *V << '\n'; + } else { + V->printAsOperand(MessagesStr, true, Mod); + MessagesStr << '\n'; + } } } - // CheckFailed - A check failed, so print out the condition and the message - // that failed. This provides a nice place to put a breakpoint if you want - // to see why something is not correct. - void CheckFailed(const Twine &Message, - const Value *V1 = nullptr, const Value *V2 = nullptr, - const Value *V3 = nullptr, const Value *V4 = nullptr) { - MessagesStr << Message.str() << "\n"; - WriteValue(V1); - WriteValue(V2); - WriteValue(V3); - WriteValue(V4); + /// \brief A check failed, so printout out the condition and the message. + /// + /// This provides a nice place to put a breakpoint if you want to see why + /// something is not correct. + void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; } + + /// \brief A check failed (with values to print). + /// + /// This calls the Message-only version so that the above is easier to set + /// a breakpoint on. + template <typename T1, typename... Ts> + void CheckFailed(const Twine &Message, const T1 &V1, const Ts &...Vs) { + CheckFailed(Message); + WriteValues({V1, Vs...}); } }; } @@ -155,23 +163,15 @@ char Lint::ID = 0; INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR", false, true) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR", false, true) // Assert - We know that cond should be true, if not print an error message. -#define Assert(C, M) \ - do { if (!(C)) { CheckFailed(M); return; } } while (0) -#define Assert1(C, M, V1) \ - do { if (!(C)) { CheckFailed(M, V1); return; } } while (0) -#define Assert2(C, M, V1, V2) \ - do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0) -#define Assert3(C, M, V1, V2, V3) \ - do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0) -#define Assert4(C, M, V1, V2, V3, V4) \ - do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0) +#define Assert(C, ...) \ + do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (0) // Lint::run - This is the main Analysis entry point for a // function. @@ -181,9 +181,7 @@ bool Lint::runOnFunction(Function &F) { AA = &getAnalysis<AliasAnalysis>(); AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; - TLI = &getAnalysis<TargetLibraryInfo>(); + TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); visit(F); dbgs() << MessagesStr.str(); Messages.clear(); @@ -193,8 +191,8 @@ bool Lint::runOnFunction(Function &F) { void Lint::visitFunction(Function &F) { // This isn't undefined behavior, it's just a little unusual, and it's a // fairly common mistake to neglect to name a function. - Assert1(F.hasName() || F.hasLocalLinkage(), - "Unusual: Unnamed function with non-local linkage", &F); + Assert(F.hasName() || F.hasLocalLinkage(), + "Unusual: Unnamed function with non-local linkage", &F); // TODO: Check for irreducible control flow. } @@ -202,27 +200,30 @@ void Lint::visitFunction(Function &F) { void Lint::visitCallSite(CallSite CS) { Instruction &I = *CS.getInstruction(); Value *Callee = CS.getCalledValue(); + const DataLayout &DL = CS->getModule()->getDataLayout(); visitMemoryReference(I, Callee, AliasAnalysis::UnknownSize, 0, nullptr, MemRef::Callee); - if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) { - Assert1(CS.getCallingConv() == F->getCallingConv(), - "Undefined behavior: Caller and callee calling convention differ", - &I); + if (Function *F = dyn_cast<Function>(findValue(Callee, DL, + /*OffsetOk=*/false))) { + Assert(CS.getCallingConv() == F->getCallingConv(), + "Undefined behavior: Caller and callee calling convention differ", + &I); FunctionType *FT = F->getFunctionType(); unsigned NumActualArgs = CS.arg_size(); - Assert1(FT->isVarArg() ? - FT->getNumParams() <= NumActualArgs : - FT->getNumParams() == NumActualArgs, - "Undefined behavior: Call argument count mismatches callee " - "argument count", &I); + Assert(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs + : FT->getNumParams() == NumActualArgs, + "Undefined behavior: Call argument count mismatches callee " + "argument count", + &I); - Assert1(FT->getReturnType() == I.getType(), - "Undefined behavior: Call return type mismatches " - "callee return type", &I); + Assert(FT->getReturnType() == I.getType(), + "Undefined behavior: Call return type mismatches " + "callee return type", + &I); // Check argument types (in case the callee was casted) and attributes. // TODO: Verify that caller and callee attributes are compatible. @@ -232,9 +233,10 @@ void Lint::visitCallSite(CallSite CS) { Value *Actual = *AI; if (PI != PE) { Argument *Formal = PI++; - Assert1(Formal->getType() == Actual->getType(), - "Undefined behavior: Call argument type mismatches " - "callee parameter type", &I); + Assert(Formal->getType() == Actual->getType(), + "Undefined behavior: Call argument type mismatches " + "callee parameter type", + &I); // Check that noalias arguments don't alias other arguments. This is // not fully precise because we don't know the sizes of the dereferenced @@ -243,9 +245,9 @@ void Lint::visitCallSite(CallSite CS) { for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) if (AI != BI && (*BI)->getType()->isPointerTy()) { AliasAnalysis::AliasResult Result = AA->alias(*AI, *BI); - Assert1(Result != AliasAnalysis::MustAlias && - Result != AliasAnalysis::PartialAlias, - "Unusual: noalias argument aliases another argument", &I); + Assert(Result != AliasAnalysis::MustAlias && + Result != AliasAnalysis::PartialAlias, + "Unusual: noalias argument aliases another argument", &I); } // Check that an sret argument points to valid memory. @@ -253,8 +255,8 @@ void Lint::visitCallSite(CallSite CS) { Type *Ty = cast<PointerType>(Formal->getType())->getElementType(); visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty), - DL ? DL->getABITypeAlignment(Ty) : 0, - Ty, MemRef::Read | MemRef::Write); + DL.getABITypeAlignment(Ty), Ty, + MemRef::Read | MemRef::Write); } } } @@ -263,10 +265,11 @@ void Lint::visitCallSite(CallSite CS) { if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall()) for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); AI != AE; ++AI) { - Value *Obj = findValue(*AI, /*OffsetOk=*/true); - Assert1(!isa<AllocaInst>(Obj), - "Undefined behavior: Call with \"tail\" keyword references " - "alloca", &I); + Value *Obj = findValue(*AI, DL, /*OffsetOk=*/true); + Assert(!isa<AllocaInst>(Obj), + "Undefined behavior: Call with \"tail\" keyword references " + "alloca", + &I); } @@ -291,13 +294,13 @@ void Lint::visitCallSite(CallSite CS) { // overlap is not distinguished from the case where nothing is known. uint64_t Size = 0; if (const ConstantInt *Len = - dyn_cast<ConstantInt>(findValue(MCI->getLength(), - /*OffsetOk=*/false))) + dyn_cast<ConstantInt>(findValue(MCI->getLength(), DL, + /*OffsetOk=*/false))) if (Len->getValue().isIntN(32)) Size = Len->getValue().getZExtValue(); - Assert1(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != - AliasAnalysis::MustAlias, - "Undefined behavior: memcpy source and destination overlap", &I); + Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != + AliasAnalysis::MustAlias, + "Undefined behavior: memcpy source and destination overlap", &I); break; } case Intrinsic::memmove: { @@ -321,9 +324,9 @@ void Lint::visitCallSite(CallSite CS) { } case Intrinsic::vastart: - Assert1(I.getParent()->getParent()->isVarArg(), - "Undefined behavior: va_start called in a non-varargs function", - &I); + Assert(I.getParent()->getParent()->isVarArg(), + "Undefined behavior: va_start called in a non-varargs function", + &I); visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, 0, nullptr, MemRef::Read | MemRef::Write); @@ -346,6 +349,13 @@ void Lint::visitCallSite(CallSite CS) { visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, 0, nullptr, MemRef::Read | MemRef::Write); break; + + case Intrinsic::eh_begincatch: + visitEHBeginCatch(II); + break; + case Intrinsic::eh_endcatch: + visitEHEndCatch(II); + break; } } @@ -359,14 +369,13 @@ void Lint::visitInvokeInst(InvokeInst &I) { void Lint::visitReturnInst(ReturnInst &I) { Function *F = I.getParent()->getParent(); - Assert1(!F->doesNotReturn(), - "Unusual: Return statement in function with noreturn attribute", - &I); + Assert(!F->doesNotReturn(), + "Unusual: Return statement in function with noreturn attribute", &I); if (Value *V = I.getReturnValue()) { - Value *Obj = findValue(V, /*OffsetOk=*/true); - Assert1(!isa<AllocaInst>(Obj), - "Unusual: Returning alloca value", &I); + Value *Obj = + findValue(V, F->getParent()->getDataLayout(), /*OffsetOk=*/true); + Assert(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I); } } @@ -380,45 +389,47 @@ void Lint::visitMemoryReference(Instruction &I, if (Size == 0) return; - Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true); - Assert1(!isa<ConstantPointerNull>(UnderlyingObject), - "Undefined behavior: Null pointer dereference", &I); - Assert1(!isa<UndefValue>(UnderlyingObject), - "Undefined behavior: Undef pointer dereference", &I); - Assert1(!isa<ConstantInt>(UnderlyingObject) || - !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(), - "Unusual: All-ones pointer dereference", &I); - Assert1(!isa<ConstantInt>(UnderlyingObject) || - !cast<ConstantInt>(UnderlyingObject)->isOne(), - "Unusual: Address one pointer dereference", &I); + Value *UnderlyingObject = + findValue(Ptr, I.getModule()->getDataLayout(), /*OffsetOk=*/true); + Assert(!isa<ConstantPointerNull>(UnderlyingObject), + "Undefined behavior: Null pointer dereference", &I); + Assert(!isa<UndefValue>(UnderlyingObject), + "Undefined behavior: Undef pointer dereference", &I); + Assert(!isa<ConstantInt>(UnderlyingObject) || + !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(), + "Unusual: All-ones pointer dereference", &I); + Assert(!isa<ConstantInt>(UnderlyingObject) || + !cast<ConstantInt>(UnderlyingObject)->isOne(), + "Unusual: Address one pointer dereference", &I); if (Flags & MemRef::Write) { if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject)) - Assert1(!GV->isConstant(), - "Undefined behavior: Write to read-only memory", &I); - Assert1(!isa<Function>(UnderlyingObject) && - !isa<BlockAddress>(UnderlyingObject), - "Undefined behavior: Write to text section", &I); + Assert(!GV->isConstant(), "Undefined behavior: Write to read-only memory", + &I); + Assert(!isa<Function>(UnderlyingObject) && + !isa<BlockAddress>(UnderlyingObject), + "Undefined behavior: Write to text section", &I); } if (Flags & MemRef::Read) { - Assert1(!isa<Function>(UnderlyingObject), - "Unusual: Load from function body", &I); - Assert1(!isa<BlockAddress>(UnderlyingObject), - "Undefined behavior: Load from block address", &I); + Assert(!isa<Function>(UnderlyingObject), "Unusual: Load from function body", + &I); + Assert(!isa<BlockAddress>(UnderlyingObject), + "Undefined behavior: Load from block address", &I); } if (Flags & MemRef::Callee) { - Assert1(!isa<BlockAddress>(UnderlyingObject), - "Undefined behavior: Call to block address", &I); + Assert(!isa<BlockAddress>(UnderlyingObject), + "Undefined behavior: Call to block address", &I); } if (Flags & MemRef::Branchee) { - Assert1(!isa<Constant>(UnderlyingObject) || - isa<BlockAddress>(UnderlyingObject), - "Undefined behavior: Branch to non-blockaddress", &I); + Assert(!isa<Constant>(UnderlyingObject) || + isa<BlockAddress>(UnderlyingObject), + "Undefined behavior: Branch to non-blockaddress", &I); } // Check for buffer overflows and misalignment. // Only handles memory references that read/write something simple like an // alloca instruction or a global variable. + auto &DL = I.getModule()->getDataLayout(); int64_t Offset = 0; if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, DL)) { // OK, so the access is to a constant offset from Ptr. Check that Ptr is @@ -429,37 +440,37 @@ void Lint::visitMemoryReference(Instruction &I, if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { Type *ATy = AI->getAllocatedType(); - if (DL && !AI->isArrayAllocation() && ATy->isSized()) - BaseSize = DL->getTypeAllocSize(ATy); + if (!AI->isArrayAllocation() && ATy->isSized()) + BaseSize = DL.getTypeAllocSize(ATy); BaseAlign = AI->getAlignment(); - if (DL && BaseAlign == 0 && ATy->isSized()) - BaseAlign = DL->getABITypeAlignment(ATy); + if (BaseAlign == 0 && ATy->isSized()) + BaseAlign = DL.getABITypeAlignment(ATy); } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) { // If the global may be defined differently in another compilation unit // then don't warn about funky memory accesses. if (GV->hasDefinitiveInitializer()) { Type *GTy = GV->getType()->getElementType(); - if (DL && GTy->isSized()) - BaseSize = DL->getTypeAllocSize(GTy); + if (GTy->isSized()) + BaseSize = DL.getTypeAllocSize(GTy); BaseAlign = GV->getAlignment(); - if (DL && BaseAlign == 0 && GTy->isSized()) - BaseAlign = DL->getABITypeAlignment(GTy); + if (BaseAlign == 0 && GTy->isSized()) + BaseAlign = DL.getABITypeAlignment(GTy); } } // Accesses from before the start or after the end of the object are not // defined. - Assert1(Size == AliasAnalysis::UnknownSize || - BaseSize == AliasAnalysis::UnknownSize || - (Offset >= 0 && Offset + Size <= BaseSize), - "Undefined behavior: Buffer overflow", &I); + Assert(Size == AliasAnalysis::UnknownSize || + BaseSize == AliasAnalysis::UnknownSize || + (Offset >= 0 && Offset + Size <= BaseSize), + "Undefined behavior: Buffer overflow", &I); // Accesses that say that the memory is more aligned than it is are not // defined. - if (DL && Align == 0 && Ty && Ty->isSized()) - Align = DL->getABITypeAlignment(Ty); - Assert1(!BaseAlign || Align <= MinAlign(BaseAlign, Offset), - "Undefined behavior: Memory reference address is misaligned", &I); + if (Align == 0 && Ty && Ty->isSized()) + Align = DL.getABITypeAlignment(Ty); + Assert(!BaseAlign || Align <= MinAlign(BaseAlign, Offset), + "Undefined behavior: Memory reference address is misaligned", &I); } } @@ -477,39 +488,219 @@ void Lint::visitStoreInst(StoreInst &I) { } void Lint::visitXor(BinaryOperator &I) { - Assert1(!isa<UndefValue>(I.getOperand(0)) || - !isa<UndefValue>(I.getOperand(1)), - "Undefined result: xor(undef, undef)", &I); + Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)), + "Undefined result: xor(undef, undef)", &I); } void Lint::visitSub(BinaryOperator &I) { - Assert1(!isa<UndefValue>(I.getOperand(0)) || - !isa<UndefValue>(I.getOperand(1)), - "Undefined result: sub(undef, undef)", &I); + Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)), + "Undefined result: sub(undef, undef)", &I); } void Lint::visitLShr(BinaryOperator &I) { - if (ConstantInt *CI = - dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) - Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), - "Undefined result: Shift count out of range", &I); + if (ConstantInt *CI = dyn_cast<ConstantInt>( + findValue(I.getOperand(1), I.getModule()->getDataLayout(), + /*OffsetOk=*/false))) + Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), + "Undefined result: Shift count out of range", &I); } void Lint::visitAShr(BinaryOperator &I) { - if (ConstantInt *CI = - dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) - Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), - "Undefined result: Shift count out of range", &I); + if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue( + I.getOperand(1), I.getModule()->getDataLayout(), /*OffsetOk=*/false))) + Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), + "Undefined result: Shift count out of range", &I); } void Lint::visitShl(BinaryOperator &I) { - if (ConstantInt *CI = - dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) - Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), - "Undefined result: Shift count out of range", &I); + if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue( + I.getOperand(1), I.getModule()->getDataLayout(), /*OffsetOk=*/false))) + Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), + "Undefined result: Shift count out of range", &I); +} + +static bool +allPredsCameFromLandingPad(BasicBlock *BB, + SmallSet<BasicBlock *, 4> &VisitedBlocks) { + VisitedBlocks.insert(BB); + if (BB->isLandingPad()) + return true; + // If we find a block with no predecessors, the search failed. + if (pred_empty(BB)) + return false; + for (BasicBlock *Pred : predecessors(BB)) { + if (VisitedBlocks.count(Pred)) + continue; + if (!allPredsCameFromLandingPad(Pred, VisitedBlocks)) + return false; + } + return true; +} + +static bool +allSuccessorsReachEndCatch(BasicBlock *BB, BasicBlock::iterator InstBegin, + IntrinsicInst **SecondBeginCatch, + SmallSet<BasicBlock *, 4> &VisitedBlocks) { + VisitedBlocks.insert(BB); + for (BasicBlock::iterator I = InstBegin, E = BB->end(); I != E; ++I) { + IntrinsicInst *IC = dyn_cast<IntrinsicInst>(I); + if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch) + return true; + // If we find another begincatch while looking for an endcatch, + // that's also an error. + if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch) { + *SecondBeginCatch = IC; + return false; + } + } + + // If we reach a block with no successors while searching, the + // search has failed. + if (succ_empty(BB)) + return false; + // Otherwise, search all of the successors. + for (BasicBlock *Succ : successors(BB)) { + if (VisitedBlocks.count(Succ)) + continue; + if (!allSuccessorsReachEndCatch(Succ, Succ->begin(), SecondBeginCatch, + VisitedBlocks)) + return false; + } + return true; +} + +void Lint::visitEHBeginCatch(IntrinsicInst *II) { + // The checks in this function make a potentially dubious assumption about + // the CFG, namely that any block involved in a catch is only used for the + // catch. This will very likely be true of IR generated by a front end, + // but it may cease to be true, for example, if the IR is run through a + // pass which combines similar blocks. + // + // In general, if we encounter a block the isn't dominated by the catch + // block while we are searching the catch block's successors for a call + // to end catch intrinsic, then it is possible that it will be legal for + // a path through this block to never reach a call to llvm.eh.endcatch. + // An analogous statement could be made about our search for a landing + // pad among the catch block's predecessors. + // + // What is actually required is that no path is possible at runtime that + // reaches a call to llvm.eh.begincatch without having previously visited + // a landingpad instruction and that no path is possible at runtime that + // calls llvm.eh.begincatch and does not subsequently call llvm.eh.endcatch + // (mentally adjusting for the fact that in reality these calls will be + // removed before code generation). + // + // Because this is a lint check, we take a pessimistic approach and warn if + // the control flow is potentially incorrect. + + SmallSet<BasicBlock *, 4> VisitedBlocks; + BasicBlock *CatchBB = II->getParent(); + + // The begin catch must occur in a landing pad block or all paths + // to it must have come from a landing pad. + Assert(allPredsCameFromLandingPad(CatchBB, VisitedBlocks), + "llvm.eh.begincatch may be reachable without passing a landingpad", + II); + + // Reset the visited block list. + VisitedBlocks.clear(); + + IntrinsicInst *SecondBeginCatch = nullptr; + + // This has to be called before it is asserted. Otherwise, the first assert + // below can never be hit. + bool EndCatchFound = allSuccessorsReachEndCatch( + CatchBB, std::next(static_cast<BasicBlock::iterator>(II)), + &SecondBeginCatch, VisitedBlocks); + Assert( + SecondBeginCatch == nullptr, + "llvm.eh.begincatch may be called a second time before llvm.eh.endcatch", + II, SecondBeginCatch); + Assert(EndCatchFound, + "Some paths from llvm.eh.begincatch may not reach llvm.eh.endcatch", + II); +} + +static bool allPredCameFromBeginCatch( + BasicBlock *BB, BasicBlock::reverse_iterator InstRbegin, + IntrinsicInst **SecondEndCatch, SmallSet<BasicBlock *, 4> &VisitedBlocks) { + VisitedBlocks.insert(BB); + // Look for a begincatch in this block. + for (BasicBlock::reverse_iterator RI = InstRbegin, RE = BB->rend(); RI != RE; + ++RI) { + IntrinsicInst *IC = dyn_cast<IntrinsicInst>(&*RI); + if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch) + return true; + // If we find another end catch before we find a begin catch, that's + // an error. + if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch) { + *SecondEndCatch = IC; + return false; + } + // If we encounter a landingpad instruction, the search failed. + if (isa<LandingPadInst>(*RI)) + return false; + } + // If while searching we find a block with no predeccesors, + // the search failed. + if (pred_empty(BB)) + return false; + // Search any predecessors we haven't seen before. + for (BasicBlock *Pred : predecessors(BB)) { + if (VisitedBlocks.count(Pred)) + continue; + if (!allPredCameFromBeginCatch(Pred, Pred->rbegin(), SecondEndCatch, + VisitedBlocks)) + return false; + } + return true; +} + +void Lint::visitEHEndCatch(IntrinsicInst *II) { + // The check in this function makes a potentially dubious assumption about + // the CFG, namely that any block involved in a catch is only used for the + // catch. This will very likely be true of IR generated by a front end, + // but it may cease to be true, for example, if the IR is run through a + // pass which combines similar blocks. + // + // In general, if we encounter a block the isn't post-dominated by the + // end catch block while we are searching the end catch block's predecessors + // for a call to the begin catch intrinsic, then it is possible that it will + // be legal for a path to reach the end catch block without ever having + // called llvm.eh.begincatch. + // + // What is actually required is that no path is possible at runtime that + // reaches a call to llvm.eh.endcatch without having previously visited + // a call to llvm.eh.begincatch (mentally adjusting for the fact that in + // reality these calls will be removed before code generation). + // + // Because this is a lint check, we take a pessimistic approach and warn if + // the control flow is potentially incorrect. + + BasicBlock *EndCatchBB = II->getParent(); + + // Alls paths to the end catch call must pass through a begin catch call. + + // If llvm.eh.begincatch wasn't called in the current block, we'll use this + // lambda to recursively look for it in predecessors. + SmallSet<BasicBlock *, 4> VisitedBlocks; + IntrinsicInst *SecondEndCatch = nullptr; + + // This has to be called before it is asserted. Otherwise, the first assert + // below can never be hit. + bool BeginCatchFound = + allPredCameFromBeginCatch(EndCatchBB, BasicBlock::reverse_iterator(II), + &SecondEndCatch, VisitedBlocks); + Assert( + SecondEndCatch == nullptr, + "llvm.eh.endcatch may be called a second time after llvm.eh.begincatch", + II, SecondEndCatch); + Assert(BeginCatchFound, + "llvm.eh.endcatch may be reachable without passing llvm.eh.begincatch", + II); } -static bool isZero(Value *V, const DataLayout *DL, DominatorTree *DT, +static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC) { // Assume undef could be zero. if (isa<UndefValue>(V)) @@ -550,30 +741,30 @@ static bool isZero(Value *V, const DataLayout *DL, DominatorTree *DT, } void Lint::visitSDiv(BinaryOperator &I) { - Assert1(!isZero(I.getOperand(1), DL, DT, AC), - "Undefined behavior: Division by zero", &I); + Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), + "Undefined behavior: Division by zero", &I); } void Lint::visitUDiv(BinaryOperator &I) { - Assert1(!isZero(I.getOperand(1), DL, DT, AC), - "Undefined behavior: Division by zero", &I); + Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), + "Undefined behavior: Division by zero", &I); } void Lint::visitSRem(BinaryOperator &I) { - Assert1(!isZero(I.getOperand(1), DL, DT, AC), - "Undefined behavior: Division by zero", &I); + Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), + "Undefined behavior: Division by zero", &I); } void Lint::visitURem(BinaryOperator &I) { - Assert1(!isZero(I.getOperand(1), DL, DT, AC), - "Undefined behavior: Division by zero", &I); + Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), + "Undefined behavior: Division by zero", &I); } void Lint::visitAllocaInst(AllocaInst &I) { if (isa<ConstantInt>(I.getArraySize())) // This isn't undefined behavior, it's just an obvious pessimization. - Assert1(&I.getParent()->getParent()->getEntryBlock() == I.getParent(), - "Pessimization: Static alloca outside of entry block", &I); + Assert(&I.getParent()->getParent()->getEntryBlock() == I.getParent(), + "Pessimization: Static alloca outside of entry block", &I); // TODO: Check for an unusual size (MSB set?) } @@ -587,32 +778,33 @@ void Lint::visitIndirectBrInst(IndirectBrInst &I) { visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0, nullptr, MemRef::Branchee); - Assert1(I.getNumDestinations() != 0, - "Undefined behavior: indirectbr with no destinations", &I); + Assert(I.getNumDestinations() != 0, + "Undefined behavior: indirectbr with no destinations", &I); } void Lint::visitExtractElementInst(ExtractElementInst &I) { - if (ConstantInt *CI = - dyn_cast<ConstantInt>(findValue(I.getIndexOperand(), - /*OffsetOk=*/false))) - Assert1(CI->getValue().ult(I.getVectorOperandType()->getNumElements()), - "Undefined result: extractelement index out of range", &I); + if (ConstantInt *CI = dyn_cast<ConstantInt>( + findValue(I.getIndexOperand(), I.getModule()->getDataLayout(), + /*OffsetOk=*/false))) + Assert(CI->getValue().ult(I.getVectorOperandType()->getNumElements()), + "Undefined result: extractelement index out of range", &I); } void Lint::visitInsertElementInst(InsertElementInst &I) { - if (ConstantInt *CI = - dyn_cast<ConstantInt>(findValue(I.getOperand(2), - /*OffsetOk=*/false))) - Assert1(CI->getValue().ult(I.getType()->getNumElements()), - "Undefined result: insertelement index out of range", &I); + if (ConstantInt *CI = dyn_cast<ConstantInt>( + findValue(I.getOperand(2), I.getModule()->getDataLayout(), + /*OffsetOk=*/false))) + Assert(CI->getValue().ult(I.getType()->getNumElements()), + "Undefined result: insertelement index out of range", &I); } void Lint::visitUnreachableInst(UnreachableInst &I) { // This isn't undefined behavior, it's merely suspicious. - Assert1(&I == I.getParent()->begin() || - std::prev(BasicBlock::iterator(&I))->mayHaveSideEffects(), - "Unusual: unreachable immediately preceded by instruction without " - "side effects", &I); + Assert(&I == I.getParent()->begin() || + std::prev(BasicBlock::iterator(&I))->mayHaveSideEffects(), + "Unusual: unreachable immediately preceded by instruction without " + "side effects", + &I); } /// findValue - Look through bitcasts and simple memory reference patterns @@ -622,13 +814,13 @@ void Lint::visitUnreachableInst(UnreachableInst &I) { /// Most analysis passes don't require this logic, because instcombine /// will simplify most of these kinds of things away. But it's a goal of /// this Lint pass to be useful even on non-optimized IR. -Value *Lint::findValue(Value *V, bool OffsetOk) const { +Value *Lint::findValue(Value *V, const DataLayout &DL, bool OffsetOk) const { SmallPtrSet<Value *, 4> Visited; - return findValueImpl(V, OffsetOk, Visited); + return findValueImpl(V, DL, OffsetOk, Visited); } /// findValueImpl - Implementation helper for findValue. -Value *Lint::findValueImpl(Value *V, bool OffsetOk, +Value *Lint::findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk, SmallPtrSetImpl<Value *> &Visited) const { // Detect self-referential values. if (!Visited.insert(V).second) @@ -649,7 +841,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, break; if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(), BB, BBI, 6, AA)) - return findValueImpl(U, OffsetOk, Visited); + return findValueImpl(U, DL, OffsetOk, Visited); if (BBI != BB->begin()) break; BB = BB->getUniquePredecessor(); if (!BB) break; @@ -658,40 +850,38 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, } else if (PHINode *PN = dyn_cast<PHINode>(V)) { if (Value *W = PN->hasConstantValue()) if (W != V) - return findValueImpl(W, OffsetOk, Visited); + return findValueImpl(W, DL, OffsetOk, Visited); } else if (CastInst *CI = dyn_cast<CastInst>(V)) { if (CI->isNoopCast(DL)) - return findValueImpl(CI->getOperand(0), OffsetOk, Visited); + return findValueImpl(CI->getOperand(0), DL, OffsetOk, Visited); } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) { if (Value *W = FindInsertedValue(Ex->getAggregateOperand(), Ex->getIndices())) if (W != V) - return findValueImpl(W, OffsetOk, Visited); + return findValueImpl(W, DL, OffsetOk, Visited); } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { // Same as above, but for ConstantExpr instead of Instruction. if (Instruction::isCast(CE->getOpcode())) { if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()), - CE->getOperand(0)->getType(), - CE->getType(), - DL ? DL->getIntPtrType(V->getType()) : - Type::getInt64Ty(V->getContext()))) - return findValueImpl(CE->getOperand(0), OffsetOk, Visited); + CE->getOperand(0)->getType(), CE->getType(), + DL.getIntPtrType(V->getType()))) + return findValueImpl(CE->getOperand(0), DL, OffsetOk, Visited); } else if (CE->getOpcode() == Instruction::ExtractValue) { ArrayRef<unsigned> Indices = CE->getIndices(); if (Value *W = FindInsertedValue(CE->getOperand(0), Indices)) if (W != V) - return findValueImpl(W, OffsetOk, Visited); + return findValueImpl(W, DL, OffsetOk, Visited); } } // As a last resort, try SimplifyInstruction or constant folding. if (Instruction *Inst = dyn_cast<Instruction>(V)) { if (Value *W = SimplifyInstruction(Inst, DL, TLI, DT, AC)) - return findValueImpl(W, OffsetOk, Visited); + return findValueImpl(W, DL, OffsetOk, Visited); } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { if (Value *W = ConstantFoldConstantExpression(CE, DL, TLI)) if (W != V) - return findValueImpl(W, OffsetOk, Visited); + return findValueImpl(W, DL, OffsetOk, Visited); } return V; @@ -711,7 +901,7 @@ void llvm::lintFunction(const Function &f) { Function &F = const_cast<Function&>(f); assert(!F.isDeclaration() && "Cannot lint external functions"); - FunctionPassManager FPM(F.getParent()); + legacy::FunctionPassManager FPM(F.getParent()); Lint *V = new Lint(); FPM.add(V); FPM.run(F); @@ -720,7 +910,7 @@ void llvm::lintFunction(const Function &f) { /// lintModule - Check a module for errors, printing messages on stderr. /// void llvm::lintModule(const Module &M) { - PassManager PM; + legacy::PassManager PM; Lint *V = new Lint(); PM.add(V); PM.run(const_cast<Module&>(M)); diff --git a/contrib/llvm/lib/Analysis/Loads.cpp b/contrib/llvm/lib/Analysis/Loads.cpp index 5042eb9..aed3b04 100644 --- a/contrib/llvm/lib/Analysis/Loads.cpp +++ b/contrib/llvm/lib/Analysis/Loads.cpp @@ -19,6 +19,7 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" using namespace llvm; @@ -62,7 +63,8 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) { /// This uses the pointee type to determine how many bytes need to be safe to /// load from the pointer. bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, - unsigned Align, const DataLayout *DL) { + unsigned Align) { + const DataLayout &DL = ScanFrom->getModule()->getDataLayout(); int64_t ByteOffset = 0; Value *Base = V; Base = GetPointerBaseWithConstantOffset(V, ByteOffset, DL); @@ -87,19 +89,19 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, } PointerType *AddrTy = cast<PointerType>(V->getType()); - uint64_t LoadSize = DL ? DL->getTypeStoreSize(AddrTy->getElementType()) : 0; + uint64_t LoadSize = DL.getTypeStoreSize(AddrTy->getElementType()); // If we found a base allocated type from either an alloca or global variable, // try to see if we are definitively within the allocated region. We need to // know the size of the base type and the loaded type to do anything in this - // case, so only try this when we have the DataLayout available. - if (BaseType && BaseType->isSized() && DL) { + // case. + if (BaseType && BaseType->isSized()) { if (BaseAlign == 0) - BaseAlign = DL->getPrefTypeAlignment(BaseType); + BaseAlign = DL.getPrefTypeAlignment(BaseType); if (Align <= BaseAlign) { // Check if the load is within the bounds of the underlying object. - if (ByteOffset + LoadSize <= DL->getTypeAllocSize(BaseType) && + if (ByteOffset + LoadSize <= DL.getTypeAllocSize(BaseType) && (Align == 0 || (ByteOffset % Align) == 0)) return true; } @@ -133,16 +135,13 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, else continue; - // Handle trivial cases even w/o DataLayout or other work. + // Handle trivial cases. if (AccessedPtr == V) return true; - if (!DL) - continue; - auto *AccessedTy = cast<PointerType>(AccessedPtr->getType()); if (AreEquivalentAddressValues(AccessedPtr->stripPointerCasts(), V) && - LoadSize <= DL->getTypeStoreSize(AccessedTy->getElementType())) + LoadSize <= DL.getTypeStoreSize(AccessedTy->getElementType())) return true; } return false; @@ -176,13 +175,10 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType(); - // Try to get the DataLayout for this module. This may be null, in which case - // the optimizations will be limited. - const DataLayout *DL = ScanBB->getDataLayout(); + const DataLayout &DL = ScanBB->getModule()->getDataLayout(); // Try to get the store size for the type. - uint64_t AccessSize = DL ? DL->getTypeStoreSize(AccessTy) - : AA ? AA->getTypeStoreSize(AccessTy) : 0; + uint64_t AccessSize = DL.getTypeStoreSize(AccessTy); Value *StrippedPtr = Ptr->stripPointerCasts(); diff --git a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp new file mode 100644 index 0000000..b70de00 --- /dev/null +++ b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -0,0 +1,1427 @@ +//===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The implementation for the loop memory dependence that was originally +// developed for the loop vectorizer. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/VectorUtils.h" +using namespace llvm; + +#define DEBUG_TYPE "loop-accesses" + +static cl::opt<unsigned, true> +VectorizationFactor("force-vector-width", cl::Hidden, + cl::desc("Sets the SIMD width. Zero is autoselect."), + cl::location(VectorizerParams::VectorizationFactor)); +unsigned VectorizerParams::VectorizationFactor; + +static cl::opt<unsigned, true> +VectorizationInterleave("force-vector-interleave", cl::Hidden, + cl::desc("Sets the vectorization interleave count. " + "Zero is autoselect."), + cl::location( + VectorizerParams::VectorizationInterleave)); +unsigned VectorizerParams::VectorizationInterleave; + +static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold( + "runtime-memory-check-threshold", cl::Hidden, + cl::desc("When performing memory disambiguation checks at runtime do not " + "generate more than this number of comparisons (default = 8)."), + cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8)); +unsigned VectorizerParams::RuntimeMemoryCheckThreshold; + +/// Maximum SIMD width. +const unsigned VectorizerParams::MaxVectorWidth = 64; + +/// \brief We collect interesting dependences up to this threshold. +static cl::opt<unsigned> MaxInterestingDependence( + "max-interesting-dependences", cl::Hidden, + cl::desc("Maximum number of interesting dependences collected by " + "loop-access analysis (default = 100)"), + cl::init(100)); + +bool VectorizerParams::isInterleaveForced() { + return ::VectorizationInterleave.getNumOccurrences() > 0; +} + +void LoopAccessReport::emitAnalysis(const LoopAccessReport &Message, + const Function *TheFunction, + const Loop *TheLoop, + const char *PassName) { + DebugLoc DL = TheLoop->getStartLoc(); + if (const Instruction *I = Message.getInstr()) + DL = I->getDebugLoc(); + emitOptimizationRemarkAnalysis(TheFunction->getContext(), PassName, + *TheFunction, DL, Message.str()); +} + +Value *llvm::stripIntegerCast(Value *V) { + if (CastInst *CI = dyn_cast<CastInst>(V)) + if (CI->getOperand(0)->getType()->isIntegerTy()) + return CI->getOperand(0); + return V; +} + +const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE, + const ValueToValueMap &PtrToStride, + Value *Ptr, Value *OrigPtr) { + + const SCEV *OrigSCEV = SE->getSCEV(Ptr); + + // If there is an entry in the map return the SCEV of the pointer with the + // symbolic stride replaced by one. + ValueToValueMap::const_iterator SI = + PtrToStride.find(OrigPtr ? OrigPtr : Ptr); + if (SI != PtrToStride.end()) { + Value *StrideVal = SI->second; + + // Strip casts. + StrideVal = stripIntegerCast(StrideVal); + + // Replace symbolic stride by one. + Value *One = ConstantInt::get(StrideVal->getType(), 1); + ValueToValueMap RewriteMap; + RewriteMap[StrideVal] = One; + + const SCEV *ByOne = + SCEVParameterRewriter::rewrite(OrigSCEV, *SE, RewriteMap, true); + DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne + << "\n"); + return ByOne; + } + + // Otherwise, just return the SCEV of the original pointer. + return SE->getSCEV(Ptr); +} + +void LoopAccessInfo::RuntimePointerCheck::insert( + ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, + unsigned ASId, const ValueToValueMap &Strides) { + // Get the stride replaced scev. + const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr); + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc); + assert(AR && "Invalid addrec expression"); + const SCEV *Ex = SE->getBackedgeTakenCount(Lp); + const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE); + Pointers.push_back(Ptr); + Starts.push_back(AR->getStart()); + Ends.push_back(ScEnd); + IsWritePtr.push_back(WritePtr); + DependencySetId.push_back(DepSetId); + AliasSetId.push_back(ASId); +} + +bool LoopAccessInfo::RuntimePointerCheck::needsChecking( + unsigned I, unsigned J, const SmallVectorImpl<int> *PtrPartition) const { + // No need to check if two readonly pointers intersect. + if (!IsWritePtr[I] && !IsWritePtr[J]) + return false; + + // Only need to check pointers between two different dependency sets. + if (DependencySetId[I] == DependencySetId[J]) + return false; + + // Only need to check pointers in the same alias set. + if (AliasSetId[I] != AliasSetId[J]) + return false; + + // If PtrPartition is set omit checks between pointers of the same partition. + // Partition number -1 means that the pointer is used in multiple partitions. + // In this case we can't omit the check. + if (PtrPartition && (*PtrPartition)[I] != -1 && + (*PtrPartition)[I] == (*PtrPartition)[J]) + return false; + + return true; +} + +void LoopAccessInfo::RuntimePointerCheck::print( + raw_ostream &OS, unsigned Depth, + const SmallVectorImpl<int> *PtrPartition) const { + unsigned NumPointers = Pointers.size(); + if (NumPointers == 0) + return; + + OS.indent(Depth) << "Run-time memory checks:\n"; + unsigned N = 0; + for (unsigned I = 0; I < NumPointers; ++I) + for (unsigned J = I + 1; J < NumPointers; ++J) + if (needsChecking(I, J, PtrPartition)) { + OS.indent(Depth) << N++ << ":\n"; + OS.indent(Depth + 2) << *Pointers[I]; + if (PtrPartition) + OS << " (Partition: " << (*PtrPartition)[I] << ")"; + OS << "\n"; + OS.indent(Depth + 2) << *Pointers[J]; + if (PtrPartition) + OS << " (Partition: " << (*PtrPartition)[J] << ")"; + OS << "\n"; + } +} + +bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking( + const SmallVectorImpl<int> *PtrPartition) const { + unsigned NumPointers = Pointers.size(); + + for (unsigned I = 0; I < NumPointers; ++I) + for (unsigned J = I + 1; J < NumPointers; ++J) + if (needsChecking(I, J, PtrPartition)) + return true; + return false; +} + +namespace { +/// \brief Analyses memory accesses in a loop. +/// +/// Checks whether run time pointer checks are needed and builds sets for data +/// dependence checking. +class AccessAnalysis { +public: + /// \brief Read or write access location. + typedef PointerIntPair<Value *, 1, bool> MemAccessInfo; + typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet; + + AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI, + MemoryDepChecker::DepCandidates &DA) + : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckNeeded(false) {} + + /// \brief Register a load and whether it is only read from. + void addLoad(AliasAnalysis::Location &Loc, bool IsReadOnly) { + Value *Ptr = const_cast<Value*>(Loc.Ptr); + AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.AATags); + Accesses.insert(MemAccessInfo(Ptr, false)); + if (IsReadOnly) + ReadOnlyPtr.insert(Ptr); + } + + /// \brief Register a store. + void addStore(AliasAnalysis::Location &Loc) { + Value *Ptr = const_cast<Value*>(Loc.Ptr); + AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.AATags); + Accesses.insert(MemAccessInfo(Ptr, true)); + } + + /// \brief Check whether we can check the pointers at runtime for + /// non-intersection. + bool canCheckPtrAtRT(LoopAccessInfo::RuntimePointerCheck &RtCheck, + unsigned &NumComparisons, ScalarEvolution *SE, + Loop *TheLoop, const ValueToValueMap &Strides, + bool ShouldCheckStride = false); + + /// \brief Goes over all memory accesses, checks whether a RT check is needed + /// and builds sets of dependent accesses. + void buildDependenceSets() { + processMemAccesses(); + } + + bool isRTCheckNeeded() { return IsRTCheckNeeded; } + + bool isDependencyCheckNeeded() { return !CheckDeps.empty(); } + + /// We decided that no dependence analysis would be used. Reset the state. + void resetDepChecks(MemoryDepChecker &DepChecker) { + CheckDeps.clear(); + DepChecker.clearInterestingDependences(); + } + + MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; } + +private: + typedef SetVector<MemAccessInfo> PtrAccessSet; + + /// \brief Go over all memory access and check whether runtime pointer checks + /// are needed /// and build sets of dependency check candidates. + void processMemAccesses(); + + /// Set of all accesses. + PtrAccessSet Accesses; + + const DataLayout &DL; + + /// Set of accesses that need a further dependence check. + MemAccessInfoSet CheckDeps; + + /// Set of pointers that are read only. + SmallPtrSet<Value*, 16> ReadOnlyPtr; + + /// An alias set tracker to partition the access set by underlying object and + //intrinsic property (such as TBAA metadata). + AliasSetTracker AST; + + LoopInfo *LI; + + /// Sets of potentially dependent accesses - members of one set share an + /// underlying pointer. The set "CheckDeps" identfies which sets really need a + /// dependence check. + MemoryDepChecker::DepCandidates &DepCands; + + bool IsRTCheckNeeded; +}; + +} // end anonymous namespace + +/// \brief Check whether a pointer can participate in a runtime bounds check. +static bool hasComputableBounds(ScalarEvolution *SE, + const ValueToValueMap &Strides, Value *Ptr) { + const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Ptr); + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev); + if (!AR) + return false; + + return AR->isAffine(); +} + +/// \brief Check the stride of the pointer and ensure that it does not wrap in +/// the address space. +static int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, + const ValueToValueMap &StridesMap); + +bool AccessAnalysis::canCheckPtrAtRT( + LoopAccessInfo::RuntimePointerCheck &RtCheck, unsigned &NumComparisons, + ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &StridesMap, + bool ShouldCheckStride) { + // Find pointers with computable bounds. We are going to use this information + // to place a runtime bound check. + bool CanDoRT = true; + + bool IsDepCheckNeeded = isDependencyCheckNeeded(); + NumComparisons = 0; + + // We assign a consecutive id to access from different alias sets. + // Accesses between different groups doesn't need to be checked. + unsigned ASId = 1; + for (auto &AS : AST) { + unsigned NumReadPtrChecks = 0; + unsigned NumWritePtrChecks = 0; + + // We assign consecutive id to access from different dependence sets. + // Accesses within the same set don't need a runtime check. + unsigned RunningDepId = 1; + DenseMap<Value *, unsigned> DepSetId; + + for (auto A : AS) { + Value *Ptr = A.getValue(); + bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true)); + MemAccessInfo Access(Ptr, IsWrite); + + if (IsWrite) + ++NumWritePtrChecks; + else + ++NumReadPtrChecks; + + if (hasComputableBounds(SE, StridesMap, Ptr) && + // When we run after a failing dependency check we have to make sure + // we don't have wrapping pointers. + (!ShouldCheckStride || + isStridedPtr(SE, Ptr, TheLoop, StridesMap) == 1)) { + // The id of the dependence set. + unsigned DepId; + + if (IsDepCheckNeeded) { + Value *Leader = DepCands.getLeaderValue(Access).getPointer(); + unsigned &LeaderId = DepSetId[Leader]; + if (!LeaderId) + LeaderId = RunningDepId++; + DepId = LeaderId; + } else + // Each access has its own dependence set. + DepId = RunningDepId++; + + RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap); + + DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); + } else { + DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n'); + CanDoRT = false; + } + } + + if (IsDepCheckNeeded && CanDoRT && RunningDepId == 2) + NumComparisons += 0; // Only one dependence set. + else { + NumComparisons += (NumWritePtrChecks * (NumReadPtrChecks + + NumWritePtrChecks - 1)); + } + + ++ASId; + } + + // If the pointers that we would use for the bounds comparison have different + // address spaces, assume the values aren't directly comparable, so we can't + // use them for the runtime check. We also have to assume they could + // overlap. In the future there should be metadata for whether address spaces + // are disjoint. + unsigned NumPointers = RtCheck.Pointers.size(); + for (unsigned i = 0; i < NumPointers; ++i) { + for (unsigned j = i + 1; j < NumPointers; ++j) { + // Only need to check pointers between two different dependency sets. + if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j]) + continue; + // Only need to check pointers in the same alias set. + if (RtCheck.AliasSetId[i] != RtCheck.AliasSetId[j]) + continue; + + Value *PtrI = RtCheck.Pointers[i]; + Value *PtrJ = RtCheck.Pointers[j]; + + unsigned ASi = PtrI->getType()->getPointerAddressSpace(); + unsigned ASj = PtrJ->getType()->getPointerAddressSpace(); + if (ASi != ASj) { + DEBUG(dbgs() << "LAA: Runtime check would require comparison between" + " different address spaces\n"); + return false; + } + } + } + + return CanDoRT; +} + +void AccessAnalysis::processMemAccesses() { + // We process the set twice: first we process read-write pointers, last we + // process read-only pointers. This allows us to skip dependence tests for + // read-only pointers. + + DEBUG(dbgs() << "LAA: Processing memory accesses...\n"); + DEBUG(dbgs() << " AST: "; AST.dump()); + DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n"); + DEBUG({ + for (auto A : Accesses) + dbgs() << "\t" << *A.getPointer() << " (" << + (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ? + "read-only" : "read")) << ")\n"; + }); + + // The AliasSetTracker has nicely partitioned our pointers by metadata + // compatibility and potential for underlying-object overlap. As a result, we + // only need to check for potential pointer dependencies within each alias + // set. + for (auto &AS : AST) { + // Note that both the alias-set tracker and the alias sets themselves used + // linked lists internally and so the iteration order here is deterministic + // (matching the original instruction order within each set). + + bool SetHasWrite = false; + + // Map of pointers to last access encountered. + typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap; + UnderlyingObjToAccessMap ObjToLastAccess; + + // Set of access to check after all writes have been processed. + PtrAccessSet DeferredAccesses; + + // Iterate over each alias set twice, once to process read/write pointers, + // and then to process read-only pointers. + for (int SetIteration = 0; SetIteration < 2; ++SetIteration) { + bool UseDeferred = SetIteration > 0; + PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses; + + for (auto AV : AS) { + Value *Ptr = AV.getValue(); + + // For a single memory access in AliasSetTracker, Accesses may contain + // both read and write, and they both need to be handled for CheckDeps. + for (auto AC : S) { + if (AC.getPointer() != Ptr) + continue; + + bool IsWrite = AC.getInt(); + + // If we're using the deferred access set, then it contains only + // reads. + bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite; + if (UseDeferred && !IsReadOnlyPtr) + continue; + // Otherwise, the pointer must be in the PtrAccessSet, either as a + // read or a write. + assert(((IsReadOnlyPtr && UseDeferred) || IsWrite || + S.count(MemAccessInfo(Ptr, false))) && + "Alias-set pointer not in the access set?"); + + MemAccessInfo Access(Ptr, IsWrite); + DepCands.insert(Access); + + // Memorize read-only pointers for later processing and skip them in + // the first round (they need to be checked after we have seen all + // write pointers). Note: we also mark pointer that are not + // consecutive as "read-only" pointers (so that we check + // "a[b[i]] +="). Hence, we need the second check for "!IsWrite". + if (!UseDeferred && IsReadOnlyPtr) { + DeferredAccesses.insert(Access); + continue; + } + + // If this is a write - check other reads and writes for conflicts. If + // this is a read only check other writes for conflicts (but only if + // there is no other write to the ptr - this is an optimization to + // catch "a[i] = a[i] + " without having to do a dependence check). + if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) { + CheckDeps.insert(Access); + IsRTCheckNeeded = true; + } + + if (IsWrite) + SetHasWrite = true; + + // Create sets of pointers connected by a shared alias set and + // underlying object. + typedef SmallVector<Value *, 16> ValueVector; + ValueVector TempObjects; + + GetUnderlyingObjects(Ptr, TempObjects, DL, LI); + DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n"); + for (Value *UnderlyingObj : TempObjects) { + UnderlyingObjToAccessMap::iterator Prev = + ObjToLastAccess.find(UnderlyingObj); + if (Prev != ObjToLastAccess.end()) + DepCands.unionSets(Access, Prev->second); + + ObjToLastAccess[UnderlyingObj] = Access; + DEBUG(dbgs() << " " << *UnderlyingObj << "\n"); + } + } + } + } + } +} + +static bool isInBoundsGep(Value *Ptr) { + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) + return GEP->isInBounds(); + return false; +} + +/// \brief Check whether the access through \p Ptr has a constant stride. +static int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, + const ValueToValueMap &StridesMap) { + const Type *Ty = Ptr->getType(); + assert(Ty->isPointerTy() && "Unexpected non-ptr"); + + // Make sure that the pointer does not point to aggregate types. + const PointerType *PtrTy = cast<PointerType>(Ty); + if (PtrTy->getElementType()->isAggregateType()) { + DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type" + << *Ptr << "\n"); + return 0; + } + + const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Ptr); + + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev); + if (!AR) { + DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " + << *Ptr << " SCEV: " << *PtrScev << "\n"); + return 0; + } + + // The accesss function must stride over the innermost loop. + if (Lp != AR->getLoop()) { + DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " << + *Ptr << " SCEV: " << *PtrScev << "\n"); + } + + // The address calculation must not wrap. Otherwise, a dependence could be + // inverted. + // An inbounds getelementptr that is a AddRec with a unit stride + // cannot wrap per definition. The unit stride requirement is checked later. + // An getelementptr without an inbounds attribute and unit stride would have + // to access the pointer value "0" which is undefined behavior in address + // space 0, therefore we can also vectorize this case. + bool IsInBoundsGEP = isInBoundsGep(Ptr); + bool IsNoWrapAddRec = AR->getNoWrapFlags(SCEV::NoWrapMask); + bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0; + if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) { + DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space " + << *Ptr << " SCEV: " << *PtrScev << "\n"); + return 0; + } + + // Check the step is constant. + const SCEV *Step = AR->getStepRecurrence(*SE); + + // Calculate the pointer stride and check if it is consecutive. + const SCEVConstant *C = dyn_cast<SCEVConstant>(Step); + if (!C) { + DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr << + " SCEV: " << *PtrScev << "\n"); + return 0; + } + + auto &DL = Lp->getHeader()->getModule()->getDataLayout(); + int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType()); + const APInt &APStepVal = C->getValue()->getValue(); + + // Huge step value - give up. + if (APStepVal.getBitWidth() > 64) + return 0; + + int64_t StepVal = APStepVal.getSExtValue(); + + // Strided access. + int64_t Stride = StepVal / Size; + int64_t Rem = StepVal % Size; + if (Rem) + return 0; + + // If the SCEV could wrap but we have an inbounds gep with a unit stride we + // know we can't "wrap around the address space". In case of address space + // zero we know that this won't happen without triggering undefined behavior. + if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) && + Stride != 1 && Stride != -1) + return 0; + + return Stride; +} + +bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) { + switch (Type) { + case NoDep: + case Forward: + case BackwardVectorizable: + return true; + + case Unknown: + case ForwardButPreventsForwarding: + case Backward: + case BackwardVectorizableButPreventsForwarding: + return false; + } + llvm_unreachable("unexpected DepType!"); +} + +bool MemoryDepChecker::Dependence::isInterestingDependence(DepType Type) { + switch (Type) { + case NoDep: + case Forward: + return false; + + case BackwardVectorizable: + case Unknown: + case ForwardButPreventsForwarding: + case Backward: + case BackwardVectorizableButPreventsForwarding: + return true; + } + llvm_unreachable("unexpected DepType!"); +} + +bool MemoryDepChecker::Dependence::isPossiblyBackward() const { + switch (Type) { + case NoDep: + case Forward: + case ForwardButPreventsForwarding: + return false; + + case Unknown: + case BackwardVectorizable: + case Backward: + case BackwardVectorizableButPreventsForwarding: + return true; + } + llvm_unreachable("unexpected DepType!"); +} + +bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance, + unsigned TypeByteSize) { + // If loads occur at a distance that is not a multiple of a feasible vector + // factor store-load forwarding does not take place. + // Positive dependences might cause troubles because vectorizing them might + // prevent store-load forwarding making vectorized code run a lot slower. + // a[i] = a[i-3] ^ a[i-8]; + // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and + // hence on your typical architecture store-load forwarding does not take + // place. Vectorizing in such cases does not make sense. + // Store-load forwarding distance. + const unsigned NumCyclesForStoreLoadThroughMemory = 8*TypeByteSize; + // Maximum vector factor. + unsigned MaxVFWithoutSLForwardIssues = + VectorizerParams::MaxVectorWidth * TypeByteSize; + if(MaxSafeDepDistBytes < MaxVFWithoutSLForwardIssues) + MaxVFWithoutSLForwardIssues = MaxSafeDepDistBytes; + + for (unsigned vf = 2*TypeByteSize; vf <= MaxVFWithoutSLForwardIssues; + vf *= 2) { + if (Distance % vf && Distance / vf < NumCyclesForStoreLoadThroughMemory) { + MaxVFWithoutSLForwardIssues = (vf >>=1); + break; + } + } + + if (MaxVFWithoutSLForwardIssues< 2*TypeByteSize) { + DEBUG(dbgs() << "LAA: Distance " << Distance << + " that could cause a store-load forwarding conflict\n"); + return true; + } + + if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes && + MaxVFWithoutSLForwardIssues != + VectorizerParams::MaxVectorWidth * TypeByteSize) + MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues; + return false; +} + +MemoryDepChecker::Dependence::DepType +MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, + const MemAccessInfo &B, unsigned BIdx, + const ValueToValueMap &Strides) { + assert (AIdx < BIdx && "Must pass arguments in program order"); + + Value *APtr = A.getPointer(); + Value *BPtr = B.getPointer(); + bool AIsWrite = A.getInt(); + bool BIsWrite = B.getInt(); + + // Two reads are independent. + if (!AIsWrite && !BIsWrite) + return Dependence::NoDep; + + // We cannot check pointers in different address spaces. + if (APtr->getType()->getPointerAddressSpace() != + BPtr->getType()->getPointerAddressSpace()) + return Dependence::Unknown; + + const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr); + const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr); + + int StrideAPtr = isStridedPtr(SE, APtr, InnermostLoop, Strides); + int StrideBPtr = isStridedPtr(SE, BPtr, InnermostLoop, Strides); + + const SCEV *Src = AScev; + const SCEV *Sink = BScev; + + // If the induction step is negative we have to invert source and sink of the + // dependence. + if (StrideAPtr < 0) { + //Src = BScev; + //Sink = AScev; + std::swap(APtr, BPtr); + std::swap(Src, Sink); + std::swap(AIsWrite, BIsWrite); + std::swap(AIdx, BIdx); + std::swap(StrideAPtr, StrideBPtr); + } + + const SCEV *Dist = SE->getMinusSCEV(Sink, Src); + + DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink + << "(Induction step: " << StrideAPtr << ")\n"); + DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to " + << *InstMap[BIdx] << ": " << *Dist << "\n"); + + // Need consecutive accesses. We don't want to vectorize + // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in + // the address space. + if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){ + DEBUG(dbgs() << "Non-consecutive pointer access\n"); + return Dependence::Unknown; + } + + const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist); + if (!C) { + DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n"); + ShouldRetryWithRuntimeCheck = true; + return Dependence::Unknown; + } + + Type *ATy = APtr->getType()->getPointerElementType(); + Type *BTy = BPtr->getType()->getPointerElementType(); + auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout(); + unsigned TypeByteSize = DL.getTypeAllocSize(ATy); + + // Negative distances are not plausible dependencies. + const APInt &Val = C->getValue()->getValue(); + if (Val.isNegative()) { + bool IsTrueDataDependence = (AIsWrite && !BIsWrite); + if (IsTrueDataDependence && + (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) || + ATy != BTy)) + return Dependence::ForwardButPreventsForwarding; + + DEBUG(dbgs() << "LAA: Dependence is negative: NoDep\n"); + return Dependence::Forward; + } + + // Write to the same location with the same size. + // Could be improved to assert type sizes are the same (i32 == float, etc). + if (Val == 0) { + if (ATy == BTy) + return Dependence::NoDep; + DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n"); + return Dependence::Unknown; + } + + assert(Val.isStrictlyPositive() && "Expect a positive value"); + + if (ATy != BTy) { + DEBUG(dbgs() << + "LAA: ReadWrite-Write positive dependency with different types\n"); + return Dependence::Unknown; + } + + unsigned Distance = (unsigned) Val.getZExtValue(); + + // Bail out early if passed-in parameters make vectorization not feasible. + unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ? + VectorizerParams::VectorizationFactor : 1); + unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ? + VectorizerParams::VectorizationInterleave : 1); + + // The distance must be bigger than the size needed for a vectorized version + // of the operation and the size of the vectorized operation must not be + // bigger than the currrent maximum size. + if (Distance < 2*TypeByteSize || + 2*TypeByteSize > MaxSafeDepDistBytes || + Distance < TypeByteSize * ForcedUnroll * ForcedFactor) { + DEBUG(dbgs() << "LAA: Failure because of Positive distance " + << Val.getSExtValue() << '\n'); + return Dependence::Backward; + } + + // Positive distance bigger than max vectorization factor. + MaxSafeDepDistBytes = Distance < MaxSafeDepDistBytes ? + Distance : MaxSafeDepDistBytes; + + bool IsTrueDataDependence = (!AIsWrite && BIsWrite); + if (IsTrueDataDependence && + couldPreventStoreLoadForward(Distance, TypeByteSize)) + return Dependence::BackwardVectorizableButPreventsForwarding; + + DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() << + " with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n'); + + return Dependence::BackwardVectorizable; +} + +bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, + MemAccessInfoSet &CheckDeps, + const ValueToValueMap &Strides) { + + MaxSafeDepDistBytes = -1U; + while (!CheckDeps.empty()) { + MemAccessInfo CurAccess = *CheckDeps.begin(); + + // Get the relevant memory access set. + EquivalenceClasses<MemAccessInfo>::iterator I = + AccessSets.findValue(AccessSets.getLeaderValue(CurAccess)); + + // Check accesses within this set. + EquivalenceClasses<MemAccessInfo>::member_iterator AI, AE; + AI = AccessSets.member_begin(I), AE = AccessSets.member_end(); + + // Check every access pair. + while (AI != AE) { + CheckDeps.erase(*AI); + EquivalenceClasses<MemAccessInfo>::member_iterator OI = std::next(AI); + while (OI != AE) { + // Check every accessing instruction pair in program order. + for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(), + I1E = Accesses[*AI].end(); I1 != I1E; ++I1) + for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(), + I2E = Accesses[*OI].end(); I2 != I2E; ++I2) { + auto A = std::make_pair(&*AI, *I1); + auto B = std::make_pair(&*OI, *I2); + + assert(*I1 != *I2); + if (*I1 > *I2) + std::swap(A, B); + + Dependence::DepType Type = + isDependent(*A.first, A.second, *B.first, B.second, Strides); + SafeForVectorization &= Dependence::isSafeForVectorization(Type); + + // Gather dependences unless we accumulated MaxInterestingDependence + // dependences. In that case return as soon as we find the first + // unsafe dependence. This puts a limit on this quadratic + // algorithm. + if (RecordInterestingDependences) { + if (Dependence::isInterestingDependence(Type)) + InterestingDependences.push_back( + Dependence(A.second, B.second, Type)); + + if (InterestingDependences.size() >= MaxInterestingDependence) { + RecordInterestingDependences = false; + InterestingDependences.clear(); + DEBUG(dbgs() << "Too many dependences, stopped recording\n"); + } + } + if (!RecordInterestingDependences && !SafeForVectorization) + return false; + } + ++OI; + } + AI++; + } + } + + DEBUG(dbgs() << "Total Interesting Dependences: " + << InterestingDependences.size() << "\n"); + return SafeForVectorization; +} + +SmallVector<Instruction *, 4> +MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool isWrite) const { + MemAccessInfo Access(Ptr, isWrite); + auto &IndexVector = Accesses.find(Access)->second; + + SmallVector<Instruction *, 4> Insts; + std::transform(IndexVector.begin(), IndexVector.end(), + std::back_inserter(Insts), + [&](unsigned Idx) { return this->InstMap[Idx]; }); + return Insts; +} + +const char *MemoryDepChecker::Dependence::DepName[] = { + "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward", + "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"}; + +void MemoryDepChecker::Dependence::print( + raw_ostream &OS, unsigned Depth, + const SmallVectorImpl<Instruction *> &Instrs) const { + OS.indent(Depth) << DepName[Type] << ":\n"; + OS.indent(Depth + 2) << *Instrs[Source] << " -> \n"; + OS.indent(Depth + 2) << *Instrs[Destination] << "\n"; +} + +bool LoopAccessInfo::canAnalyzeLoop() { + // We need to have a loop header. + DEBUG(dbgs() << "LAA: Found a loop: " << + TheLoop->getHeader()->getName() << '\n'); + + // We can only analyze innermost loops. + if (!TheLoop->empty()) { + DEBUG(dbgs() << "LAA: loop is not the innermost loop\n"); + emitAnalysis(LoopAccessReport() << "loop is not the innermost loop"); + return false; + } + + // We must have a single backedge. + if (TheLoop->getNumBackEdges() != 1) { + DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n"); + emitAnalysis( + LoopAccessReport() << + "loop control flow is not understood by analyzer"); + return false; + } + + // We must have a single exiting block. + if (!TheLoop->getExitingBlock()) { + DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n"); + emitAnalysis( + LoopAccessReport() << + "loop control flow is not understood by analyzer"); + return false; + } + + // We only handle bottom-tested loops, i.e. loop in which the condition is + // checked at the end of each iteration. With that we can assume that all + // instructions in the loop are executed the same number of times. + if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) { + DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n"); + emitAnalysis( + LoopAccessReport() << + "loop control flow is not understood by analyzer"); + return false; + } + + // ScalarEvolution needs to be able to find the exit count. + const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop); + if (ExitCount == SE->getCouldNotCompute()) { + emitAnalysis(LoopAccessReport() << + "could not determine number of loop iterations"); + DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n"); + return false; + } + + return true; +} + +void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { + + typedef SmallVector<Value*, 16> ValueVector; + typedef SmallPtrSet<Value*, 16> ValueSet; + + // Holds the Load and Store *instructions*. + ValueVector Loads; + ValueVector Stores; + + // Holds all the different accesses in the loop. + unsigned NumReads = 0; + unsigned NumReadWrites = 0; + + PtrRtCheck.Pointers.clear(); + PtrRtCheck.Need = false; + + const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); + + // For each block. + for (Loop::block_iterator bb = TheLoop->block_begin(), + be = TheLoop->block_end(); bb != be; ++bb) { + + // Scan the BB and collect legal loads and stores. + for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e; + ++it) { + + // If this is a load, save it. If this instruction can read from memory + // but is not a load, then we quit. Notice that we don't handle function + // calls that read or write. + if (it->mayReadFromMemory()) { + // Many math library functions read the rounding mode. We will only + // vectorize a loop if it contains known function calls that don't set + // the flag. Therefore, it is safe to ignore this read from memory. + CallInst *Call = dyn_cast<CallInst>(it); + if (Call && getIntrinsicIDForCall(Call, TLI)) + continue; + + // If the function has an explicit vectorized counterpart, we can safely + // assume that it can be vectorized. + if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() && + TLI->isFunctionVectorizable(Call->getCalledFunction()->getName())) + continue; + + LoadInst *Ld = dyn_cast<LoadInst>(it); + if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) { + emitAnalysis(LoopAccessReport(Ld) + << "read with atomic ordering or volatile read"); + DEBUG(dbgs() << "LAA: Found a non-simple load.\n"); + CanVecMem = false; + return; + } + NumLoads++; + Loads.push_back(Ld); + DepChecker.addAccess(Ld); + continue; + } + + // Save 'store' instructions. Abort if other instructions write to memory. + if (it->mayWriteToMemory()) { + StoreInst *St = dyn_cast<StoreInst>(it); + if (!St) { + emitAnalysis(LoopAccessReport(it) << + "instruction cannot be vectorized"); + CanVecMem = false; + return; + } + if (!St->isSimple() && !IsAnnotatedParallel) { + emitAnalysis(LoopAccessReport(St) + << "write with atomic ordering or volatile write"); + DEBUG(dbgs() << "LAA: Found a non-simple store.\n"); + CanVecMem = false; + return; + } + NumStores++; + Stores.push_back(St); + DepChecker.addAccess(St); + } + } // Next instr. + } // Next block. + + // Now we have two lists that hold the loads and the stores. + // Next, we find the pointers that they use. + + // Check if we see any stores. If there are no stores, then we don't + // care if the pointers are *restrict*. + if (!Stores.size()) { + DEBUG(dbgs() << "LAA: Found a read-only loop!\n"); + CanVecMem = true; + return; + } + + MemoryDepChecker::DepCandidates DependentAccesses; + AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(), + AA, LI, DependentAccesses); + + // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects + // multiple times on the same object. If the ptr is accessed twice, once + // for read and once for write, it will only appear once (on the write + // list). This is okay, since we are going to check for conflicts between + // writes and between reads and writes, but not between reads and reads. + ValueSet Seen; + + ValueVector::iterator I, IE; + for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) { + StoreInst *ST = cast<StoreInst>(*I); + Value* Ptr = ST->getPointerOperand(); + // Check for store to loop invariant address. + StoreToLoopInvariantAddress |= isUniform(Ptr); + // If we did *not* see this pointer before, insert it to the read-write + // list. At this phase it is only a 'write' list. + if (Seen.insert(Ptr).second) { + ++NumReadWrites; + + AliasAnalysis::Location Loc = AA->getLocation(ST); + // The TBAA metadata could have a control dependency on the predication + // condition, so we cannot rely on it when determining whether or not we + // need runtime pointer checks. + if (blockNeedsPredication(ST->getParent(), TheLoop, DT)) + Loc.AATags.TBAA = nullptr; + + Accesses.addStore(Loc); + } + } + + if (IsAnnotatedParallel) { + DEBUG(dbgs() + << "LAA: A loop annotated parallel, ignore memory dependency " + << "checks.\n"); + CanVecMem = true; + return; + } + + for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) { + LoadInst *LD = cast<LoadInst>(*I); + Value* Ptr = LD->getPointerOperand(); + // If we did *not* see this pointer before, insert it to the + // read list. If we *did* see it before, then it is already in + // the read-write list. This allows us to vectorize expressions + // such as A[i] += x; Because the address of A[i] is a read-write + // pointer. This only works if the index of A[i] is consecutive. + // If the address of i is unknown (for example A[B[i]]) then we may + // read a few words, modify, and write a few words, and some of the + // words may be written to the same address. + bool IsReadOnlyPtr = false; + if (Seen.insert(Ptr).second || !isStridedPtr(SE, Ptr, TheLoop, Strides)) { + ++NumReads; + IsReadOnlyPtr = true; + } + + AliasAnalysis::Location Loc = AA->getLocation(LD); + // The TBAA metadata could have a control dependency on the predication + // condition, so we cannot rely on it when determining whether or not we + // need runtime pointer checks. + if (blockNeedsPredication(LD->getParent(), TheLoop, DT)) + Loc.AATags.TBAA = nullptr; + + Accesses.addLoad(Loc, IsReadOnlyPtr); + } + + // If we write (or read-write) to a single destination and there are no + // other reads in this loop then is it safe to vectorize. + if (NumReadWrites == 1 && NumReads == 0) { + DEBUG(dbgs() << "LAA: Found a write-only loop!\n"); + CanVecMem = true; + return; + } + + // Build dependence sets and check whether we need a runtime pointer bounds + // check. + Accesses.buildDependenceSets(); + bool NeedRTCheck = Accesses.isRTCheckNeeded(); + + // Find pointers with computable bounds. We are going to use this information + // to place a runtime bound check. + bool CanDoRT = false; + if (NeedRTCheck) + CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop, + Strides); + + DEBUG(dbgs() << "LAA: We need to do " << NumComparisons << + " pointer comparisons.\n"); + + // If we only have one set of dependences to check pointers among we don't + // need a runtime check. + if (NumComparisons == 0 && NeedRTCheck) + NeedRTCheck = false; + + // Check that we found the bounds for the pointer. + if (CanDoRT) + DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n"); + else if (NeedRTCheck) { + emitAnalysis(LoopAccessReport() << "cannot identify array bounds"); + DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " << + "the array bounds.\n"); + PtrRtCheck.reset(); + CanVecMem = false; + return; + } + + PtrRtCheck.Need = NeedRTCheck; + + CanVecMem = true; + if (Accesses.isDependencyCheckNeeded()) { + DEBUG(dbgs() << "LAA: Checking memory dependencies\n"); + CanVecMem = DepChecker.areDepsSafe( + DependentAccesses, Accesses.getDependenciesToCheck(), Strides); + MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes(); + + if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) { + DEBUG(dbgs() << "LAA: Retrying with memory checks\n"); + NeedRTCheck = true; + + // Clear the dependency checks. We assume they are not needed. + Accesses.resetDepChecks(DepChecker); + + PtrRtCheck.reset(); + PtrRtCheck.Need = true; + + CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, + TheLoop, Strides, true); + // Check that we found the bounds for the pointer. + if (!CanDoRT && NumComparisons > 0) { + emitAnalysis(LoopAccessReport() + << "cannot check memory dependencies at runtime"); + DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n"); + PtrRtCheck.reset(); + CanVecMem = false; + return; + } + + CanVecMem = true; + } + } + + if (CanVecMem) + DEBUG(dbgs() << "LAA: No unsafe dependent memory operations in loop. We" + << (NeedRTCheck ? "" : " don't") + << " need a runtime memory check.\n"); + else { + emitAnalysis(LoopAccessReport() << + "unsafe dependent memory operations in loop"); + DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n"); + } +} + +bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, + DominatorTree *DT) { + assert(TheLoop->contains(BB) && "Unknown block used"); + + // Blocks that do not dominate the latch need predication. + BasicBlock* Latch = TheLoop->getLoopLatch(); + return !DT->dominates(BB, Latch); +} + +void LoopAccessInfo::emitAnalysis(LoopAccessReport &Message) { + assert(!Report && "Multiple reports generated"); + Report = Message; +} + +bool LoopAccessInfo::isUniform(Value *V) const { + return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop)); +} + +// FIXME: this function is currently a duplicate of the one in +// LoopVectorize.cpp. +static Instruction *getFirstInst(Instruction *FirstInst, Value *V, + Instruction *Loc) { + if (FirstInst) + return FirstInst; + if (Instruction *I = dyn_cast<Instruction>(V)) + return I->getParent() == Loc->getParent() ? I : nullptr; + return nullptr; +} + +std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck( + Instruction *Loc, const SmallVectorImpl<int> *PtrPartition) const { + if (!PtrRtCheck.Need) + return std::make_pair(nullptr, nullptr); + + unsigned NumPointers = PtrRtCheck.Pointers.size(); + SmallVector<TrackingVH<Value> , 2> Starts; + SmallVector<TrackingVH<Value> , 2> Ends; + + LLVMContext &Ctx = Loc->getContext(); + SCEVExpander Exp(*SE, DL, "induction"); + Instruction *FirstInst = nullptr; + + for (unsigned i = 0; i < NumPointers; ++i) { + Value *Ptr = PtrRtCheck.Pointers[i]; + const SCEV *Sc = SE->getSCEV(Ptr); + + if (SE->isLoopInvariant(Sc, TheLoop)) { + DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << + *Ptr <<"\n"); + Starts.push_back(Ptr); + Ends.push_back(Ptr); + } else { + DEBUG(dbgs() << "LAA: Adding RT check for range:" << *Ptr << '\n'); + unsigned AS = Ptr->getType()->getPointerAddressSpace(); + + // Use this type for pointer arithmetic. + Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS); + + Value *Start = Exp.expandCodeFor(PtrRtCheck.Starts[i], PtrArithTy, Loc); + Value *End = Exp.expandCodeFor(PtrRtCheck.Ends[i], PtrArithTy, Loc); + Starts.push_back(Start); + Ends.push_back(End); + } + } + + IRBuilder<> ChkBuilder(Loc); + // Our instructions might fold to a constant. + Value *MemoryRuntimeCheck = nullptr; + for (unsigned i = 0; i < NumPointers; ++i) { + for (unsigned j = i+1; j < NumPointers; ++j) { + if (!PtrRtCheck.needsChecking(i, j, PtrPartition)) + continue; + + unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace(); + unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace(); + + assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) && + (AS1 == Ends[i]->getType()->getPointerAddressSpace()) && + "Trying to bounds check pointers with different address spaces"); + + Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0); + Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1); + + Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc"); + Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc"); + Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy1, "bc"); + Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy0, "bc"); + + Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0"); + FirstInst = getFirstInst(FirstInst, Cmp0, Loc); + Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1"); + FirstInst = getFirstInst(FirstInst, Cmp1, Loc); + Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict"); + FirstInst = getFirstInst(FirstInst, IsConflict, Loc); + if (MemoryRuntimeCheck) { + IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, + "conflict.rdx"); + FirstInst = getFirstInst(FirstInst, IsConflict, Loc); + } + MemoryRuntimeCheck = IsConflict; + } + } + + if (!MemoryRuntimeCheck) + return std::make_pair(nullptr, nullptr); + + // We have to do this trickery because the IRBuilder might fold the check to a + // constant expression in which case there is no Instruction anchored in a + // the block. + Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck, + ConstantInt::getTrue(Ctx)); + ChkBuilder.Insert(Check, "memcheck.conflict"); + FirstInst = getFirstInst(FirstInst, Check, Loc); + return std::make_pair(FirstInst, Check); +} + +LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, + const DataLayout &DL, + const TargetLibraryInfo *TLI, AliasAnalysis *AA, + DominatorTree *DT, LoopInfo *LI, + const ValueToValueMap &Strides) + : DepChecker(SE, L), NumComparisons(0), TheLoop(L), SE(SE), DL(DL), + TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0), + MaxSafeDepDistBytes(-1U), CanVecMem(false), + StoreToLoopInvariantAddress(false) { + if (canAnalyzeLoop()) + analyzeLoop(Strides); +} + +void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { + if (CanVecMem) { + if (PtrRtCheck.Need) + OS.indent(Depth) << "Memory dependences are safe with run-time checks\n"; + else + OS.indent(Depth) << "Memory dependences are safe\n"; + } + + if (Report) + OS.indent(Depth) << "Report: " << Report->str() << "\n"; + + if (auto *InterestingDependences = DepChecker.getInterestingDependences()) { + OS.indent(Depth) << "Interesting Dependences:\n"; + for (auto &Dep : *InterestingDependences) { + Dep.print(OS, Depth + 2, DepChecker.getMemoryInstructions()); + OS << "\n"; + } + } else + OS.indent(Depth) << "Too many interesting dependences, not recorded\n"; + + // List the pair of accesses need run-time checks to prove independence. + PtrRtCheck.print(OS, Depth); + OS << "\n"; + + OS.indent(Depth) << "Store to invariant address was " + << (StoreToLoopInvariantAddress ? "" : "not ") + << "found in loop.\n"; +} + +const LoopAccessInfo & +LoopAccessAnalysis::getInfo(Loop *L, const ValueToValueMap &Strides) { + auto &LAI = LoopAccessInfoMap[L]; + +#ifndef NDEBUG + assert((!LAI || LAI->NumSymbolicStrides == Strides.size()) && + "Symbolic strides changed for loop"); +#endif + + if (!LAI) { + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI, + Strides); +#ifndef NDEBUG + LAI->NumSymbolicStrides = Strides.size(); +#endif + } + return *LAI.get(); +} + +void LoopAccessAnalysis::print(raw_ostream &OS, const Module *M) const { + LoopAccessAnalysis &LAA = *const_cast<LoopAccessAnalysis *>(this); + + ValueToValueMap NoSymbolicStrides; + + for (Loop *TopLevelLoop : *LI) + for (Loop *L : depth_first(TopLevelLoop)) { + OS.indent(2) << L->getHeader()->getName() << ":\n"; + auto &LAI = LAA.getInfo(L, NoSymbolicStrides); + LAI.print(OS, 4); + } +} + +bool LoopAccessAnalysis::runOnFunction(Function &F) { + SE = &getAnalysis<ScalarEvolution>(); + auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); + TLI = TLIP ? &TLIP->getTLI() : nullptr; + AA = &getAnalysis<AliasAnalysis>(); + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + + return false; +} + +void LoopAccessAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<ScalarEvolution>(); + AU.addRequired<AliasAnalysis>(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); + + AU.setPreservesAll(); +} + +char LoopAccessAnalysis::ID = 0; +static const char laa_name[] = "Loop Access Analysis"; +#define LAA_NAME "loop-accesses" + +INITIALIZE_PASS_BEGIN(LoopAccessAnalysis, LAA_NAME, laa_name, false, true) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_END(LoopAccessAnalysis, LAA_NAME, laa_name, false, true) + +namespace llvm { + Pass *createLAAPass() { + return new LoopAccessAnalysis(); + } +} diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp index b1f62c4..6b6faf8 100644 --- a/contrib/llvm/lib/Analysis/LoopInfo.cpp +++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp @@ -26,8 +26,10 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/PassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> using namespace llvm; @@ -45,11 +47,6 @@ static cl::opt<bool,true> VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo), cl::desc("Verify loop info (time consuming)")); -char LoopInfo::ID = 0; -INITIALIZE_PASS_BEGIN(LoopInfo, "loops", "Natural Loop Information", true, true) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END(LoopInfo, "loops", "Natural Loop Information", true, true) - // Loop identifier metadata name. static const char *const LoopMDName = "llvm.loop"; @@ -59,20 +56,16 @@ static const char *const LoopMDName = "llvm.loop"; /// isLoopInvariant - Return true if the specified value is loop invariant /// -bool Loop::isLoopInvariant(Value *V) const { - if (Instruction *I = dyn_cast<Instruction>(V)) +bool Loop::isLoopInvariant(const Value *V) const { + if (const Instruction *I = dyn_cast<Instruction>(V)) return !contains(I); return true; // All non-instructions are loop invariant } /// hasLoopInvariantOperands - Return true if all the operands of the /// specified instruction are loop invariant. -bool Loop::hasLoopInvariantOperands(Instruction *I) const { - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) - if (!isLoopInvariant(I->getOperand(i))) - return false; - - return true; +bool Loop::hasLoopInvariantOperands(const Instruction *I) const { + return all_of(I->operands(), [this](Value *V) { return isLoopInvariant(V); }); } /// makeLoopInvariant - If the given value is an instruciton inside of the @@ -609,15 +602,6 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) { return NearLoop; } -//===----------------------------------------------------------------------===// -// LoopInfo implementation -// -bool LoopInfo::runOnFunction(Function &) { - releaseMemory(); - LI.Analyze(getAnalysis<DominatorTreeWrapperPass>().getDomTree()); - return false; -} - /// updateUnloop - The last backedge has been removed from a loop--now the /// "unloop". Find a new parent for the blocks contained within unloop and /// update the loop tree. We don't necessarily have valid dominators at this @@ -631,7 +615,8 @@ void LoopInfo::updateUnloop(Loop *Unloop) { if (!Unloop->getParentLoop()) { // Since BBLoop had no parent, Unloop blocks are no longer in a loop. for (Loop::block_iterator I = Unloop->block_begin(), - E = Unloop->block_end(); I != E; ++I) { + E = Unloop->block_end(); + I != E; ++I) { // Don't reparent blocks in subloops. if (getLoopFor(*I) != Unloop) @@ -639,21 +624,21 @@ void LoopInfo::updateUnloop(Loop *Unloop) { // Blocks no longer have a parent but are still referenced by Unloop until // the Unloop object is deleted. - LI.changeLoopFor(*I, nullptr); + changeLoopFor(*I, nullptr); } // Remove the loop from the top-level LoopInfo object. - for (LoopInfo::iterator I = LI.begin();; ++I) { - assert(I != LI.end() && "Couldn't find loop"); + for (iterator I = begin();; ++I) { + assert(I != end() && "Couldn't find loop"); if (*I == Unloop) { - LI.removeLoop(I); + removeLoop(I); break; } } // Move all of the subloops to the top-level. while (!Unloop->empty()) - LI.addTopLevelLoop(Unloop->removeChildLoop(std::prev(Unloop->end()))); + addTopLevelLoop(Unloop->removeChildLoop(std::prev(Unloop->end()))); return; } @@ -680,35 +665,59 @@ void LoopInfo::updateUnloop(Loop *Unloop) { } } -void LoopInfo::verifyAnalysis() const { - // LoopInfo is a FunctionPass, but verifying every loop in the function - // each time verifyAnalysis is called is very expensive. The - // -verify-loop-info option can enable this. In order to perform some - // checking by default, LoopPass has been taught to call verifyLoop - // manually during loop pass sequences. +char LoopAnalysis::PassID; + +LoopInfo LoopAnalysis::run(Function &F, AnalysisManager<Function> *AM) { + // FIXME: Currently we create a LoopInfo from scratch for every function. + // This may prove to be too wasteful due to deallocating and re-allocating + // memory each time for the underlying map and vector datastructures. At some + // point it may prove worthwhile to use a freelist and recycle LoopInfo + // objects. I don't want to add that kind of complexity until the scope of + // the problem is better understood. + LoopInfo LI; + LI.Analyze(AM->getResult<DominatorTreeAnalysis>(F)); + return LI; +} - if (!VerifyLoopInfo) return; +PreservedAnalyses LoopPrinterPass::run(Function &F, + AnalysisManager<Function> *AM) { + AM->getResult<LoopAnalysis>(F).print(OS); + return PreservedAnalyses::all(); +} - DenseSet<const Loop*> Loops; - for (iterator I = begin(), E = end(); I != E; ++I) { - assert(!(*I)->getParentLoop() && "Top-level loop has a parent!"); - (*I)->verifyLoopNest(&Loops); - } +//===----------------------------------------------------------------------===// +// LoopInfo implementation +// - // Verify that blocks are mapped to valid loops. - for (DenseMap<BasicBlock*, Loop*>::const_iterator I = LI.BBMap.begin(), - E = LI.BBMap.end(); I != E; ++I) { - assert(Loops.count(I->second) && "orphaned loop"); - assert(I->second->contains(I->first) && "orphaned block"); - } +char LoopInfoWrapperPass::ID = 0; +INITIALIZE_PASS_BEGIN(LoopInfoWrapperPass, "loops", "Natural Loop Information", + true, true) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(LoopInfoWrapperPass, "loops", "Natural Loop Information", + true, true) + +bool LoopInfoWrapperPass::runOnFunction(Function &) { + releaseMemory(); + LI.Analyze(getAnalysis<DominatorTreeWrapperPass>().getDomTree()); + return false; +} + +void LoopInfoWrapperPass::verifyAnalysis() const { + // LoopInfoWrapperPass is a FunctionPass, but verifying every loop in the + // function each time verifyAnalysis is called is very expensive. The + // -verify-loop-info option can enable this. In order to perform some + // checking by default, LoopPass has been taught to call verifyLoop manually + // during loop pass sequences. + if (VerifyLoopInfo) + LI.verify(); } -void LoopInfo::getAnalysisUsage(AnalysisUsage &AU) const { +void LoopInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired<DominatorTreeWrapperPass>(); } -void LoopInfo::print(raw_ostream &OS, const Module*) const { +void LoopInfoWrapperPass::print(raw_ostream &OS, const Module *) const { LI.print(OS); } diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp index 190abc7..e9fcf02 100644 --- a/contrib/llvm/lib/Analysis/LoopPass.cpp +++ b/contrib/llvm/lib/Analysis/LoopPass.cpp @@ -18,6 +18,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; #define DEBUG_TYPE "loop-pass-manager" @@ -187,14 +188,15 @@ static void addLoopIntoQueue(Loop *L, std::deque<Loop *> &LQ) { void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const { // LPPassManager needs LoopInfo. In the long term LoopInfo class will // become part of LPPassManager. - Info.addRequired<LoopInfo>(); + Info.addRequired<LoopInfoWrapperPass>(); Info.setPreservesAll(); } /// run - Execute all of the passes scheduled for execution. Keep track of /// whether any of the passes modifies the function, and if so, return true. bool LPPassManager::runOnFunction(Function &F) { - LI = &getAnalysis<LoopInfo>(); + auto &LIWP = getAnalysis<LoopInfoWrapperPass>(); + LI = &LIWP.getLoopInfo(); bool Changed = false; // Collect inherited analysis from Module level pass manager. @@ -262,7 +264,7 @@ bool LPPassManager::runOnFunction(Function &F) { // loop in the function every time. That level of checking can be // enabled with the -verify-loop-info option. { - TimeRegion PassTimer(getPassTimer(LI)); + TimeRegion PassTimer(getPassTimer(&LIWP)); CurrentLoop->verifyLoop(); } diff --git a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp index ffc9fe6..da3b829 100644 --- a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp +++ b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp @@ -96,8 +96,8 @@ bool MemDepPrinter::runOnFunction(Function &F) { // All this code uses non-const interfaces because MemDep is not // const-friendly, though nothing is actually modified. - for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { - Instruction *Inst = &*I; + for (auto &I : inst_range(F)) { + Instruction *Inst = &I; if (!Inst->mayReadFromMemory() && !Inst->mayWriteToMemory()) continue; @@ -106,7 +106,7 @@ bool MemDepPrinter::runOnFunction(Function &F) { if (!Res.isNonLocal()) { Deps[Inst].insert(std::make_pair(getInstTypePair(Res), static_cast<BasicBlock *>(nullptr))); - } else if (CallSite CS = cast<Value>(Inst)) { + } else if (auto CS = CallSite(Inst)) { const MemoryDependenceAnalysis::NonLocalDepInfo &NLDI = MDA.getNonLocalCallDependency(CS); @@ -135,8 +135,8 @@ bool MemDepPrinter::runOnFunction(Function &F) { } void MemDepPrinter::print(raw_ostream &OS, const Module *M) const { - for (const_inst_iterator I = inst_begin(*F), E = inst_end(*F); I != E; ++I) { - const Instruction *Inst = &*I; + for (const auto &I : inst_range(*F)) { + const Instruction *Inst = &I; DepSetMap::const_iterator DI = Deps.find(Inst); if (DI == Deps.end()) @@ -144,11 +144,10 @@ void MemDepPrinter::print(raw_ostream &OS, const Module *M) const { const DepSet &InstDeps = DI->second; - for (DepSet::const_iterator I = InstDeps.begin(), E = InstDeps.end(); - I != E; ++I) { - const Instruction *DepInst = I->first.getPointer(); - DepType type = I->first.getInt(); - const BasicBlock *DepBB = I->second; + for (const auto &I : InstDeps) { + const Instruction *DepInst = I.first.getPointer(); + DepType type = I.first.getInt(); + const BasicBlock *DepBB = I.second; OS << " "; OS << DepTypeStr[type]; diff --git a/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp b/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp new file mode 100644 index 0000000..fa292a2 --- /dev/null +++ b/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp @@ -0,0 +1,70 @@ +//===- MemDerefPrinter.cpp - Printer for isDereferenceablePointer ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Passes.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + struct MemDerefPrinter : public FunctionPass { + SmallVector<Value *, 4> Vec; + + static char ID; // Pass identification, replacement for typeid + MemDerefPrinter() : FunctionPass(ID) { + initializeMemDerefPrinterPass(*PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + bool runOnFunction(Function &F) override; + void print(raw_ostream &OS, const Module * = nullptr) const override; + void releaseMemory() override { + Vec.clear(); + } + }; +} + +char MemDerefPrinter::ID = 0; +INITIALIZE_PASS_BEGIN(MemDerefPrinter, "print-memderefs", + "Memory Dereferenciblity of pointers in function", false, true) +INITIALIZE_PASS_END(MemDerefPrinter, "print-memderefs", + "Memory Dereferenciblity of pointers in function", false, true) + +FunctionPass *llvm::createMemDerefPrinter() { + return new MemDerefPrinter(); +} + +bool MemDerefPrinter::runOnFunction(Function &F) { + const DataLayout &DL = F.getParent()->getDataLayout(); + for (auto &I: inst_range(F)) { + if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { + Value *PO = LI->getPointerOperand(); + if (isDereferenceablePointer(PO, DL)) + Vec.push_back(PO); + } + } + return false; +} + +void MemDerefPrinter::print(raw_ostream &OS, const Module *M) const { + OS << "The following are dereferenceable:\n"; + for (auto &V: Vec) { + V->print(OS); + OS << "\n\n"; + } +} diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp index 08b41fe..8ddac8f 100644 --- a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -15,6 +15,7 @@ #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalVariable.h" @@ -25,7 +26,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -206,7 +206,7 @@ const CallInst *llvm::extractMallocCall(const Value *I, return isMallocLikeFn(I, TLI) ? dyn_cast<CallInst>(I) : nullptr; } -static Value *computeArraySize(const CallInst *CI, const DataLayout *DL, +static Value *computeArraySize(const CallInst *CI, const DataLayout &DL, const TargetLibraryInfo *TLI, bool LookThroughSExt = false) { if (!CI) @@ -214,12 +214,12 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *DL, // The size of the malloc's result type must be known to determine array size. Type *T = getMallocAllocatedType(CI, TLI); - if (!T || !T->isSized() || !DL) + if (!T || !T->isSized()) return nullptr; - unsigned ElementSize = DL->getTypeAllocSize(T); + unsigned ElementSize = DL.getTypeAllocSize(T); if (StructType *ST = dyn_cast<StructType>(T)) - ElementSize = DL->getStructLayout(ST)->getSizeInBytes(); + ElementSize = DL.getStructLayout(ST)->getSizeInBytes(); // If malloc call's arg can be determined to be a multiple of ElementSize, // return the multiple. Otherwise, return NULL. @@ -232,23 +232,6 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *DL, return nullptr; } -/// isArrayMalloc - Returns the corresponding CallInst if the instruction -/// is a call to malloc whose array size can be determined and the array size -/// is not constant 1. Otherwise, return NULL. -const CallInst *llvm::isArrayMalloc(const Value *I, - const DataLayout *DL, - const TargetLibraryInfo *TLI) { - const CallInst *CI = extractMallocCall(I, TLI); - Value *ArraySize = computeArraySize(CI, DL, TLI); - - if (ConstantInt *ConstSize = dyn_cast_or_null<ConstantInt>(ArraySize)) - if (ConstSize->isOne()) - return CI; - - // CI is a non-array malloc or we can't figure out that it is an array malloc. - return nullptr; -} - /// getMallocType - Returns the PointerType resulting from the malloc call. /// The PointerType depends on the number of bitcast uses of the malloc call: /// 0: PointerType is the calls' return type. @@ -297,7 +280,7 @@ Type *llvm::getMallocAllocatedType(const CallInst *CI, /// then return that multiple. For non-array mallocs, the multiple is /// constant 1. Otherwise, return NULL for mallocs whose array size cannot be /// determined. -Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout *DL, +Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout &DL, const TargetLibraryInfo *TLI, bool LookThroughSExt) { assert(isMallocLikeFn(CI, TLI) && "getMallocArraySize and not malloc call"); @@ -319,7 +302,7 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { if (!CI || isa<IntrinsicInst>(CI)) return nullptr; Function *Callee = CI->getCalledFunction(); - if (Callee == nullptr || !Callee->isDeclaration()) + if (Callee == nullptr) return nullptr; StringRef FnName = Callee->getName(); @@ -367,11 +350,8 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { /// object size in Size if successful, and false otherwise. /// If RoundToAlign is true, then Size is rounded up to the aligment of allocas, /// byval arguments, and global variables. -bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *DL, +bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, bool RoundToAlign) { - if (!DL) - return false; - ObjectSizeOffsetVisitor Visitor(DL, TLI, Ptr->getContext(), RoundToAlign); SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr)); if (!Visitor.bothKnown(Data)) @@ -399,17 +379,17 @@ APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) { return Size; } -ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *DL, +ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout &DL, const TargetLibraryInfo *TLI, LLVMContext &Context, bool RoundToAlign) -: DL(DL), TLI(TLI), RoundToAlign(RoundToAlign) { + : DL(DL), TLI(TLI), RoundToAlign(RoundToAlign) { // Pointer size must be rechecked for each object visited since it could have // a different address space. } SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { - IntTyBits = DL->getPointerTypeSizeInBits(V->getType()); + IntTyBits = DL.getPointerTypeSizeInBits(V->getType()); Zero = APInt::getNullValue(IntTyBits); V = V->stripPointerCasts(); @@ -449,7 +429,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) { if (!I.getAllocatedType()->isSized()) return unknown(); - APInt Size(IntTyBits, DL->getTypeAllocSize(I.getAllocatedType())); + APInt Size(IntTyBits, DL.getTypeAllocSize(I.getAllocatedType())); if (!I.isArrayAllocation()) return std::make_pair(align(Size, I.getAlignment()), Zero); @@ -468,7 +448,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) { return unknown(); } PointerType *PT = cast<PointerType>(A.getType()); - APInt Size(IntTyBits, DL->getTypeAllocSize(PT->getElementType())); + APInt Size(IntTyBits, DL.getTypeAllocSize(PT->getElementType())); return std::make_pair(align(Size, A.getParamAlignment()), Zero); } @@ -541,7 +521,7 @@ ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) { SizeOffsetType ObjectSizeOffsetVisitor::visitGEPOperator(GEPOperator &GEP) { SizeOffsetType PtrData = compute(GEP.getPointerOperand()); APInt Offset(IntTyBits, 0); - if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(*DL, Offset)) + if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(DL, Offset)) return unknown(); return std::make_pair(PtrData.first, PtrData.second + Offset); @@ -557,7 +537,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){ if (!GV.hasDefinitiveInitializer()) return unknown(); - APInt Size(IntTyBits, DL->getTypeAllocSize(GV.getType()->getElementType())); + APInt Size(IntTyBits, DL.getTypeAllocSize(GV.getType()->getElementType())); return std::make_pair(align(Size, GV.getAlignment()), Zero); } @@ -593,19 +573,18 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) { return unknown(); } -ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const DataLayout *DL, - const TargetLibraryInfo *TLI, - LLVMContext &Context, - bool RoundToAlign) -: DL(DL), TLI(TLI), Context(Context), Builder(Context, TargetFolder(DL)), - RoundToAlign(RoundToAlign) { +ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator( + const DataLayout &DL, const TargetLibraryInfo *TLI, LLVMContext &Context, + bool RoundToAlign) + : DL(DL), TLI(TLI), Context(Context), Builder(Context, TargetFolder(DL)), + RoundToAlign(RoundToAlign) { // IntTy and Zero must be set for each compute() since the address space may // be different for later objects. } SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) { // XXX - Are vectors of pointers possible here? - IntTy = cast<IntegerType>(DL->getIntPtrType(V->getType())); + IntTy = cast<IntegerType>(DL.getIntPtrType(V->getType())); Zero = ConstantInt::get(IntTy, 0); SizeOffsetEvalType Result = compute_(V); @@ -687,7 +666,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitAllocaInst(AllocaInst &I) { assert(I.isArrayAllocation()); Value *ArraySize = I.getArraySize(); Value *Size = ConstantInt::get(ArraySize->getType(), - DL->getTypeAllocSize(I.getAllocatedType())); + DL.getTypeAllocSize(I.getAllocatedType())); Size = Builder.CreateMul(Size, ArraySize); return std::make_pair(Size, Zero); } @@ -739,7 +718,7 @@ ObjectSizeOffsetEvaluator::visitGEPOperator(GEPOperator &GEP) { if (!bothKnown(PtrData)) return unknown(); - Value *Offset = EmitGEPOffset(&Builder, *DL, &GEP, /*NoAssumptions=*/true); + Value *Offset = EmitGEPOffset(&Builder, DL, &GEP, /*NoAssumptions=*/true); Offset = Builder.CreateAdd(PtrData.second, Offset); return std::make_pair(PtrData.first, Offset); } diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index c505aa4..3c1826a 100644 --- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -65,7 +65,7 @@ INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep", "Memory Dependence Analysis", false, true) MemoryDependenceAnalysis::MemoryDependenceAnalysis() - : FunctionPass(ID), PredCache() { + : FunctionPass(ID) { initializeMemoryDependenceAnalysisPass(*PassRegistry::getPassRegistry()); } MemoryDependenceAnalysis::~MemoryDependenceAnalysis() { @@ -79,11 +79,9 @@ void MemoryDependenceAnalysis::releaseMemory() { ReverseLocalDeps.clear(); ReverseNonLocalDeps.clear(); ReverseNonLocalPtrDeps.clear(); - PredCache->clear(); + PredCache.clear(); } - - /// getAnalysisUsage - Does not modify anything. It uses Alias Analysis. /// void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { @@ -95,13 +93,9 @@ void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { bool MemoryDependenceAnalysis::runOnFunction(Function &F) { AA = &getAnalysis<AliasAnalysis>(); AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); DT = DTWP ? &DTWP->getDomTree() : nullptr; - if (!PredCache) - PredCache.reset(new PredIteratorCache()); return false; } @@ -227,7 +221,7 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, continue; } - if (CallSite InstCS = cast<Value>(Inst)) { + if (auto InstCS = CallSite(Inst)) { // Debug intrinsics don't cause dependences. if (isa<DbgInfoIntrinsic>(Inst)) continue; // If these two calls do not interfere, look past it. @@ -265,22 +259,17 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, /// /// MemLocBase, MemLocOffset are lazily computed here the first time the /// base/offs of memloc is needed. -static bool -isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc, - const Value *&MemLocBase, - int64_t &MemLocOffs, - const LoadInst *LI, - const DataLayout *DL) { - // If we have no target data, we can't do this. - if (!DL) return false; +static bool isLoadLoadClobberIfExtendedToFullWidth( + const AliasAnalysis::Location &MemLoc, const Value *&MemLocBase, + int64_t &MemLocOffs, const LoadInst *LI) { + const DataLayout &DL = LI->getModule()->getDataLayout(); // If we haven't already computed the base/offset of MemLoc, do so now. if (!MemLocBase) MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, DL); - unsigned Size = MemoryDependenceAnalysis:: - getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size, - LI, *DL); + unsigned Size = MemoryDependenceAnalysis::getLoadLoadClobberFullWidthSize( + MemLocBase, MemLocOffs, MemLoc.Size, LI); return Size != 0; } @@ -291,23 +280,23 @@ isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc, /// 2) safe for the target, and 3) would provide the specified memory /// location value, then this function returns the size in bytes of the /// load width to use. If not, this returns zero. -unsigned MemoryDependenceAnalysis:: -getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, - unsigned MemLocSize, const LoadInst *LI, - const DataLayout &DL) { +unsigned MemoryDependenceAnalysis::getLoadLoadClobberFullWidthSize( + const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize, + const LoadInst *LI) { // We can only extend simple integer loads. if (!isa<IntegerType>(LI->getType()) || !LI->isSimple()) return 0; // Load widening is hostile to ThreadSanitizer: it may cause false positives // or make the reports more cryptic (access sizes are wrong). - if (LI->getParent()->getParent()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeThread)) + if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread)) return 0; + const DataLayout &DL = LI->getModule()->getDataLayout(); + // Get the base of this load. int64_t LIOffs = 0; const Value *LIBase = - GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, &DL); + GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, DL); // If the two pointers are not based on the same pointer, we can't tell that // they are related. @@ -346,9 +335,9 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, !DL.fitsInLegalInteger(NewLoadByteSize*8)) return 0; - if (LIOffs+NewLoadByteSize > MemLocEnd && - LI->getParent()->getParent()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeAddress)) + if (LIOffs + NewLoadByteSize > MemLocEnd && + LI->getParent()->getParent()->hasFnAttribute( + Attribute::SanitizeAddress)) // We will be reading past the location accessed by the original program. // While this is safe in a regular build, Address Safety analysis tools // may start reporting false warnings. So, don't do widening. @@ -362,6 +351,17 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, } } +static bool isVolatile(Instruction *Inst) { + if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) + return LI->isVolatile(); + else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) + return SI->isVolatile(); + else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(Inst)) + return AI->isVolatile(); + return false; +} + + /// getPointerDependencyFrom - Return the instruction on which a memory /// location depends. If isLoad is true, this routine ignores may-aliases with /// read-only operations. If isLoad is false, this routine ignores may-aliases @@ -405,14 +405,19 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // by every program that can detect any optimisation of that kind: either // it is racy (undefined) or there is a release followed by an acquire // between the pair of accesses under consideration. - bool HasSeenAcquire = false; + // If the load is invariant, we "know" that it doesn't alias *any* write. We + // do want to respect mustalias results since defs are useful for value + // forwarding, but any mayalias write can be assumed to be noalias. + // Arguably, this logic should be pushed inside AliasAnalysis itself. if (isLoad && QueryInst) { LoadInst *LI = dyn_cast<LoadInst>(QueryInst); if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr) isInvariantLoad = true; } + const DataLayout &DL = BB->getModule()->getDataLayout(); + // Walk backwards through the basic block, looking for dependencies. while (ScanIt != BB->begin()) { Instruction *Inst = --ScanIt; @@ -448,14 +453,28 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // does not alias with when this atomic load indicates that another thread may // be accessing the location. if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + + // While volatile access cannot be eliminated, they do not have to clobber + // non-aliasing locations, as normal accesses, for example, can be safely + // reordered with volatile accesses. + if (LI->isVolatile()) { + if (!QueryInst) + // Original QueryInst *may* be volatile + return MemDepResult::getClobber(LI); + if (isVolatile(QueryInst)) + // Ordering required if QueryInst is itself volatile + return MemDepResult::getClobber(LI); + // Otherwise, volatile doesn't imply any special ordering + } + // Atomic loads have complications involved. // A Monotonic (or higher) load is OK if the query inst is itself not atomic. - // An Acquire (or higher) load sets the HasSeenAcquire flag, so that any - // release store will know to return getClobber. // FIXME: This is overly conservative. - if (!LI->isUnordered()) { + if (LI->isAtomic() && LI->getOrdering() > Unordered) { if (!QueryInst) return MemDepResult::getClobber(LI); + if (LI->getOrdering() != Monotonic) + return MemDepResult::getClobber(LI); if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) { if (!QueryLI->isSimple()) return MemDepResult::getClobber(LI); @@ -465,18 +484,8 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, } else if (QueryInst->mayReadOrWriteMemory()) { return MemDepResult::getClobber(LI); } - - if (isAtLeastAcquire(LI->getOrdering())) - HasSeenAcquire = true; } - // FIXME: this is overly conservative. - // While volatile access cannot be eliminated, they do not have to clobber - // non-aliasing locations, as normal accesses can for example be reordered - // with volatile accesses. - if (LI->isVolatile()) - return MemDepResult::getClobber(LI); - AliasAnalysis::Location LoadLoc = AA->getLocation(LI); // If we found a pointer, check if it could be the same as our pointer. @@ -490,12 +499,12 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // location is 1 byte at P+1). If so, return it as a load/load // clobber result, allowing the client to decide to widen the load if // it wants to. - if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) - if (LI->getAlignment()*8 > ITy->getPrimitiveSizeInBits() && + if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) { + if (LI->getAlignment() * 8 > ITy->getPrimitiveSizeInBits() && isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase, - MemLocOffset, LI, DL)) + MemLocOffset, LI)) return MemDepResult::getClobber(Inst); - + } continue; } @@ -534,12 +543,12 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { // Atomic stores have complications involved. // A Monotonic store is OK if the query inst is itself not atomic. - // A Release (or higher) store further requires that no acquire load - // has been seen. // FIXME: This is overly conservative. if (!SI->isUnordered()) { if (!QueryInst) return MemDepResult::getClobber(SI); + if (SI->getOrdering() != Monotonic) + return MemDepResult::getClobber(SI); if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) { if (!QueryLI->isSimple()) return MemDepResult::getClobber(SI); @@ -549,9 +558,6 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, } else if (QueryInst->mayReadOrWriteMemory()) { return MemDepResult::getClobber(SI); } - - if (HasSeenAcquire && isAtLeastRelease(SI->getOrdering())) - return MemDepResult::getClobber(SI); } // FIXME: this is overly conservative. @@ -597,6 +603,8 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr)) return MemDepResult::getDef(Inst); + if (isInvariantLoad) + continue; // Be conservative if the accessed pointer may alias the allocation. if (AA->alias(Inst, AccessPtr) != AliasAnalysis::NoAlias) return MemDepResult::getClobber(Inst); @@ -607,6 +615,9 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, continue; } + if (isInvariantLoad) + continue; + // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer. AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc); // If necessary, perform additional analysis. @@ -757,8 +768,8 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { } else { // Seed DirtyBlocks with each of the preds of QueryInst's block. BasicBlock *QueryBB = QueryCS.getInstruction()->getParent(); - for (BasicBlock **PI = PredCache->GetPreds(QueryBB); *PI; ++PI) - DirtyBlocks.push_back(*PI); + for (BasicBlock *Pred : PredCache.get(QueryBB)) + DirtyBlocks.push_back(Pred); ++NumUncacheNonLocal; } @@ -843,8 +854,8 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { // If the block *is* completely transparent to the load, we need to check // the predecessors of this block. Add them to our worklist. - for (BasicBlock **PI = PredCache->GetPreds(DirtyBB); *PI; ++PI) - DirtyBlocks.push_back(*PI); + for (BasicBlock *Pred : PredCache.get(DirtyBB)) + DirtyBlocks.push_back(Pred); } } @@ -861,23 +872,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { void MemoryDependenceAnalysis:: getNonLocalPointerDependency(Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result) { - - auto getLocation = [](AliasAnalysis *AA, Instruction *Inst) { - if (auto *I = dyn_cast<LoadInst>(Inst)) - return AA->getLocation(I); - else if (auto *I = dyn_cast<StoreInst>(Inst)) - return AA->getLocation(I); - else if (auto *I = dyn_cast<VAArgInst>(Inst)) - return AA->getLocation(I); - else if (auto *I = dyn_cast<AtomicCmpXchgInst>(Inst)) - return AA->getLocation(I); - else if (auto *I = dyn_cast<AtomicRMWInst>(Inst)) - return AA->getLocation(I); - else - llvm_unreachable("unsupported memory instruction"); - }; - - const AliasAnalysis::Location Loc = getLocation(AA, QueryInst); + const AliasAnalysis::Location Loc = AA->getLocation(QueryInst); bool isLoad = isa<LoadInst>(QueryInst); BasicBlock *FromBB = QueryInst->getParent(); assert(FromBB); @@ -890,14 +885,7 @@ getNonLocalPointerDependency(Instruction *QueryInst, // Doing so would require piping through the QueryInst all the way through. // TODO: volatiles can't be elided, but they can be reordered with other // non-volatile accesses. - auto isVolatile = [](Instruction *Inst) { - if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { - return LI->isVolatile(); - } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { - return SI->isVolatile(); - } - return false; - }; + // We currently give up on any instruction which is ordered, but we do handle // atomic instructions which are unordered. // TODO: Handle ordered instructions @@ -915,8 +903,7 @@ getNonLocalPointerDependency(Instruction *QueryInst, const_cast<Value *>(Loc.Ptr))); return; } - - + const DataLayout &DL = FromBB->getModule()->getDataLayout(); PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, AC); // This is the set of blocks we've inspected, and the pointer we consider in @@ -924,7 +911,7 @@ getNonLocalPointerDependency(Instruction *QueryInst, // a block with multiple different pointers. This can happen during PHI // translation. DenseMap<BasicBlock*, Value*> Visited; - if (!getNonLocalPointerDepFromBB(Address, Loc, isLoad, FromBB, + if (!getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB, Result, Visited, true)) return; Result.clear(); @@ -938,7 +925,8 @@ getNonLocalPointerDependency(Instruction *QueryInst, /// lookup (which may use dirty cache info if available). If we do a lookup, /// add the result to the cache. MemDepResult MemoryDependenceAnalysis:: -GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, +GetNonLocalInfoForBlock(Instruction *QueryInst, + const AliasAnalysis::Location &Loc, bool isLoad, BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) { @@ -979,7 +967,8 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, } // Scan the block for the dependency. - MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB); + MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB, + QueryInst); // If we had a dirty entry for the block, update it. Otherwise, just add // a new entry. @@ -1052,7 +1041,8 @@ SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, /// not compute dependence information for some reason. This should be treated /// as a clobber dependence on the first instruction in the predecessor block. bool MemoryDependenceAnalysis:: -getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, +getNonLocalPointerDepFromBB(Instruction *QueryInst, + const PHITransAddr &Pointer, const AliasAnalysis::Location &Loc, bool isLoad, BasicBlock *StartBB, SmallVectorImpl<NonLocalDepResult> &Result, @@ -1091,7 +1081,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, } else if (CacheInfo->Size > Loc.Size) { // This query's Size is less than the cached one. Conservatively restart // the query using the greater size. - return getNonLocalPointerDepFromBB(Pointer, + return getNonLocalPointerDepFromBB(QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad, StartBB, Result, Visited, SkipFirstBlock); @@ -1111,7 +1101,8 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, CacheInfo->NonLocalDeps.clear(); } if (Loc.AATags) - return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutAATags(), + return getNonLocalPointerDepFromBB(QueryInst, + Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result, Visited, SkipFirstBlock); } @@ -1214,7 +1205,8 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // Get the dependency info for Pointer in BB. If we have cached // information, we will use it, otherwise we compute it. DEBUG(AssertSorted(*Cache, NumSortedEntries)); - MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache, + MemDepResult Dep = GetNonLocalInfoForBlock(QueryInst, + Loc, isLoad, BB, Cache, NumSortedEntries); // If we got a Def or Clobber, add this to the list of results. @@ -1238,13 +1230,13 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, if (!Pointer.NeedsPHITranslationFromBlock(BB)) { SkipFirstBlock = false; SmallVector<BasicBlock*, 16> NewBlocks; - for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { + for (BasicBlock *Pred : PredCache.get(BB)) { // Verify that we haven't looked at this block yet. std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool> - InsertRes = Visited.insert(std::make_pair(*PI, Pointer.getAddr())); + InsertRes = Visited.insert(std::make_pair(Pred, Pointer.getAddr())); if (InsertRes.second) { // First time we've looked at *PI. - NewBlocks.push_back(*PI); + NewBlocks.push_back(Pred); continue; } @@ -1280,8 +1272,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, Cache = nullptr; PredList.clear(); - for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { - BasicBlock *Pred = *PI; + for (BasicBlock *Pred : PredCache.get(BB)) { PredList.push_back(std::make_pair(Pred, Pointer)); // Get the PHI translated pointer in this predecessor. This can fail if @@ -1348,7 +1339,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // result conflicted with the Visited list; we have to conservatively // assume it is unknown, but this also does not block PRE of the load. if (!CanTranslate || - getNonLocalPointerDepFromBB(PredPointer, + getNonLocalPointerDepFromBB(QueryInst, PredPointer, Loc.getWithNewPtr(PredPtrVal), isLoad, Pred, Result, Visited)) { @@ -1471,7 +1462,7 @@ void MemoryDependenceAnalysis::invalidateCachedPointerInfo(Value *Ptr) { /// This needs to be done when the CFG changes, e.g., due to splitting /// critical edges. void MemoryDependenceAnalysis::invalidateCachedPredecessors() { - PredCache->clear(); + PredCache.clear(); } /// removeInstruction - Remove an instruction from the dependence analysis, diff --git a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp index f645558..36c4714 100644 --- a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp +++ b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp @@ -55,28 +55,72 @@ bool ModuleDebugInfoPrinter::runOnModule(Module &M) { return false; } +static void printFile(raw_ostream &O, StringRef Filename, StringRef Directory, + unsigned Line = 0) { + if (Filename.empty()) + return; + + O << " from "; + if (!Directory.empty()) + O << Directory << "/"; + O << Filename; + if (Line) + O << ":" << Line; +} + void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const { - for (DICompileUnit CU : Finder.compile_units()) { - O << "Compile Unit: "; - CU.print(O); + // Printing the nodes directly isn't particularly helpful (since they + // reference other nodes that won't be printed, particularly for the + // filenames), so just print a few useful things. + for (DICompileUnit *CU : Finder.compile_units()) { + O << "Compile unit: "; + if (const char *Lang = dwarf::LanguageString(CU->getSourceLanguage())) + O << Lang; + else + O << "unknown-language(" << CU->getSourceLanguage() << ")"; + printFile(O, CU->getFilename(), CU->getDirectory()); O << '\n'; } - for (DISubprogram S : Finder.subprograms()) { - O << "Subprogram: "; - S.print(O); + for (DISubprogram *S : Finder.subprograms()) { + O << "Subprogram: " << S->getName(); + printFile(O, S->getFilename(), S->getDirectory(), S->getLine()); + if (!S->getLinkageName().empty()) + O << " ('" << S->getLinkageName() << "')"; O << '\n'; } - for (DIGlobalVariable GV : Finder.global_variables()) { - O << "GlobalVariable: "; - GV.print(O); + for (const DIGlobalVariable *GV : Finder.global_variables()) { + O << "Global variable: " << GV->getName(); + printFile(O, GV->getFilename(), GV->getDirectory(), GV->getLine()); + if (!GV->getLinkageName().empty()) + O << " ('" << GV->getLinkageName() << "')"; O << '\n'; } - for (DIType T : Finder.types()) { - O << "Type: "; - T.print(O); + for (const DIType *T : Finder.types()) { + O << "Type:"; + if (!T->getName().empty()) + O << ' ' << T->getName(); + printFile(O, T->getFilename(), T->getDirectory(), T->getLine()); + if (auto *BT = dyn_cast<DIBasicType>(T)) { + O << " "; + if (const char *Encoding = + dwarf::AttributeEncodingString(BT->getEncoding())) + O << Encoding; + else + O << "unknown-encoding(" << BT->getEncoding() << ')'; + } else { + O << ' '; + if (const char *Tag = dwarf::TagString(T->getTag())) + O << Tag; + else + O << "unknown-tag(" << T->getTag() << ")"; + } + if (auto *CT = dyn_cast<DICompositeType>(T)) { + if (auto *S = CT->getRawIdentifier()) + O << " (identifier: '" << S->getString() << "')"; + } O << '\n'; } } diff --git a/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp index c214d3c..203e1da 100644 --- a/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/Pass.h" using namespace llvm; @@ -33,11 +34,11 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override {} - void initializePass() override { + bool doInitialization(Module &M) override { // Note: NoAA does not call InitializeAliasAnalysis because it's // special and does not support chaining. - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; + DL = &M.getDataLayout(); + return true; } AliasResult alias(const Location &LocA, const Location &LocB) override { diff --git a/contrib/llvm/lib/Analysis/PHITransAddr.cpp b/contrib/llvm/lib/Analysis/PHITransAddr.cpp index a534418..177684f 100644 --- a/contrib/llvm/lib/Analysis/PHITransAddr.cpp +++ b/contrib/llvm/lib/Analysis/PHITransAddr.cpp @@ -404,10 +404,9 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, GEPOps.push_back(OpVal); } - GetElementPtrInst *Result = - GetElementPtrInst::Create(GEPOps[0], makeArrayRef(GEPOps).slice(1), - InVal->getName()+".phi.trans.insert", - PredBB->getTerminator()); + GetElementPtrInst *Result = GetElementPtrInst::Create( + GEP->getSourceElementType(), GEPOps[0], makeArrayRef(GEPOps).slice(1), + InVal->getName() + ".phi.trans.insert", PredBB->getTerminator()); Result->setIsInBounds(GEP->isInBounds()); NewInsts.push_back(Result); return Result; diff --git a/contrib/llvm/lib/Analysis/RegionPass.cpp b/contrib/llvm/lib/Analysis/RegionPass.cpp index 6fa7b2e..5e1cdd4 100644 --- a/contrib/llvm/lib/Analysis/RegionPass.cpp +++ b/contrib/llvm/lib/Analysis/RegionPass.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/RegionIterator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; #define DEBUG_TYPE "regionpassmgr" @@ -83,9 +84,11 @@ bool RGPassManager::runOnFunction(Function &F) { for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { RegionPass *P = (RegionPass*)getContainedPass(Index); - dumpPassInfo(P, EXECUTION_MSG, ON_REGION_MSG, - CurrentRegion->getNameStr()); - dumpRequiredSet(P); + if (isPassDebuggingExecutionsOrMore()) { + dumpPassInfo(P, EXECUTION_MSG, ON_REGION_MSG, + CurrentRegion->getNameStr()); + dumpRequiredSet(P); + } initializeAnalysisImpl(P); @@ -96,11 +99,13 @@ bool RGPassManager::runOnFunction(Function &F) { Changed |= P->runOnRegion(CurrentRegion, *this); } - if (Changed) - dumpPassInfo(P, MODIFICATION_MSG, ON_REGION_MSG, - skipThisRegion ? "<deleted>" : - CurrentRegion->getNameStr()); - dumpPreservedSet(P); + if (isPassDebuggingExecutionsOrMore()) { + if (Changed) + dumpPassInfo(P, MODIFICATION_MSG, ON_REGION_MSG, + skipThisRegion ? "<deleted>" : + CurrentRegion->getNameStr()); + dumpPreservedSet(P); + } if (!skipThisRegion) { // Manually check that this region is still healthy. This is done @@ -120,8 +125,8 @@ bool RGPassManager::runOnFunction(Function &F) { removeNotPreservedAnalysis(P); recordAvailableAnalysis(P); removeDeadPasses(P, - skipThisRegion ? "<deleted>" : - CurrentRegion->getNameStr(), + (!isPassDebuggingExecutionsOrMore() || skipThisRegion) ? + "<deleted>" : CurrentRegion->getNameStr(), ON_REGION_MSG); if (skipThisRegion) @@ -194,7 +199,7 @@ public: bool runOnRegion(Region *R, RGPassManager &RGM) override { Out << Banner; - for (const auto &BB : R->blocks()) { + for (const auto *BB : R->blocks()) { if (BB) BB->print(Out); else diff --git a/contrib/llvm/lib/Analysis/RegionPrinter.cpp b/contrib/llvm/lib/Analysis/RegionPrinter.cpp index ad83113..d7f5109 100644 --- a/contrib/llvm/lib/Analysis/RegionPrinter.cpp +++ b/contrib/llvm/lib/Analysis/RegionPrinter.cpp @@ -123,7 +123,7 @@ struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> { const RegionInfo &RI = *static_cast<const RegionInfo*>(R.getRegionInfo()); - for (const auto &BB : R.blocks()) + for (auto *BB : R.blocks()) if (RI.getRegionFor(BB) == &R) O.indent(2 * (depth + 1)) << "Node" << static_cast<const void*>(RI.getTopLevelRegion()->getBBNode(BB)) diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp index ee42737..0e9f812 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp @@ -68,6 +68,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" @@ -87,7 +88,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLibraryInfo.h" #include <algorithm> using namespace llvm; @@ -117,9 +117,9 @@ VerifySCEV("verify-scev", INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution", "Scalar Evolution Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution", "Scalar Evolution Analysis", false, true) char ScalarEvolution::ID = 0; @@ -726,6 +726,13 @@ public: return; } + // A simple case when N/1. The quotient is N. + if (Denominator->isOne()) { + *Quotient = Numerator; + *Remainder = D.Zero; + return; + } + // Split the Denominator when it is a product. if (const SCEVMulExpr *T = dyn_cast<const SCEVMulExpr>(Denominator)) { const SCEV *Q, *R; @@ -788,6 +795,14 @@ public: assert(Numerator->isAffine() && "Numerator should be affine"); divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR); divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR); + // Bail out if the types do not match. + Type *Ty = Denominator->getType(); + if (Ty != StartQ->getType() || Ty != StartR->getType() || + Ty != StepQ->getType() || Ty != StepR->getType()) { + Quotient = Zero; + Remainder = Numerator; + return; + } Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(), Numerator->getNoWrapFlags()); Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(), @@ -1102,13 +1117,14 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, return getTruncateOrZeroExtend(SZ->getOperand(), Ty); // trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can - // eliminate all the truncates. + // eliminate all the truncates, or we replace other casts with truncates. if (const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Op)) { SmallVector<const SCEV *, 4> Operands; bool hasTrunc = false; for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) { const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty); - hasTrunc = isa<SCEVTruncateExpr>(S); + if (!isa<SCEVCastExpr>(SA->getOperand(i))) + hasTrunc = isa<SCEVTruncateExpr>(S); Operands.push_back(S); } if (!hasTrunc) @@ -1117,13 +1133,14 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, } // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can - // eliminate all the truncates. + // eliminate all the truncates, or we replace other casts with truncates. if (const SCEVMulExpr *SM = dyn_cast<SCEVMulExpr>(Op)) { SmallVector<const SCEV *, 4> Operands; bool hasTrunc = false; for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) { const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty); - hasTrunc = isa<SCEVTruncateExpr>(S); + if (!isa<SCEVCastExpr>(SM->getOperand(i))) + hasTrunc = isa<SCEVTruncateExpr>(S); Operands.push_back(S); } if (!hasTrunc) @@ -1148,6 +1165,262 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, return S; } +// Get the limit of a recurrence such that incrementing by Step cannot cause +// signed overflow as long as the value of the recurrence within the +// loop does not exceed this limit before incrementing. +static const SCEV *getSignedOverflowLimitForStep(const SCEV *Step, + ICmpInst::Predicate *Pred, + ScalarEvolution *SE) { + unsigned BitWidth = SE->getTypeSizeInBits(Step->getType()); + if (SE->isKnownPositive(Step)) { + *Pred = ICmpInst::ICMP_SLT; + return SE->getConstant(APInt::getSignedMinValue(BitWidth) - + SE->getSignedRange(Step).getSignedMax()); + } + if (SE->isKnownNegative(Step)) { + *Pred = ICmpInst::ICMP_SGT; + return SE->getConstant(APInt::getSignedMaxValue(BitWidth) - + SE->getSignedRange(Step).getSignedMin()); + } + return nullptr; +} + +// Get the limit of a recurrence such that incrementing by Step cannot cause +// unsigned overflow as long as the value of the recurrence within the loop does +// not exceed this limit before incrementing. +static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step, + ICmpInst::Predicate *Pred, + ScalarEvolution *SE) { + unsigned BitWidth = SE->getTypeSizeInBits(Step->getType()); + *Pred = ICmpInst::ICMP_ULT; + + return SE->getConstant(APInt::getMinValue(BitWidth) - + SE->getUnsignedRange(Step).getUnsignedMax()); +} + +namespace { + +struct ExtendOpTraitsBase { + typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *); +}; + +// Used to make code generic over signed and unsigned overflow. +template <typename ExtendOp> struct ExtendOpTraits { + // Members present: + // + // static const SCEV::NoWrapFlags WrapType; + // + // static const ExtendOpTraitsBase::GetExtendExprTy GetExtendExpr; + // + // static const SCEV *getOverflowLimitForStep(const SCEV *Step, + // ICmpInst::Predicate *Pred, + // ScalarEvolution *SE); +}; + +template <> +struct ExtendOpTraits<SCEVSignExtendExpr> : public ExtendOpTraitsBase { + static const SCEV::NoWrapFlags WrapType = SCEV::FlagNSW; + + static const GetExtendExprTy GetExtendExpr; + + static const SCEV *getOverflowLimitForStep(const SCEV *Step, + ICmpInst::Predicate *Pred, + ScalarEvolution *SE) { + return getSignedOverflowLimitForStep(Step, Pred, SE); + } +}; + +const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits< + SCEVSignExtendExpr>::GetExtendExpr = &ScalarEvolution::getSignExtendExpr; + +template <> +struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase { + static const SCEV::NoWrapFlags WrapType = SCEV::FlagNUW; + + static const GetExtendExprTy GetExtendExpr; + + static const SCEV *getOverflowLimitForStep(const SCEV *Step, + ICmpInst::Predicate *Pred, + ScalarEvolution *SE) { + return getUnsignedOverflowLimitForStep(Step, Pred, SE); + } +}; + +const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits< + SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr; +} + +// The recurrence AR has been shown to have no signed/unsigned wrap or something +// close to it. Typically, if we can prove NSW/NUW for AR, then we can just as +// easily prove NSW/NUW for its preincrement or postincrement sibling. This +// allows normalizing a sign/zero extended AddRec as such: {sext/zext(Step + +// Start),+,Step} => {(Step + sext/zext(Start),+,Step} As a result, the +// expression "Step + sext/zext(PreIncAR)" is congruent with +// "sext/zext(PostIncAR)" +template <typename ExtendOpTy> +static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty, + ScalarEvolution *SE) { + auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType; + auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr; + + const Loop *L = AR->getLoop(); + const SCEV *Start = AR->getStart(); + const SCEV *Step = AR->getStepRecurrence(*SE); + + // Check for a simple looking step prior to loop entry. + const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start); + if (!SA) + return nullptr; + + // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV + // subtraction is expensive. For this purpose, perform a quick and dirty + // difference, by checking for Step in the operand list. + SmallVector<const SCEV *, 4> DiffOps; + for (const SCEV *Op : SA->operands()) + if (Op != Step) + DiffOps.push_back(Op); + + if (DiffOps.size() == SA->getNumOperands()) + return nullptr; + + // Try to prove `WrapType` (SCEV::FlagNSW or SCEV::FlagNUW) on `PreStart` + + // `Step`: + + // 1. NSW/NUW flags on the step increment. + const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags()); + const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>( + SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap)); + + // "{S,+,X} is <nsw>/<nuw>" and "the backedge is taken at least once" implies + // "S+X does not sign/unsign-overflow". + // + + const SCEV *BECount = SE->getBackedgeTakenCount(L); + if (PreAR && PreAR->getNoWrapFlags(WrapType) && + !isa<SCEVCouldNotCompute>(BECount) && SE->isKnownPositive(BECount)) + return PreStart; + + // 2. Direct overflow check on the step operation's expression. + unsigned BitWidth = SE->getTypeSizeInBits(AR->getType()); + Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2); + const SCEV *OperandExtendedStart = + SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy), + (SE->*GetExtendExpr)(Step, WideTy)); + if ((SE->*GetExtendExpr)(Start, WideTy) == OperandExtendedStart) { + if (PreAR && AR->getNoWrapFlags(WrapType)) { + // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW + // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then + // `PreAR` == {`PreStart`,+,`Step`} is also `WrapType`. Cache this fact. + const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(WrapType); + } + return PreStart; + } + + // 3. Loop precondition. + ICmpInst::Predicate Pred; + const SCEV *OverflowLimit = + ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(Step, &Pred, SE); + + if (OverflowLimit && + SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) { + return PreStart; + } + return nullptr; +} + +// Get the normalized zero or sign extended expression for this AddRec's Start. +template <typename ExtendOpTy> +static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty, + ScalarEvolution *SE) { + auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr; + + const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE); + if (!PreStart) + return (SE->*GetExtendExpr)(AR->getStart(), Ty); + + return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty), + (SE->*GetExtendExpr)(PreStart, Ty)); +} + +// Try to prove away overflow by looking at "nearby" add recurrences. A +// motivating example for this rule: if we know `{0,+,4}` is `ult` `-1` and it +// does not itself wrap then we can conclude that `{1,+,4}` is `nuw`. +// +// Formally: +// +// {S,+,X} == {S-T,+,X} + T +// => Ext({S,+,X}) == Ext({S-T,+,X} + T) +// +// If ({S-T,+,X} + T) does not overflow ... (1) +// +// RHS == Ext({S-T,+,X} + T) == Ext({S-T,+,X}) + Ext(T) +// +// If {S-T,+,X} does not overflow ... (2) +// +// RHS == Ext({S-T,+,X}) + Ext(T) == {Ext(S-T),+,Ext(X)} + Ext(T) +// == {Ext(S-T)+Ext(T),+,Ext(X)} +// +// If (S-T)+T does not overflow ... (3) +// +// RHS == {Ext(S-T)+Ext(T),+,Ext(X)} == {Ext(S-T+T),+,Ext(X)} +// == {Ext(S),+,Ext(X)} == LHS +// +// Thus, if (1), (2) and (3) are true for some T, then +// Ext({S,+,X}) == {Ext(S),+,Ext(X)} +// +// (3) is implied by (1) -- "(S-T)+T does not overflow" is simply "({S-T,+,X}+T) +// does not overflow" restricted to the 0th iteration. Therefore we only need +// to check for (1) and (2). +// +// In the current context, S is `Start`, X is `Step`, Ext is `ExtendOpTy` and T +// is `Delta` (defined below). +// +template <typename ExtendOpTy> +bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start, + const SCEV *Step, + const Loop *L) { + auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType; + + // We restrict `Start` to a constant to prevent SCEV from spending too much + // time here. It is correct (but more expensive) to continue with a + // non-constant `Start` and do a general SCEV subtraction to compute + // `PreStart` below. + // + const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start); + if (!StartC) + return false; + + APInt StartAI = StartC->getValue()->getValue(); + + for (unsigned Delta : {-2, -1, 1, 2}) { + const SCEV *PreStart = getConstant(StartAI - Delta); + + // Give up if we don't already have the add recurrence we need because + // actually constructing an add recurrence is relatively expensive. + const SCEVAddRecExpr *PreAR = [&]() { + FoldingSetNodeID ID; + ID.AddInteger(scAddRecExpr); + ID.AddPointer(PreStart); + ID.AddPointer(Step); + ID.AddPointer(L); + void *IP = nullptr; + return static_cast<SCEVAddRecExpr *>( + this->UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); + }(); + + if (PreAR && PreAR->getNoWrapFlags(WrapType)) { // proves (2) + const SCEV *DeltaS = getConstant(StartC->getType(), Delta); + ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; + const SCEV *Limit = ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep( + DeltaS, &Pred, this); + if (Limit && isKnownPredicate(Pred, PreAR, Limit)) // proves (1) + return true; + } + } + + return false; +} + const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && @@ -1201,9 +1474,9 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, // If we have special knowledge that this addrec won't overflow, // we don't need to do any further analysis. if (AR->getNoWrapFlags(SCEV::FlagNUW)) - return getAddRecExpr(getZeroExtendExpr(Start, Ty), - getZeroExtendExpr(Step, Ty), - L, AR->getNoWrapFlags()); + return getAddRecExpr( + getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this), + getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are @@ -1240,9 +1513,9 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, // Cache knowledge of AR NUW, which is propagated to this AddRec. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW); // Return the expression with the addrec on the outside. - return getAddRecExpr(getZeroExtendExpr(Start, Ty), - getZeroExtendExpr(Step, Ty), - L, AR->getNoWrapFlags()); + return getAddRecExpr( + getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this), + getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } // Similar to above, only this time treat the step value as signed. // This covers loops that count down. @@ -1255,9 +1528,9 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, // Negative step causes unsigned wrap, but it still can't self-wrap. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW); // Return the expression with the addrec on the outside. - return getAddRecExpr(getZeroExtendExpr(Start, Ty), - getSignExtendExpr(Step, Ty), - L, AR->getNoWrapFlags()); + return getAddRecExpr( + getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this), + getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } } @@ -1275,9 +1548,9 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, // Cache knowledge of AR NUW, which is propagated to this AddRec. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW); // Return the expression with the addrec on the outside. - return getAddRecExpr(getZeroExtendExpr(Start, Ty), - getZeroExtendExpr(Step, Ty), - L, AR->getNoWrapFlags()); + return getAddRecExpr( + getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this), + getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } } else if (isKnownNegative(Step)) { const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) - @@ -1290,12 +1563,19 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, // Negative step causes unsigned wrap, but it still can't self-wrap. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW); // Return the expression with the addrec on the outside. - return getAddRecExpr(getZeroExtendExpr(Start, Ty), - getSignExtendExpr(Step, Ty), - L, AR->getNoWrapFlags()); + return getAddRecExpr( + getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this), + getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } } } + + if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) { + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW); + return getAddRecExpr( + getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this), + getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); + } } // The cast wasn't folded; create an explicit cast node. @@ -1307,104 +1587,6 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, return S; } -// Get the limit of a recurrence such that incrementing by Step cannot cause -// signed overflow as long as the value of the recurrence within the loop does -// not exceed this limit before incrementing. -static const SCEV *getOverflowLimitForStep(const SCEV *Step, - ICmpInst::Predicate *Pred, - ScalarEvolution *SE) { - unsigned BitWidth = SE->getTypeSizeInBits(Step->getType()); - if (SE->isKnownPositive(Step)) { - *Pred = ICmpInst::ICMP_SLT; - return SE->getConstant(APInt::getSignedMinValue(BitWidth) - - SE->getSignedRange(Step).getSignedMax()); - } - if (SE->isKnownNegative(Step)) { - *Pred = ICmpInst::ICMP_SGT; - return SE->getConstant(APInt::getSignedMaxValue(BitWidth) - - SE->getSignedRange(Step).getSignedMin()); - } - return nullptr; -} - -// The recurrence AR has been shown to have no signed wrap. Typically, if we can -// prove NSW for AR, then we can just as easily prove NSW for its preincrement -// or postincrement sibling. This allows normalizing a sign extended AddRec as -// such: {sext(Step + Start),+,Step} => {(Step + sext(Start),+,Step} As a -// result, the expression "Step + sext(PreIncAR)" is congruent with -// "sext(PostIncAR)" -static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR, - Type *Ty, - ScalarEvolution *SE) { - const Loop *L = AR->getLoop(); - const SCEV *Start = AR->getStart(); - const SCEV *Step = AR->getStepRecurrence(*SE); - - // Check for a simple looking step prior to loop entry. - const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start); - if (!SA) - return nullptr; - - // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV - // subtraction is expensive. For this purpose, perform a quick and dirty - // difference, by checking for Step in the operand list. - SmallVector<const SCEV *, 4> DiffOps; - for (const SCEV *Op : SA->operands()) - if (Op != Step) - DiffOps.push_back(Op); - - if (DiffOps.size() == SA->getNumOperands()) - return nullptr; - - // This is a postinc AR. Check for overflow on the preinc recurrence using the - // same three conditions that getSignExtendedExpr checks. - - // 1. NSW flags on the step increment. - const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags()); - const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>( - SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap)); - - if (PreAR && PreAR->getNoWrapFlags(SCEV::FlagNSW)) - return PreStart; - - // 2. Direct overflow check on the step operation's expression. - unsigned BitWidth = SE->getTypeSizeInBits(AR->getType()); - Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2); - const SCEV *OperandExtendedStart = - SE->getAddExpr(SE->getSignExtendExpr(PreStart, WideTy), - SE->getSignExtendExpr(Step, WideTy)); - if (SE->getSignExtendExpr(Start, WideTy) == OperandExtendedStart) { - // Cache knowledge of PreAR NSW. - if (PreAR) - const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(SCEV::FlagNSW); - // FIXME: this optimization needs a unit test - DEBUG(dbgs() << "SCEV: untested prestart overflow check\n"); - return PreStart; - } - - // 3. Loop precondition. - ICmpInst::Predicate Pred; - const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, SE); - - if (OverflowLimit && - SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) { - return PreStart; - } - return nullptr; -} - -// Get the normalized sign-extended expression for this AddRec's Start. -static const SCEV *getSignExtendAddRecStart(const SCEVAddRecExpr *AR, - Type *Ty, - ScalarEvolution *SE) { - const SCEV *PreStart = getPreStartForSignExtend(AR, Ty, SE); - if (!PreStart) - return SE->getSignExtendExpr(AR->getStart(), Ty); - - return SE->getAddExpr(SE->getSignExtendExpr(AR->getStepRecurrence(*SE), Ty), - SE->getSignExtendExpr(PreStart, Ty)); -} - const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && @@ -1483,9 +1665,9 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // If we have special knowledge that this addrec won't overflow, // we don't need to do any further analysis. if (AR->getNoWrapFlags(SCEV::FlagNSW)) - return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this), - getSignExtendExpr(Step, Ty), - L, SCEV::FlagNSW); + return getAddRecExpr( + getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this), + getSignExtendExpr(Step, Ty), L, SCEV::FlagNSW); // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are @@ -1522,9 +1704,9 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // Cache knowledge of AR NSW, which is propagated to this AddRec. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); // Return the expression with the addrec on the outside. - return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this), - getSignExtendExpr(Step, Ty), - L, AR->getNoWrapFlags()); + return getAddRecExpr( + getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this), + getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } // Similar to above, only this time treat the step value as unsigned. // This covers loops that count up with an unsigned step. @@ -1533,12 +1715,20 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, getMulExpr(WideMaxBECount, getZeroExtendExpr(Step, WideTy))); if (SAdd == OperandExtendedAdd) { - // Cache knowledge of AR NSW, which is propagated to this AddRec. - const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); + // If AR wraps around then + // + // abs(Step) * MaxBECount > unsigned-max(AR->getType()) + // => SAdd != OperandExtendedAdd + // + // Thus (AR is not NW => SAdd != OperandExtendedAdd) <=> + // (SAdd == OperandExtendedAdd => AR is NW) + + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW); + // Return the expression with the addrec on the outside. - return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this), - getZeroExtendExpr(Step, Ty), - L, AR->getNoWrapFlags()); + return getAddRecExpr( + getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this), + getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } } @@ -1547,7 +1737,8 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // with the start value and the backedge is guarded by a comparison // with the post-inc value, the addrec is safe. ICmpInst::Predicate Pred; - const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, this); + const SCEV *OverflowLimit = + getSignedOverflowLimitForStep(Step, &Pred, this); if (OverflowLimit && (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) || (isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) && @@ -1555,9 +1746,9 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, OverflowLimit)))) { // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); - return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this), - getSignExtendExpr(Step, Ty), - L, AR->getNoWrapFlags()); + return getAddRecExpr( + getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this), + getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } } // If Start and Step are constants, check if we can apply this @@ -1576,6 +1767,13 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, return getAddExpr(Start, getSignExtendExpr(NewAR, Ty)); } } + + if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) { + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); + return getAddRecExpr( + getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this), + getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); + } } // The cast wasn't folded; create an explicit cast node. @@ -2169,8 +2367,7 @@ static bool containsConstantSomewhere(const SCEV *StartExpr) { if (isa<SCEVAddExpr>(*CurrentExpr) || isa<SCEVMulExpr>(*CurrentExpr)) { const auto *CurrentNAry = cast<SCEVNAryExpr>(CurrentExpr); - for (const SCEV *Operand : CurrentNAry->operands()) - Ops.push_back(Operand); + Ops.append(CurrentNAry->op_begin(), CurrentNAry->op_end()); } } return false; @@ -2729,6 +2926,56 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, return S; } +const SCEV * +ScalarEvolution::getGEPExpr(Type *PointeeType, const SCEV *BaseExpr, + const SmallVectorImpl<const SCEV *> &IndexExprs, + bool InBounds) { + // getSCEV(Base)->getType() has the same address space as Base->getType() + // because SCEV::getType() preserves the address space. + Type *IntPtrTy = getEffectiveSCEVType(BaseExpr->getType()); + // FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP + // instruction to its SCEV, because the Instruction may be guarded by control + // flow and the no-overflow bits may not be valid for the expression in any + // context. + SCEV::NoWrapFlags Wrap = InBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap; + + const SCEV *TotalOffset = getConstant(IntPtrTy, 0); + // The address space is unimportant. The first thing we do on CurTy is getting + // its element type. + Type *CurTy = PointerType::getUnqual(PointeeType); + for (const SCEV *IndexExpr : IndexExprs) { + // Compute the (potentially symbolic) offset in bytes for this index. + if (StructType *STy = dyn_cast<StructType>(CurTy)) { + // For a struct, add the member offset. + ConstantInt *Index = cast<SCEVConstant>(IndexExpr)->getValue(); + unsigned FieldNo = Index->getZExtValue(); + const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo); + + // Add the field offset to the running total offset. + TotalOffset = getAddExpr(TotalOffset, FieldOffset); + + // Update CurTy to the type of the field at Index. + CurTy = STy->getTypeAtIndex(Index); + } else { + // Update CurTy to its element type. + CurTy = cast<SequentialType>(CurTy)->getElementType(); + // For an array, add the element offset, explicitly scaled. + const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, CurTy); + // Getelementptr indices are signed. + IndexExpr = getTruncateOrSignExtend(IndexExpr, IntPtrTy); + + // Multiply the index by the element size to compute the element offset. + const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, Wrap); + + // Add the element offset to the running total offset. + TotalOffset = getAddExpr(TotalOffset, LocalOffset); + } + } + + // Add the total offset from all the GEP indices to the base. + return getAddExpr(BaseExpr, TotalOffset, Wrap); +} + const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, const SCEV *RHS) { SmallVector<const SCEV *, 2> Ops; @@ -2950,39 +3197,23 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, } const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) { - // If we have DataLayout, we can bypass creating a target-independent + // We can bypass creating a target-independent // constant expression and then folding it back into a ConstantInt. // This is just a compile-time optimization. - if (DL) - return getConstant(IntTy, DL->getTypeAllocSize(AllocTy)); - - Constant *C = ConstantExpr::getSizeOf(AllocTy); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI)) - C = Folded; - Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); - assert(Ty == IntTy && "Effective SCEV type doesn't match"); - return getTruncateOrZeroExtend(getSCEV(C), Ty); + return getConstant(IntTy, + F->getParent()->getDataLayout().getTypeAllocSize(AllocTy)); } const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy, StructType *STy, unsigned FieldNo) { - // If we have DataLayout, we can bypass creating a target-independent + // We can bypass creating a target-independent // constant expression and then folding it back into a ConstantInt. // This is just a compile-time optimization. - if (DL) { - return getConstant(IntTy, - DL->getStructLayout(STy)->getElementOffset(FieldNo)); - } - - Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI)) - C = Folded; - - Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy)); - return getTruncateOrZeroExtend(getSCEV(C), Ty); + return getConstant( + IntTy, + F->getParent()->getDataLayout().getStructLayout(STy)->getElementOffset( + FieldNo)); } const SCEV *ScalarEvolution::getUnknown(Value *V) { @@ -3024,19 +3255,7 @@ bool ScalarEvolution::isSCEVable(Type *Ty) const { /// for which isSCEVable must return true. uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const { assert(isSCEVable(Ty) && "Type is not SCEVable!"); - - // If we have a DataLayout, use it! - if (DL) - return DL->getTypeSizeInBits(Ty); - - // Integer types have fixed sizes. - if (Ty->isIntegerTy()) - return Ty->getPrimitiveSizeInBits(); - - // The only other support type is pointer. Without DataLayout, conservatively - // assume pointers are 64-bit. - assert(Ty->isPointerTy() && "isSCEVable permitted a non-SCEVable type!"); - return 64; + return F->getParent()->getDataLayout().getTypeSizeInBits(Ty); } /// getEffectiveSCEVType - Return a type with the same bitwidth as @@ -3052,12 +3271,7 @@ Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const { // The only other support type is pointer. assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!"); - - if (DL) - return DL->getIntPtrType(Ty); - - // Without DataLayout, conservatively assume pointers are 64-bit. - return Type::getInt64Ty(getContext()); + return F->getParent()->getDataLayout().getIntPtrType(Ty); } const SCEV *ScalarEvolution::getCouldNotCompute() { @@ -3444,10 +3658,12 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { // If the increment doesn't overflow, then neither the addrec nor // the post-increment will overflow. if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) { - if (OBO->hasNoUnsignedWrap()) - Flags = setFlags(Flags, SCEV::FlagNUW); - if (OBO->hasNoSignedWrap()) - Flags = setFlags(Flags, SCEV::FlagNSW); + if (OBO->getOperand(0) == PN) { + if (OBO->hasNoUnsignedWrap()) + Flags = setFlags(Flags, SCEV::FlagNUW); + if (OBO->hasNoSignedWrap()) + Flags = setFlags(Flags, SCEV::FlagNSW); + } } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) { // If the increment is an inbounds GEP, then we know the address // space cannot be wrapped around. We cannot make any guarantee @@ -3455,7 +3671,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { // unsigned but we may have a negative index from the base // pointer. We can guarantee that no unsigned wrap occurs if the // indices form a positive value. - if (GEP->isInBounds()) { + if (GEP->isInBounds() && GEP->getOperand(0) == PN) { Flags = setFlags(Flags, SCEV::FlagNW); const SCEV *Ptr = getSCEV(GEP->getPointerOperand()); @@ -3521,7 +3737,8 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { // PHI's incoming blocks are in a different loop, in which case doing so // risks breaking LCSSA form. Instcombine would normally zap these, but // it doesn't have DominatorTree information, so it may miss cases. - if (Value *V = SimplifyInstruction(PN, DL, TLI, DT, AC)) + if (Value *V = + SimplifyInstruction(PN, F->getParent()->getDataLayout(), TLI, DT, AC)) if (LI->replacementPreservesLCSSAForm(PN, V)) return getSCEV(V); @@ -3533,52 +3750,16 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { /// operations. This allows them to be analyzed by regular SCEV code. /// const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { - Type *IntPtrTy = getEffectiveSCEVType(GEP->getType()); Value *Base = GEP->getOperand(0); // Don't attempt to analyze GEPs over unsized objects. if (!Base->getType()->getPointerElementType()->isSized()) return getUnknown(GEP); - // Don't blindly transfer the inbounds flag from the GEP instruction to the - // Add expression, because the Instruction may be guarded by control flow - // and the no-overflow bits may not be valid for the expression in any - // context. - SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? SCEV::FlagNSW : SCEV::FlagAnyWrap; - - const SCEV *TotalOffset = getConstant(IntPtrTy, 0); - gep_type_iterator GTI = gep_type_begin(GEP); - for (GetElementPtrInst::op_iterator I = std::next(GEP->op_begin()), - E = GEP->op_end(); - I != E; ++I) { - Value *Index = *I; - // Compute the (potentially symbolic) offset in bytes for this index. - if (StructType *STy = dyn_cast<StructType>(*GTI++)) { - // For a struct, add the member offset. - unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue(); - const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo); - - // Add the field offset to the running total offset. - TotalOffset = getAddExpr(TotalOffset, FieldOffset); - } else { - // For an array, add the element offset, explicitly scaled. - const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, *GTI); - const SCEV *IndexS = getSCEV(Index); - // Getelementptr indices are signed. - IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy); - - // Multiply the index by the element size to compute the element offset. - const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize, Wrap); - - // Add the element offset to the running total offset. - TotalOffset = getAddExpr(TotalOffset, LocalOffset); - } - } - - // Get the SCEV for the GEP base. - const SCEV *BaseS = getSCEV(Base); - - // Add the total offset from all the GEP indices to the base. - return getAddExpr(BaseS, TotalOffset, Wrap); + SmallVector<const SCEV *, 4> IndexExprs; + for (auto Index = GEP->idx_begin(); Index != GEP->idx_end(); ++Index) + IndexExprs.push_back(getSCEV(*Index)); + return getGEPExpr(GEP->getSourceElementType(), getSCEV(Base), IndexExprs, + GEP->isInBounds()); } /// GetMinTrailingZeros - Determine the minimum number of zero bits that S is @@ -3653,7 +3834,8 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { // For a SCEVUnknown, ask ValueTracking. unsigned BitWidth = getTypeSizeInBits(U->getType()); APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT); + computeKnownBits(U->getValue(), Zeros, Ones, + F->getParent()->getDataLayout(), 0, AC, nullptr, DT); return Zeros.countTrailingOnes(); } @@ -3688,79 +3870,93 @@ static Optional<ConstantRange> GetRangeFromMetadata(Value *V) { return None; } -/// getUnsignedRange - Determine the unsigned range for a particular SCEV. +/// getRange - Determine the range for a particular SCEV. If SignHint is +/// HINT_RANGE_UNSIGNED (resp. HINT_RANGE_SIGNED) then getRange prefers ranges +/// with a "cleaner" unsigned (resp. signed) representation. /// ConstantRange -ScalarEvolution::getUnsignedRange(const SCEV *S) { +ScalarEvolution::getRange(const SCEV *S, + ScalarEvolution::RangeSignHint SignHint) { + DenseMap<const SCEV *, ConstantRange> &Cache = + SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges + : SignedRanges; + // See if we've computed this range already. - DenseMap<const SCEV *, ConstantRange>::iterator I = UnsignedRanges.find(S); - if (I != UnsignedRanges.end()) + DenseMap<const SCEV *, ConstantRange>::iterator I = Cache.find(S); + if (I != Cache.end()) return I->second; if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) - return setUnsignedRange(C, ConstantRange(C->getValue()->getValue())); + return setRange(C, SignHint, ConstantRange(C->getValue()->getValue())); unsigned BitWidth = getTypeSizeInBits(S->getType()); ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true); - // If the value has known zeros, the maximum unsigned value will have those - // known zeros as well. + // If the value has known zeros, the maximum value will have those known zeros + // as well. uint32_t TZ = GetMinTrailingZeros(S); - if (TZ != 0) - ConservativeResult = - ConstantRange(APInt::getMinValue(BitWidth), - APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1); + if (TZ != 0) { + if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) + ConservativeResult = + ConstantRange(APInt::getMinValue(BitWidth), + APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1); + else + ConservativeResult = ConstantRange( + APInt::getSignedMinValue(BitWidth), + APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1); + } if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { - ConstantRange X = getUnsignedRange(Add->getOperand(0)); + ConstantRange X = getRange(Add->getOperand(0), SignHint); for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) - X = X.add(getUnsignedRange(Add->getOperand(i))); - return setUnsignedRange(Add, ConservativeResult.intersectWith(X)); + X = X.add(getRange(Add->getOperand(i), SignHint)); + return setRange(Add, SignHint, ConservativeResult.intersectWith(X)); } if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { - ConstantRange X = getUnsignedRange(Mul->getOperand(0)); + ConstantRange X = getRange(Mul->getOperand(0), SignHint); for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) - X = X.multiply(getUnsignedRange(Mul->getOperand(i))); - return setUnsignedRange(Mul, ConservativeResult.intersectWith(X)); + X = X.multiply(getRange(Mul->getOperand(i), SignHint)); + return setRange(Mul, SignHint, ConservativeResult.intersectWith(X)); } if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) { - ConstantRange X = getUnsignedRange(SMax->getOperand(0)); + ConstantRange X = getRange(SMax->getOperand(0), SignHint); for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) - X = X.smax(getUnsignedRange(SMax->getOperand(i))); - return setUnsignedRange(SMax, ConservativeResult.intersectWith(X)); + X = X.smax(getRange(SMax->getOperand(i), SignHint)); + return setRange(SMax, SignHint, ConservativeResult.intersectWith(X)); } if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) { - ConstantRange X = getUnsignedRange(UMax->getOperand(0)); + ConstantRange X = getRange(UMax->getOperand(0), SignHint); for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) - X = X.umax(getUnsignedRange(UMax->getOperand(i))); - return setUnsignedRange(UMax, ConservativeResult.intersectWith(X)); + X = X.umax(getRange(UMax->getOperand(i), SignHint)); + return setRange(UMax, SignHint, ConservativeResult.intersectWith(X)); } if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) { - ConstantRange X = getUnsignedRange(UDiv->getLHS()); - ConstantRange Y = getUnsignedRange(UDiv->getRHS()); - return setUnsignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y))); + ConstantRange X = getRange(UDiv->getLHS(), SignHint); + ConstantRange Y = getRange(UDiv->getRHS(), SignHint); + return setRange(UDiv, SignHint, + ConservativeResult.intersectWith(X.udiv(Y))); } if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) { - ConstantRange X = getUnsignedRange(ZExt->getOperand()); - return setUnsignedRange(ZExt, - ConservativeResult.intersectWith(X.zeroExtend(BitWidth))); + ConstantRange X = getRange(ZExt->getOperand(), SignHint); + return setRange(ZExt, SignHint, + ConservativeResult.intersectWith(X.zeroExtend(BitWidth))); } if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) { - ConstantRange X = getUnsignedRange(SExt->getOperand()); - return setUnsignedRange(SExt, - ConservativeResult.intersectWith(X.signExtend(BitWidth))); + ConstantRange X = getRange(SExt->getOperand(), SignHint); + return setRange(SExt, SignHint, + ConservativeResult.intersectWith(X.signExtend(BitWidth))); } if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) { - ConstantRange X = getUnsignedRange(Trunc->getOperand()); - return setUnsignedRange(Trunc, - ConservativeResult.intersectWith(X.truncate(BitWidth))); + ConstantRange X = getRange(Trunc->getOperand(), SignHint); + return setRange(Trunc, SignHint, + ConservativeResult.intersectWith(X.truncate(BitWidth))); } if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { @@ -3773,143 +3969,6 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { ConservativeResult.intersectWith( ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0))); - // TODO: non-affine addrec - if (AddRec->isAffine()) { - Type *Ty = AddRec->getType(); - const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); - if (!isa<SCEVCouldNotCompute>(MaxBECount) && - getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) { - MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty); - - const SCEV *Start = AddRec->getStart(); - const SCEV *Step = AddRec->getStepRecurrence(*this); - - ConstantRange StartRange = getUnsignedRange(Start); - ConstantRange StepRange = getSignedRange(Step); - ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount); - ConstantRange EndRange = - StartRange.add(MaxBECountRange.multiply(StepRange)); - - // Check for overflow. This must be done with ConstantRange arithmetic - // because we could be called from within the ScalarEvolution overflow - // checking code. - ConstantRange ExtStartRange = StartRange.zextOrTrunc(BitWidth*2+1); - ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1); - ConstantRange ExtMaxBECountRange = - MaxBECountRange.zextOrTrunc(BitWidth*2+1); - ConstantRange ExtEndRange = EndRange.zextOrTrunc(BitWidth*2+1); - if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) != - ExtEndRange) - return setUnsignedRange(AddRec, ConservativeResult); - - APInt Min = APIntOps::umin(StartRange.getUnsignedMin(), - EndRange.getUnsignedMin()); - APInt Max = APIntOps::umax(StartRange.getUnsignedMax(), - EndRange.getUnsignedMax()); - if (Min.isMinValue() && Max.isMaxValue()) - return setUnsignedRange(AddRec, ConservativeResult); - return setUnsignedRange(AddRec, - ConservativeResult.intersectWith(ConstantRange(Min, Max+1))); - } - } - - return setUnsignedRange(AddRec, ConservativeResult); - } - - if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { - // Check if the IR explicitly contains !range metadata. - Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue()); - if (MDRange.hasValue()) - ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue()); - - // For a SCEVUnknown, ask ValueTracking. - APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT); - if (Ones == ~Zeros + 1) - return setUnsignedRange(U, ConservativeResult); - return setUnsignedRange(U, - ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1))); - } - - return setUnsignedRange(S, ConservativeResult); -} - -/// getSignedRange - Determine the signed range for a particular SCEV. -/// -ConstantRange -ScalarEvolution::getSignedRange(const SCEV *S) { - // See if we've computed this range already. - DenseMap<const SCEV *, ConstantRange>::iterator I = SignedRanges.find(S); - if (I != SignedRanges.end()) - return I->second; - - if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) - return setSignedRange(C, ConstantRange(C->getValue()->getValue())); - - unsigned BitWidth = getTypeSizeInBits(S->getType()); - ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true); - - // If the value has known zeros, the maximum signed value will have those - // known zeros as well. - uint32_t TZ = GetMinTrailingZeros(S); - if (TZ != 0) - ConservativeResult = - ConstantRange(APInt::getSignedMinValue(BitWidth), - APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1); - - if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { - ConstantRange X = getSignedRange(Add->getOperand(0)); - for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) - X = X.add(getSignedRange(Add->getOperand(i))); - return setSignedRange(Add, ConservativeResult.intersectWith(X)); - } - - if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { - ConstantRange X = getSignedRange(Mul->getOperand(0)); - for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) - X = X.multiply(getSignedRange(Mul->getOperand(i))); - return setSignedRange(Mul, ConservativeResult.intersectWith(X)); - } - - if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) { - ConstantRange X = getSignedRange(SMax->getOperand(0)); - for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) - X = X.smax(getSignedRange(SMax->getOperand(i))); - return setSignedRange(SMax, ConservativeResult.intersectWith(X)); - } - - if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) { - ConstantRange X = getSignedRange(UMax->getOperand(0)); - for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) - X = X.umax(getSignedRange(UMax->getOperand(i))); - return setSignedRange(UMax, ConservativeResult.intersectWith(X)); - } - - if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) { - ConstantRange X = getSignedRange(UDiv->getLHS()); - ConstantRange Y = getSignedRange(UDiv->getRHS()); - return setSignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y))); - } - - if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) { - ConstantRange X = getSignedRange(ZExt->getOperand()); - return setSignedRange(ZExt, - ConservativeResult.intersectWith(X.zeroExtend(BitWidth))); - } - - if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) { - ConstantRange X = getSignedRange(SExt->getOperand()); - return setSignedRange(SExt, - ConservativeResult.intersectWith(X.signExtend(BitWidth))); - } - - if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) { - ConstantRange X = getSignedRange(Trunc->getOperand()); - return setSignedRange(Trunc, - ConservativeResult.intersectWith(X.truncate(BitWidth))); - } - - if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { // If there's no signed wrap, and all the operands have the same sign or // zero, the value won't ever change sign. if (AddRec->getNoWrapFlags(SCEV::FlagNSW)) { @@ -3935,41 +3994,66 @@ ScalarEvolution::getSignedRange(const SCEV *S) { const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); if (!isa<SCEVCouldNotCompute>(MaxBECount) && getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) { + + // Check for overflow. This must be done with ConstantRange arithmetic + // because we could be called from within the ScalarEvolution overflow + // checking code. + MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty); + ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount); + ConstantRange ZExtMaxBECountRange = + MaxBECountRange.zextOrTrunc(BitWidth * 2 + 1); const SCEV *Start = AddRec->getStart(); const SCEV *Step = AddRec->getStepRecurrence(*this); + ConstantRange StepSRange = getSignedRange(Step); + ConstantRange SExtStepSRange = StepSRange.sextOrTrunc(BitWidth * 2 + 1); + + ConstantRange StartURange = getUnsignedRange(Start); + ConstantRange EndURange = + StartURange.add(MaxBECountRange.multiply(StepSRange)); + + // Check for unsigned overflow. + ConstantRange ZExtStartURange = + StartURange.zextOrTrunc(BitWidth * 2 + 1); + ConstantRange ZExtEndURange = EndURange.zextOrTrunc(BitWidth * 2 + 1); + if (ZExtStartURange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) == + ZExtEndURange) { + APInt Min = APIntOps::umin(StartURange.getUnsignedMin(), + EndURange.getUnsignedMin()); + APInt Max = APIntOps::umax(StartURange.getUnsignedMax(), + EndURange.getUnsignedMax()); + bool IsFullRange = Min.isMinValue() && Max.isMaxValue(); + if (!IsFullRange) + ConservativeResult = + ConservativeResult.intersectWith(ConstantRange(Min, Max + 1)); + } - ConstantRange StartRange = getSignedRange(Start); - ConstantRange StepRange = getSignedRange(Step); - ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount); - ConstantRange EndRange = - StartRange.add(MaxBECountRange.multiply(StepRange)); - - // Check for overflow. This must be done with ConstantRange arithmetic - // because we could be called from within the ScalarEvolution overflow - // checking code. - ConstantRange ExtStartRange = StartRange.sextOrTrunc(BitWidth*2+1); - ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1); - ConstantRange ExtMaxBECountRange = - MaxBECountRange.zextOrTrunc(BitWidth*2+1); - ConstantRange ExtEndRange = EndRange.sextOrTrunc(BitWidth*2+1); - if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) != - ExtEndRange) - return setSignedRange(AddRec, ConservativeResult); - - APInt Min = APIntOps::smin(StartRange.getSignedMin(), - EndRange.getSignedMin()); - APInt Max = APIntOps::smax(StartRange.getSignedMax(), - EndRange.getSignedMax()); - if (Min.isMinSignedValue() && Max.isMaxSignedValue()) - return setSignedRange(AddRec, ConservativeResult); - return setSignedRange(AddRec, - ConservativeResult.intersectWith(ConstantRange(Min, Max+1))); + ConstantRange StartSRange = getSignedRange(Start); + ConstantRange EndSRange = + StartSRange.add(MaxBECountRange.multiply(StepSRange)); + + // Check for signed overflow. This must be done with ConstantRange + // arithmetic because we could be called from within the ScalarEvolution + // overflow checking code. + ConstantRange SExtStartSRange = + StartSRange.sextOrTrunc(BitWidth * 2 + 1); + ConstantRange SExtEndSRange = EndSRange.sextOrTrunc(BitWidth * 2 + 1); + if (SExtStartSRange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) == + SExtEndSRange) { + APInt Min = APIntOps::smin(StartSRange.getSignedMin(), + EndSRange.getSignedMin()); + APInt Max = APIntOps::smax(StartSRange.getSignedMax(), + EndSRange.getSignedMax()); + bool IsFullRange = Min.isMinSignedValue() && Max.isMaxSignedValue(); + if (!IsFullRange) + ConservativeResult = + ConservativeResult.intersectWith(ConstantRange(Min, Max + 1)); + } } } - return setSignedRange(AddRec, ConservativeResult); + return setRange(AddRec, SignHint, ConservativeResult); } if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { @@ -3978,18 +4062,31 @@ ScalarEvolution::getSignedRange(const SCEV *S) { if (MDRange.hasValue()) ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue()); - // For a SCEVUnknown, ask ValueTracking. - if (!U->getValue()->getType()->isIntegerTy() && !DL) - return setSignedRange(U, ConservativeResult); - unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, AC, nullptr, DT); - if (NS <= 1) - return setSignedRange(U, ConservativeResult); - return setSignedRange(U, ConservativeResult.intersectWith( - ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1), - APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1))); + // Split here to avoid paying the compile-time cost of calling both + // computeKnownBits and ComputeNumSignBits. This restriction can be lifted + // if needed. + const DataLayout &DL = F->getParent()->getDataLayout(); + if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) { + // For a SCEVUnknown, ask ValueTracking. + APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); + computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT); + if (Ones != ~Zeros + 1) + ConservativeResult = + ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1)); + } else { + assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED && + "generalize as needed!"); + unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, AC, nullptr, DT); + if (NS > 1) + ConservativeResult = ConservativeResult.intersectWith( + ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1), + APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1)); + } + + return setRange(U, SignHint, ConservativeResult); } - return setSignedRange(S, ConservativeResult); + return setRange(S, SignHint, ConservativeResult); } /// createSCEV - We know that there is no SCEV for the specified value. @@ -4088,8 +4185,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { unsigned TZ = A.countTrailingZeros(); unsigned BitWidth = A.getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL, 0, AC, - nullptr, DT); + computeKnownBits(U->getOperand(0), KnownZero, KnownOne, + F->getParent()->getDataLayout(), 0, AC, nullptr, DT); APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ); @@ -4280,9 +4377,10 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { case ICmpInst::ICMP_SGE: // a >s b ? a+x : b+x -> smax(a, b)+x // a >s b ? b+x : a+x -> smin(a, b)+x - if (LHS->getType() == U->getType()) { - const SCEV *LS = getSCEV(LHS); - const SCEV *RS = getSCEV(RHS); + if (getTypeSizeInBits(LHS->getType()) <= + getTypeSizeInBits(U->getType())) { + const SCEV *LS = getNoopOrSignExtend(getSCEV(LHS), U->getType()); + const SCEV *RS = getNoopOrSignExtend(getSCEV(RHS), U->getType()); const SCEV *LA = getSCEV(U->getOperand(1)); const SCEV *RA = getSCEV(U->getOperand(2)); const SCEV *LDiff = getMinusSCEV(LA, LS); @@ -4303,9 +4401,10 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { case ICmpInst::ICMP_UGE: // a >u b ? a+x : b+x -> umax(a, b)+x // a >u b ? b+x : a+x -> umin(a, b)+x - if (LHS->getType() == U->getType()) { - const SCEV *LS = getSCEV(LHS); - const SCEV *RS = getSCEV(RHS); + if (getTypeSizeInBits(LHS->getType()) <= + getTypeSizeInBits(U->getType())) { + const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType()); + const SCEV *RS = getNoopOrZeroExtend(getSCEV(RHS), U->getType()); const SCEV *LA = getSCEV(U->getOperand(1)); const SCEV *RA = getSCEV(U->getOperand(2)); const SCEV *LDiff = getMinusSCEV(LA, LS); @@ -4320,11 +4419,11 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { break; case ICmpInst::ICMP_NE: // n != 0 ? n+x : 1+x -> umax(n, 1)+x - if (LHS->getType() == U->getType() && - isa<ConstantInt>(RHS) && - cast<ConstantInt>(RHS)->isZero()) { - const SCEV *One = getConstant(LHS->getType(), 1); - const SCEV *LS = getSCEV(LHS); + if (getTypeSizeInBits(LHS->getType()) <= + getTypeSizeInBits(U->getType()) && + isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) { + const SCEV *One = getConstant(U->getType(), 1); + const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType()); const SCEV *LA = getSCEV(U->getOperand(1)); const SCEV *RA = getSCEV(U->getOperand(2)); const SCEV *LDiff = getMinusSCEV(LA, LS); @@ -4335,11 +4434,11 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { break; case ICmpInst::ICMP_EQ: // n == 0 ? 1+x : n+x -> umax(n, 1)+x - if (LHS->getType() == U->getType() && - isa<ConstantInt>(RHS) && - cast<ConstantInt>(RHS)->isZero()) { - const SCEV *One = getConstant(LHS->getType(), 1); - const SCEV *LS = getSCEV(LHS); + if (getTypeSizeInBits(LHS->getType()) <= + getTypeSizeInBits(U->getType()) && + isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) { + const SCEV *One = getConstant(U->getType(), 1); + const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType()); const SCEV *LA = getSCEV(U->getOperand(1)); const SCEV *RA = getSCEV(U->getOperand(2)); const SCEV *LDiff = getMinusSCEV(LA, One); @@ -5238,12 +5337,9 @@ static bool canConstantEvolve(Instruction *I, const Loop *L) { if (!L->contains(I)) return false; if (isa<PHINode>(I)) { - if (L->getHeader() == I->getParent()) - return true; - else - // We don't currently keep track of the control flow needed to evaluate - // PHIs, so we cannot handle PHIs inside of loops. - return false; + // We don't currently keep track of the control flow needed to evaluate + // PHIs, so we cannot handle PHIs inside of loops. + return L->getHeader() == I->getParent(); } // If we won't be able to constant fold this expression even if the operands @@ -5314,7 +5410,7 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { /// reason, return null. static Constant *EvaluateExpression(Value *V, const Loop *L, DenseMap<Instruction *, Constant *> &Vals, - const DataLayout *DL, + const DataLayout &DL, const TargetLibraryInfo *TLI) { // Convenient constant check, but redundant for recursive calls. if (Constant *C = dyn_cast<Constant>(V)) return C; @@ -5403,6 +5499,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, unsigned NumIterations = BEs.getZExtValue(); // must be in range unsigned IterationNum = 0; + const DataLayout &DL = F->getParent()->getDataLayout(); for (; ; ++IterationNum) { if (IterationNum == NumIterations) return RetVal = CurrentIterVals[PN]; // Got exit value! @@ -5410,8 +5507,8 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, // Compute the value of the PHIs for the next iteration. // EvaluateExpression adds non-phi values to the CurrentIterVals map. DenseMap<Instruction *, Constant *> NextIterVals; - Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, - TLI); + Constant *NextPHI = + EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI); if (!NextPHI) return nullptr; // Couldn't evaluate! NextIterVals[PN] = NextPHI; @@ -5487,12 +5584,11 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, // Okay, we find a PHI node that defines the trip count of this loop. Execute // the loop symbolically to determine when the condition gets a value of // "ExitWhen". - unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. + const DataLayout &DL = F->getParent()->getDataLayout(); for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){ - ConstantInt *CondVal = - dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, CurrentIterVals, - DL, TLI)); + ConstantInt *CondVal = dyn_cast_or_null<ConstantInt>( + EvaluateExpression(Cond, L, CurrentIterVals, DL, TLI)); // Couldn't symbolically evaluate. if (!CondVal) return getCouldNotCompute(); @@ -5623,7 +5719,7 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) { if (PTy->getElementType()->isStructTy()) C2 = ConstantExpr::getIntegerCast( C2, Type::getInt32Ty(C->getContext()), true); - C = ConstantExpr::getGetElementPtr(C, C2); + C = ConstantExpr::getGetElementPtr(PTy->getElementType(), C, C2); } else C = ConstantExpr::getAdd(C, C2); } @@ -5725,16 +5821,16 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { // Check to see if getSCEVAtScope actually made an improvement. if (MadeImprovement) { Constant *C = nullptr; + const DataLayout &DL = F->getParent()->getDataLayout(); if (const CmpInst *CI = dyn_cast<CmpInst>(I)) - C = ConstantFoldCompareInstOperands(CI->getPredicate(), - Operands[0], Operands[1], DL, - TLI); + C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], + Operands[1], DL, TLI); else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { if (!LI->isVolatile()) C = ConstantFoldLoadFromConstPtr(Operands[0], DL); } else - C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), - Operands, DL, TLI); + C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, + DL, TLI); if (!C) return V; return getSCEV(C); } @@ -6016,7 +6112,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) { dyn_cast<ConstantInt>(ConstantExpr::getICmp(CmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) { - if (CB->getZExtValue() == false) + if (!CB->getZExtValue()) std::swap(R1, R2); // R1 is the minimum root now. // We can only use this value if the chrec ends up with an exact zero @@ -6631,6 +6727,65 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, return true; } + struct ClearWalkingBEDominatingCondsOnExit { + ScalarEvolution &SE; + + explicit ClearWalkingBEDominatingCondsOnExit(ScalarEvolution &SE) + : SE(SE){}; + + ~ClearWalkingBEDominatingCondsOnExit() { + SE.WalkingBEDominatingConds = false; + } + }; + + // We don't want more than one activation of the following loop on the stack + // -- that can lead to O(n!) time complexity. + if (WalkingBEDominatingConds) + return false; + + WalkingBEDominatingConds = true; + ClearWalkingBEDominatingCondsOnExit ClearOnExit(*this); + + // If the loop is not reachable from the entry block, we risk running into an + // infinite loop as we walk up into the dom tree. These loops do not matter + // anyway, so we just return a conservative answer when we see them. + if (!DT->isReachableFromEntry(L->getHeader())) + return false; + + for (DomTreeNode *DTN = (*DT)[Latch], *HeaderDTN = (*DT)[L->getHeader()]; + DTN != HeaderDTN; + DTN = DTN->getIDom()) { + + assert(DTN && "should reach the loop header before reaching the root!"); + + BasicBlock *BB = DTN->getBlock(); + BasicBlock *PBB = BB->getSinglePredecessor(); + if (!PBB) + continue; + + BranchInst *ContinuePredicate = dyn_cast<BranchInst>(PBB->getTerminator()); + if (!ContinuePredicate || !ContinuePredicate->isConditional()) + continue; + + Value *Condition = ContinuePredicate->getCondition(); + + // If we have an edge `E` within the loop body that dominates the only + // latch, the condition guarding `E` also guards the backedge. This + // reasoning works only for loops with a single latch. + + BasicBlockEdge DominatingEdge(PBB, BB); + if (DominatingEdge.isSingleEdge()) { + // We're constructively (and conservatively) enumerating edges within the + // loop body that dominate the latch. The dominator tree better agree + // with us on this: + assert(DT->dominates(DominatingEdge, Latch) && "should be!"); + + if (isImpliedCond(Pred, LHS, RHS, Condition, + BB != ContinuePredicate->getSuccessor(0))) + return true; + } + } + return false; } @@ -6726,15 +6881,6 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue); if (!ICI) return false; - // Bail if the ICmp's operands' types are wider than the needed type - // before attempting to call getSCEV on them. This avoids infinite - // recursion, since the analysis of widening casts can require loop - // exit condition information for overflow checking, which would - // lead back here. - if (getTypeSizeInBits(LHS->getType()) < - getTypeSizeInBits(ICI->getOperand(0)->getType())) - return false; - // Now that we found a conditional branch that dominates the loop or controls // the loop latch. Check to see if it is the comparison we are looking for. ICmpInst::Predicate FoundPred; @@ -6746,9 +6892,17 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *FoundLHS = getSCEV(ICI->getOperand(0)); const SCEV *FoundRHS = getSCEV(ICI->getOperand(1)); - // Balance the types. The case where FoundLHS' type is wider than - // LHS' type is checked for above. - if (getTypeSizeInBits(LHS->getType()) > + // Balance the types. + if (getTypeSizeInBits(LHS->getType()) < + getTypeSizeInBits(FoundLHS->getType())) { + if (CmpInst::isSigned(Pred)) { + LHS = getSignExtendExpr(LHS, FoundLHS->getType()); + RHS = getSignExtendExpr(RHS, FoundLHS->getType()); + } else { + LHS = getZeroExtendExpr(LHS, FoundLHS->getType()); + RHS = getZeroExtendExpr(RHS, FoundLHS->getType()); + } + } else if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(FoundLHS->getType())) { if (CmpInst::isSigned(FoundPred)) { FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType()); @@ -6874,6 +7028,9 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS) { + if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS)) + return true; + return isImpliedCondOperandsHelper(Pred, LHS, RHS, FoundLHS, FoundRHS) || // ~x < ~y --> x > y @@ -7011,8 +7168,49 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, return false; } -// Verify if an linear IV with positive stride can overflow when in a -// less-than comparison, knowing the invariant term of the comparison, the +/// isImpliedCondOperandsViaRanges - helper function for isImpliedCondOperands. +/// Tries to get cases like "X `sgt` 0 => X - 1 `sgt` -1". +bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred, + const SCEV *LHS, + const SCEV *RHS, + const SCEV *FoundLHS, + const SCEV *FoundRHS) { + if (!isa<SCEVConstant>(RHS) || !isa<SCEVConstant>(FoundRHS)) + // The restriction on `FoundRHS` be lifted easily -- it exists only to + // reduce the compile time impact of this optimization. + return false; + + const SCEVAddExpr *AddLHS = dyn_cast<SCEVAddExpr>(LHS); + if (!AddLHS || AddLHS->getOperand(1) != FoundLHS || + !isa<SCEVConstant>(AddLHS->getOperand(0))) + return false; + + APInt ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getValue()->getValue(); + + // `FoundLHSRange` is the range we know `FoundLHS` to be in by virtue of the + // antecedent "`FoundLHS` `Pred` `FoundRHS`". + ConstantRange FoundLHSRange = + ConstantRange::makeAllowedICmpRegion(Pred, ConstFoundRHS); + + // Since `LHS` is `FoundLHS` + `AddLHS->getOperand(0)`, we can compute a range + // for `LHS`: + APInt Addend = + cast<SCEVConstant>(AddLHS->getOperand(0))->getValue()->getValue(); + ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(Addend)); + + // We can also compute the range of values for `LHS` that satisfy the + // consequent, "`LHS` `Pred` `RHS`": + APInt ConstRHS = cast<SCEVConstant>(RHS)->getValue()->getValue(); + ConstantRange SatisfyingLHSRange = + ConstantRange::makeSatisfyingICmpRegion(Pred, ConstRHS); + + // The antecedent implies the consequent if every value of `LHS` that + // satisfies the antecedent also satisfies the consequent. + return SatisfyingLHSRange.contains(LHSRange); +} + +// Verify if an linear IV with positive stride can overflow when in a +// less-than comparison, knowing the invariant term of the comparison, the // stride and the knowledge of NSW/NUW flags on the recurrence. bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, bool IsSigned, bool NoWrap) { @@ -7040,7 +7238,7 @@ bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, return (MaxValue - MaxStrideMinusOne).ult(MaxRHS); } -// Verify if an linear IV with negative stride can overflow when in a +// Verify if an linear IV with negative stride can overflow when in a // greater-than comparison, knowing the invariant term of the comparison, // the stride and the knowledge of NSW/NUW flags on the recurrence. bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, @@ -7071,7 +7269,7 @@ bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, // Compute the backedge taken count knowing the interval difference, the // stride and presence of the equality in the comparison. -const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step, +const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step, bool Equality) { const SCEV *One = getConstant(Step->getType(), 1); Delta = Equality ? getAddExpr(Delta, Step) @@ -7111,7 +7309,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, // Avoid proven overflow cases: this will ensure that the backedge taken count // will not generate any unsigned overflow. Relaxed no-overflow conditions - // exploit NoWrapFlags, allowing to optimize in presence of undefined + // exploit NoWrapFlags, allowing to optimize in presence of undefined // behaviors like the case of C language. if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap)) return getCouldNotCompute(); @@ -7191,7 +7389,7 @@ ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS, // Avoid proven overflow cases: this will ensure that the backedge taken count // will not generate any unsigned overflow. Relaxed no-overflow conditions - // exploit NoWrapFlags, allowing to optimize in presence of undefined + // exploit NoWrapFlags, allowing to optimize in presence of undefined // behaviors like the case of C language. if (!Stride->isOne() && doesIVOverflowOnGT(RHS, Stride, IsSigned, NoWrap)) return getCouldNotCompute(); @@ -7239,7 +7437,7 @@ ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS, if (isa<SCEVConstant>(BECount)) MaxBECount = BECount; else - MaxBECount = computeBECount(getConstant(MaxStart - MinEnd), + MaxBECount = computeBECount(getConstant(MaxStart - MinEnd), getConstant(MinStride), false); if (isa<SCEVCouldNotCompute>(MaxBECount)) @@ -7339,7 +7537,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, if (ConstantInt *CB = dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) { - if (CB->getZExtValue() == false) + if (!CB->getZExtValue()) std::swap(R1, R2); // R1 is the minimum root now. // Make sure the root is not off by one. The returned iteration should @@ -7858,18 +8056,16 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se) //===----------------------------------------------------------------------===// ScalarEvolution::ScalarEvolution() - : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64), - BlockDispositions(64), FirstUnknown(nullptr) { + : FunctionPass(ID), WalkingBEDominatingConds(false), ValuesAtScopes(64), + LoopDispositions(64), BlockDispositions(64), FirstUnknown(nullptr) { initializeScalarEvolutionPass(*PassRegistry::getPassRegistry()); } bool ScalarEvolution::runOnFunction(Function &F) { this->F = &F; AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - LI = &getAnalysis<LoopInfo>(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; - TLI = &getAnalysis<TargetLibraryInfo>(); + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); return false; } @@ -7892,6 +8088,7 @@ void ScalarEvolution::releaseMemory() { } assert(PendingLoopPredicates.empty() && "isImpliedCond garbage"); + assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!"); BackedgeTakenCounts.clear(); ConstantEvolutionLoopExitValue.clear(); @@ -7907,9 +8104,9 @@ void ScalarEvolution::releaseMemory() { void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired<AssumptionCacheTracker>(); - AU.addRequiredTransitive<LoopInfo>(); + AU.addRequiredTransitive<LoopInfoWrapperPass>(); AU.addRequiredTransitive<DominatorTreeWrapperPass>(); - AU.addRequired<TargetLibraryInfo>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); } bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) { @@ -7969,6 +8166,12 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const { OS << " --> "; const SCEV *SV = SE.getSCEV(&*I); SV->print(OS); + if (!isa<SCEVCouldNotCompute>(SV)) { + OS << " U: "; + SE.getUnsignedRange(SV).print(OS); + OS << " S: "; + SE.getSignedRange(SV).print(OS); + } const Loop *L = LI->getLoopFor((*I).getParent()); @@ -7976,6 +8179,12 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const { if (AtUse != SV) { OS << " --> "; AtUse->print(OS); + if (!isa<SCEVCouldNotCompute>(AtUse)) { + OS << " U: "; + SE.getUnsignedRange(AtUse).print(OS); + OS << " S: "; + SE.getSignedRange(AtUse).print(OS); + } } if (L) { @@ -8000,17 +8209,17 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const { ScalarEvolution::LoopDisposition ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) { - SmallVector<std::pair<const Loop *, LoopDisposition>, 2> &Values = LoopDispositions[S]; - for (unsigned u = 0; u < Values.size(); u++) { - if (Values[u].first == L) - return Values[u].second; + auto &Values = LoopDispositions[S]; + for (auto &V : Values) { + if (V.getPointer() == L) + return V.getInt(); } - Values.push_back(std::make_pair(L, LoopVariant)); + Values.emplace_back(L, LoopVariant); LoopDisposition D = computeLoopDisposition(S, L); - SmallVector<std::pair<const Loop *, LoopDisposition>, 2> &Values2 = LoopDispositions[S]; - for (unsigned u = Values2.size(); u > 0; u--) { - if (Values2[u - 1].first == L) { - Values2[u - 1].second = D; + auto &Values2 = LoopDispositions[S]; + for (auto &V : make_range(Values2.rbegin(), Values2.rend())) { + if (V.getPointer() == L) { + V.setInt(D); break; } } @@ -8106,17 +8315,17 @@ bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) { ScalarEvolution::BlockDisposition ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) { - SmallVector<std::pair<const BasicBlock *, BlockDisposition>, 2> &Values = BlockDispositions[S]; - for (unsigned u = 0; u < Values.size(); u++) { - if (Values[u].first == BB) - return Values[u].second; + auto &Values = BlockDispositions[S]; + for (auto &V : Values) { + if (V.getPointer() == BB) + return V.getInt(); } - Values.push_back(std::make_pair(BB, DoesNotDominateBlock)); + Values.emplace_back(BB, DoesNotDominateBlock); BlockDisposition D = computeBlockDisposition(S, BB); - SmallVector<std::pair<const BasicBlock *, BlockDisposition>, 2> &Values2 = BlockDispositions[S]; - for (unsigned u = Values2.size(); u > 0; u--) { - if (Values2[u - 1].first == BB) { - Values2[u - 1].second = D; + auto &Values2 = BlockDispositions[S]; + for (auto &V : make_range(Values2.rbegin(), Values2.rend())) { + if (V.getPointer() == BB) { + V.setInt(D); break; } } diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp index 5c339ee..ccec0a8 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -22,6 +22,7 @@ #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/IR/Module.h" #include "llvm/Pass.h" using namespace llvm; @@ -79,7 +80,7 @@ ScalarEvolutionAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { bool ScalarEvolutionAliasAnalysis::runOnFunction(Function &F) { - InitializeAliasAnalysis(this); + InitializeAliasAnalysis(this, &F.getParent()->getDataLayout()); SE = &getAnalysis<ScalarEvolution>(); return false; } diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp index 7e9e351..0bd427b 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -23,7 +23,9 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -204,11 +206,9 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, /// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made /// unnecessary; in its place, just signed-divide Ops[i] by the scale and /// check to see if the divide was folded. -static bool FactorOutConstant(const SCEV *&S, - const SCEV *&Remainder, - const SCEV *Factor, - ScalarEvolution &SE, - const DataLayout *DL) { +static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder, + const SCEV *Factor, ScalarEvolution &SE, + const DataLayout &DL) { // Everything is divisible by one. if (Factor->isOne()) return true; @@ -248,35 +248,17 @@ static bool FactorOutConstant(const SCEV *&S, // In a Mul, check if there is a constant operand which is a multiple // of the given factor. if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) { - if (DL) { - // With DataLayout, the size is known. Check if there is a constant - // operand which is a multiple of the given factor. If so, we can - // factor it. - const SCEVConstant *FC = cast<SCEVConstant>(Factor); - if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0))) - if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) { - SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end()); - NewMulOps[0] = - SE.getConstant(C->getValue()->getValue().sdiv( - FC->getValue()->getValue())); - S = SE.getMulExpr(NewMulOps); - return true; - } - } else { - // Without DataLayout, check if Factor can be factored out of any of the - // Mul's operands. If so, we can just remove it. - for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { - const SCEV *SOp = M->getOperand(i); - const SCEV *Remainder = SE.getConstant(SOp->getType(), 0); - if (FactorOutConstant(SOp, Remainder, Factor, SE, DL) && - Remainder->isZero()) { - SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end()); - NewMulOps[i] = SOp; - S = SE.getMulExpr(NewMulOps); - return true; - } + // Size is known, check if there is a constant operand which is a multiple + // of the given factor. If so, we can factor it. + const SCEVConstant *FC = cast<SCEVConstant>(Factor); + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0))) + if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) { + SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end()); + NewMulOps[0] = SE.getConstant( + C->getValue()->getValue().sdiv(FC->getValue()->getValue())); + S = SE.getMulExpr(NewMulOps); + return true; } - } } // In an AddRec, check if both start and step are divisible. @@ -393,7 +375,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, PointerType *PTy, Type *Ty, Value *V) { - Type *ElTy = PTy->getElementType(); + Type *OriginalElTy = PTy->getElementType(); + Type *ElTy = OriginalElTy; SmallVector<Value *, 4> GepIndices; SmallVector<const SCEV *, 8> Ops(op_begin, op_end); bool AnyNonZeroIndices = false; @@ -402,9 +385,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // without the other. SplitAddRecs(Ops, Ty, SE); - Type *IntPtrTy = SE.DL - ? SE.DL->getIntPtrType(PTy) - : Type::getInt64Ty(PTy->getContext()); + Type *IntPtrTy = DL.getIntPtrType(PTy); // Descend down the pointer's type and attempt to convert the other // operands into GEP indices, at each level. The first index in a GEP @@ -422,7 +403,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, for (unsigned i = 0, e = Ops.size(); i != e; ++i) { const SCEV *Op = Ops[i]; const SCEV *Remainder = SE.getConstant(Ty, 0); - if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.DL)) { + if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) { // Op now has ElSize factored out. ScaledOps.push_back(Op); if (!Remainder->isZero()) @@ -456,43 +437,25 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, bool FoundFieldNo = false; // An empty struct has no fields. if (STy->getNumElements() == 0) break; - if (SE.DL) { - // With DataLayout, field offsets are known. See if a constant offset - // falls within any of the struct fields. - if (Ops.empty()) break; - if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0])) - if (SE.getTypeSizeInBits(C->getType()) <= 64) { - const StructLayout &SL = *SE.DL->getStructLayout(STy); - uint64_t FullOffset = C->getValue()->getZExtValue(); - if (FullOffset < SL.getSizeInBytes()) { - unsigned ElIdx = SL.getElementContainingOffset(FullOffset); - GepIndices.push_back( - ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx)); - ElTy = STy->getTypeAtIndex(ElIdx); - Ops[0] = + // Field offsets are known. See if a constant offset falls within any of + // the struct fields. + if (Ops.empty()) + break; + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0])) + if (SE.getTypeSizeInBits(C->getType()) <= 64) { + const StructLayout &SL = *DL.getStructLayout(STy); + uint64_t FullOffset = C->getValue()->getZExtValue(); + if (FullOffset < SL.getSizeInBytes()) { + unsigned ElIdx = SL.getElementContainingOffset(FullOffset); + GepIndices.push_back( + ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx)); + ElTy = STy->getTypeAtIndex(ElIdx); + Ops[0] = SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx)); - AnyNonZeroIndices = true; - FoundFieldNo = true; - } - } - } else { - // Without DataLayout, just check for an offsetof expression of the - // appropriate struct type. - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Ops[i])) { - Type *CTy; - Constant *FieldNo; - if (U->isOffsetOf(CTy, FieldNo) && CTy == STy) { - GepIndices.push_back(FieldNo); - ElTy = - STy->getTypeAtIndex(cast<ConstantInt>(FieldNo)->getZExtValue()); - Ops[i] = SE.getConstant(Ty, 0); - AnyNonZeroIndices = true; - FoundFieldNo = true; - break; - } + AnyNonZeroIndices = true; + FoundFieldNo = true; } - } + } // If no struct field offsets were found, tentatively assume that // field zero was selected (since the zero offset would obviously // be folded away). @@ -526,7 +489,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // Fold a GEP with constant operands. if (Constant *CLHS = dyn_cast<Constant>(V)) if (Constant *CRHS = dyn_cast<Constant>(Idx)) - return ConstantExpr::getGetElementPtr(CLHS, CRHS); + return ConstantExpr::getGetElementPtr(Type::getInt8Ty(Ty->getContext()), + CLHS, CRHS); // Do a quick scan to see if we have this GEP nearby. If so, reuse it. unsigned ScanLimit = 6; @@ -561,7 +525,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, } // Emit a GEP. - Value *GEP = Builder.CreateGEP(V, Idx, "uglygep"); + Value *GEP = Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep"); rememberInstruction(GEP); return GEP; @@ -597,7 +561,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, Value *Casted = V; if (V->getType() != PTy) Casted = InsertNoopCastOfTo(Casted, PTy); - Value *GEP = Builder.CreateGEP(Casted, + Value *GEP = Builder.CreateGEP(OriginalElTy, Casted, GepIndices, "scevgep"); Ops.push_back(SE.getUnknown(GEP)); @@ -1063,6 +1027,34 @@ static bool canBeCheaplyTransformed(ScalarEvolution &SE, return false; } +static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) { + if (!isa<IntegerType>(AR->getType())) + return false; + + unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth(); + Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2); + const SCEV *Step = AR->getStepRecurrence(SE); + const SCEV *OpAfterExtend = SE.getAddExpr(SE.getSignExtendExpr(Step, WideTy), + SE.getSignExtendExpr(AR, WideTy)); + const SCEV *ExtendAfterOp = + SE.getSignExtendExpr(SE.getAddExpr(AR, Step), WideTy); + return ExtendAfterOp == OpAfterExtend; +} + +static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) { + if (!isa<IntegerType>(AR->getType())) + return false; + + unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth(); + Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2); + const SCEV *Step = AR->getStepRecurrence(SE); + const SCEV *OpAfterExtend = SE.getAddExpr(SE.getZeroExtendExpr(Step, WideTy), + SE.getZeroExtendExpr(AR, WideTy)); + const SCEV *ExtendAfterOp = + SE.getZeroExtendExpr(SE.getAddExpr(AR, Step), WideTy); + return ExtendAfterOp == OpAfterExtend; +} + /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand /// the base addrec, which is the addrec without any non-loop-dominating /// values, and return the PHI. @@ -1188,6 +1180,12 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, // Expand the step somewhere that dominates the loop header. Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); + // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if + // we actually do emit an addition. It does not apply if we emit a + // subtraction. + bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, Normalized); + bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, Normalized); + // Create the PHI. BasicBlock *Header = L->getHeader(); Builder.SetInsertPoint(Header, Header->begin()); @@ -1213,10 +1211,11 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, IVIncInsertPos : Pred->getTerminator(); Builder.SetInsertPoint(InsertPos); Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); + if (isa<OverflowingBinaryOperator>(IncV)) { - if (Normalized->getNoWrapFlags(SCEV::FlagNUW)) + if (IncrementIsNUW) cast<BinaryOperator>(IncV)->setHasNoUnsignedWrap(); - if (Normalized->getNoWrapFlags(SCEV::FlagNSW)) + if (IncrementIsNSW) cast<BinaryOperator>(IncV)->setHasNoSignedWrap(); } PN->addIncoming(IncV, Pred); @@ -1711,7 +1710,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, // Fold constant phis. They may be congruent to other constant phis and // would confuse the logic below that expects proper IVs. - if (Value *V = SimplifyInstruction(Phi, SE.DL, SE.TLI, SE.DT, SE.AC)) { + if (Value *V = SimplifyInstruction(Phi, DL, SE.TLI, SE.DT, SE.AC)) { Phi->replaceAllUsesWith(V); DeadInsts.push_back(Phi); ++NumElim; @@ -1806,6 +1805,72 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, return NumElim; } +bool SCEVExpander::isHighCostExpansionHelper( + const SCEV *S, Loop *L, SmallPtrSetImpl<const SCEV *> &Processed) { + if (!Processed.insert(S).second) + return false; + + if (auto *UDivExpr = dyn_cast<SCEVUDivExpr>(S)) { + // If the divisor is a power of two and the SCEV type fits in a native + // integer, consider the divison cheap irrespective of whether it occurs in + // the user code since it can be lowered into a right shift. + if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS())) + if (SC->getValue()->getValue().isPowerOf2()) { + const DataLayout &DL = + L->getHeader()->getParent()->getParent()->getDataLayout(); + unsigned Width = cast<IntegerType>(UDivExpr->getType())->getBitWidth(); + return DL.isIllegalInteger(Width); + } + + // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or + // HowManyLessThans produced to compute a precise expression, rather than a + // UDiv from the user's code. If we can't find a UDiv in the code with some + // simple searching, assume the former consider UDivExpr expensive to + // compute. + BasicBlock *ExitingBB = L->getExitingBlock(); + if (!ExitingBB) + return true; + + BranchInst *ExitingBI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); + if (!ExitingBI || !ExitingBI->isConditional()) + return true; + + ICmpInst *OrigCond = dyn_cast<ICmpInst>(ExitingBI->getCondition()); + if (!OrigCond) + return true; + + const SCEV *RHS = SE.getSCEV(OrigCond->getOperand(1)); + RHS = SE.getMinusSCEV(RHS, SE.getConstant(RHS->getType(), 1)); + if (RHS != S) { + const SCEV *LHS = SE.getSCEV(OrigCond->getOperand(0)); + LHS = SE.getMinusSCEV(LHS, SE.getConstant(LHS->getType(), 1)); + if (LHS != S) + return true; + } + } + + // Recurse past add expressions, which commonly occur in the + // BackedgeTakenCount. They may already exist in program code, and if not, + // they are not too expensive rematerialize. + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + I != E; ++I) { + if (isHighCostExpansionHelper(*I, L, Processed)) + return true; + } + return false; + } + + // HowManyLessThans uses a Max expression whenever the loop is not guarded by + // the exit condition. + if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S)) + return true; + + // If we haven't recognized an expensive SCEV pattern, assume it's an + // expression produced by program code. + return false; +} + namespace { // Search for a SCEV subexpression that is not safe to expand. Any expression // that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely diff --git a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp index c6ea3af..02f8b0b 100644 --- a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp +++ b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp @@ -80,7 +80,7 @@ public: initializeScopedNoAliasAAPass(*PassRegistry::getPassRegistry()); } - void initializePass() override { InitializeAliasAnalysis(this); } + bool doInitialization(Module &M) override; /// getAdjustedAnalysisPointer - This method is used when a pass implements /// an analysis interface through multiple inheritance. If needed, it @@ -119,6 +119,11 @@ ImmutablePass *llvm::createScopedNoAliasAAPass() { return new ScopedNoAliasAA(); } +bool ScopedNoAliasAA::doInitialization(Module &M) { + InitializeAliasAnalysis(this, &M.getDataLayout()); + return true; +} + void ScopedNoAliasAA::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); diff --git a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp new file mode 100644 index 0000000..635c50c --- /dev/null +++ b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -0,0 +1,603 @@ +//===-- TargetLibraryInfo.cpp - Runtime library information ----------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TargetLibraryInfo class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary( + "vector-library", cl::Hidden, cl::desc("Vector functions library"), + cl::init(TargetLibraryInfoImpl::NoLibrary), + cl::values(clEnumValN(TargetLibraryInfoImpl::NoLibrary, "none", + "No vector functions library"), + clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate", + "Accelerate framework"), + clEnumValEnd)); + +const char *const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = { +#define TLI_DEFINE_STRING +#include "llvm/Analysis/TargetLibraryInfo.def" +}; + +static bool hasSinCosPiStret(const Triple &T) { + // Only Darwin variants have _stret versions of combined trig functions. + if (!T.isOSDarwin()) + return false; + + // The ABI is rather complicated on x86, so don't do anything special there. + if (T.getArch() == Triple::x86) + return false; + + if (T.isMacOSX() && T.isMacOSXVersionLT(10, 9)) + return false; + + if (T.isiOS() && T.isOSVersionLT(7, 0)) + return false; + + return true; +} + +/// initialize - Initialize the set of available library functions based on the +/// specified target triple. This should be carefully written so that a missing +/// target triple gets a sane set of defaults. +static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, + const char *const *StandardNames) { +#ifndef NDEBUG + // Verify that the StandardNames array is in alphabetical order. + for (unsigned F = 1; F < LibFunc::NumLibFuncs; ++F) { + if (strcmp(StandardNames[F-1], StandardNames[F]) >= 0) + llvm_unreachable("TargetLibraryInfoImpl function names must be sorted"); + } +#endif // !NDEBUG + + // There are no library implementations of mempcy and memset for AMD gpus and + // these can be difficult to lower in the backend. + if (T.getArch() == Triple::r600 || + T.getArch() == Triple::amdgcn) { + TLI.setUnavailable(LibFunc::memcpy); + TLI.setUnavailable(LibFunc::memset); + TLI.setUnavailable(LibFunc::memset_pattern16); + return; + } + + // memset_pattern16 is only available on iOS 3.0 and Mac OS X 10.5 and later. + if (T.isMacOSX()) { + if (T.isMacOSXVersionLT(10, 5)) + TLI.setUnavailable(LibFunc::memset_pattern16); + } else if (T.isiOS()) { + if (T.isOSVersionLT(3, 0)) + TLI.setUnavailable(LibFunc::memset_pattern16); + } else { + TLI.setUnavailable(LibFunc::memset_pattern16); + } + + if (!hasSinCosPiStret(T)) { + TLI.setUnavailable(LibFunc::sinpi); + TLI.setUnavailable(LibFunc::sinpif); + TLI.setUnavailable(LibFunc::cospi); + TLI.setUnavailable(LibFunc::cospif); + TLI.setUnavailable(LibFunc::sincospi_stret); + TLI.setUnavailable(LibFunc::sincospif_stret); + } + + if (T.isMacOSX() && T.getArch() == Triple::x86 && + !T.isMacOSXVersionLT(10, 7)) { + // x86-32 OSX has a scheme where fwrite and fputs (and some other functions + // we don't care about) have two versions; on recent OSX, the one we want + // has a $UNIX2003 suffix. The two implementations are identical except + // for the return value in some edge cases. However, we don't want to + // generate code that depends on the old symbols. + TLI.setAvailableWithName(LibFunc::fwrite, "fwrite$UNIX2003"); + TLI.setAvailableWithName(LibFunc::fputs, "fputs$UNIX2003"); + } + + // iprintf and friends are only available on XCore and TCE. + if (T.getArch() != Triple::xcore && T.getArch() != Triple::tce) { + TLI.setUnavailable(LibFunc::iprintf); + TLI.setUnavailable(LibFunc::siprintf); + TLI.setUnavailable(LibFunc::fiprintf); + } + + if (T.isOSWindows() && !T.isOSCygMing()) { + // Win32 does not support long double + TLI.setUnavailable(LibFunc::acosl); + TLI.setUnavailable(LibFunc::asinl); + TLI.setUnavailable(LibFunc::atanl); + TLI.setUnavailable(LibFunc::atan2l); + TLI.setUnavailable(LibFunc::ceill); + TLI.setUnavailable(LibFunc::copysignl); + TLI.setUnavailable(LibFunc::cosl); + TLI.setUnavailable(LibFunc::coshl); + TLI.setUnavailable(LibFunc::expl); + TLI.setUnavailable(LibFunc::fabsf); // Win32 and Win64 both lack fabsf + TLI.setUnavailable(LibFunc::fabsl); + TLI.setUnavailable(LibFunc::floorl); + TLI.setUnavailable(LibFunc::fmaxl); + TLI.setUnavailable(LibFunc::fminl); + TLI.setUnavailable(LibFunc::fmodl); + TLI.setUnavailable(LibFunc::frexpl); + TLI.setUnavailable(LibFunc::ldexpf); + TLI.setUnavailable(LibFunc::ldexpl); + TLI.setUnavailable(LibFunc::logl); + TLI.setUnavailable(LibFunc::modfl); + TLI.setUnavailable(LibFunc::powl); + TLI.setUnavailable(LibFunc::sinl); + TLI.setUnavailable(LibFunc::sinhl); + TLI.setUnavailable(LibFunc::sqrtl); + TLI.setUnavailable(LibFunc::tanl); + TLI.setUnavailable(LibFunc::tanhl); + + // Win32 only has C89 math + TLI.setUnavailable(LibFunc::acosh); + TLI.setUnavailable(LibFunc::acoshf); + TLI.setUnavailable(LibFunc::acoshl); + TLI.setUnavailable(LibFunc::asinh); + TLI.setUnavailable(LibFunc::asinhf); + TLI.setUnavailable(LibFunc::asinhl); + TLI.setUnavailable(LibFunc::atanh); + TLI.setUnavailable(LibFunc::atanhf); + TLI.setUnavailable(LibFunc::atanhl); + TLI.setUnavailable(LibFunc::cbrt); + TLI.setUnavailable(LibFunc::cbrtf); + TLI.setUnavailable(LibFunc::cbrtl); + TLI.setUnavailable(LibFunc::exp2); + TLI.setUnavailable(LibFunc::exp2f); + TLI.setUnavailable(LibFunc::exp2l); + TLI.setUnavailable(LibFunc::expm1); + TLI.setUnavailable(LibFunc::expm1f); + TLI.setUnavailable(LibFunc::expm1l); + TLI.setUnavailable(LibFunc::log2); + TLI.setUnavailable(LibFunc::log2f); + TLI.setUnavailable(LibFunc::log2l); + TLI.setUnavailable(LibFunc::log1p); + TLI.setUnavailable(LibFunc::log1pf); + TLI.setUnavailable(LibFunc::log1pl); + TLI.setUnavailable(LibFunc::logb); + TLI.setUnavailable(LibFunc::logbf); + TLI.setUnavailable(LibFunc::logbl); + TLI.setUnavailable(LibFunc::nearbyint); + TLI.setUnavailable(LibFunc::nearbyintf); + TLI.setUnavailable(LibFunc::nearbyintl); + TLI.setUnavailable(LibFunc::rint); + TLI.setUnavailable(LibFunc::rintf); + TLI.setUnavailable(LibFunc::rintl); + TLI.setUnavailable(LibFunc::round); + TLI.setUnavailable(LibFunc::roundf); + TLI.setUnavailable(LibFunc::roundl); + TLI.setUnavailable(LibFunc::trunc); + TLI.setUnavailable(LibFunc::truncf); + TLI.setUnavailable(LibFunc::truncl); + + // Win32 provides some C99 math with mangled names + TLI.setAvailableWithName(LibFunc::copysign, "_copysign"); + + if (T.getArch() == Triple::x86) { + // Win32 on x86 implements single-precision math functions as macros + TLI.setUnavailable(LibFunc::acosf); + TLI.setUnavailable(LibFunc::asinf); + TLI.setUnavailable(LibFunc::atanf); + TLI.setUnavailable(LibFunc::atan2f); + TLI.setUnavailable(LibFunc::ceilf); + TLI.setUnavailable(LibFunc::copysignf); + TLI.setUnavailable(LibFunc::cosf); + TLI.setUnavailable(LibFunc::coshf); + TLI.setUnavailable(LibFunc::expf); + TLI.setUnavailable(LibFunc::floorf); + TLI.setUnavailable(LibFunc::fminf); + TLI.setUnavailable(LibFunc::fmaxf); + TLI.setUnavailable(LibFunc::fmodf); + TLI.setUnavailable(LibFunc::logf); + TLI.setUnavailable(LibFunc::powf); + TLI.setUnavailable(LibFunc::sinf); + TLI.setUnavailable(LibFunc::sinhf); + TLI.setUnavailable(LibFunc::sqrtf); + TLI.setUnavailable(LibFunc::tanf); + TLI.setUnavailable(LibFunc::tanhf); + } + + // Win32 does *not* provide provide these functions, but they are + // generally available on POSIX-compliant systems: + TLI.setUnavailable(LibFunc::access); + TLI.setUnavailable(LibFunc::bcmp); + TLI.setUnavailable(LibFunc::bcopy); + TLI.setUnavailable(LibFunc::bzero); + TLI.setUnavailable(LibFunc::chmod); + TLI.setUnavailable(LibFunc::chown); + TLI.setUnavailable(LibFunc::closedir); + TLI.setUnavailable(LibFunc::ctermid); + TLI.setUnavailable(LibFunc::fdopen); + TLI.setUnavailable(LibFunc::ffs); + TLI.setUnavailable(LibFunc::fileno); + TLI.setUnavailable(LibFunc::flockfile); + TLI.setUnavailable(LibFunc::fseeko); + TLI.setUnavailable(LibFunc::fstat); + TLI.setUnavailable(LibFunc::fstatvfs); + TLI.setUnavailable(LibFunc::ftello); + TLI.setUnavailable(LibFunc::ftrylockfile); + TLI.setUnavailable(LibFunc::funlockfile); + TLI.setUnavailable(LibFunc::getc_unlocked); + TLI.setUnavailable(LibFunc::getitimer); + TLI.setUnavailable(LibFunc::getlogin_r); + TLI.setUnavailable(LibFunc::getpwnam); + TLI.setUnavailable(LibFunc::gettimeofday); + TLI.setUnavailable(LibFunc::htonl); + TLI.setUnavailable(LibFunc::htons); + TLI.setUnavailable(LibFunc::lchown); + TLI.setUnavailable(LibFunc::lstat); + TLI.setUnavailable(LibFunc::memccpy); + TLI.setUnavailable(LibFunc::mkdir); + TLI.setUnavailable(LibFunc::ntohl); + TLI.setUnavailable(LibFunc::ntohs); + TLI.setUnavailable(LibFunc::open); + TLI.setUnavailable(LibFunc::opendir); + TLI.setUnavailable(LibFunc::pclose); + TLI.setUnavailable(LibFunc::popen); + TLI.setUnavailable(LibFunc::pread); + TLI.setUnavailable(LibFunc::pwrite); + TLI.setUnavailable(LibFunc::read); + TLI.setUnavailable(LibFunc::readlink); + TLI.setUnavailable(LibFunc::realpath); + TLI.setUnavailable(LibFunc::rmdir); + TLI.setUnavailable(LibFunc::setitimer); + TLI.setUnavailable(LibFunc::stat); + TLI.setUnavailable(LibFunc::statvfs); + TLI.setUnavailable(LibFunc::stpcpy); + TLI.setUnavailable(LibFunc::stpncpy); + TLI.setUnavailable(LibFunc::strcasecmp); + TLI.setUnavailable(LibFunc::strncasecmp); + TLI.setUnavailable(LibFunc::times); + TLI.setUnavailable(LibFunc::uname); + TLI.setUnavailable(LibFunc::unlink); + TLI.setUnavailable(LibFunc::unsetenv); + TLI.setUnavailable(LibFunc::utime); + TLI.setUnavailable(LibFunc::utimes); + TLI.setUnavailable(LibFunc::write); + + // Win32 does *not* provide provide these functions, but they are + // specified by C99: + TLI.setUnavailable(LibFunc::atoll); + TLI.setUnavailable(LibFunc::frexpf); + TLI.setUnavailable(LibFunc::llabs); + } + + switch (T.getOS()) { + case Triple::MacOSX: + // exp10 and exp10f are not available on OS X until 10.9 and iOS until 7.0 + // and their names are __exp10 and __exp10f. exp10l is not available on + // OS X or iOS. + TLI.setUnavailable(LibFunc::exp10l); + if (T.isMacOSXVersionLT(10, 9)) { + TLI.setUnavailable(LibFunc::exp10); + TLI.setUnavailable(LibFunc::exp10f); + } else { + TLI.setAvailableWithName(LibFunc::exp10, "__exp10"); + TLI.setAvailableWithName(LibFunc::exp10f, "__exp10f"); + } + break; + case Triple::IOS: + TLI.setUnavailable(LibFunc::exp10l); + if (T.isOSVersionLT(7, 0)) { + TLI.setUnavailable(LibFunc::exp10); + TLI.setUnavailable(LibFunc::exp10f); + } else { + TLI.setAvailableWithName(LibFunc::exp10, "__exp10"); + TLI.setAvailableWithName(LibFunc::exp10f, "__exp10f"); + } + break; + case Triple::Linux: + // exp10, exp10f, exp10l is available on Linux (GLIBC) but are extremely + // buggy prior to glibc version 2.18. Until this version is widely deployed + // or we have a reasonable detection strategy, we cannot use exp10 reliably + // on Linux. + // + // Fall through to disable all of them. + default: + TLI.setUnavailable(LibFunc::exp10); + TLI.setUnavailable(LibFunc::exp10f); + TLI.setUnavailable(LibFunc::exp10l); + } + + // ffsl is available on at least Darwin, Mac OS X, iOS, FreeBSD, and + // Linux (GLIBC): + // http://developer.apple.com/library/mac/#documentation/Darwin/Reference/ManPages/man3/ffsl.3.html + // http://svn.freebsd.org/base/user/eri/pf45/head/lib/libc/string/ffsl.c + // http://www.gnu.org/software/gnulib/manual/html_node/ffsl.html + switch (T.getOS()) { + case Triple::Darwin: + case Triple::MacOSX: + case Triple::IOS: + case Triple::FreeBSD: + case Triple::Linux: + break; + default: + TLI.setUnavailable(LibFunc::ffsl); + } + + // ffsll is available on at least FreeBSD and Linux (GLIBC): + // http://svn.freebsd.org/base/user/eri/pf45/head/lib/libc/string/ffsll.c + // http://www.gnu.org/software/gnulib/manual/html_node/ffsll.html + switch (T.getOS()) { + case Triple::FreeBSD: + case Triple::Linux: + break; + default: + TLI.setUnavailable(LibFunc::ffsll); + } + + // The following functions are available on at least Linux: + if (!T.isOSLinux()) { + TLI.setUnavailable(LibFunc::dunder_strdup); + TLI.setUnavailable(LibFunc::dunder_strtok_r); + TLI.setUnavailable(LibFunc::dunder_isoc99_scanf); + TLI.setUnavailable(LibFunc::dunder_isoc99_sscanf); + TLI.setUnavailable(LibFunc::under_IO_getc); + TLI.setUnavailable(LibFunc::under_IO_putc); + TLI.setUnavailable(LibFunc::memalign); + TLI.setUnavailable(LibFunc::fopen64); + TLI.setUnavailable(LibFunc::fseeko64); + TLI.setUnavailable(LibFunc::fstat64); + TLI.setUnavailable(LibFunc::fstatvfs64); + TLI.setUnavailable(LibFunc::ftello64); + TLI.setUnavailable(LibFunc::lstat64); + TLI.setUnavailable(LibFunc::open64); + TLI.setUnavailable(LibFunc::stat64); + TLI.setUnavailable(LibFunc::statvfs64); + TLI.setUnavailable(LibFunc::tmpfile64); + } + + TLI.addVectorizableFunctionsFromVecLib(ClVectorLibrary); +} + +TargetLibraryInfoImpl::TargetLibraryInfoImpl() { + // Default to everything being available. + memset(AvailableArray, -1, sizeof(AvailableArray)); + + initialize(*this, Triple(), StandardNames); +} + +TargetLibraryInfoImpl::TargetLibraryInfoImpl(const Triple &T) { + // Default to everything being available. + memset(AvailableArray, -1, sizeof(AvailableArray)); + + initialize(*this, T, StandardNames); +} + +TargetLibraryInfoImpl::TargetLibraryInfoImpl(const TargetLibraryInfoImpl &TLI) + : CustomNames(TLI.CustomNames) { + memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray)); + VectorDescs = TLI.VectorDescs; + ScalarDescs = TLI.ScalarDescs; +} + +TargetLibraryInfoImpl::TargetLibraryInfoImpl(TargetLibraryInfoImpl &&TLI) + : CustomNames(std::move(TLI.CustomNames)) { + std::move(std::begin(TLI.AvailableArray), std::end(TLI.AvailableArray), + AvailableArray); + VectorDescs = TLI.VectorDescs; + ScalarDescs = TLI.ScalarDescs; +} + +TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(const TargetLibraryInfoImpl &TLI) { + CustomNames = TLI.CustomNames; + memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray)); + return *this; +} + +TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(TargetLibraryInfoImpl &&TLI) { + CustomNames = std::move(TLI.CustomNames); + std::move(std::begin(TLI.AvailableArray), std::end(TLI.AvailableArray), + AvailableArray); + return *this; +} + +static StringRef sanitizeFunctionName(StringRef funcName) { + // Filter out empty names and names containing null bytes, those can't be in + // our table. + if (funcName.empty() || funcName.find('\0') != StringRef::npos) + return StringRef(); + + // Check for \01 prefix that is used to mangle __asm declarations and + // strip it if present. + return GlobalValue::getRealLinkageName(funcName); +} + +bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName, + LibFunc::Func &F) const { + const char *const *Start = &StandardNames[0]; + const char *const *End = &StandardNames[LibFunc::NumLibFuncs]; + + funcName = sanitizeFunctionName(funcName); + if (funcName.empty()) + return false; + + const char *const *I = std::lower_bound( + Start, End, funcName, [](const char *LHS, StringRef RHS) { + return std::strncmp(LHS, RHS.data(), RHS.size()) < 0; + }); + if (I != End && *I == funcName) { + F = (LibFunc::Func)(I - Start); + return true; + } + return false; +} + +void TargetLibraryInfoImpl::disableAllFunctions() { + memset(AvailableArray, 0, sizeof(AvailableArray)); +} + +static bool compareByScalarFnName(const VecDesc &LHS, const VecDesc &RHS) { + return std::strncmp(LHS.ScalarFnName, RHS.ScalarFnName, + std::strlen(RHS.ScalarFnName)) < 0; +} + +static bool compareByVectorFnName(const VecDesc &LHS, const VecDesc &RHS) { + return std::strncmp(LHS.VectorFnName, RHS.VectorFnName, + std::strlen(RHS.VectorFnName)) < 0; +} + +static bool compareWithScalarFnName(const VecDesc &LHS, StringRef S) { + return std::strncmp(LHS.ScalarFnName, S.data(), S.size()) < 0; +} + +static bool compareWithVectorFnName(const VecDesc &LHS, StringRef S) { + return std::strncmp(LHS.VectorFnName, S.data(), S.size()) < 0; +} + +void TargetLibraryInfoImpl::addVectorizableFunctions(ArrayRef<VecDesc> Fns) { + VectorDescs.insert(VectorDescs.end(), Fns.begin(), Fns.end()); + std::sort(VectorDescs.begin(), VectorDescs.end(), compareByScalarFnName); + + ScalarDescs.insert(ScalarDescs.end(), Fns.begin(), Fns.end()); + std::sort(ScalarDescs.begin(), ScalarDescs.end(), compareByVectorFnName); +} + +void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( + enum VectorLibrary VecLib) { + switch (VecLib) { + case Accelerate: { + const VecDesc VecFuncs[] = { + // Floating-Point Arithmetic and Auxiliary Functions + {"ceilf", "vceilf", 4}, + {"fabsf", "vfabsf", 4}, + {"llvm.fabs.f32", "vfabsf", 4}, + {"floorf", "vfloorf", 4}, + {"sqrtf", "vsqrtf", 4}, + {"llvm.sqrt.f32", "vsqrtf", 4}, + + // Exponential and Logarithmic Functions + {"expf", "vexpf", 4}, + {"llvm.exp.f32", "vexpf", 4}, + {"expm1f", "vexpm1f", 4}, + {"logf", "vlogf", 4}, + {"llvm.log.f32", "vlogf", 4}, + {"log1pf", "vlog1pf", 4}, + {"log10f", "vlog10f", 4}, + {"llvm.log10.f32", "vlog10f", 4}, + {"logbf", "vlogbf", 4}, + + // Trigonometric Functions + {"sinf", "vsinf", 4}, + {"llvm.sin.f32", "vsinf", 4}, + {"cosf", "vcosf", 4}, + {"llvm.cos.f32", "vcosf", 4}, + {"tanf", "vtanf", 4}, + {"asinf", "vasinf", 4}, + {"acosf", "vacosf", 4}, + {"atanf", "vatanf", 4}, + + // Hyperbolic Functions + {"sinhf", "vsinhf", 4}, + {"coshf", "vcoshf", 4}, + {"tanhf", "vtanhf", 4}, + {"asinhf", "vasinhf", 4}, + {"acoshf", "vacoshf", 4}, + {"atanhf", "vatanhf", 4}, + }; + addVectorizableFunctions(VecFuncs); + break; + } + case NoLibrary: + break; + } +} + +bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const { + funcName = sanitizeFunctionName(funcName); + if (funcName.empty()) + return false; + + std::vector<VecDesc>::const_iterator I = std::lower_bound( + VectorDescs.begin(), VectorDescs.end(), funcName, + compareWithScalarFnName); + return I != VectorDescs.end() && StringRef(I->ScalarFnName) == funcName; +} + +StringRef TargetLibraryInfoImpl::getVectorizedFunction(StringRef F, + unsigned VF) const { + F = sanitizeFunctionName(F); + if (F.empty()) + return F; + std::vector<VecDesc>::const_iterator I = std::lower_bound( + VectorDescs.begin(), VectorDescs.end(), F, compareWithScalarFnName); + while (I != VectorDescs.end() && StringRef(I->ScalarFnName) == F) { + if (I->VectorizationFactor == VF) + return I->VectorFnName; + ++I; + } + return StringRef(); +} + +StringRef TargetLibraryInfoImpl::getScalarizedFunction(StringRef F, + unsigned &VF) const { + F = sanitizeFunctionName(F); + if (F.empty()) + return F; + + std::vector<VecDesc>::const_iterator I = std::lower_bound( + ScalarDescs.begin(), ScalarDescs.end(), F, compareWithVectorFnName); + if (I == VectorDescs.end() || StringRef(I->VectorFnName) != F) + return StringRef(); + VF = I->VectorizationFactor; + return I->ScalarFnName; +} + +TargetLibraryInfo TargetLibraryAnalysis::run(Module &M) { + if (PresetInfoImpl) + return TargetLibraryInfo(*PresetInfoImpl); + + return TargetLibraryInfo(lookupInfoImpl(Triple(M.getTargetTriple()))); +} + +TargetLibraryInfo TargetLibraryAnalysis::run(Function &F) { + if (PresetInfoImpl) + return TargetLibraryInfo(*PresetInfoImpl); + + return TargetLibraryInfo( + lookupInfoImpl(Triple(F.getParent()->getTargetTriple()))); +} + +TargetLibraryInfoImpl &TargetLibraryAnalysis::lookupInfoImpl(Triple T) { + std::unique_ptr<TargetLibraryInfoImpl> &Impl = + Impls[T.normalize()]; + if (!Impl) + Impl.reset(new TargetLibraryInfoImpl(T)); + + return *Impl; +} + + +TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass() + : ImmutablePass(ID), TLIImpl(), TLI(TLIImpl) { + initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass(const Triple &T) + : ImmutablePass(ID), TLIImpl(T), TLI(TLIImpl) { + initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass( + const TargetLibraryInfoImpl &TLIImpl) + : ImmutablePass(ID), TLIImpl(TLIImpl), TLI(this->TLIImpl) { + initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +char TargetLibraryAnalysis::PassID; + +// Register the basic pass. +INITIALIZE_PASS(TargetLibraryInfoWrapperPass, "targetlibinfo", + "Target Library Information", false, true) +char TargetLibraryInfoWrapperPass::ID = 0; + +void TargetLibraryInfoWrapperPass::anchor() {} diff --git a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp index ef3909b..e1744d1 100644 --- a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -8,11 +8,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/TargetTransformInfoImpl.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/Support/ErrorHandling.h" @@ -20,637 +22,303 @@ using namespace llvm; #define DEBUG_TYPE "tti" -// Setup the analysis group to manage the TargetTransformInfo passes. -INITIALIZE_ANALYSIS_GROUP(TargetTransformInfo, "Target Information", NoTTI) -char TargetTransformInfo::ID = 0; - -TargetTransformInfo::~TargetTransformInfo() { +namespace { +/// \brief No-op implementation of the TTI interface using the utility base +/// classes. +/// +/// This is used when no target specific information is available. +struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> { + explicit NoTTIImpl(const DataLayout *DL) + : TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {} +}; } -void TargetTransformInfo::pushTTIStack(Pass *P) { - TopTTI = this; - PrevTTI = &P->getAnalysis<TargetTransformInfo>(); +TargetTransformInfo::TargetTransformInfo(const DataLayout *DL) + : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {} - // Walk up the chain and update the top TTI pointer. - for (TargetTransformInfo *PTTI = PrevTTI; PTTI; PTTI = PTTI->PrevTTI) - PTTI->TopTTI = this; -} +TargetTransformInfo::~TargetTransformInfo() {} -void TargetTransformInfo::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<TargetTransformInfo>(); +TargetTransformInfo::TargetTransformInfo(TargetTransformInfo &&Arg) + : TTIImpl(std::move(Arg.TTIImpl)) {} + +TargetTransformInfo &TargetTransformInfo::operator=(TargetTransformInfo &&RHS) { + TTIImpl = std::move(RHS.TTIImpl); + return *this; } unsigned TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) const { - return PrevTTI->getOperationCost(Opcode, Ty, OpTy); -} - -unsigned TargetTransformInfo::getGEPCost( - const Value *Ptr, ArrayRef<const Value *> Operands) const { - return PrevTTI->getGEPCost(Ptr, Operands); + return TTIImpl->getOperationCost(Opcode, Ty, OpTy); } unsigned TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs) const { - return PrevTTI->getCallCost(FTy, NumArgs); + return TTIImpl->getCallCost(FTy, NumArgs); } -unsigned TargetTransformInfo::getCallCost(const Function *F, - int NumArgs) const { - return PrevTTI->getCallCost(F, NumArgs); -} - -unsigned TargetTransformInfo::getCallCost( - const Function *F, ArrayRef<const Value *> Arguments) const { - return PrevTTI->getCallCost(F, Arguments); -} - -unsigned TargetTransformInfo::getIntrinsicCost( - Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> ParamTys) const { - return PrevTTI->getIntrinsicCost(IID, RetTy, ParamTys); +unsigned +TargetTransformInfo::getCallCost(const Function *F, + ArrayRef<const Value *> Arguments) const { + return TTIImpl->getCallCost(F, Arguments); } -unsigned TargetTransformInfo::getIntrinsicCost( - Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments) const { - return PrevTTI->getIntrinsicCost(IID, RetTy, Arguments); +unsigned +TargetTransformInfo::getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef<const Value *> Arguments) const { + return TTIImpl->getIntrinsicCost(IID, RetTy, Arguments); } unsigned TargetTransformInfo::getUserCost(const User *U) const { - return PrevTTI->getUserCost(U); + return TTIImpl->getUserCost(U); } bool TargetTransformInfo::hasBranchDivergence() const { - return PrevTTI->hasBranchDivergence(); + return TTIImpl->hasBranchDivergence(); +} + +bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const { + return TTIImpl->isSourceOfDivergence(V); } bool TargetTransformInfo::isLoweredToCall(const Function *F) const { - return PrevTTI->isLoweredToCall(F); + return TTIImpl->isLoweredToCall(F); } -void -TargetTransformInfo::getUnrollingPreferences(const Function *F, Loop *L, - UnrollingPreferences &UP) const { - PrevTTI->getUnrollingPreferences(F, L, UP); +void TargetTransformInfo::getUnrollingPreferences( + Loop *L, UnrollingPreferences &UP) const { + return TTIImpl->getUnrollingPreferences(L, UP); } bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const { - return PrevTTI->isLegalAddImmediate(Imm); + return TTIImpl->isLegalAddImmediate(Imm); } bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const { - return PrevTTI->isLegalICmpImmediate(Imm); + return TTIImpl->isLegalICmpImmediate(Imm); } -bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType, - int Consecutive) const { - return false; +bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, + bool HasBaseReg, + int64_t Scale) const { + return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, + Scale); } bool TargetTransformInfo::isLegalMaskedStore(Type *DataType, int Consecutive) const { - return false; + return TTIImpl->isLegalMaskedStore(DataType, Consecutive); } - -bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, - int64_t BaseOffset, - bool HasBaseReg, - int64_t Scale) const { - return PrevTTI->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, - Scale); +bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType, + int Consecutive) const { + return TTIImpl->isLegalMaskedLoad(DataType, Consecutive); } int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale) const { - return PrevTTI->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, + return TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale); } bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const { - return PrevTTI->isTruncateFree(Ty1, Ty2); + return TTIImpl->isTruncateFree(Ty1, Ty2); +} + +bool TargetTransformInfo::isProfitableToHoist(Instruction *I) const { + return TTIImpl->isProfitableToHoist(I); } bool TargetTransformInfo::isTypeLegal(Type *Ty) const { - return PrevTTI->isTypeLegal(Ty); + return TTIImpl->isTypeLegal(Ty); } unsigned TargetTransformInfo::getJumpBufAlignment() const { - return PrevTTI->getJumpBufAlignment(); + return TTIImpl->getJumpBufAlignment(); } unsigned TargetTransformInfo::getJumpBufSize() const { - return PrevTTI->getJumpBufSize(); + return TTIImpl->getJumpBufSize(); } bool TargetTransformInfo::shouldBuildLookupTables() const { - return PrevTTI->shouldBuildLookupTables(); + return TTIImpl->shouldBuildLookupTables(); +} + +bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const { + return TTIImpl->enableAggressiveInterleaving(LoopHasReductions); } TargetTransformInfo::PopcntSupportKind TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const { - return PrevTTI->getPopcntSupport(IntTyWidthInBit); + return TTIImpl->getPopcntSupport(IntTyWidthInBit); } bool TargetTransformInfo::haveFastSqrt(Type *Ty) const { - return PrevTTI->haveFastSqrt(Ty); + return TTIImpl->haveFastSqrt(Ty); +} + +unsigned TargetTransformInfo::getFPOpCost(Type *Ty) const { + return TTIImpl->getFPOpCost(Ty); } unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const { - return PrevTTI->getIntImmCost(Imm, Ty); + return TTIImpl->getIntImmCost(Imm, Ty); } -unsigned TargetTransformInfo::getIntImmCost(unsigned Opc, unsigned Idx, +unsigned TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const { - return PrevTTI->getIntImmCost(Opc, Idx, Imm, Ty); + return TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty); } unsigned TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty) const { - return PrevTTI->getIntImmCost(IID, Idx, Imm, Ty); + return TTIImpl->getIntImmCost(IID, Idx, Imm, Ty); } unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const { - return PrevTTI->getNumberOfRegisters(Vector); + return TTIImpl->getNumberOfRegisters(Vector); } unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const { - return PrevTTI->getRegisterBitWidth(Vector); + return TTIImpl->getRegisterBitWidth(Vector); } -unsigned TargetTransformInfo::getMaxInterleaveFactor() const { - return PrevTTI->getMaxInterleaveFactor(); +unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const { + return TTIImpl->getMaxInterleaveFactor(VF); } unsigned TargetTransformInfo::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, OperandValueKind Op1Info, - OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo, + unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, + OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo) const { - return PrevTTI->getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, + return TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); } -unsigned TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Tp, +unsigned TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty, int Index, Type *SubTp) const { - return PrevTTI->getShuffleCost(Kind, Tp, Index, SubTp); + return TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp); } unsigned TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { - return PrevTTI->getCastInstrCost(Opcode, Dst, Src); + return TTIImpl->getCastInstrCost(Opcode, Dst, Src); } unsigned TargetTransformInfo::getCFInstrCost(unsigned Opcode) const { - return PrevTTI->getCFInstrCost(Opcode); + return TTIImpl->getCFInstrCost(Opcode); } unsigned TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const { - return PrevTTI->getCmpSelInstrCost(Opcode, ValTy, CondTy); + return TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy); } unsigned TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const { - return PrevTTI->getVectorInstrCost(Opcode, Val, Index); + return TTIImpl->getVectorInstrCost(Opcode, Val, Index); } unsigned TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const { - return PrevTTI->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); + return TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); } unsigned -TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, - Type *RetTy, +TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const { + return TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); +} + +unsigned +TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys) const { - return PrevTTI->getIntrinsicInstrCost(ID, RetTy, Tys); + return TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys); +} + +unsigned TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy, + ArrayRef<Type *> Tys) const { + return TTIImpl->getCallInstrCost(F, RetTy, Tys); } unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const { - return PrevTTI->getNumberOfParts(Tp); + return TTIImpl->getNumberOfParts(Tp); } unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp, bool IsComplex) const { - return PrevTTI->getAddressComputationCost(Tp, IsComplex); + return TTIImpl->getAddressComputationCost(Tp, IsComplex); } unsigned TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty, - bool IsPairwise) const { - return PrevTTI->getReductionCost(Opcode, Ty, IsPairwise); + bool IsPairwiseForm) const { + return TTIImpl->getReductionCost(Opcode, Ty, IsPairwiseForm); } -unsigned TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) - const { - return PrevTTI->getCostOfKeepingLiveOverCall(Tys); +unsigned +TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const { + return TTIImpl->getCostOfKeepingLiveOverCall(Tys); } -namespace { +bool TargetTransformInfo::getTgtMemIntrinsic(IntrinsicInst *Inst, + MemIntrinsicInfo &Info) const { + return TTIImpl->getTgtMemIntrinsic(Inst, Info); +} -struct NoTTI final : ImmutablePass, TargetTransformInfo { - const DataLayout *DL; - - NoTTI() : ImmutablePass(ID), DL(nullptr) { - initializeNoTTIPass(*PassRegistry::getPassRegistry()); - } - - void initializePass() override { - // Note that this subclass is special, and must *not* call initializeTTI as - // it does not chain. - TopTTI = this; - PrevTTI = nullptr; - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - // Note that this subclass is special, and must *not* call - // TTI::getAnalysisUsage as it breaks the recursion. - } - - /// Pass identification. - static char ID; - - /// Provide necessary pointer adjustments for the two base classes. - void *getAdjustedAnalysisPointer(const void *ID) override { - if (ID == &TargetTransformInfo::ID) - return (TargetTransformInfo*)this; - return this; - } - - unsigned getOperationCost(unsigned Opcode, Type *Ty, - Type *OpTy) const override { - switch (Opcode) { - default: - // By default, just classify everything as 'basic'. - return TCC_Basic; - - case Instruction::GetElementPtr: - llvm_unreachable("Use getGEPCost for GEP operations!"); - - case Instruction::BitCast: - assert(OpTy && "Cast instructions must provide the operand type"); - if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy())) - // Identity and pointer-to-pointer casts are free. - return TCC_Free; - - // Otherwise, the default basic cost is used. - return TCC_Basic; - - case Instruction::IntToPtr: { - if (!DL) - return TCC_Basic; - - // An inttoptr cast is free so long as the input is a legal integer type - // which doesn't contain values outside the range of a pointer. - unsigned OpSize = OpTy->getScalarSizeInBits(); - if (DL->isLegalInteger(OpSize) && - OpSize <= DL->getPointerTypeSizeInBits(Ty)) - return TCC_Free; - - // Otherwise it's not a no-op. - return TCC_Basic; - } - case Instruction::PtrToInt: { - if (!DL) - return TCC_Basic; - - // A ptrtoint cast is free so long as the result is large enough to store - // the pointer, and a legal integer type. - unsigned DestSize = Ty->getScalarSizeInBits(); - if (DL->isLegalInteger(DestSize) && - DestSize >= DL->getPointerTypeSizeInBits(OpTy)) - return TCC_Free; - - // Otherwise it's not a no-op. - return TCC_Basic; - } - case Instruction::Trunc: - // trunc to a native type is free (assuming the target has compare and - // shift-right of the same width). - if (DL && DL->isLegalInteger(DL->getTypeSizeInBits(Ty))) - return TCC_Free; - - return TCC_Basic; - } - } - - unsigned getGEPCost(const Value *Ptr, - ArrayRef<const Value *> Operands) const override { - // In the basic model, we just assume that all-constant GEPs will be folded - // into their uses via addressing modes. - for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) - if (!isa<Constant>(Operands[Idx])) - return TCC_Basic; - - return TCC_Free; - } - - unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const override - { - assert(FTy && "FunctionType must be provided to this routine."); - - // The target-independent implementation just measures the size of the - // function by approximating that each argument will take on average one - // instruction to prepare. - - if (NumArgs < 0) - // Set the argument number to the number of explicit arguments in the - // function. - NumArgs = FTy->getNumParams(); - - return TCC_Basic * (NumArgs + 1); - } - - unsigned getCallCost(const Function *F, int NumArgs = -1) const override - { - assert(F && "A concrete function must be provided to this routine."); - - if (NumArgs < 0) - // Set the argument number to the number of explicit arguments in the - // function. - NumArgs = F->arg_size(); - - if (Intrinsic::ID IID = (Intrinsic::ID)F->getIntrinsicID()) { - FunctionType *FTy = F->getFunctionType(); - SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end()); - return TopTTI->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys); - } - - if (!TopTTI->isLoweredToCall(F)) - return TCC_Basic; // Give a basic cost if it will be lowered directly. - - return TopTTI->getCallCost(F->getFunctionType(), NumArgs); - } - - unsigned getCallCost(const Function *F, - ArrayRef<const Value *> Arguments) const override { - // Simply delegate to generic handling of the call. - // FIXME: We should use instsimplify or something else to catch calls which - // will constant fold with these arguments. - return TopTTI->getCallCost(F, Arguments.size()); - } - - unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef<Type *> ParamTys) const override { - switch (IID) { - default: - // Intrinsics rarely (if ever) have normal argument setup constraints. - // Model them as having a basic instruction cost. - // FIXME: This is wrong for libc intrinsics. - return TCC_Basic; - - case Intrinsic::annotation: - case Intrinsic::assume: - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - case Intrinsic::invariant_start: - case Intrinsic::invariant_end: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - case Intrinsic::objectsize: - case Intrinsic::ptr_annotation: - case Intrinsic::var_annotation: - case Intrinsic::experimental_gc_result_int: - case Intrinsic::experimental_gc_result_float: - case Intrinsic::experimental_gc_result_ptr: - case Intrinsic::experimental_gc_relocate: - // These intrinsics don't actually represent code after lowering. - return TCC_Free; - } - } - - unsigned - getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef<const Value *> Arguments) const override { - // Delegate to the generic intrinsic handling code. This mostly provides an - // opportunity for targets to (for example) special case the cost of - // certain intrinsics based on constants used as arguments. - SmallVector<Type *, 8> ParamTys; - ParamTys.reserve(Arguments.size()); - for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx) - ParamTys.push_back(Arguments[Idx]->getType()); - return TopTTI->getIntrinsicCost(IID, RetTy, ParamTys); - } - - unsigned getUserCost(const User *U) const override { - if (isa<PHINode>(U)) - return TCC_Free; // Model all PHI nodes as free. - - if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) { - SmallVector<const Value *, 4> Indices(GEP->idx_begin(), GEP->idx_end()); - return TopTTI->getGEPCost(GEP->getPointerOperand(), Indices); - } - - if (ImmutableCallSite CS = U) { - const Function *F = CS.getCalledFunction(); - if (!F) { - // Just use the called value type. - Type *FTy = CS.getCalledValue()->getType()->getPointerElementType(); - return TopTTI->getCallCost(cast<FunctionType>(FTy), CS.arg_size()); - } - - SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end()); - return TopTTI->getCallCost(F, Arguments); - } - - if (const CastInst *CI = dyn_cast<CastInst>(U)) { - // Result of a cmp instruction is often extended (to be used by other - // cmp instructions, logical or return instructions). These are usually - // nop on most sane targets. - if (isa<CmpInst>(CI->getOperand(0))) - return TCC_Free; - } - - // Otherwise delegate to the fully generic implementations. - return getOperationCost(Operator::getOpcode(U), U->getType(), - U->getNumOperands() == 1 ? - U->getOperand(0)->getType() : nullptr); - } - - bool hasBranchDivergence() const override { return false; } - - bool isLoweredToCall(const Function *F) const override { - // FIXME: These should almost certainly not be handled here, and instead - // handled with the help of TLI or the target itself. This was largely - // ported from existing analysis heuristics here so that such refactorings - // can take place in the future. - - if (F->isIntrinsic()) - return false; - - if (F->hasLocalLinkage() || !F->hasName()) - return true; - - StringRef Name = F->getName(); - - // These will all likely lower to a single selection DAG node. - if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || - Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" || - Name == "fmin" || Name == "fminf" || Name == "fminl" || - Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" || - Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" || - Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") - return false; - - // These are all likely to be optimized into something smaller. - if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || - Name == "exp2l" || Name == "exp2f" || Name == "floor" || Name == - "floorf" || Name == "ceil" || Name == "round" || Name == "ffs" || - Name == "ffsl" || Name == "abs" || Name == "labs" || Name == "llabs") - return false; - - return true; - } - - void getUnrollingPreferences(const Function *, Loop *, - UnrollingPreferences &) const override {} - - bool isLegalAddImmediate(int64_t Imm) const override { - return false; - } - - bool isLegalICmpImmediate(int64_t Imm) const override { - return false; - } - - bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, - bool HasBaseReg, int64_t Scale) const override - { - // Guess that reg+reg addressing is allowed. This heuristic is taken from - // the implementation of LSR. - return !BaseGV && BaseOffset == 0 && Scale <= 1; - } - - int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, - bool HasBaseReg, int64_t Scale) const override { - // Guess that all legal addressing mode are free. - if(isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale)) - return 0; - return -1; - } - - bool isTruncateFree(Type *Ty1, Type *Ty2) const override { - return false; - } - - bool isTypeLegal(Type *Ty) const override { - return false; - } - - unsigned getJumpBufAlignment() const override { - return 0; - } - - unsigned getJumpBufSize() const override { - return 0; - } - - bool shouldBuildLookupTables() const override { - return true; - } - - PopcntSupportKind - getPopcntSupport(unsigned IntTyWidthInBit) const override { - return PSK_Software; - } - - bool haveFastSqrt(Type *Ty) const override { - return false; - } - - unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override { - return TCC_Basic; - } - - unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty) const override { - return TCC_Free; - } - - unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty) const override { - return TCC_Free; - } - - unsigned getNumberOfRegisters(bool Vector) const override { - return 8; - } - - unsigned getRegisterBitWidth(bool Vector) const override { - return 32; - } - - unsigned getMaxInterleaveFactor() const override { - return 1; - } - - unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind, - OperandValueKind, OperandValueProperties, - OperandValueProperties) const override { - return 1; - } - - unsigned getShuffleCost(ShuffleKind Kind, Type *Ty, - int Index = 0, Type *SubTp = nullptr) const override { - return 1; - } - - unsigned getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const override { - return 1; - } - - unsigned getCFInstrCost(unsigned Opcode) const override { - return 1; - } - - unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy = nullptr) const override { - return 1; - } - - unsigned getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index = -1) const override { - return 1; - } - - unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) const override { - return 1; - } - - unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Type*> Tys) const override { - return 1; - } - - unsigned getNumberOfParts(Type *Tp) const override { - return 0; - } - - unsigned getAddressComputationCost(Type *Tp, bool) const override { - return 0; - } - - unsigned getReductionCost(unsigned, Type *, bool) const override { - return 1; - } - - unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const override { - return 0; - } +Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic( + IntrinsicInst *Inst, Type *ExpectedType) const { + return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); +} -}; +TargetTransformInfo::Concept::~Concept() {} + +TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {} + +TargetIRAnalysis::TargetIRAnalysis( + std::function<Result(Function &)> TTICallback) + : TTICallback(TTICallback) {} -} // end anonymous namespace +TargetIRAnalysis::Result TargetIRAnalysis::run(Function &F) { + return TTICallback(F); +} + +char TargetIRAnalysis::PassID; + +TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(Function &F) { + return Result(&F.getParent()->getDataLayout()); +} -INITIALIZE_AG_PASS(NoTTI, TargetTransformInfo, "notti", - "No target information", true, true, true) -char NoTTI::ID = 0; +// Register the basic pass. +INITIALIZE_PASS(TargetTransformInfoWrapperPass, "tti", + "Target Transform Information", false, true) +char TargetTransformInfoWrapperPass::ID = 0; + +void TargetTransformInfoWrapperPass::anchor() {} + +TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass() + : ImmutablePass(ID) { + initializeTargetTransformInfoWrapperPassPass( + *PassRegistry::getPassRegistry()); +} + +TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass( + TargetIRAnalysis TIRA) + : ImmutablePass(ID), TIRA(std::move(TIRA)) { + initializeTargetTransformInfoWrapperPassPass( + *PassRegistry::getPassRegistry()); +} + +TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(Function &F) { + TTI = TIRA.run(F); + return *TTI; +} -ImmutablePass *llvm::createNoTargetTransformInfoPass() { - return new NoTTI(); +ImmutablePass * +llvm::createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA) { + return new TargetTransformInfoWrapperPass(std::move(TIRA)); } diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp index ff89558..1158725 100644 --- a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -129,6 +129,7 @@ #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" +#include "llvm/ADT/SetVector.h" using namespace llvm; // A handy option for disabling TBAA functionality. The same effect can also be @@ -282,9 +283,7 @@ namespace { initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry()); } - void initializePass() override { - InitializeAliasAnalysis(this); - } + bool doInitialization(Module &M) override; /// getAdjustedAnalysisPointer - This method is used when a pass implements /// an analysis interface through multiple inheritance. If needed, it @@ -321,6 +320,11 @@ ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() { return new TypeBasedAliasAnalysis(); } +bool TypeBasedAliasAnalysis::doInitialization(Module &M) { + InitializeAliasAnalysis(this, &M.getDataLayout()); + return true; +} + void TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -575,18 +579,22 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { if (!B) return nullptr; } - SmallVector<MDNode *, 4> PathA; + SmallSetVector<MDNode *, 4> PathA; MDNode *T = A; while (T) { - PathA.push_back(T); + if (PathA.count(T)) + report_fatal_error("Cycle found in TBAA metadata."); + PathA.insert(T); T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : nullptr; } - SmallVector<MDNode *, 4> PathB; + SmallSetVector<MDNode *, 4> PathB; T = B; while (T) { - PathB.push_back(T); + if (PathB.count(T)) + report_fatal_error("Cycle found in TBAA metadata."); + PathB.insert(T); T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : nullptr; } diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp index 5d90917..a55712c 100644 --- a/contrib/llvm/lib/Analysis/ValueTracking.cpp +++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" @@ -31,6 +32,7 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Statepoint.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include <cstring> @@ -39,13 +41,41 @@ using namespace llvm::PatternMatch; const unsigned MaxDepth = 6; +/// Enable an experimental feature to leverage information about dominating +/// conditions to compute known bits. The individual options below control how +/// hard we search. The defaults are choosen to be fairly aggressive. If you +/// run into compile time problems when testing, scale them back and report +/// your findings. +static cl::opt<bool> EnableDomConditions("value-tracking-dom-conditions", + cl::Hidden, cl::init(false)); + +// This is expensive, so we only do it for the top level query value. +// (TODO: evaluate cost vs profit, consider higher thresholds) +static cl::opt<unsigned> DomConditionsMaxDepth("dom-conditions-max-depth", + cl::Hidden, cl::init(1)); + +/// How many dominating blocks should be scanned looking for dominating +/// conditions? +static cl::opt<unsigned> DomConditionsMaxDomBlocks("dom-conditions-dom-blocks", + cl::Hidden, + cl::init(20000)); + +// Controls the number of uses of the value searched for possible +// dominating comparisons. +static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses", + cl::Hidden, cl::init(2000)); + +// If true, don't consider only compares whose only use is a branch. +static cl::opt<bool> DomConditionsSingleCmpUse("dom-conditions-single-cmp-use", + cl::Hidden, cl::init(false)); + /// Returns the bitwidth of the given scalar or pointer type (if unknown returns /// 0). For vector types, returns the element type's bitwidth. -static unsigned getBitWidth(Type *Ty, const DataLayout *TD) { +static unsigned getBitWidth(Type *Ty, const DataLayout &DL) { if (unsigned BitWidth = Ty->getScalarSizeInBits()) return BitWidth; - return TD ? TD->getPointerTypeSizeInBits(Ty) : 0; + return DL.getPointerTypeSizeInBits(Ty); } // Many of these functions have internal versions that take an assumption @@ -97,73 +127,88 @@ static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) { } static void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, - const DataLayout *TD, unsigned Depth, - const Query &Q); + const DataLayout &DL, unsigned Depth, + const Query &Q); void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, - const DataLayout *TD, unsigned Depth, + const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { - ::computeKnownBits(V, KnownZero, KnownOne, TD, Depth, + ::computeKnownBits(V, KnownZero, KnownOne, DL, Depth, Query(AC, safeCxtI(V, CxtI), DT)); } +bool llvm::haveNoCommonBitsSet(Value *LHS, Value *RHS, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT) { + assert(LHS->getType() == RHS->getType() && + "LHS and RHS should have the same type"); + assert(LHS->getType()->isIntOrIntVectorTy() && + "LHS and RHS should be integers"); + IntegerType *IT = cast<IntegerType>(LHS->getType()->getScalarType()); + APInt LHSKnownZero(IT->getBitWidth(), 0), LHSKnownOne(IT->getBitWidth(), 0); + APInt RHSKnownZero(IT->getBitWidth(), 0), RHSKnownOne(IT->getBitWidth(), 0); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, DL, 0, AC, CxtI, DT); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, DL, 0, AC, CxtI, DT); + return (LHSKnownZero | RHSKnownZero).isAllOnesValue(); +} + static void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, - const DataLayout *TD, unsigned Depth, - const Query &Q); + const DataLayout &DL, unsigned Depth, + const Query &Q); void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, - const DataLayout *TD, unsigned Depth, + const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { - ::ComputeSignBit(V, KnownZero, KnownOne, TD, Depth, + ::ComputeSignBit(V, KnownZero, KnownOne, DL, Depth, Query(AC, safeCxtI(V, CxtI), DT)); } static bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, - const Query &Q); + const Query &Q, const DataLayout &DL); -bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, - AssumptionCache *AC, const Instruction *CxtI, +bool llvm::isKnownToBeAPowerOfTwo(Value *V, const DataLayout &DL, bool OrZero, + unsigned Depth, AssumptionCache *AC, + const Instruction *CxtI, const DominatorTree *DT) { return ::isKnownToBeAPowerOfTwo(V, OrZero, Depth, - Query(AC, safeCxtI(V, CxtI), DT)); + Query(AC, safeCxtI(V, CxtI), DT), DL); } -static bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth, +static bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, const Query &Q); -bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth, +bool llvm::isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { - return ::isKnownNonZero(V, TD, Depth, Query(AC, safeCxtI(V, CxtI), DT)); + return ::isKnownNonZero(V, DL, Depth, Query(AC, safeCxtI(V, CxtI), DT)); } -static bool MaskedValueIsZero(Value *V, const APInt &Mask, - const DataLayout *TD, unsigned Depth, - const Query &Q); +static bool MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL, + unsigned Depth, const Query &Q); -bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout *TD, +bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { - return ::MaskedValueIsZero(V, Mask, TD, Depth, + return ::MaskedValueIsZero(V, Mask, DL, Depth, Query(AC, safeCxtI(V, CxtI), DT)); } -static unsigned ComputeNumSignBits(Value *V, const DataLayout *TD, +static unsigned ComputeNumSignBits(Value *V, const DataLayout &DL, unsigned Depth, const Query &Q); -unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, +unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { - return ::ComputeNumSignBits(V, TD, Depth, Query(AC, safeCxtI(V, CxtI), DT)); + return ::ComputeNumSignBits(V, DL, Depth, Query(AC, safeCxtI(V, CxtI), DT)); } static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, APInt &KnownZero, APInt &KnownOne, APInt &KnownZero2, APInt &KnownOne2, - const DataLayout *TD, unsigned Depth, + const DataLayout &DL, unsigned Depth, const Query &Q) { if (!Add) { if (ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) { @@ -175,7 +220,7 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros(); // NLZ can't be BitWidth with no sign bit APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1, Q); + computeKnownBits(Op1, KnownZero2, KnownOne2, DL, Depth + 1, Q); // If all of the MaskV bits are known to be zero, then we know the // output top bits are zero, because we now know that the output is @@ -194,8 +239,8 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, // If an initial sequence of bits in the result is not needed, the // corresponding bits in the operands are not needed. APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1, Q); - computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1, Q); + computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, DL, Depth + 1, Q); + computeKnownBits(Op1, KnownZero2, KnownOne2, DL, Depth + 1, Q); // Carry in a 1 for a subtract, rather than a 0. APInt CarryIn(BitWidth, 0); @@ -243,11 +288,11 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW, APInt &KnownZero, APInt &KnownOne, APInt &KnownZero2, APInt &KnownOne2, - const DataLayout *TD, unsigned Depth, + const DataLayout &DL, unsigned Depth, const Query &Q) { unsigned BitWidth = KnownZero.getBitWidth(); - computeKnownBits(Op1, KnownZero, KnownOne, TD, Depth+1, Q); - computeKnownBits(Op0, KnownZero2, KnownOne2, TD, Depth+1, Q); + computeKnownBits(Op1, KnownZero, KnownOne, DL, Depth + 1, Q); + computeKnownBits(Op0, KnownZero2, KnownOne2, DL, Depth + 1, Q); bool isKnownNegative = false; bool isKnownNonNegative = false; @@ -268,9 +313,9 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW, // negative or zero. if (!isKnownNonNegative) isKnownNegative = (isKnownNegativeOp1 && isKnownNonNegativeOp0 && - isKnownNonZero(Op0, TD, Depth, Q)) || + isKnownNonZero(Op0, DL, Depth, Q)) || (isKnownNegativeOp0 && isKnownNonNegativeOp1 && - isKnownNonZero(Op1, TD, Depth, Q)); + isKnownNonZero(Op1, DL, Depth, Q)); } } @@ -382,8 +427,7 @@ static bool isAssumeLikeIntrinsic(const Instruction *I) { return false; } -static bool isValidAssumeForContext(Value *V, const Query &Q, - const DataLayout *DL) { +static bool isValidAssumeForContext(Value *V, const Query &Q) { Instruction *Inv = cast<Instruction>(V); // There are two restrictions on the use of an assume: @@ -403,8 +447,7 @@ static bool isValidAssumeForContext(Value *V, const Query &Q, for (BasicBlock::const_iterator I = std::next(BasicBlock::const_iterator(Q.CxtI)), IE(Inv); I != IE; ++I) - if (!isSafeToSpeculativelyExecute(I, DL) && - !isAssumeLikeIntrinsic(I)) + if (!isSafeToSpeculativelyExecute(I) && !isAssumeLikeIntrinsic(I)) return false; return !isEphemeralValueOf(Inv, Q.CxtI); @@ -428,8 +471,7 @@ static bool isValidAssumeForContext(Value *V, const Query &Q, for (BasicBlock::const_iterator I = std::next(BasicBlock::const_iterator(Q.CxtI)), IE(Inv); I != IE; ++I) - if (!isSafeToSpeculativelyExecute(I, DL) && - !isAssumeLikeIntrinsic(I)) + if (!isSafeToSpeculativelyExecute(I) && !isAssumeLikeIntrinsic(I)) return false; return !isEphemeralValueOf(Inv, Q.CxtI); @@ -440,10 +482,9 @@ static bool isValidAssumeForContext(Value *V, const Query &Q, bool llvm::isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, - const DataLayout *DL, const DominatorTree *DT) { - return ::isValidAssumeForContext(const_cast<Instruction*>(I), - Query(nullptr, CxtI, DT), DL); + return ::isValidAssumeForContext(const_cast<Instruction *>(I), + Query(nullptr, CxtI, DT)); } template<typename LHS, typename RHS> @@ -474,9 +515,181 @@ m_c_Xor(const LHS &L, const RHS &R) { return m_CombineOr(m_Xor(L, R), m_Xor(R, L)); } +/// Compute known bits in 'V' under the assumption that the condition 'Cmp' is +/// true (at the context instruction.) This is mostly a utility function for +/// the prototype dominating conditions reasoning below. +static void computeKnownBitsFromTrueCondition(Value *V, ICmpInst *Cmp, + APInt &KnownZero, + APInt &KnownOne, + const DataLayout &DL, + unsigned Depth, const Query &Q) { + Value *LHS = Cmp->getOperand(0); + Value *RHS = Cmp->getOperand(1); + // TODO: We could potentially be more aggressive here. This would be worth + // evaluating. If we can, explore commoning this code with the assume + // handling logic. + if (LHS != V && RHS != V) + return; + + const unsigned BitWidth = KnownZero.getBitWidth(); + + switch (Cmp->getPredicate()) { + default: + // We know nothing from this condition + break; + // TODO: implement unsigned bound from below (known one bits) + // TODO: common condition check implementations with assumes + // TODO: implement other patterns from assume (e.g. V & B == A) + case ICmpInst::ICMP_SGT: + if (LHS == V) { + APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0); + computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q); + if (KnownOneTemp.isAllOnesValue() || KnownZeroTemp.isNegative()) { + // We know that the sign bit is zero. + KnownZero |= APInt::getSignBit(BitWidth); + } + } + break; + case ICmpInst::ICMP_EQ: + if (LHS == V) + computeKnownBits(RHS, KnownZero, KnownOne, DL, Depth + 1, Q); + else if (RHS == V) + computeKnownBits(LHS, KnownZero, KnownOne, DL, Depth + 1, Q); + else + llvm_unreachable("missing use?"); + break; + case ICmpInst::ICMP_ULE: + if (LHS == V) { + APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0); + computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q); + // The known zero bits carry over + unsigned SignBits = KnownZeroTemp.countLeadingOnes(); + KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits); + } + break; + case ICmpInst::ICMP_ULT: + if (LHS == V) { + APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0); + computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q); + // Whatever high bits in rhs are zero are known to be zero (if rhs is a + // power of 2, then one more). + unsigned SignBits = KnownZeroTemp.countLeadingOnes(); + if (isKnownToBeAPowerOfTwo(RHS, false, Depth + 1, Query(Q, Cmp), DL)) + SignBits++; + KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits); + } + break; + }; +} + +/// Compute known bits in 'V' from conditions which are known to be true along +/// all paths leading to the context instruction. In particular, look for +/// cases where one branch of an interesting condition dominates the context +/// instruction. This does not do general dataflow. +/// NOTE: This code is EXPERIMENTAL and currently off by default. +static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero, + APInt &KnownOne, + const DataLayout &DL, + unsigned Depth, + const Query &Q) { + // Need both the dominator tree and the query location to do anything useful + if (!Q.DT || !Q.CxtI) + return; + Instruction *Cxt = const_cast<Instruction *>(Q.CxtI); + + // Avoid useless work + if (auto VI = dyn_cast<Instruction>(V)) + if (VI->getParent() == Cxt->getParent()) + return; + + // Note: We currently implement two options. It's not clear which of these + // will survive long term, we need data for that. + // Option 1 - Try walking the dominator tree looking for conditions which + // might apply. This works well for local conditions (loop guards, etc..), + // but not as well for things far from the context instruction (presuming a + // low max blocks explored). If we can set an high enough limit, this would + // be all we need. + // Option 2 - We restrict out search to those conditions which are uses of + // the value we're interested in. This is independent of dom structure, + // but is slightly less powerful without looking through lots of use chains. + // It does handle conditions far from the context instruction (e.g. early + // function exits on entry) really well though. + + // Option 1 - Search the dom tree + unsigned NumBlocksExplored = 0; + BasicBlock *Current = Cxt->getParent(); + while (true) { + // Stop searching if we've gone too far up the chain + if (NumBlocksExplored >= DomConditionsMaxDomBlocks) + break; + NumBlocksExplored++; + + if (!Q.DT->getNode(Current)->getIDom()) + break; + Current = Q.DT->getNode(Current)->getIDom()->getBlock(); + if (!Current) + // found function entry + break; + + BranchInst *BI = dyn_cast<BranchInst>(Current->getTerminator()); + if (!BI || BI->isUnconditional()) + continue; + ICmpInst *Cmp = dyn_cast<ICmpInst>(BI->getCondition()); + if (!Cmp) + continue; + + // We're looking for conditions that are guaranteed to hold at the context + // instruction. Finding a condition where one path dominates the context + // isn't enough because both the true and false cases could merge before + // the context instruction we're actually interested in. Instead, we need + // to ensure that the taken *edge* dominates the context instruction. + BasicBlock *BB0 = BI->getSuccessor(0); + BasicBlockEdge Edge(BI->getParent(), BB0); + if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent())) + continue; + + computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, DL, Depth, + Q); + } + + // Option 2 - Search the other uses of V + unsigned NumUsesExplored = 0; + for (auto U : V->users()) { + // Avoid massive lists + if (NumUsesExplored >= DomConditionsMaxUses) + break; + NumUsesExplored++; + // Consider only compare instructions uniquely controlling a branch + ICmpInst *Cmp = dyn_cast<ICmpInst>(U); + if (!Cmp) + continue; + + if (DomConditionsSingleCmpUse && !Cmp->hasOneUse()) + continue; + + for (auto *CmpU : Cmp->users()) { + BranchInst *BI = dyn_cast<BranchInst>(CmpU); + if (!BI || BI->isUnconditional()) + continue; + // We're looking for conditions that are guaranteed to hold at the + // context instruction. Finding a condition where one path dominates + // the context isn't enough because both the true and false cases could + // merge before the context instruction we're actually interested in. + // Instead, we need to ensure that the taken *edge* dominates the context + // instruction. + BasicBlock *BB0 = BI->getSuccessor(0); + BasicBlockEdge Edge(BI->getParent(), BB0); + if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent())) + continue; + + computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, DL, Depth, + Q); + } + } +} + static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, - APInt &KnownOne, - const DataLayout *DL, + APInt &KnownOne, const DataLayout &DL, unsigned Depth, const Query &Q) { // Use of assumptions is context-sensitive. If we don't have a context, we // cannot use them! @@ -498,14 +711,12 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, // We're running this loop for once for each value queried resulting in a // runtime of ~O(#assumes * #values). - assert(isa<IntrinsicInst>(I) && - dyn_cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::assume && + assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume && "must be an assume intrinsic"); - + Value *Arg = I->getArgOperand(0); - if (Arg == V && - isValidAssumeForContext(I, Q, DL)) { + if (Arg == V && isValidAssumeForContext(I, Q)) { assert(BitWidth == 1 && "assume operand is not i1?"); KnownZero.clearAllBits(); KnownOne.setAllBits(); @@ -525,15 +736,15 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, ConstantInt *C; // assume(v = a) if (match(Arg, m_c_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); KnownZero |= RHSKnownZero; KnownOne |= RHSKnownOne; // assume(v & b = a) - } else if (match(Arg, m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), - m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + } else if (match(Arg, + m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) && + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0); @@ -546,7 +757,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, // assume(~(v & b) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))), m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0); @@ -557,9 +768,9 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, KnownZero |= RHSKnownOne & MaskKnownOne; KnownOne |= RHSKnownZero & MaskKnownOne; // assume(v | b = a) - } else if (match(Arg, m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), - m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + } else if (match(Arg, + m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) && + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); @@ -572,7 +783,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, // assume(~(v | b) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))), m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); @@ -583,9 +794,9 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, KnownZero |= RHSKnownOne & BKnownZero; KnownOne |= RHSKnownZero & BKnownZero; // assume(v ^ b = a) - } else if (match(Arg, m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), - m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + } else if (match(Arg, + m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) && + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); @@ -601,7 +812,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, // assume(~(v ^ b) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))), m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); @@ -617,7 +828,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, // assume(v << c = a) } else if (match(Arg, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)), m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them to known @@ -627,7 +838,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, // assume(~(v << c) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))), m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them inverted @@ -637,10 +848,9 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, // assume(v >> c = a) } else if (match(Arg, m_c_ICmp(Pred, m_CombineOr(m_LShr(m_V, m_ConstantInt(C)), - m_AShr(m_V, - m_ConstantInt(C))), - m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + m_AShr(m_V, m_ConstantInt(C))), + m_Value(A))) && + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them to known @@ -649,10 +859,10 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, KnownOne |= RHSKnownOne << C->getZExtValue(); // assume(~(v >> c) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_CombineOr( - m_LShr(m_V, m_ConstantInt(C)), - m_AShr(m_V, m_ConstantInt(C)))), + m_LShr(m_V, m_ConstantInt(C)), + m_AShr(m_V, m_ConstantInt(C)))), m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them inverted @@ -661,8 +871,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, KnownOne |= RHSKnownZero << C->getZExtValue(); // assume(v >=_s c) where c is non-negative } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_SGE && - isValidAssumeForContext(I, Q, DL)) { + Pred == ICmpInst::ICMP_SGE && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); @@ -672,8 +881,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, } // assume(v >_s c) where c is at least -1. } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_SGT && - isValidAssumeForContext(I, Q, DL)) { + Pred == ICmpInst::ICMP_SGT && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); @@ -683,8 +891,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, } // assume(v <=_s c) where c is negative } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_SLE && - isValidAssumeForContext(I, Q, DL)) { + Pred == ICmpInst::ICMP_SLE && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); @@ -694,8 +901,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, } // assume(v <_s c) where c is non-positive } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_SLT && - isValidAssumeForContext(I, Q, DL)) { + Pred == ICmpInst::ICMP_SLT && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); @@ -705,8 +911,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, } // assume(v <=_u c) } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_ULE && - isValidAssumeForContext(I, Q, DL)) { + Pred == ICmpInst::ICMP_ULE && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); @@ -715,14 +920,13 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes()); // assume(v <_u c) } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_ULT && - isValidAssumeForContext(I, Q, DL)) { + Pred == ICmpInst::ICMP_ULT && isValidAssumeForContext(I, Q)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); // Whatever high bits in c are zero are known to be zero (if c is a power // of 2, then one more). - if (isKnownToBeAPowerOfTwo(A, false, Depth+1, Query(Q, I))) + if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I), DL)) KnownZero |= APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes()+1); else @@ -743,13 +947,12 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, /// this won't lose us code quality. /// /// This function is defined on values with integer type, values with pointer -/// type (but only if TD is non-null), and vectors of integers. In the case +/// type, and vectors of integers. In the case /// where V is a vector, known zero, and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, - const DataLayout *TD, unsigned Depth, - const Query &Q) { + const DataLayout &DL, unsigned Depth, const Query &Q) { assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); unsigned BitWidth = KnownZero.getBitWidth(); @@ -757,8 +960,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, assert((V->getType()->isIntOrIntVectorTy() || V->getType()->getScalarType()->isPointerTy()) && "Not integer or pointer type!"); - assert((!TD || - TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) && + assert((DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) && (!V->getType()->isIntOrIntVectorTy() || V->getType()->getScalarSizeInBits() == BitWidth) && KnownZero.getBitWidth() == BitWidth && @@ -797,7 +999,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // The address of an aligned GlobalValue has trailing zeros. if (auto *GO = dyn_cast<GlobalObject>(V)) { unsigned Align = GO->getAlignment(); - if (Align == 0 && TD) { + if (Align == 0) { if (auto *GVar = dyn_cast<GlobalVariable>(GO)) { Type *ObjectType = GVar->getType()->getElementType(); if (ObjectType->isSized()) { @@ -805,9 +1007,9 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // it the preferred alignment. Otherwise, we have to assume that it // may only have the minimum ABI alignment. if (!GVar->isDeclaration() && !GVar->isWeakForLinker()) - Align = TD->getPreferredAlignment(GVar); + Align = DL.getPreferredAlignment(GVar); else - Align = TD->getABITypeAlignment(ObjectType); + Align = DL.getABITypeAlignment(ObjectType); } } } @@ -823,11 +1025,11 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, if (Argument *A = dyn_cast<Argument>(V)) { unsigned Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0; - if (!Align && TD && A->hasStructRetAttr()) { + if (!Align && A->hasStructRetAttr()) { // An sret parameter has at least the ABI alignment of the return type. Type *EltTy = cast<PointerType>(A->getType())->getElementType(); if (EltTy->isSized()) - Align = TD->getABITypeAlignment(EltTy); + Align = DL.getABITypeAlignment(EltTy); } if (Align) @@ -838,7 +1040,12 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // Don't give up yet... there might be an assumption that provides more // information... - computeKnownBitsFromAssume(V, KnownZero, KnownOne, TD, Depth, Q); + computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q); + + // Or a dominating condition for that matter + if (EnableDomConditions && Depth <= DomConditionsMaxDepth) + computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL, + Depth, Q); return; } @@ -854,12 +1061,18 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // the bits of its aliasee. if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { if (!GA->mayBeOverridden()) - computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth + 1, Q); + computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, DL, Depth + 1, Q); return; } // Check whether a nearby assume intrinsic can determine some known bits. - computeKnownBitsFromAssume(V, KnownZero, KnownOne, TD, Depth, Q); + computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q); + + // Check whether there's a dominating condition which implies something about + // this value at the given context. + if (EnableDomConditions && Depth <= DomConditionsMaxDepth) + computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL, Depth, + Q); Operator *I = dyn_cast<Operator>(V); if (!I) return; @@ -873,8 +1086,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, break; case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. - computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q); - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q); + computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q); // Output known-1 bits are only known if set in both the LHS & RHS. KnownOne &= KnownOne2; @@ -883,8 +1096,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, break; } case Instruction::Or: { - computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q); - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q); + computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q); // Output known-0 bits are only known if clear in both the LHS & RHS. KnownZero &= KnownZero2; @@ -893,8 +1106,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, break; } case Instruction::Xor: { - computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q); - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q); + computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q); // Output known-0 bits are known if clear or set in both the LHS & RHS. APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); @@ -905,21 +1118,20 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, } case Instruction::Mul: { bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); - computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, - KnownZero, KnownOne, KnownZero2, KnownOne2, TD, - Depth, Q); + computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, KnownZero, + KnownOne, KnownZero2, KnownOne2, DL, Depth, Q); break; } case Instruction::UDiv: { // For the purposes of computing leading zeros we can conservatively // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q); unsigned LeadZ = KnownZero2.countLeadingOnes(); KnownOne2.clearAllBits(); KnownZero2.clearAllBits(); - computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1, Q); + computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, DL, Depth + 1, Q); unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); if (RHSUnknownLeadingOnes != BitWidth) LeadZ = std::min(BitWidth, @@ -929,8 +1141,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, break; } case Instruction::Select: - computeKnownBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1, Q); - computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1, Q); + computeKnownBits(I->getOperand(2), KnownZero, KnownOne, DL, Depth + 1, Q); + computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, DL, Depth + 1, Q); // Only known if known in both the LHS and RHS. KnownOne &= KnownOne2; @@ -946,8 +1158,6 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, case Instruction::PtrToInt: case Instruction::IntToPtr: case Instruction::AddrSpaceCast: // Pointers could be different sizes. - // We can't handle these if we don't know the pointer size. - if (!TD) break; // FALL THROUGH and handle them the same as zext/trunc. case Instruction::ZExt: case Instruction::Trunc: { @@ -956,17 +1166,12 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, unsigned SrcBitWidth; // Note that we handle pointer operands here because of inttoptr/ptrtoint // which fall through here. - if(TD) { - SrcBitWidth = TD->getTypeSizeInBits(SrcTy->getScalarType()); - } else { - SrcBitWidth = SrcTy->getScalarSizeInBits(); - if (!SrcBitWidth) break; - } + SrcBitWidth = DL.getTypeSizeInBits(SrcTy->getScalarType()); assert(SrcBitWidth && "SrcBitWidth can't be zero"); KnownZero = KnownZero.zextOrTrunc(SrcBitWidth); KnownOne = KnownOne.zextOrTrunc(SrcBitWidth); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); KnownZero = KnownZero.zextOrTrunc(BitWidth); KnownOne = KnownOne.zextOrTrunc(BitWidth); // Any top bits are known to be zero. @@ -980,7 +1185,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // TODO: For now, not handling conversions like: // (bitcast i64 %x to <2 x i32>) !I->getType()->isVectorTy()) { - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); break; } break; @@ -991,7 +1196,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, KnownZero = KnownZero.trunc(SrcBitWidth); KnownOne = KnownOne.trunc(SrcBitWidth); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); @@ -1007,7 +1212,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); KnownZero <<= ShiftAmt; KnownOne <<= ShiftAmt; KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0 @@ -1020,7 +1225,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); // Unsigned shift right. - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); // high bits known zero. @@ -1034,7 +1239,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); // Signed shift right. - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); @@ -1048,15 +1253,15 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, case Instruction::Sub: { bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, - KnownZero, KnownOne, KnownZero2, KnownOne2, TD, - Depth, Q); + KnownZero, KnownOne, KnownZero2, KnownOne2, DL, + Depth, Q); break; } case Instruction::Add: { bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, - KnownZero, KnownOne, KnownZero2, KnownOne2, TD, - Depth, Q); + KnownZero, KnownOne, KnownZero2, KnownOne2, DL, + Depth, Q); break; } case Instruction::SRem: @@ -1064,8 +1269,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, APInt RA = Rem->getValue().abs(); if (RA.isPowerOf2()) { APInt LowBits = RA - 1; - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, - Depth+1, Q); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, + Q); // The low bits of the first operand are unchanged by the srem. KnownZero = KnownZero2 & LowBits; @@ -1089,8 +1294,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // remainder is zero. if (KnownZero.isNonNegative()) { APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, TD, - Depth+1, Q); + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, DL, + Depth + 1, Q); // If it's known zero, our sign bit is also zero. if (LHSKnownZero.isNegative()) KnownZero.setBit(BitWidth - 1); @@ -1102,8 +1307,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, APInt RA = Rem->getValue(); if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, - Depth+1, Q); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, + Q); KnownZero |= ~LowBits; KnownOne &= LowBits; break; @@ -1112,8 +1317,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // Since the result is less than or equal to either operand, any leading // zero bits in either operand must also exist in the result. - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); - computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1, Q); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); + computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, DL, Depth + 1, Q); unsigned Leaders = std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); @@ -1125,8 +1330,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, case Instruction::Alloca: { AllocaInst *AI = cast<AllocaInst>(V); unsigned Align = AI->getAlignment(); - if (Align == 0 && TD) - Align = TD->getABITypeAlignment(AI->getType()->getElementType()); + if (Align == 0) + Align = DL.getABITypeAlignment(AI->getType()->getElementType()); if (Align > 0) KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align)); @@ -1136,8 +1341,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // Analyze all of the subscripts of this getelementptr instruction // to determine if we can prove known low zero bits. APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0); - computeKnownBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, TD, - Depth+1, Q); + computeKnownBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, DL, + Depth + 1, Q); unsigned TrailZ = LocalKnownZero.countTrailingOnes(); gep_type_iterator GTI = gep_type_begin(I); @@ -1145,10 +1350,6 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, Value *Index = I->getOperand(i); if (StructType *STy = dyn_cast<StructType>(*GTI)) { // Handle struct member offset arithmetic. - if (!TD) { - TrailZ = 0; - break; - } // Handle case when index is vector zeroinitializer Constant *CIndex = cast<Constant>(Index); @@ -1159,7 +1360,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, Index = CIndex->getSplatValue(); unsigned Idx = cast<ConstantInt>(Index)->getZExtValue(); - const StructLayout *SL = TD->getStructLayout(STy); + const StructLayout *SL = DL.getStructLayout(STy); uint64_t Offset = SL->getElementOffset(Idx); TrailZ = std::min<unsigned>(TrailZ, countTrailingZeros(Offset)); @@ -1171,9 +1372,10 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, break; } unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits(); - uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1; + uint64_t TypeSize = DL.getTypeAllocSize(IndexedTy); LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0); - computeKnownBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1, Q); + computeKnownBits(Index, LocalKnownZero, LocalKnownOne, DL, Depth + 1, + Q); TrailZ = std::min(TrailZ, unsigned(countTrailingZeros(TypeSize) + LocalKnownZero.countTrailingOnes())); @@ -1215,11 +1417,11 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, break; // Ok, we have a PHI of the form L op= R. Check for low // zero bits. - computeKnownBits(R, KnownZero2, KnownOne2, TD, Depth+1, Q); + computeKnownBits(R, KnownZero2, KnownOne2, DL, Depth + 1, Q); // We need to take the minimum number of known bits APInt KnownZero3(KnownZero), KnownOne3(KnownOne); - computeKnownBits(L, KnownZero3, KnownOne3, TD, Depth+1, Q); + computeKnownBits(L, KnownZero3, KnownOne3, DL, Depth + 1, Q); KnownZero = APInt::getLowBitsSet(BitWidth, std::min(KnownZero2.countTrailingOnes(), @@ -1242,16 +1444,16 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, KnownZero = APInt::getAllOnesValue(BitWidth); KnownOne = APInt::getAllOnesValue(BitWidth); - for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) { + for (Value *IncValue : P->incoming_values()) { // Skip direct self references. - if (P->getIncomingValue(i) == P) continue; + if (IncValue == P) continue; KnownZero2 = APInt(BitWidth, 0); KnownOne2 = APInt(BitWidth, 0); // Recurse, but cap the recursion to one level, because we don't // want to waste time spinning around in loops. - computeKnownBits(P->getIncomingValue(i), KnownZero2, KnownOne2, TD, - MaxDepth-1, Q); + computeKnownBits(IncValue, KnownZero2, KnownOne2, DL, + MaxDepth - 1, Q); KnownZero &= KnownZero2; KnownOne &= KnownOne2; // If all bits have been ruled out, there's no need to check @@ -1303,19 +1505,19 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, case Intrinsic::sadd_with_overflow: computeKnownBitsAddSub(true, II->getArgOperand(0), II->getArgOperand(1), false, KnownZero, - KnownOne, KnownZero2, KnownOne2, TD, Depth, Q); + KnownOne, KnownZero2, KnownOne2, DL, Depth, Q); break; case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: computeKnownBitsAddSub(false, II->getArgOperand(0), II->getArgOperand(1), false, KnownZero, - KnownOne, KnownZero2, KnownOne2, TD, Depth, Q); + KnownOne, KnownZero2, KnownOne2, DL, Depth, Q); break; case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: - computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), - false, KnownZero, KnownOne, - KnownZero2, KnownOne2, TD, Depth, Q); + computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false, + KnownZero, KnownOne, KnownZero2, KnownOne2, DL, + Depth, Q); break; } } @@ -1328,9 +1530,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, /// Determine whether the sign bit is known to be zero or one. /// Convenience wrapper around computeKnownBits. void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, - const DataLayout *TD, unsigned Depth, - const Query &Q) { - unsigned BitWidth = getBitWidth(V->getType(), TD); + const DataLayout &DL, unsigned Depth, const Query &Q) { + unsigned BitWidth = getBitWidth(V->getType(), DL); if (!BitWidth) { KnownZero = false; KnownOne = false; @@ -1338,7 +1539,7 @@ void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, } APInt ZeroBits(BitWidth, 0); APInt OneBits(BitWidth, 0); - computeKnownBits(V, ZeroBits, OneBits, TD, Depth, Q); + computeKnownBits(V, ZeroBits, OneBits, DL, Depth, Q); KnownOne = OneBits[BitWidth - 1]; KnownZero = ZeroBits[BitWidth - 1]; } @@ -1348,7 +1549,7 @@ void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, /// be a power of two when defined. Supports values with integer or pointer /// types and vectors of integers. bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, - const Query &Q) { + const Query &Q, const DataLayout &DL) { if (Constant *C = dyn_cast<Constant>(V)) { if (C->isNullValue()) return OrZero; @@ -1375,20 +1576,19 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, // A shift of a power of two is a power of two or zero. if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) || match(V, m_Shr(m_Value(X), m_Value())))) - return isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth, Q); + return isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q, DL); if (ZExtInst *ZI = dyn_cast<ZExtInst>(V)) - return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q); + return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q, DL); if (SelectInst *SI = dyn_cast<SelectInst>(V)) - return - isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q) && - isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q); + return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q, DL) && + isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q, DL); if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) { // A power of two and'd with anything is a power of two or zero. - if (isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth, Q) || - isKnownToBeAPowerOfTwo(Y, /*OrZero*/true, Depth, Q)) + if (isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q, DL) || + isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, Depth, Q, DL)) return true; // X & (-X) is always a power of two or zero. if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X)))) @@ -1403,19 +1603,19 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, if (OrZero || VOBO->hasNoUnsignedWrap() || VOBO->hasNoSignedWrap()) { if (match(X, m_And(m_Specific(Y), m_Value())) || match(X, m_And(m_Value(), m_Specific(Y)))) - if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q)) + if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q, DL)) return true; if (match(Y, m_And(m_Specific(X), m_Value())) || match(Y, m_And(m_Value(), m_Specific(X)))) - if (isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q)) + if (isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q, DL)) return true; unsigned BitWidth = V->getType()->getScalarSizeInBits(); APInt LHSZeroBits(BitWidth, 0), LHSOneBits(BitWidth, 0); - computeKnownBits(X, LHSZeroBits, LHSOneBits, nullptr, Depth, Q); + computeKnownBits(X, LHSZeroBits, LHSOneBits, DL, Depth, Q); APInt RHSZeroBits(BitWidth, 0), RHSOneBits(BitWidth, 0); - computeKnownBits(Y, RHSZeroBits, RHSOneBits, nullptr, Depth, Q); + computeKnownBits(Y, RHSZeroBits, RHSOneBits, DL, Depth, Q); // If i8 V is a power of two or zero: // ZeroBits: 1 1 1 0 1 1 1 1 // ~ZeroBits: 0 0 0 1 0 0 0 0 @@ -1433,7 +1633,7 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) || match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) { return isKnownToBeAPowerOfTwo(cast<Operator>(V)->getOperand(0), OrZero, - Depth, Q); + Depth, Q, DL); } return false; @@ -1445,7 +1645,7 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, /// to be non-null. /// /// Currently this routine does not support vector GEPs. -static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL, +static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout &DL, unsigned Depth, const Query &Q) { if (!GEP->isInBounds() || GEP->getPointerAddressSpace() != 0) return false; @@ -1458,10 +1658,6 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL, if (isKnownNonZero(GEP->getPointerOperand(), DL, Depth, Q)) return true; - // Past this, if we don't have DataLayout, we can't do much. - if (!DL) - return false; - // Walk the GEP operands and see if any operand introduces a non-zero offset. // If so, then the GEP cannot produce a null pointer, as doing so would // inherently violate the inbounds contract within address space zero. @@ -1471,7 +1667,7 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL, if (StructType *STy = dyn_cast<StructType>(*GTI)) { ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand()); unsigned ElementIdx = OpC->getZExtValue(); - const StructLayout *SL = DL->getStructLayout(STy); + const StructLayout *SL = DL.getStructLayout(STy); uint64_t ElementOffset = SL->getElementOffset(ElementIdx); if (ElementOffset > 0) return true; @@ -1479,7 +1675,7 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL, } // If we have a zero-sized type, the index doesn't matter. Keep looping. - if (DL->getTypeAllocSize(GTI.getIndexedType()) == 0) + if (DL.getTypeAllocSize(GTI.getIndexedType()) == 0) continue; // Fast path the constant operand case both for efficiency and so we don't @@ -1528,7 +1724,7 @@ static bool rangeMetadataExcludesValue(MDNode* Ranges, /// For vectors return true if every element is known to be non-zero when /// defined. Supports values with integer or pointer type and vectors of /// integers. -bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth, +bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, const Query &Q) { if (Constant *C = dyn_cast<Constant>(V)) { if (C->isNullValue()) @@ -1561,21 +1757,20 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth, if (isKnownNonNull(V)) return true; if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) - if (isGEPKnownNonNull(GEP, TD, Depth, Q)) + if (isGEPKnownNonNull(GEP, DL, Depth, Q)) return true; } - unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), TD); + unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), DL); // X | Y != 0 if X != 0 or Y != 0. Value *X = nullptr, *Y = nullptr; if (match(V, m_Or(m_Value(X), m_Value(Y)))) - return isKnownNonZero(X, TD, Depth, Q) || - isKnownNonZero(Y, TD, Depth, Q); + return isKnownNonZero(X, DL, Depth, Q) || isKnownNonZero(Y, DL, Depth, Q); // ext X != 0 if X != 0. if (isa<SExtInst>(V) || isa<ZExtInst>(V)) - return isKnownNonZero(cast<Instruction>(V)->getOperand(0), TD, Depth, Q); + return isKnownNonZero(cast<Instruction>(V)->getOperand(0), DL, Depth, Q); // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined // if the lowest bit is shifted off the end. @@ -1583,11 +1778,11 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth, // shl nuw can't remove any non-zero bits. OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V); if (BO->hasNoUnsignedWrap()) - return isKnownNonZero(X, TD, Depth, Q); + return isKnownNonZero(X, DL, Depth, Q); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - computeKnownBits(X, KnownZero, KnownOne, TD, Depth, Q); + computeKnownBits(X, KnownZero, KnownOne, DL, Depth, Q); if (KnownOne[0]) return true; } @@ -1597,29 +1792,28 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth, // shr exact can only shift out zero bits. PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V); if (BO->isExact()) - return isKnownNonZero(X, TD, Depth, Q); + return isKnownNonZero(X, DL, Depth, Q); bool XKnownNonNegative, XKnownNegative; - ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth, Q); + ComputeSignBit(X, XKnownNonNegative, XKnownNegative, DL, Depth, Q); if (XKnownNegative) return true; } // div exact can only produce a zero if the dividend is zero. else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) { - return isKnownNonZero(X, TD, Depth, Q); + return isKnownNonZero(X, DL, Depth, Q); } // X + Y. else if (match(V, m_Add(m_Value(X), m_Value(Y)))) { bool XKnownNonNegative, XKnownNegative; bool YKnownNonNegative, YKnownNegative; - ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth, Q); - ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, TD, Depth, Q); + ComputeSignBit(X, XKnownNonNegative, XKnownNegative, DL, Depth, Q); + ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, DL, Depth, Q); // If X and Y are both non-negative (as signed values) then their sum is not // zero unless both X and Y are zero. if (XKnownNonNegative && YKnownNonNegative) - if (isKnownNonZero(X, TD, Depth, Q) || - isKnownNonZero(Y, TD, Depth, Q)) + if (isKnownNonZero(X, DL, Depth, Q) || isKnownNonZero(Y, DL, Depth, Q)) return true; // If X and Y are both negative (as signed values) then their sum is not @@ -1630,22 +1824,22 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth, APInt Mask = APInt::getSignedMaxValue(BitWidth); // The sign bit of X is set. If some other bit is set then X is not equal // to INT_MIN. - computeKnownBits(X, KnownZero, KnownOne, TD, Depth, Q); + computeKnownBits(X, KnownZero, KnownOne, DL, Depth, Q); if ((KnownOne & Mask) != 0) return true; // The sign bit of Y is set. If some other bit is set then Y is not equal // to INT_MIN. - computeKnownBits(Y, KnownZero, KnownOne, TD, Depth, Q); + computeKnownBits(Y, KnownZero, KnownOne, DL, Depth, Q); if ((KnownOne & Mask) != 0) return true; } // The sum of a non-negative number and a power of two is not zero. if (XKnownNonNegative && - isKnownToBeAPowerOfTwo(Y, /*OrZero*/false, Depth, Q)) + isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q, DL)) return true; if (YKnownNonNegative && - isKnownToBeAPowerOfTwo(X, /*OrZero*/false, Depth, Q)) + isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q, DL)) return true; } // X * Y. @@ -1654,21 +1848,20 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth, // If X and Y are non-zero then so is X * Y as long as the multiplication // does not overflow. if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) && - isKnownNonZero(X, TD, Depth, Q) && - isKnownNonZero(Y, TD, Depth, Q)) + isKnownNonZero(X, DL, Depth, Q) && isKnownNonZero(Y, DL, Depth, Q)) return true; } // (C ? X : Y) != 0 if X != 0 and Y != 0. else if (SelectInst *SI = dyn_cast<SelectInst>(V)) { - if (isKnownNonZero(SI->getTrueValue(), TD, Depth, Q) && - isKnownNonZero(SI->getFalseValue(), TD, Depth, Q)) + if (isKnownNonZero(SI->getTrueValue(), DL, Depth, Q) && + isKnownNonZero(SI->getFalseValue(), DL, Depth, Q)) return true; } if (!BitWidth) return false; APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - computeKnownBits(V, KnownZero, KnownOne, TD, Depth, Q); + computeKnownBits(V, KnownZero, KnownOne, DL, Depth, Q); return KnownOne != 0; } @@ -1677,15 +1870,14 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth, /// cannot have. /// /// This function is defined on values with integer type, values with pointer -/// type (but only if TD is non-null), and vectors of integers. In the case +/// type, and vectors of integers. In the case /// where V is a vector, the mask, known zero, and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. -bool MaskedValueIsZero(Value *V, const APInt &Mask, - const DataLayout *TD, unsigned Depth, - const Query &Q) { +bool MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL, + unsigned Depth, const Query &Q) { APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0); - computeKnownBits(V, KnownZero, KnownOne, TD, Depth, Q); + computeKnownBits(V, KnownZero, KnownOne, DL, Depth, Q); return (KnownZero & Mask) == Mask; } @@ -1699,14 +1891,9 @@ bool MaskedValueIsZero(Value *V, const APInt &Mask, /// /// 'Op' must have a scalar integer type. /// -unsigned ComputeNumSignBits(Value *V, const DataLayout *TD, - unsigned Depth, const Query &Q) { - assert((TD || V->getType()->isIntOrIntVectorTy()) && - "ComputeNumSignBits requires a DataLayout object to operate " - "on non-integer values!"); - Type *Ty = V->getType(); - unsigned TyBits = TD ? TD->getTypeSizeInBits(V->getType()->getScalarType()) : - Ty->getScalarSizeInBits(); +unsigned ComputeNumSignBits(Value *V, const DataLayout &DL, unsigned Depth, + const Query &Q) { + unsigned TyBits = DL.getTypeSizeInBits(V->getType()->getScalarType()); unsigned Tmp, Tmp2; unsigned FirstAnswer = 1; @@ -1721,10 +1908,63 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD, default: break; case Instruction::SExt: Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits(); - return ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q) + Tmp; + return ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q) + Tmp; + + case Instruction::SDiv: { + const APInt *Denominator; + // sdiv X, C -> adds log(C) sign bits. + if (match(U->getOperand(1), m_APInt(Denominator))) { + + // Ignore non-positive denominator. + if (!Denominator->isStrictlyPositive()) + break; + + // Calculate the incoming numerator bits. + unsigned NumBits = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q); + + // Add floor(log(C)) bits to the numerator bits. + return std::min(TyBits, NumBits + Denominator->logBase2()); + } + break; + } + + case Instruction::SRem: { + const APInt *Denominator; + // srem X, C -> we know that the result is within [-C+1,C) when C is a + // positive constant. This let us put a lower bound on the number of sign + // bits. + if (match(U->getOperand(1), m_APInt(Denominator))) { + + // Ignore non-positive denominator. + if (!Denominator->isStrictlyPositive()) + break; + + // Calculate the incoming numerator bits. SRem by a positive constant + // can't lower the number of sign bits. + unsigned NumrBits = + ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q); + + // Calculate the leading sign bit constraints by examining the + // denominator. Given that the denominator is positive, there are two + // cases: + // + // 1. the numerator is positive. The result range is [0,C) and [0,C) u< + // (1 << ceilLogBase2(C)). + // + // 2. the numerator is negative. Then the result range is (-C,0] and + // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)). + // + // Thus a lower bound on the number of sign bits is `TyBits - + // ceilLogBase2(C)`. + + unsigned ResBits = TyBits - Denominator->ceilLogBase2(); + return std::max(NumrBits, ResBits); + } + break; + } case Instruction::AShr: { - Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q); + Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q); // ashr X, C -> adds C sign bits. Vectors too. const APInt *ShAmt; if (match(U->getOperand(1), m_APInt(ShAmt))) { @@ -1737,7 +1977,7 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD, const APInt *ShAmt; if (match(U->getOperand(1), m_APInt(ShAmt))) { // shl destroys sign bits. - Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q); + Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q); Tmp2 = ShAmt->getZExtValue(); if (Tmp2 >= TyBits || // Bad shift. Tmp2 >= Tmp) break; // Shifted all sign bits out. @@ -1749,9 +1989,9 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD, case Instruction::Or: case Instruction::Xor: // NOT is handled here. // Logical binary ops preserve the number of sign bits at the worst. - Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q); + Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q); if (Tmp != 1) { - Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q); + Tmp2 = ComputeNumSignBits(U->getOperand(1), DL, Depth + 1, Q); FirstAnswer = std::min(Tmp, Tmp2); // We computed what we know about the sign bits as our first // answer. Now proceed to the generic code that uses @@ -1760,22 +2000,23 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD, break; case Instruction::Select: - Tmp = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q); + Tmp = ComputeNumSignBits(U->getOperand(1), DL, Depth + 1, Q); if (Tmp == 1) return 1; // Early out. - Tmp2 = ComputeNumSignBits(U->getOperand(2), TD, Depth+1, Q); + Tmp2 = ComputeNumSignBits(U->getOperand(2), DL, Depth + 1, Q); return std::min(Tmp, Tmp2); case Instruction::Add: // Add can have at most one carry bit. Thus we know that the output // is, at worst, one more bit than the inputs. - Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q); + Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q); if (Tmp == 1) return 1; // Early out. // Special case decrementing a value (ADD X, -1): if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1))) if (CRHS->isAllOnesValue()) { APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - computeKnownBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); + computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, + Q); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. @@ -1788,19 +2029,20 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD, return Tmp; } - Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q); + Tmp2 = ComputeNumSignBits(U->getOperand(1), DL, Depth + 1, Q); if (Tmp2 == 1) return 1; return std::min(Tmp, Tmp2)-1; case Instruction::Sub: - Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q); + Tmp2 = ComputeNumSignBits(U->getOperand(1), DL, Depth + 1, Q); if (Tmp2 == 1) return 1; // Handle NEG. if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0))) if (CLHS->isNullValue()) { APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - computeKnownBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q); + computeKnownBits(U->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, + Q); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue()) @@ -1816,7 +2058,7 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD, // Sub can have at most one carry bit. Thus we know that the output // is, at worst, one more bit than the inputs. - Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q); + Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q); if (Tmp == 1) return 1; // Early out. return std::min(Tmp, Tmp2)-1; @@ -1830,12 +2072,11 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD, // Take the minimum of all incoming values. This can't infinitely loop // because of our depth threshold. - Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1, Q); + Tmp = ComputeNumSignBits(PN->getIncomingValue(0), DL, Depth + 1, Q); for (unsigned i = 1, e = NumIncomingValues; i != e; ++i) { if (Tmp == 1) return Tmp; - Tmp = std::min(Tmp, - ComputeNumSignBits(PN->getIncomingValue(i), TD, - Depth+1, Q)); + Tmp = std::min( + Tmp, ComputeNumSignBits(PN->getIncomingValue(i), DL, Depth + 1, Q)); } return Tmp; } @@ -1850,7 +2091,7 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD, // use this information. APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); APInt Mask; - computeKnownBits(V, KnownZero, KnownOne, TD, Depth, Q); + computeKnownBits(V, KnownZero, KnownOne, DL, Depth, Q); if (KnownZero.isNegative()) { // sign bit is 0 Mask = KnownZero; @@ -2000,8 +2241,11 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) return !CFP->getValueAPF().isNegZero(); + // FIXME: Magic number! At the least, this should be given a name because it's + // used similarly in CannotBeOrderedLessThanZero(). A better fix may be to + // expose it as a parameter, so it can be used for testing / experimenting. if (Depth == 6) - return 1; // Limit search depth. + return false; // Limit search depth. const Operator *I = dyn_cast<Operator>(V); if (!I) return false; @@ -2044,6 +2288,62 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { return false; } +bool llvm::CannotBeOrderedLessThanZero(const Value *V, unsigned Depth) { + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) + return !CFP->getValueAPF().isNegative() || CFP->getValueAPF().isZero(); + + // FIXME: Magic number! At the least, this should be given a name because it's + // used similarly in CannotBeNegativeZero(). A better fix may be to + // expose it as a parameter, so it can be used for testing / experimenting. + if (Depth == 6) + return false; // Limit search depth. + + const Operator *I = dyn_cast<Operator>(V); + if (!I) return false; + + switch (I->getOpcode()) { + default: break; + case Instruction::FMul: + // x*x is always non-negative or a NaN. + if (I->getOperand(0) == I->getOperand(1)) + return true; + // Fall through + case Instruction::FAdd: + case Instruction::FDiv: + case Instruction::FRem: + return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1) && + CannotBeOrderedLessThanZero(I->getOperand(1), Depth+1); + case Instruction::FPExt: + case Instruction::FPTrunc: + // Widening/narrowing never change sign. + return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1); + case Instruction::Call: + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::exp: + case Intrinsic::exp2: + case Intrinsic::fabs: + case Intrinsic::sqrt: + return true; + case Intrinsic::powi: + if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { + // powi(x,n) is non-negative if n is even. + if (CI->getBitWidth() <= 64 && CI->getSExtValue() % 2u == 0) + return true; + } + return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1); + case Intrinsic::fma: + case Intrinsic::fmuladd: + // x*x+y is non-negative if y is non-negative. + return I->getOperand(0) == I->getOperand(1) && + CannotBeOrderedLessThanZero(I->getOperand(2), Depth+1); + } + break; + } + return false; +} + /// If the specified value can be set by repeating the same byte in memory, /// return the i8 value that it is represented with. This is /// true for all i8 values obviously, but is also true for i32 0, i32 -1, @@ -2068,26 +2368,14 @@ Value *llvm::isBytewiseValue(Value *V) { // Don't handle long double formats, which have strange constraints. } - // We can handle constant integers that are power of two in size and a - // multiple of 8 bits. + // We can handle constant integers that are multiple of 8 bits. if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - unsigned Width = CI->getBitWidth(); - if (isPowerOf2_32(Width) && Width > 8) { - // We can handle this value if the recursive binary decomposition is the - // same at all levels. - APInt Val = CI->getValue(); - APInt Val2; - while (Val.getBitWidth() != 8) { - unsigned NextWidth = Val.getBitWidth()/2; - Val2 = Val.lshr(NextWidth); - Val2 = Val2.trunc(Val.getBitWidth()/2); - Val = Val.trunc(Val.getBitWidth()/2); - - // If the top/bottom halves aren't the same, reject it. - if (Val != Val2) - return nullptr; - } - return ConstantInt::get(V->getContext(), Val); + if (CI->getBitWidth() % 8 == 0) { + assert(CI->getBitWidth() > 8 && "8 bits should be handled above!"); + + if (!CI->getValue().isSplat(8)) + return nullptr; + return ConstantInt::get(V->getContext(), CI->getValue().trunc(8)); } } @@ -2286,23 +2574,19 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, /// Analyze the specified pointer to see if it can be expressed as a base /// pointer plus a constant offset. Return the base and offset to the caller. Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, - const DataLayout *DL) { - // Without DataLayout, conservatively assume 64-bit offsets, which is - // the widest we support. - unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(Ptr->getType()) : 64; + const DataLayout &DL) { + unsigned BitWidth = DL.getPointerTypeSizeInBits(Ptr->getType()); APInt ByteOffset(BitWidth, 0); while (1) { if (Ptr->getType()->isVectorTy()) break; if (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) { - if (DL) { - APInt GEPOffset(BitWidth, 0); - if (!GEP->accumulateConstantOffset(*DL, GEPOffset)) - break; + APInt GEPOffset(BitWidth, 0); + if (!GEP->accumulateConstantOffset(DL, GEPOffset)) + break; - ByteOffset += GEPOffset; - } + ByteOffset += GEPOffset; Ptr = GEP->getPointerOperand(); } else if (Operator::getOpcode(Ptr) == Instruction::BitCast || @@ -2331,7 +2615,7 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, // Look through bitcast instructions and geps. V = V->stripPointerCasts(); - // If the value is a GEP instructionor constant expression, treat it as an + // If the value is a GEP instruction or constant expression, treat it as an // offset. if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { // Make sure the GEP has exactly three arguments. @@ -2358,7 +2642,8 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, StartIdx = CI->getZExtValue(); else return false; - return getConstantStringInfo(GEP->getOperand(0), Str, StartIdx+Offset); + return getConstantStringInfo(GEP->getOperand(0), Str, StartIdx + Offset, + TrimAtNul); } // The GEP instruction, constant or instruction, must reference a global @@ -2421,8 +2706,8 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSetImpl<PHINode*> &PHIs) { // If it was new, see if all the input strings are the same length. uint64_t LenSoFar = ~0ULL; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs); + for (Value *IncValue : PN->incoming_values()) { + uint64_t Len = GetStringLengthH(IncValue, PHIs); if (Len == 0) return 0; // Unknown length -> unknown. if (Len == ~0ULL) continue; @@ -2468,8 +2753,34 @@ uint64_t llvm::GetStringLength(Value *V) { return Len == ~0ULL ? 1 : Len; } -Value * -llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) { +/// \brief \p PN defines a loop-variant pointer to an object. Check if the +/// previous iteration of the loop was referring to the same object as \p PN. +static bool isSameUnderlyingObjectInLoop(PHINode *PN, LoopInfo *LI) { + // Find the loop-defined value. + Loop *L = LI->getLoopFor(PN->getParent()); + if (PN->getNumIncomingValues() != 2) + return true; + + // Find the value from previous iteration. + auto *PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(0)); + if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L) + PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(1)); + if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L) + return true; + + // If a new pointer is loaded in the loop, the pointer references a different + // object in every iteration. E.g.: + // for (i) + // int *p = a[i]; + // ... + if (auto *Load = dyn_cast<LoadInst>(PrevValue)) + if (!L->isLoopInvariant(Load->getPointerOperand())) + return false; + return true; +} + +Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL, + unsigned MaxLookup) { if (!V->getType()->isPointerTy()) return V; for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { @@ -2486,7 +2797,7 @@ llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) { // See if InstructionSimplify knows any relevant tricks. if (Instruction *I = dyn_cast<Instruction>(V)) // TODO: Acquire a DominatorTree and AssumptionCache and use them. - if (Value *Simplified = SimplifyInstruction(I, TD, nullptr)) { + if (Value *Simplified = SimplifyInstruction(I, DL, nullptr)) { V = Simplified; continue; } @@ -2498,17 +2809,15 @@ llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) { return V; } -void -llvm::GetUnderlyingObjects(Value *V, - SmallVectorImpl<Value *> &Objects, - const DataLayout *TD, - unsigned MaxLookup) { +void llvm::GetUnderlyingObjects(Value *V, SmallVectorImpl<Value *> &Objects, + const DataLayout &DL, LoopInfo *LI, + unsigned MaxLookup) { SmallPtrSet<Value *, 4> Visited; SmallVector<Value *, 4> Worklist; Worklist.push_back(V); do { Value *P = Worklist.pop_back_val(); - P = GetUnderlyingObject(P, TD, MaxLookup); + P = GetUnderlyingObject(P, DL, MaxLookup); if (!Visited.insert(P).second) continue; @@ -2520,8 +2829,20 @@ llvm::GetUnderlyingObjects(Value *V, } if (PHINode *PN = dyn_cast<PHINode>(P)) { - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - Worklist.push_back(PN->getIncomingValue(i)); + // If this PHI changes the underlying object in every iteration of the + // loop, don't look through it. Consider: + // int **A; + // for (i) { + // Prev = Curr; // Prev = PHI (Prev_0, Curr) + // Curr = A[i]; + // *Prev, *Curr; + // + // Prev is tracking Curr one iteration behind so they refer to different + // underlying objects. + if (!LI || !LI->isLoopHeader(PN->getParent()) || + isSameUnderlyingObjectInLoop(PN, LI)) + for (Value *IncValue : PN->incoming_values()) + Worklist.push_back(IncValue); continue; } @@ -2542,8 +2863,188 @@ bool llvm::onlyUsedByLifetimeMarkers(const Value *V) { return true; } +static bool isDereferenceableFromAttribute(const Value *BV, APInt Offset, + Type *Ty, const DataLayout &DL, + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { + assert(Offset.isNonNegative() && "offset can't be negative"); + assert(Ty->isSized() && "must be sized"); + + APInt DerefBytes(Offset.getBitWidth(), 0); + bool CheckForNonNull = false; + if (const Argument *A = dyn_cast<Argument>(BV)) { + DerefBytes = A->getDereferenceableBytes(); + if (!DerefBytes.getBoolValue()) { + DerefBytes = A->getDereferenceableOrNullBytes(); + CheckForNonNull = true; + } + } else if (auto CS = ImmutableCallSite(BV)) { + DerefBytes = CS.getDereferenceableBytes(0); + if (!DerefBytes.getBoolValue()) { + DerefBytes = CS.getDereferenceableOrNullBytes(0); + CheckForNonNull = true; + } + } else if (const LoadInst *LI = dyn_cast<LoadInst>(BV)) { + if (MDNode *MD = LI->getMetadata(LLVMContext::MD_dereferenceable)) { + ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(0)); + DerefBytes = CI->getLimitedValue(); + } + if (!DerefBytes.getBoolValue()) { + if (MDNode *MD = + LI->getMetadata(LLVMContext::MD_dereferenceable_or_null)) { + ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(0)); + DerefBytes = CI->getLimitedValue(); + } + CheckForNonNull = true; + } + } + + if (DerefBytes.getBoolValue()) + if (DerefBytes.uge(Offset + DL.getTypeStoreSize(Ty))) + if (!CheckForNonNull || isKnownNonNullAt(BV, CtxI, DT, TLI)) + return true; + + return false; +} + +static bool isDereferenceableFromAttribute(const Value *V, const DataLayout &DL, + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { + Type *VTy = V->getType(); + Type *Ty = VTy->getPointerElementType(); + if (!Ty->isSized()) + return false; + + APInt Offset(DL.getTypeStoreSizeInBits(VTy), 0); + return isDereferenceableFromAttribute(V, Offset, Ty, DL, CtxI, DT, TLI); +} + +/// Return true if Value is always a dereferenceable pointer. +/// +/// Test if V is always a pointer to allocated and suitably aligned memory for +/// a simple load or store. +static bool isDereferenceablePointer(const Value *V, const DataLayout &DL, + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI, + SmallPtrSetImpl<const Value *> &Visited) { + // Note that it is not safe to speculate into a malloc'd region because + // malloc may return null. + + // These are obviously ok. + if (isa<AllocaInst>(V)) return true; + + // It's not always safe to follow a bitcast, for example: + // bitcast i8* (alloca i8) to i32* + // would result in a 4-byte load from a 1-byte alloca. However, + // if we're casting from a pointer from a type of larger size + // to a type of smaller size (or the same size), and the alignment + // is at least as large as for the resulting pointer type, then + // we can look through the bitcast. + if (const BitCastOperator *BC = dyn_cast<BitCastOperator>(V)) { + Type *STy = BC->getSrcTy()->getPointerElementType(), + *DTy = BC->getDestTy()->getPointerElementType(); + if (STy->isSized() && DTy->isSized() && + (DL.getTypeStoreSize(STy) >= DL.getTypeStoreSize(DTy)) && + (DL.getABITypeAlignment(STy) >= DL.getABITypeAlignment(DTy))) + return isDereferenceablePointer(BC->getOperand(0), DL, CtxI, + DT, TLI, Visited); + } + + // Global variables which can't collapse to null are ok. + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) + return !GV->hasExternalWeakLinkage(); + + // byval arguments are okay. + if (const Argument *A = dyn_cast<Argument>(V)) + if (A->hasByValAttr()) + return true; + + if (isDereferenceableFromAttribute(V, DL, CtxI, DT, TLI)) + return true; + + // For GEPs, determine if the indexing lands within the allocated object. + if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { + // Conservatively require that the base pointer be fully dereferenceable. + if (!Visited.insert(GEP->getOperand(0)).second) + return false; + if (!isDereferenceablePointer(GEP->getOperand(0), DL, CtxI, + DT, TLI, Visited)) + return false; + // Check the indices. + gep_type_iterator GTI = gep_type_begin(GEP); + for (User::const_op_iterator I = GEP->op_begin()+1, + E = GEP->op_end(); I != E; ++I) { + Value *Index = *I; + Type *Ty = *GTI++; + // Struct indices can't be out of bounds. + if (isa<StructType>(Ty)) + continue; + ConstantInt *CI = dyn_cast<ConstantInt>(Index); + if (!CI) + return false; + // Zero is always ok. + if (CI->isZero()) + continue; + // Check to see that it's within the bounds of an array. + ArrayType *ATy = dyn_cast<ArrayType>(Ty); + if (!ATy) + return false; + if (CI->getValue().getActiveBits() > 64) + return false; + if (CI->getZExtValue() >= ATy->getNumElements()) + return false; + } + // Indices check out; this is dereferenceable. + return true; + } + + // For gc.relocate, look through relocations + if (const IntrinsicInst *I = dyn_cast<IntrinsicInst>(V)) + if (I->getIntrinsicID() == Intrinsic::experimental_gc_relocate) { + GCRelocateOperands RelocateInst(I); + return isDereferenceablePointer(RelocateInst.getDerivedPtr(), DL, CtxI, + DT, TLI, Visited); + } + + if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(V)) + return isDereferenceablePointer(ASC->getOperand(0), DL, CtxI, + DT, TLI, Visited); + + // If we don't know, assume the worst. + return false; +} + +bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL, + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { + // When dereferenceability information is provided by a dereferenceable + // attribute, we know exactly how many bytes are dereferenceable. If we can + // determine the exact offset to the attributed variable, we can use that + // information here. + Type *VTy = V->getType(); + Type *Ty = VTy->getPointerElementType(); + if (Ty->isSized()) { + APInt Offset(DL.getTypeStoreSizeInBits(VTy), 0); + const Value *BV = V->stripAndAccumulateInBoundsConstantOffsets(DL, Offset); + + if (Offset.isNonNegative()) + if (isDereferenceableFromAttribute(BV, Offset, Ty, DL, + CtxI, DT, TLI)) + return true; + } + + SmallPtrSet<const Value *, 32> Visited; + return ::isDereferenceablePointer(V, DL, CtxI, DT, TLI, Visited); +} + bool llvm::isSafeToSpeculativelyExecute(const Value *V, - const DataLayout *TD) { + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { const Operator *Inst = dyn_cast<Operator>(V); if (!Inst) return false; @@ -2567,20 +3068,20 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, case Instruction::SDiv: case Instruction::SRem: { // x / y is undefined if y == 0 or x == INT_MIN and y == -1 - const APInt *X, *Y; - if (match(Inst->getOperand(1), m_APInt(Y))) { - if (*Y != 0) { - if (*Y == -1) { - // The numerator can't be MinSignedValue if the denominator is -1. - if (match(Inst->getOperand(0), m_APInt(X))) - return !Y->isMinSignedValue(); - // The numerator *might* be MinSignedValue. - return false; - } - // The denominator is not 0 or -1, it's safe to proceed. - return true; - } - } + const APInt *Numerator, *Denominator; + if (!match(Inst->getOperand(1), m_APInt(Denominator))) + return false; + // We cannot hoist this division if the denominator is 0. + if (*Denominator == 0) + return false; + // It's safe to hoist if the denominator is not 0 or -1. + if (*Denominator != -1) + return true; + // At this point we know that the denominator is -1. It is safe to hoist as + // long we know that the numerator is not INT_MIN. + if (match(Inst->getOperand(0), m_APInt(Numerator))) + return !Numerator->isMinSignedValue(); + // The numerator *might* be MinSignedValue. return false; } case Instruction::Load: { @@ -2589,7 +3090,8 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, // Speculative load may create a race that did not exist in the source. LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread)) return false; - return LI->getPointerOperand()->isDereferenceablePointer(TD); + const DataLayout &DL = LI->getModule()->getDataLayout(); + return isDereferenceablePointer(LI->getPointerOperand(), DL, CtxI, DT, TLI); } case Instruction::Call: { if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { @@ -2669,7 +3171,7 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { if (const LoadInst *LI = dyn_cast<LoadInst>(V)) return LI->getMetadata(LLVMContext::MD_nonnull); - if (ImmutableCallSite CS = V) + if (auto CS = ImmutableCallSite(V)) if (CS.isReturnNonNull()) return true; @@ -2680,8 +3182,62 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { return false; } +static bool isKnownNonNullFromDominatingCondition(const Value *V, + const Instruction *CtxI, + const DominatorTree *DT) { + unsigned NumUsesExplored = 0; + for (auto U : V->users()) { + // Avoid massive lists + if (NumUsesExplored >= DomConditionsMaxUses) + break; + NumUsesExplored++; + // Consider only compare instructions uniquely controlling a branch + const ICmpInst *Cmp = dyn_cast<ICmpInst>(U); + if (!Cmp) + continue; + + if (DomConditionsSingleCmpUse && !Cmp->hasOneUse()) + continue; + + for (auto *CmpU : Cmp->users()) { + const BranchInst *BI = dyn_cast<BranchInst>(CmpU); + if (!BI) + continue; + + assert(BI->isConditional() && "uses a comparison!"); + + BasicBlock *NonNullSuccessor = nullptr; + CmpInst::Predicate Pred; + + if (match(const_cast<ICmpInst*>(Cmp), + m_c_ICmp(Pred, m_Specific(V), m_Zero()))) { + if (Pred == ICmpInst::ICMP_EQ) + NonNullSuccessor = BI->getSuccessor(1); + else if (Pred == ICmpInst::ICMP_NE) + NonNullSuccessor = BI->getSuccessor(0); + } + + if (NonNullSuccessor) { + BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor); + if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent())) + return true; + } + } + } + + return false; +} + +bool llvm::isKnownNonNullAt(const Value *V, const Instruction *CtxI, + const DominatorTree *DT, const TargetLibraryInfo *TLI) { + if (isKnownNonNull(V, TLI)) + return true; + + return CtxI ? ::isKnownNonNullFromDominatingCondition(V, CtxI, DT) : false; +} + OverflowResult llvm::computeOverflowForUnsignedMul(Value *LHS, Value *RHS, - const DataLayout *DL, + const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { @@ -2731,7 +3287,7 @@ OverflowResult llvm::computeOverflowForUnsignedMul(Value *LHS, Value *RHS, } OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS, - const DataLayout *DL, + const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { @@ -2758,3 +3314,133 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS, return OverflowResult::MayOverflow; } + +static SelectPatternFlavor matchSelectPattern(ICmpInst::Predicate Pred, + Value *CmpLHS, Value *CmpRHS, + Value *TrueVal, Value *FalseVal, + Value *&LHS, Value *&RHS) { + LHS = CmpLHS; + RHS = CmpRHS; + + // (icmp X, Y) ? X : Y + if (TrueVal == CmpLHS && FalseVal == CmpRHS) { + switch (Pred) { + default: return SPF_UNKNOWN; // Equality. + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: return SPF_UMAX; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: return SPF_SMAX; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: return SPF_UMIN; + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: return SPF_SMIN; + } + } + + // (icmp X, Y) ? Y : X + if (TrueVal == CmpRHS && FalseVal == CmpLHS) { + switch (Pred) { + default: return SPF_UNKNOWN; // Equality. + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: return SPF_UMIN; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: return SPF_SMIN; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: return SPF_UMAX; + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: return SPF_SMAX; + } + } + + if (ConstantInt *C1 = dyn_cast<ConstantInt>(CmpRHS)) { + if ((CmpLHS == TrueVal && match(FalseVal, m_Neg(m_Specific(CmpLHS)))) || + (CmpLHS == FalseVal && match(TrueVal, m_Neg(m_Specific(CmpLHS))))) { + + // ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X + // NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X + if (Pred == ICmpInst::ICMP_SGT && (C1->isZero() || C1->isMinusOne())) { + return (CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS; + } + + // ABS(X) ==> (X <s 0) ? -X : X and (X <s 1) ? -X : X + // NABS(X) ==> (X <s 0) ? X : -X and (X <s 1) ? X : -X + if (Pred == ICmpInst::ICMP_SLT && (C1->isZero() || C1->isOne())) { + return (CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS; + } + } + + // Y >s C ? ~Y : ~C == ~Y <s ~C ? ~Y : ~C = SMIN(~Y, ~C) + if (const auto *C2 = dyn_cast<ConstantInt>(FalseVal)) { + if (C1->getType() == C2->getType() && ~C1->getValue() == C2->getValue() && + (match(TrueVal, m_Not(m_Specific(CmpLHS))) || + match(CmpLHS, m_Not(m_Specific(TrueVal))))) { + LHS = TrueVal; + RHS = FalseVal; + return SPF_SMIN; + } + } + } + + // TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5) + + return SPF_UNKNOWN; +} + +static Constant *lookThroughCast(ICmpInst *CmpI, Value *V1, Value *V2, + Instruction::CastOps *CastOp) { + CastInst *CI = dyn_cast<CastInst>(V1); + Constant *C = dyn_cast<Constant>(V2); + if (!CI || !C) + return nullptr; + *CastOp = CI->getOpcode(); + + if (isa<SExtInst>(CI) && CmpI->isSigned()) { + Constant *T = ConstantExpr::getTrunc(C, CI->getSrcTy()); + // This is only valid if the truncated value can be sign-extended + // back to the original value. + if (ConstantExpr::getSExt(T, C->getType()) == C) + return T; + return nullptr; + } + if (isa<ZExtInst>(CI) && CmpI->isUnsigned()) + return ConstantExpr::getTrunc(C, CI->getSrcTy()); + + if (isa<TruncInst>(CI)) + return ConstantExpr::getIntegerCast(C, CI->getSrcTy(), CmpI->isSigned()); + + return nullptr; +} + +SelectPatternFlavor llvm::matchSelectPattern(Value *V, + Value *&LHS, Value *&RHS, + Instruction::CastOps *CastOp) { + SelectInst *SI = dyn_cast<SelectInst>(V); + if (!SI) return SPF_UNKNOWN; + + ICmpInst *CmpI = dyn_cast<ICmpInst>(SI->getCondition()); + if (!CmpI) return SPF_UNKNOWN; + + ICmpInst::Predicate Pred = CmpI->getPredicate(); + Value *CmpLHS = CmpI->getOperand(0); + Value *CmpRHS = CmpI->getOperand(1); + Value *TrueVal = SI->getTrueValue(); + Value *FalseVal = SI->getFalseValue(); + + // Bail out early. + if (CmpI->isEquality()) + return SPF_UNKNOWN; + + // Deal with type mismatches. + if (CastOp && CmpLHS->getType() != TrueVal->getType()) { + if (Constant *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp)) + return ::matchSelectPattern(Pred, CmpLHS, CmpRHS, + cast<CastInst>(TrueVal)->getOperand(0), C, + LHS, RHS); + if (Constant *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) + return ::matchSelectPattern(Pred, CmpLHS, CmpRHS, + C, cast<CastInst>(FalseVal)->getOperand(0), + LHS, RHS); + } + return ::matchSelectPattern(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, + LHS, RHS); +} |