diff options
author | dim <dim@FreeBSD.org> | 2011-06-12 15:42:51 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2011-06-12 15:42:51 +0000 |
commit | ece02cd5829cea836e9365b0845a8ef042d17b0a (patch) | |
tree | b3032e51d630e8070e9e08d6641648f195316a80 /lib/Analysis | |
parent | 2b066988909948dc3d53d01760bc2d71d32f3feb (diff) | |
download | FreeBSD-src-ece02cd5829cea836e9365b0845a8ef042d17b0a.zip FreeBSD-src-ece02cd5829cea836e9365b0845a8ef042d17b0a.tar.gz |
Vendor import of llvm trunk r132879:
http://llvm.org/svn/llvm-project/llvm/trunk@132879
Diffstat (limited to 'lib/Analysis')
-rw-r--r-- | lib/Analysis/Analysis.cpp | 1 | ||||
-rw-r--r-- | lib/Analysis/BasicAliasAnalysis.cpp | 143 | ||||
-rw-r--r-- | lib/Analysis/BranchProbabilityInfo.cpp | 357 | ||||
-rw-r--r-- | lib/Analysis/CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/Analysis/ConstantFolding.cpp | 8 | ||||
-rw-r--r-- | lib/Analysis/DIBuilder.cpp | 8 | ||||
-rw-r--r-- | lib/Analysis/IPA/CallGraph.cpp | 2 | ||||
-rw-r--r-- | lib/Analysis/IPA/CallGraphSCCPass.cpp | 4 | ||||
-rw-r--r-- | lib/Analysis/IPA/FindUsedTypes.cpp | 4 | ||||
-rw-r--r-- | lib/Analysis/IVUsers.cpp | 33 | ||||
-rw-r--r-- | lib/Analysis/InlineCost.cpp | 19 | ||||
-rw-r--r-- | lib/Analysis/InstructionSimplify.cpp | 250 | ||||
-rw-r--r-- | lib/Analysis/LazyValueInfo.cpp | 10 | ||||
-rw-r--r-- | lib/Analysis/Loads.cpp | 4 | ||||
-rw-r--r-- | lib/Analysis/MemoryDependenceAnalysis.cpp | 90 | ||||
-rw-r--r-- | lib/Analysis/RegionPass.cpp | 2 | ||||
-rw-r--r-- | lib/Analysis/ScalarEvolution.cpp | 137 | ||||
-rw-r--r-- | lib/Analysis/ValueTracking.cpp | 16 |
18 files changed, 920 insertions, 169 deletions
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index 6ebe100..e57ba78 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -23,6 +23,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeAliasSetPrinterPass(Registry); initializeNoAAPass(Registry); initializeBasicAliasAnalysisPass(Registry); + initializeBranchProbabilityInfoPass(Registry); initializeCFGViewerPass(Registry); initializeCFGPrinterPass(Registry); initializeCFGOnlyViewerPass(Registry); diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index f1bb8a3..8330ea7 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -281,17 +281,20 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, continue; } - if (const Instruction *I = dyn_cast<Instruction>(V)) - // TODO: Get a DominatorTree and use it here. - if (const Value *Simplified = - SimplifyInstruction(const_cast<Instruction *>(I), TD)) { - V = Simplified; - continue; - } - const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op); - if (GEPOp == 0) + if (GEPOp == 0) { + // If it's not a GEP, hand it off to SimplifyInstruction to see if it + // can come up with something. This matches what GetUnderlyingObject does. + if (const Instruction *I = dyn_cast<Instruction>(V)) + // TODO: Get a DominatorTree and use it here. + if (const Value *Simplified = + SimplifyInstruction(const_cast<Instruction *>(I), TD)) { + V = Simplified; + continue; + } + return V; + } // Don't attempt to analyze GEPs over unsized objects. if (!cast<PointerType>(GEPOp->getOperand(0)->getType()) @@ -448,7 +451,13 @@ namespace { /// BasicAliasAnalysis - This is the primary alias analysis implementation. struct BasicAliasAnalysis : public ImmutablePass, public AliasAnalysis { static char ID; // Class identification, replacement for typeinfo - BasicAliasAnalysis() : ImmutablePass(ID) { + BasicAliasAnalysis() : ImmutablePass(ID), + // AliasCache rarely has more than 1 or 2 elements, + // so start it off fairly small so that clear() + // doesn't have to tromp through 64 (the default) + // elements on each alias query. This really wants + // something like a SmallDenseMap. + AliasCache(8) { initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry()); } @@ -462,12 +471,12 @@ namespace { virtual AliasResult alias(const Location &LocA, const Location &LocB) { - assert(Visited.empty() && "Visited must be cleared after use!"); + assert(AliasCache.empty() && "AliasCache must be cleared after use!"); assert(notDifferentParent(LocA.Ptr, LocB.Ptr) && "BasicAliasAnalysis doesn't support interprocedural queries."); AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.TBAATag, LocB.Ptr, LocB.Size, LocB.TBAATag); - Visited.clear(); + AliasCache.clear(); return Alias; } @@ -503,7 +512,12 @@ namespace { } private: - // Visited - Track instructions visited by a aliasPHI, aliasSelect(), and aliasGEP(). + // AliasCache - Track alias queries to guard against recursion. + typedef std::pair<Location, Location> LocPair; + typedef DenseMap<LocPair, AliasResult> AliasCacheTy; + AliasCacheTy AliasCache; + + // Visited - Track instructions visited by pointsToConstantMemory. SmallPtrSet<const Value*, 16> Visited; // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP @@ -680,9 +694,12 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, unsigned ArgNo = 0; for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); CI != CE; ++CI, ++ArgNo) { - // Only look at the no-capture pointer arguments. + // Only look at the no-capture or byval pointer arguments. If this + // pointer were passed to arguments that were neither of these, then it + // couldn't be no-capture. if (!(*CI)->getType()->isPointerTy() || - !CS.paramHasAttr(ArgNo+1, Attribute::NoCapture)) + (!CS.paramHasAttr(ArgNo+1, Attribute::NoCapture) && + !CS.paramHasAttr(ArgNo+1, Attribute::ByVal))) continue; // If this is a no-capture pointer argument, see if we can tell that it @@ -816,13 +833,6 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, const MDNode *V2TBAAInfo, const Value *UnderlyingV1, const Value *UnderlyingV2) { - // If this GEP has been visited before, we're on a use-def cycle. - // Such cycles are only valid when PHI nodes are involved or in unreachable - // code. The visitPHI function catches cycles containing PHIs, but there - // could still be a cycle without PHIs in unreachable code. - if (!Visited.insert(GEP1)) - return MayAlias; - int64_t GEP1BaseOffset; SmallVector<VariableGEPIndex, 4> GEP1VariableIndices; @@ -940,7 +950,30 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, return NoAlias; } - return MayAlias; + // Statically, we can see that the base objects are the same, but the + // pointers have dynamic offsets which we can't resolve. And none of our + // little tricks above worked. + // + // TODO: Returning PartialAlias instead of MayAlias is a mild hack; the + // practical effect of this is protecting TBAA in the case of dynamic + // indices into arrays of unions. An alternative way to solve this would + // be to have clang emit extra metadata for unions and/or union accesses. + // A union-specific solution wouldn't handle the problem for malloc'd + // memory however. + return PartialAlias; +} + +static AliasAnalysis::AliasResult +MergeAliasResults(AliasAnalysis::AliasResult A, AliasAnalysis::AliasResult B) { + // If the results agree, take it. + if (A == B) + return A; + // A mix of PartialAlias and MustAlias is PartialAlias. + if ((A == AliasAnalysis::PartialAlias && B == AliasAnalysis::MustAlias) || + (B == AliasAnalysis::PartialAlias && A == AliasAnalysis::MustAlias)) + return AliasAnalysis::PartialAlias; + // Otherwise, we don't know anything. + return AliasAnalysis::MayAlias; } /// aliasSelect - Provide a bunch of ad-hoc rules to disambiguate a Select @@ -950,13 +983,6 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize, const MDNode *SITBAAInfo, const Value *V2, uint64_t V2Size, const MDNode *V2TBAAInfo) { - // If this select has been visited before, we're on a use-def cycle. - // Such cycles are only valid when PHI nodes are involved or in unreachable - // code. The visitPHI function catches cycles containing PHIs, but there - // could still be a cycle without PHIs in unreachable code. - if (!Visited.insert(SI)) - return MayAlias; - // If the values are Selects with the same condition, we can do a more precise // check: just check for aliases between the values on corresponding arms. if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2)) @@ -969,9 +995,7 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize, AliasResult ThisAlias = aliasCheck(SI->getFalseValue(), SISize, SITBAAInfo, SI2->getFalseValue(), V2Size, V2TBAAInfo); - if (ThisAlias != Alias) - return MayAlias; - return Alias; + return MergeAliasResults(ThisAlias, Alias); } // If both arms of the Select node NoAlias or MustAlias V2, then returns @@ -981,16 +1005,9 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize, if (Alias == MayAlias) return MayAlias; - // If V2 is visited, the recursive case will have been caught in the - // above aliasCheck call, so these subsequent calls to aliasCheck - // don't need to assume that V2 is being visited recursively. - Visited.erase(V2); - AliasResult ThisAlias = aliasCheck(V2, V2Size, V2TBAAInfo, SI->getFalseValue(), SISize, SITBAAInfo); - if (ThisAlias != Alias) - return MayAlias; - return Alias; + return MergeAliasResults(ThisAlias, Alias); } // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction @@ -1000,10 +1017,6 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, const MDNode *PNTBAAInfo, const Value *V2, uint64_t V2Size, const MDNode *V2TBAAInfo) { - // The PHI node has already been visited, avoid recursion any further. - if (!Visited.insert(PN)) - return MayAlias; - // If the values are PHIs in the same block, we can do a more precise // as well as efficient check: just check for aliases between the values // on corresponding edges. @@ -1020,8 +1033,9 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, aliasCheck(PN->getIncomingValue(i), PNSize, PNTBAAInfo, PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)), V2Size, V2TBAAInfo); - if (ThisAlias != Alias) - return MayAlias; + Alias = MergeAliasResults(ThisAlias, Alias); + if (Alias == MayAlias) + break; } return Alias; } @@ -1052,15 +1066,11 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) { Value *V = V1Srcs[i]; - // If V2 is visited, the recursive case will have been caught in the - // above aliasCheck call, so these subsequent calls to aliasCheck - // don't need to assume that V2 is being visited recursively. - Visited.erase(V2); - AliasResult ThisAlias = aliasCheck(V2, V2Size, V2TBAAInfo, V, PNSize, PNTBAAInfo); - if (ThisAlias != Alias || ThisAlias == MayAlias) - return MayAlias; + Alias = MergeAliasResults(ThisAlias, Alias); + if (Alias == MayAlias) + break; } return Alias; @@ -1145,6 +1155,17 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, (V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD))) return NoAlias; + // Check the cache before climbing up use-def chains. This also terminates + // otherwise infinitely recursive queries. + LocPair Locs(Location(V1, V1Size, V1TBAAInfo), + Location(V2, V2Size, V2TBAAInfo)); + if (V1 > V2) + std::swap(Locs.first, Locs.second); + std::pair<AliasCacheTy::iterator, bool> Pair = + AliasCache.insert(std::make_pair(Locs, MayAlias)); + if (!Pair.second) + return Pair.first->second; + // FIXME: This isn't aggressively handling alias(GEP, PHI) for example: if the // GEP can't simplify, we don't even look at the PHI cases. if (!isa<GEPOperator>(V1) && isa<GEPOperator>(V2)) { @@ -1154,7 +1175,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, } if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) { AliasResult Result = aliasGEP(GV1, V1Size, V2, V2Size, V2TBAAInfo, O1, O2); - if (Result != MayAlias) return Result; + if (Result != MayAlias) return AliasCache[Locs] = Result; } if (isa<PHINode>(V2) && !isa<PHINode>(V1)) { @@ -1164,7 +1185,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, if (const PHINode *PN = dyn_cast<PHINode>(V1)) { AliasResult Result = aliasPHI(PN, V1Size, V1TBAAInfo, V2, V2Size, V2TBAAInfo); - if (Result != MayAlias) return Result; + if (Result != MayAlias) return AliasCache[Locs] = Result; } if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) { @@ -1174,7 +1195,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) { AliasResult Result = aliasSelect(S1, V1Size, V1TBAAInfo, V2, V2Size, V2TBAAInfo); - if (Result != MayAlias) return Result; + if (Result != MayAlias) return AliasCache[Locs] = Result; } // If both pointers are pointing into the same object and one of them @@ -1183,8 +1204,10 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, if (TD && O1 == O2) if ((V1Size != UnknownSize && isObjectSize(O1, V1Size, *TD)) || (V2Size != UnknownSize && isObjectSize(O2, V2Size, *TD))) - return PartialAlias; + return AliasCache[Locs] = PartialAlias; - return AliasAnalysis::alias(Location(V1, V1Size, V1TBAAInfo), - Location(V2, V2Size, V2TBAAInfo)); + AliasResult Result = + AliasAnalysis::alias(Location(V1, V1Size, V1TBAAInfo), + Location(V2, V2Size, V2TBAAInfo)); + return AliasCache[Locs] = Result; } diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp new file mode 100644 index 0000000..812fac0 --- /dev/null +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -0,0 +1,357 @@ +//===-- BranchProbabilityInfo.cpp - Branch Probability Analysis -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Loops should be simplified before this analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Instructions.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +INITIALIZE_PASS_BEGIN(BranchProbabilityInfo, "branch-prob", + "Branch Probability Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_END(BranchProbabilityInfo, "branch-prob", + "Branch Probability Analysis", false, true) + +char BranchProbabilityInfo::ID = 0; + + +// Please note that BranchProbabilityAnalysis is not a FunctionPass. +// It is created by BranchProbabilityInfo (which is a FunctionPass), which +// provides a clear interface. Thanks to that, all heuristics and other +// private methods are hidden in the .cpp file. +class BranchProbabilityAnalysis { + + typedef std::pair<BasicBlock *, BasicBlock *> Edge; + + DenseMap<Edge, uint32_t> *Weights; + + BranchProbabilityInfo *BP; + + LoopInfo *LI; + + + // Weights are for internal use only. They are used by heuristics to help to + // estimate edges' probability. Example: + // + // Using "Loop Branch Heuristics" we predict weights of edges for the + // block BB2. + // ... + // | + // V + // BB1<-+ + // | | + // | | (Weight = 128) + // V | + // BB2--+ + // | + // | (Weight = 4) + // V + // BB3 + // + // Probability of the edge BB2->BB1 = 128 / (128 + 4) = 0.9696.. + // Probability of the edge BB2->BB3 = 4 / (128 + 4) = 0.0303.. + + static const uint32_t LBH_TAKEN_WEIGHT = 128; + static const uint32_t LBH_NONTAKEN_WEIGHT = 4; + + // Standard weight value. Used when none of the heuristics set weight for + // the edge. + static const uint32_t NORMAL_WEIGHT = 16; + + // Minimum weight of an edge. Please note, that weight is NEVER 0. + static const uint32_t MIN_WEIGHT = 1; + + // Return TRUE if BB leads directly to a Return Instruction. + static bool isReturningBlock(BasicBlock *BB) { + SmallPtrSet<BasicBlock *, 8> Visited; + + while (true) { + TerminatorInst *TI = BB->getTerminator(); + if (isa<ReturnInst>(TI)) + return true; + + if (TI->getNumSuccessors() > 1) + break; + + // It is unreachable block which we can consider as a return instruction. + if (TI->getNumSuccessors() == 0) + return true; + + Visited.insert(BB); + BB = TI->getSuccessor(0); + + // Stop if cycle is detected. + if (Visited.count(BB)) + return false; + } + + return false; + } + + // Multiply Edge Weight by two. + void incEdgeWeight(BasicBlock *Src, BasicBlock *Dst) { + uint32_t Weight = BP->getEdgeWeight(Src, Dst); + uint32_t MaxWeight = getMaxWeightFor(Src); + + if (Weight * 2 > MaxWeight) + BP->setEdgeWeight(Src, Dst, MaxWeight); + else + BP->setEdgeWeight(Src, Dst, Weight * 2); + } + + // Divide Edge Weight by two. + void decEdgeWeight(BasicBlock *Src, BasicBlock *Dst) { + uint32_t Weight = BP->getEdgeWeight(Src, Dst); + + assert(Weight > 0); + if (Weight / 2 < MIN_WEIGHT) + BP->setEdgeWeight(Src, Dst, MIN_WEIGHT); + else + BP->setEdgeWeight(Src, Dst, Weight / 2); + } + + + uint32_t getMaxWeightFor(BasicBlock *BB) const { + return UINT32_MAX / BB->getTerminator()->getNumSuccessors(); + } + +public: + BranchProbabilityAnalysis(DenseMap<Edge, uint32_t> *W, + BranchProbabilityInfo *BP, LoopInfo *LI) + : Weights(W), BP(BP), LI(LI) { + } + + // Return Heuristics + void calcReturnHeuristics(BasicBlock *BB); + + // Pointer Heuristics + void calcPointerHeuristics(BasicBlock *BB); + + // Loop Branch Heuristics + void calcLoopBranchHeuristics(BasicBlock *BB); + + bool runOnFunction(Function &F); +}; + +// Calculate Edge Weights using "Return Heuristics". Predict a successor which +// leads directly to Return Instruction will not be taken. +void BranchProbabilityAnalysis::calcReturnHeuristics(BasicBlock *BB){ + if (BB->getTerminator()->getNumSuccessors() == 1) + return; + + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + BasicBlock *Succ = *I; + if (isReturningBlock(Succ)) { + decEdgeWeight(BB, Succ); + } + } +} + +// Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion +// between two pointer or pointer and NULL will fail. +void BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) { + BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator()); + if (!BI || !BI->isConditional()) + return; + + Value *Cond = BI->getCondition(); + ICmpInst *CI = dyn_cast<ICmpInst>(Cond); + if (!CI) + return; + + Value *LHS = CI->getOperand(0); + + if (!LHS->getType()->isPointerTy()) + return; + + assert(CI->getOperand(1)->getType()->isPointerTy()); + + BasicBlock *Taken = BI->getSuccessor(0); + BasicBlock *NonTaken = BI->getSuccessor(1); + + // p != 0 -> isProb = true + // p == 0 -> isProb = false + // p != q -> isProb = true + // p == q -> isProb = false; + bool isProb = !CI->isEquality(); + if (!isProb) + std::swap(Taken, NonTaken); + + incEdgeWeight(BB, Taken); + decEdgeWeight(BB, NonTaken); +} + +// Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges +// as taken, exiting edges as not-taken. +void BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) { + uint32_t numSuccs = BB->getTerminator()->getNumSuccessors(); + + Loop *L = LI->getLoopFor(BB); + if (!L) + return; + + SmallVector<BasicBlock *, 8> BackEdges; + SmallVector<BasicBlock *, 8> ExitingEdges; + + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + BasicBlock *Succ = *I; + Loop *SuccL = LI->getLoopFor(Succ); + if (SuccL != L) + ExitingEdges.push_back(Succ); + else if (Succ == L->getHeader()) + BackEdges.push_back(Succ); + } + + if (uint32_t numBackEdges = BackEdges.size()) { + uint32_t backWeight = LBH_TAKEN_WEIGHT / numBackEdges; + if (backWeight < NORMAL_WEIGHT) + backWeight = NORMAL_WEIGHT; + + for (SmallVector<BasicBlock *, 8>::iterator EI = BackEdges.begin(), + EE = BackEdges.end(); EI != EE; ++EI) { + BasicBlock *Back = *EI; + BP->setEdgeWeight(BB, Back, backWeight); + } + } + + uint32_t numExitingEdges = ExitingEdges.size(); + if (uint32_t numNonExitingEdges = numSuccs - numExitingEdges) { + uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numNonExitingEdges; + if (exitWeight < MIN_WEIGHT) + exitWeight = MIN_WEIGHT; + + for (SmallVector<BasicBlock *, 8>::iterator EI = ExitingEdges.begin(), + EE = ExitingEdges.end(); EI != EE; ++EI) { + BasicBlock *Exiting = *EI; + BP->setEdgeWeight(BB, Exiting, exitWeight); + } + } +} + +bool BranchProbabilityAnalysis::runOnFunction(Function &F) { + + for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { + BasicBlock *BB = I++; + + // Only LBH uses setEdgeWeight method. + calcLoopBranchHeuristics(BB); + + // PH and RH use only incEdgeWeight and decEwdgeWeight methods to + // not efface LBH results. + calcPointerHeuristics(BB); + calcReturnHeuristics(BB); + } + + return false; +} + + +bool BranchProbabilityInfo::runOnFunction(Function &F) { + LoopInfo &LI = getAnalysis<LoopInfo>(); + BranchProbabilityAnalysis BPA(&Weights, this, &LI); + return BPA.runOnFunction(F); +} + +uint32_t BranchProbabilityInfo::getSumForBlock(BasicBlock *BB) const { + uint32_t Sum = 0; + + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + BasicBlock *Succ = *I; + uint32_t Weight = getEdgeWeight(BB, Succ); + uint32_t PrevSum = Sum; + + Sum += Weight; + assert(Sum > PrevSum); (void) PrevSum; + } + + return Sum; +} + +bool BranchProbabilityInfo::isEdgeHot(BasicBlock *Src, BasicBlock *Dst) const { + // Hot probability is at least 4/5 = 80% + uint32_t Weight = getEdgeWeight(Src, Dst); + uint32_t Sum = getSumForBlock(Src); + + // FIXME: Implement BranchProbability::compare then change this code to + // compare this BranchProbability against a static "hot" BranchProbability. + return (uint64_t)Weight * 5 > (uint64_t)Sum * 4; +} + +BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const { + uint32_t Sum = 0; + uint32_t MaxWeight = 0; + BasicBlock *MaxSucc = 0; + + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + BasicBlock *Succ = *I; + uint32_t Weight = getEdgeWeight(BB, Succ); + uint32_t PrevSum = Sum; + + Sum += Weight; + assert(Sum > PrevSum); (void) PrevSum; + + if (Weight > MaxWeight) { + MaxWeight = Weight; + MaxSucc = Succ; + } + } + + // FIXME: Use BranchProbability::compare. + if ((uint64_t)MaxWeight * 5 > (uint64_t)Sum * 4) + return MaxSucc; + + return 0; +} + +// Return edge's weight. If can't find it, return DEFAULT_WEIGHT value. +uint32_t +BranchProbabilityInfo::getEdgeWeight(BasicBlock *Src, BasicBlock *Dst) const { + Edge E(Src, Dst); + DenseMap<Edge, uint32_t>::const_iterator I = Weights.find(E); + + if (I != Weights.end()) + return I->second; + + return DEFAULT_WEIGHT; +} + +void BranchProbabilityInfo::setEdgeWeight(BasicBlock *Src, BasicBlock *Dst, + uint32_t Weight) { + Weights[std::make_pair(Src, Dst)] = Weight; + DEBUG(dbgs() << "set edge " << Src->getNameStr() << " -> " + << Dst->getNameStr() << " weight to " << Weight + << (isEdgeHot(Src, Dst) ? " [is HOT now]\n" : "\n")); +} + + +BranchProbability BranchProbabilityInfo:: +getEdgeProbability(BasicBlock *Src, BasicBlock *Dst) const { + + uint32_t N = getEdgeWeight(Src, Dst); + uint32_t D = getSumForBlock(Src); + + return BranchProbability(N, D); +} + +raw_ostream & +BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, BasicBlock *Src, + BasicBlock *Dst) const { + BranchProbability Prob = getEdgeProbability(Src, Dst); + + OS << "edge " << Src->getNameStr() << " -> " << Dst->getNameStr() + << " probability is " << Prob + << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n"); + + return OS; +} diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 6be5617..1a975bf 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -6,6 +6,7 @@ add_llvm_library(LLVMAnalysis AliasSetTracker.cpp Analysis.cpp BasicAliasAnalysis.cpp + BranchProbabilityInfo.cpp CFGPrinter.cpp CaptureTracking.cpp ConstantFolding.cpp diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 5de2b04..08a6065 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -1085,7 +1085,7 @@ llvm::canConstantFoldCallTo(const Function *F) { case 'c': return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh"; case 'e': - return Name == "exp"; + return Name == "exp" || Name == "exp2"; case 'f': return Name == "fabs" || Name == "fmod" || Name == "floor"; case 'l': @@ -1221,6 +1221,12 @@ llvm::ConstantFoldCall(Function *F, case 'e': if (Name == "exp") return ConstantFoldFP(exp, V, Ty); + + if (Name == "exp2") { + // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a + // C99 library. + return ConstantFoldBinaryFP(pow, 2.0, V, Ty); + } break; case 'f': if (Name == "fabs") diff --git a/lib/Analysis/DIBuilder.cpp b/lib/Analysis/DIBuilder.cpp index dc98c9e..ef5d03a 100644 --- a/lib/Analysis/DIBuilder.cpp +++ b/lib/Analysis/DIBuilder.cpp @@ -51,6 +51,10 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer) }; TheCU = DICompileUnit(MDNode::get(VMContext, Elts)); + + // Create a named metadata so that it is easier to find cu in a module. + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.cu"); + NMD->addOperand(TheCU); } /// createFile - Create a file descriptor to hold debugging information @@ -156,12 +160,12 @@ DIType DIBuilder::createReferenceType(DIType RTy) { /// createTypedef - Create debugging information entry for a typedef. DIType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File, - unsigned LineNo) { + unsigned LineNo, DIDescriptor Context) { // typedefs are encoded in DIDerivedType format. assert(Ty.Verify() && "Invalid typedef type!"); Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_typedef), - Ty.getContext(), + Context, MDString::get(VMContext, Name), File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp index 690c4b4..2e79eab 100644 --- a/lib/Analysis/IPA/CallGraph.cpp +++ b/lib/Analysis/IPA/CallGraph.cpp @@ -148,7 +148,7 @@ private: for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE; ++II) { CallSite CS(cast<Value>(II)); - if (CS && !isa<DbgInfoIntrinsic>(II)) { + if (CS && !isa<IntrinsicInst>(II)) { const Function *Callee = CS.getCalledFunction(); if (Callee) Node->addCalledFunction(CS, getOrInsertFunction(Callee)); diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp index 725ab72..659ffab 100644 --- a/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -245,8 +245,8 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - CallSite CS(cast<Value>(I)); - if (!CS || isa<DbgInfoIntrinsic>(I)) continue; + CallSite CS(cast<Value>(I)); + if (!CS || isa<IntrinsicInst>(I)) continue; // If this call site already existed in the callgraph, just verify it // matches up to expectations and remove it from CallSites. diff --git a/lib/Analysis/IPA/FindUsedTypes.cpp b/lib/Analysis/IPA/FindUsedTypes.cpp index 06ae34c..dde2556 100644 --- a/lib/Analysis/IPA/FindUsedTypes.cpp +++ b/lib/Analysis/IPA/FindUsedTypes.cpp @@ -32,7 +32,7 @@ INITIALIZE_PASS(FindUsedTypes, "print-used-types", void FindUsedTypes::IncorporateType(const Type *Ty) { // If ty doesn't already exist in the used types map, add it now, otherwise // return. - if (!UsedTypes.insert(Ty).second) return; // Already contain Ty. + if (!UsedTypes.insert(Ty)) return; // Already contain Ty. // Make sure to add any types this type references now. // @@ -94,7 +94,7 @@ bool FindUsedTypes::runOnModule(Module &m) { // void FindUsedTypes::print(raw_ostream &OS, const Module *M) const { OS << "Types in use by this module:\n"; - for (std::set<const Type *>::const_iterator I = UsedTypes.begin(), + for (SetVector<const Type *>::const_iterator I = UsedTypes.begin(), E = UsedTypes.end(); I != E; ++I) { OS << " "; WriteTypeSymbolic(OS, *I, M); diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index 2cda791..a0c42f0 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetData.h" #include "llvm/Assembly/Writer.h" #include "llvm/ADT/STLExtras.h" @@ -38,6 +39,15 @@ INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) INITIALIZE_PASS_END(IVUsers, "iv-users", "Induction Variable Users", false, true) +// IVUsers behavior currently depends on this temporary indvars mode. The +// option must be defined upstream from its uses. +namespace llvm { + bool DisableIVRewrite = false; +} +cl::opt<bool, true> DisableIVRewriteOpt( + "disable-iv-rewrite", cl::Hidden, cl::location(llvm::DisableIVRewrite), + cl::desc("Disable canonical induction variable rewriting")); + Pass *llvm::createIVUsersPass() { return new IVUsers(); } @@ -79,7 +89,7 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, /// AddUsersIfInteresting - Inspect the specified instruction. If it is a /// reducible SCEV, recursively add its users to the IVUsesByStride set and /// return true. Otherwise, return false. -bool IVUsers::AddUsersIfInteresting(Instruction *I) { +bool IVUsers::AddUsersIfInteresting(Instruction *I, PHINode *Phi) { if (!SE->isSCEVable(I->getType())) return false; // Void and FP expressions cannot be reduced. @@ -90,6 +100,11 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { if (Width > 64 || (TD && !TD->isLegalInteger(Width))) return false; + // We expect Sign/Zero extension to be eliminated from the IR before analyzing + // any downstream uses. + if (DisableIVRewrite && (isa<SExtInst>(I) || isa<ZExtInst>(I))) + return false; + if (!Processed.insert(I)) return true; // Instruction already handled. @@ -121,13 +136,13 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { bool AddUserToIVUsers = false; if (LI->getLoopFor(User->getParent()) != L) { if (isa<PHINode>(User) || Processed.count(User) || - !AddUsersIfInteresting(User)) { + !AddUsersIfInteresting(User, Phi)) { DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n' << " OF SCEV: " << *ISE << '\n'); AddUserToIVUsers = true; } } else if (Processed.count(User) || - !AddUsersIfInteresting(User)) { + !AddUsersIfInteresting(User, Phi)) { DEBUG(dbgs() << "FOUND USER: " << *User << '\n' << " OF SCEV: " << *ISE << '\n'); AddUserToIVUsers = true; @@ -135,9 +150,11 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { if (AddUserToIVUsers) { // Okay, we found a user that we cannot reduce. - IVUses.push_back(new IVStrideUse(this, User, I)); + IVUses.push_back(new IVStrideUse(this, User, I, Phi)); IVStrideUse &NewUse = IVUses.back(); - // Transform the expression into a normalized form. + // Autodetect the post-inc loop set, populating NewUse.PostIncLoops. + // The regular return value here is discarded; instead of recording + // it, we just recompute it when we need it. ISE = TransformForPostIncUse(NormalizeAutodetect, ISE, User, I, NewUse.PostIncLoops, @@ -148,8 +165,8 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { return true; } -IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) { - IVUses.push_back(new IVStrideUse(this, User, Operand)); +IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand, PHINode *Phi) { + IVUses.push_back(new IVStrideUse(this, User, Operand, Phi)); return IVUses.back(); } @@ -177,7 +194,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { // them by stride. Start by finding all of the PHI nodes in the header for // this loop. If they are induction variables, inspect their uses. for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) - (void)AddUsersIfInteresting(I); + (void)AddUsersIfInteresting(I, cast<PHINode>(I)); return false; } diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index a820ecf..efde598 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -66,21 +66,13 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { ImmutableCallSite CS(cast<Instruction>(II)); - // If this function contains a call to setjmp or _setjmp, never inline - // it. This is a hack because we depend on the user marking their local - // variables as volatile if they are live across a setjmp call, and they - // probably won't do this in callers. if (const Function *F = CS.getCalledFunction()) { // If a function is both internal and has a single use, then it is // extremely likely to get inlined in the future (it was probably // exposed by an interleaved devirtualization pass). if (F->hasInternalLinkage() && F->hasOneUse()) ++NumInlineCandidates; - - if (F->isDeclaration() && - (F->getName() == "setjmp" || F->getName() == "_setjmp")) - callsSetJmp = true; - + // If this call is to function itself, then the function is recursive. // Inlining it into other functions is a bad idea, because this is // basically just a form of loop peeling, and our metrics aren't useful @@ -226,6 +218,13 @@ unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) { /// analyzeFunction - Fill in the current structure with information gleaned /// from the specified function. void CodeMetrics::analyzeFunction(Function *F) { + // If this function contains a call to setjmp or _setjmp, never inline + // it. This is a hack because we depend on the user marking their local + // variables as volatile if they are live across a setjmp call, and they + // probably won't do this in callers. + if (F->callsFunctionThatReturnsTwice()) + callsSetJmp = true; + // Look at the size of the callee. for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) analyzeBasicBlock(&*BB); @@ -594,7 +593,7 @@ InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) { CodeMetrics &CallerMetrics = CachedFunctionInfo[Caller].Metrics; // For small functions we prefer to recalculate the cost for better accuracy. - if (CallerMetrics.NumBlocks < 10 || CallerMetrics.NumInsts < 1000) { + if (CallerMetrics.NumBlocks < 10 && CallerMetrics.NumInsts < 1000) { resetCachedCostInfo(Caller); return; } diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 9d6d339..9d78f8b 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -913,8 +913,6 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, } } - bool isSigned = Opcode == Instruction::SRem; - // X % undef -> undef if (match(Op1, m_Undef())) return Op1; @@ -1378,6 +1376,26 @@ static const Type *GetCompareTy(Value *Op) { return CmpInst::makeCmpResultType(Op->getType()); } +/// ExtractEquivalentCondition - Rummage around inside V looking for something +/// equivalent to the comparison "LHS Pred RHS". Return such a value if found, +/// otherwise return null. Helper function for analyzing max/min idioms. +static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, + Value *LHS, Value *RHS) { + SelectInst *SI = dyn_cast<SelectInst>(V); + if (!SI) + return 0; + CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition()); + if (!Cmp) + return 0; + Value *CmpLHS = Cmp->getOperand(0), *CmpRHS = Cmp->getOperand(1); + if (Pred == Cmp->getPredicate() && LHS == CmpLHS && RHS == CmpRHS) + return Cmp; + if (Pred == CmpInst::getSwappedPredicate(Cmp->getPredicate()) && + LHS == CmpRHS && RHS == CmpLHS) + return Cmp; + return 0; +} + /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can /// fold the result. If not, this returns null. static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, @@ -1460,46 +1478,48 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, default: assert(false && "Unknown ICmp predicate!"); case ICmpInst::ICMP_ULT: - return ConstantInt::getFalse(LHS->getContext()); + // getNullValue also works for vectors, unlike getFalse. + return Constant::getNullValue(ITy); case ICmpInst::ICMP_UGE: - return ConstantInt::getTrue(LHS->getContext()); + // getAllOnesValue also works for vectors, unlike getTrue. + return ConstantInt::getAllOnesValue(ITy); case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULE: if (isKnownNonZero(LHS, TD)) - return ConstantInt::getFalse(LHS->getContext()); + return Constant::getNullValue(ITy); break; case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGT: if (isKnownNonZero(LHS, TD)) - return ConstantInt::getTrue(LHS->getContext()); + return ConstantInt::getAllOnesValue(ITy); break; case ICmpInst::ICMP_SLT: ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); if (LHSKnownNegative) - return ConstantInt::getTrue(LHS->getContext()); + return ConstantInt::getAllOnesValue(ITy); if (LHSKnownNonNegative) - return ConstantInt::getFalse(LHS->getContext()); + return Constant::getNullValue(ITy); break; case ICmpInst::ICMP_SLE: ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); if (LHSKnownNegative) - return ConstantInt::getTrue(LHS->getContext()); + return ConstantInt::getAllOnesValue(ITy); if (LHSKnownNonNegative && isKnownNonZero(LHS, TD)) - return ConstantInt::getFalse(LHS->getContext()); + return Constant::getNullValue(ITy); break; case ICmpInst::ICMP_SGE: ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); if (LHSKnownNegative) - return ConstantInt::getFalse(LHS->getContext()); + return Constant::getNullValue(ITy); if (LHSKnownNonNegative) - return ConstantInt::getTrue(LHS->getContext()); + return ConstantInt::getAllOnesValue(ITy); break; case ICmpInst::ICMP_SGT: ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); if (LHSKnownNegative) - return ConstantInt::getFalse(LHS->getContext()); + return Constant::getNullValue(ITy); if (LHSKnownNonNegative && isKnownNonZero(LHS, TD)) - return ConstantInt::getTrue(LHS->getContext()); + return ConstantInt::getAllOnesValue(ITy); break; } } @@ -1791,7 +1811,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: - return ConstantInt::getFalse(RHS->getContext()); + // getNullValue also works for vectors, unlike getFalse. + return Constant::getNullValue(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD); @@ -1801,7 +1822,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, case ICmpInst::ICMP_NE: case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: - return ConstantInt::getTrue(RHS->getContext()); + // getAllOnesValue also works for vectors, unlike getTrue. + return Constant::getAllOnesValue(ITy); } } if (RBO && match(RBO, m_URem(m_Value(), m_Specific(LHS)))) { @@ -1818,7 +1840,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: - return ConstantInt::getTrue(RHS->getContext()); + // getAllOnesValue also works for vectors, unlike getTrue. + return Constant::getAllOnesValue(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD); @@ -1828,7 +1851,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: - return ConstantInt::getFalse(RHS->getContext()); + // getNullValue also works for vectors, unlike getFalse. + return Constant::getNullValue(ITy); } } @@ -1843,7 +1867,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // fall-through case Instruction::SDiv: case Instruction::AShr: - if (!LBO->isExact() && !RBO->isExact()) + if (!LBO->isExact() || !RBO->isExact()) break; if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), RBO->getOperand(0), TD, DT, MaxRecurse-1)) @@ -1864,6 +1888,194 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } + // Simplify comparisons involving max/min. + Value *A, *B; + CmpInst::Predicate P = CmpInst::BAD_ICMP_PREDICATE; + CmpInst::Predicate EqP; // Chosen so that "A == max/min(A,B)" iff "A EqP B". + + // Signed variants on "max(a,b)>=a -> true". + if (match(LHS, m_SMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) { + if (A != RHS) std::swap(A, B); // smax(A, B) pred A. + EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B". + // We analyze this as smax(A, B) pred A. + P = Pred; + } else if (match(RHS, m_SMax(m_Value(A), m_Value(B))) && + (A == LHS || B == LHS)) { + if (A != LHS) std::swap(A, B); // A pred smax(A, B). + EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B". + // We analyze this as smax(A, B) swapped-pred A. + P = CmpInst::getSwappedPredicate(Pred); + } else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) && + (A == RHS || B == RHS)) { + if (A != RHS) std::swap(A, B); // smin(A, B) pred A. + EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B". + // We analyze this as smax(-A, -B) swapped-pred -A. + // Note that we do not need to actually form -A or -B thanks to EqP. + P = CmpInst::getSwappedPredicate(Pred); + } else if (match(RHS, m_SMin(m_Value(A), m_Value(B))) && + (A == LHS || B == LHS)) { + if (A != LHS) std::swap(A, B); // A pred smin(A, B). + EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B". + // We analyze this as smax(-A, -B) pred -A. + // Note that we do not need to actually form -A or -B thanks to EqP. + P = Pred; + } + if (P != CmpInst::BAD_ICMP_PREDICATE) { + // Cases correspond to "max(A, B) p A". + switch (P) { + default: + break; + case CmpInst::ICMP_EQ: + case CmpInst::ICMP_SLE: + // Equivalent to "A EqP B". This may be the same as the condition tested + // in the max/min; if so, we can just return that. + if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B)) + return V; + if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B)) + return V; + // Otherwise, see if "A EqP B" simplifies. + if (MaxRecurse) + if (Value *V = SimplifyICmpInst(EqP, A, B, TD, DT, MaxRecurse-1)) + return V; + break; + case CmpInst::ICMP_NE: + case CmpInst::ICMP_SGT: { + CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP); + // Equivalent to "A InvEqP B". This may be the same as the condition + // tested in the max/min; if so, we can just return that. + if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B)) + return V; + if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B)) + return V; + // Otherwise, see if "A InvEqP B" simplifies. + if (MaxRecurse) + if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, DT, MaxRecurse-1)) + return V; + break; + } + case CmpInst::ICMP_SGE: + // Always true. + return Constant::getAllOnesValue(ITy); + case CmpInst::ICMP_SLT: + // Always false. + return Constant::getNullValue(ITy); + } + } + + // Unsigned variants on "max(a,b)>=a -> true". + P = CmpInst::BAD_ICMP_PREDICATE; + if (match(LHS, m_UMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) { + if (A != RHS) std::swap(A, B); // umax(A, B) pred A. + EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B". + // We analyze this as umax(A, B) pred A. + P = Pred; + } else if (match(RHS, m_UMax(m_Value(A), m_Value(B))) && + (A == LHS || B == LHS)) { + if (A != LHS) std::swap(A, B); // A pred umax(A, B). + EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B". + // We analyze this as umax(A, B) swapped-pred A. + P = CmpInst::getSwappedPredicate(Pred); + } else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) && + (A == RHS || B == RHS)) { + if (A != RHS) std::swap(A, B); // umin(A, B) pred A. + EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B". + // We analyze this as umax(-A, -B) swapped-pred -A. + // Note that we do not need to actually form -A or -B thanks to EqP. + P = CmpInst::getSwappedPredicate(Pred); + } else if (match(RHS, m_UMin(m_Value(A), m_Value(B))) && + (A == LHS || B == LHS)) { + if (A != LHS) std::swap(A, B); // A pred umin(A, B). + EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B". + // We analyze this as umax(-A, -B) pred -A. + // Note that we do not need to actually form -A or -B thanks to EqP. + P = Pred; + } + if (P != CmpInst::BAD_ICMP_PREDICATE) { + // Cases correspond to "max(A, B) p A". + switch (P) { + default: + break; + case CmpInst::ICMP_EQ: + case CmpInst::ICMP_ULE: + // Equivalent to "A EqP B". This may be the same as the condition tested + // in the max/min; if so, we can just return that. + if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B)) + return V; + if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B)) + return V; + // Otherwise, see if "A EqP B" simplifies. + if (MaxRecurse) + if (Value *V = SimplifyICmpInst(EqP, A, B, TD, DT, MaxRecurse-1)) + return V; + break; + case CmpInst::ICMP_NE: + case CmpInst::ICMP_UGT: { + CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP); + // Equivalent to "A InvEqP B". This may be the same as the condition + // tested in the max/min; if so, we can just return that. + if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B)) + return V; + if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B)) + return V; + // Otherwise, see if "A InvEqP B" simplifies. + if (MaxRecurse) + if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, DT, MaxRecurse-1)) + return V; + break; + } + case CmpInst::ICMP_UGE: + // Always true. + return Constant::getAllOnesValue(ITy); + case CmpInst::ICMP_ULT: + // Always false. + return Constant::getNullValue(ITy); + } + } + + // Variants on "max(x,y) >= min(x,z)". + Value *C, *D; + if (match(LHS, m_SMax(m_Value(A), m_Value(B))) && + match(RHS, m_SMin(m_Value(C), m_Value(D))) && + (A == C || A == D || B == C || B == D)) { + // max(x, ?) pred min(x, ?). + if (Pred == CmpInst::ICMP_SGE) + // Always true. + return Constant::getAllOnesValue(ITy); + if (Pred == CmpInst::ICMP_SLT) + // Always false. + return Constant::getNullValue(ITy); + } else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) && + match(RHS, m_SMax(m_Value(C), m_Value(D))) && + (A == C || A == D || B == C || B == D)) { + // min(x, ?) pred max(x, ?). + if (Pred == CmpInst::ICMP_SLE) + // Always true. + return Constant::getAllOnesValue(ITy); + if (Pred == CmpInst::ICMP_SGT) + // Always false. + return Constant::getNullValue(ITy); + } else if (match(LHS, m_UMax(m_Value(A), m_Value(B))) && + match(RHS, m_UMin(m_Value(C), m_Value(D))) && + (A == C || A == D || B == C || B == D)) { + // max(x, ?) pred min(x, ?). + if (Pred == CmpInst::ICMP_UGE) + // Always true. + return Constant::getAllOnesValue(ITy); + if (Pred == CmpInst::ICMP_ULT) + // Always false. + return Constant::getNullValue(ITy); + } else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) && + match(RHS, m_UMax(m_Value(C), m_Value(D))) && + (A == C || A == D || B == C || B == D)) { + // min(x, ?) pred max(x, ?). + if (Pred == CmpInst::ICMP_ULE) + // Always true. + return Constant::getAllOnesValue(ITy); + if (Pred == CmpInst::ICMP_UGT) + // Always false. + return Constant::getNullValue(ITy); + } + // If the comparison is with the result of a select instruction, check whether // comparing with either branch of the select always yields the same value. if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS)) diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index d5f0b5c..6e27597 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -589,16 +589,18 @@ static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) { } if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) { if (MI->isVolatile()) return false; - if (MI->getAddressSpace() != 0) return false; // FIXME: check whether it has a valuerange that excludes zero? ConstantInt *Len = dyn_cast<ConstantInt>(MI->getLength()); if (!Len || Len->isZero()) return false; - if (MI->getRawDest() == Ptr || MI->getDest() == Ptr) - return true; + if (MI->getDestAddressSpace() == 0) + if (MI->getRawDest() == Ptr || MI->getDest() == Ptr) + return true; if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) - return MTI->getRawSource() == Ptr || MTI->getSource() == Ptr; + if (MTI->getSourceAddressSpace() == 0) + if (MTI->getRawSource() == Ptr || MTI->getSource() == Ptr) + return true; } return false; } diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp index ab34fd6..c5c676b 100644 --- a/lib/Analysis/Loads.cpp +++ b/lib/Analysis/Loads.cpp @@ -31,7 +31,7 @@ using namespace llvm; static bool AreEquivalentAddressValues(const Value *A, const Value *B) { // Test if the values are trivially equivalent. if (A == B) return true; - + // Test if the values come from identical arithmetic instructions. // Use isIdenticalToWhenDefined instead of isIdenticalTo because // this function is only used when one address use dominates the @@ -42,7 +42,7 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) { if (const Instruction *BI = dyn_cast<Instruction>(B)) if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) return true; - + // Otherwise they may not be equivalent. return false; } diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index ce7fab6..5f640c0 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -374,10 +374,16 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, if (R == AliasAnalysis::MustAlias) return MemDepResult::getDef(Inst); +#if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads + // in terms of clobbering loads, but since it does this by looking + // at the clobbering load directly, it doesn't know about any + // phi translation that may have happened along the way. + // If we have a partial alias, then return this as a clobber for the // client to handle. if (R == AliasAnalysis::PartialAlias) return MemDepResult::getClobber(Inst); +#endif // Random may-alias loads don't depend on each other without a // dependence. @@ -497,7 +503,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { // If we can do a pointer scan, make it happen. bool isLoad = !(MR & AliasAnalysis::Mod); if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst)) - isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_end; + isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start; LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos, QueryParent); @@ -937,6 +943,9 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, SmallVector<BasicBlock*, 32> Worklist; Worklist.push_back(StartBB); + // PredList used inside loop. + SmallVector<std::pair<BasicBlock*, PHITransAddr>, 16> PredList; + // Keep track of the entries that we know are sorted. Previously cached // entries will all be sorted. The entries we add we only sort on demand (we // don't insert every element into its sorted position). We know that we @@ -973,22 +982,29 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // the same Pointer. if (!Pointer.NeedsPHITranslationFromBlock(BB)) { SkipFirstBlock = false; + SmallVector<BasicBlock*, 16> NewBlocks; for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { // Verify that we haven't looked at this block yet. std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool> InsertRes = Visited.insert(std::make_pair(*PI, Pointer.getAddr())); if (InsertRes.second) { // First time we've looked at *PI. - Worklist.push_back(*PI); + NewBlocks.push_back(*PI); continue; } // If we have seen this block before, but it was with a different // pointer then we have a phi translation failure and we have to treat // this as a clobber. - if (InsertRes.first->second != Pointer.getAddr()) + if (InsertRes.first->second != Pointer.getAddr()) { + // Make sure to clean up the Visited map before continuing on to + // PredTranslationFailure. + for (unsigned i = 0; i < NewBlocks.size(); i++) + Visited.erase(NewBlocks[i]); goto PredTranslationFailure; + } } + Worklist.append(NewBlocks.begin(), NewBlocks.end()); continue; } @@ -1007,13 +1023,15 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, NumSortedEntries = Cache->size(); } Cache = 0; - + + PredList.clear(); for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { BasicBlock *Pred = *PI; - + PredList.push_back(std::make_pair(Pred, Pointer)); + // Get the PHI translated pointer in this predecessor. This can fail if // not translatable, in which case the getAddr() returns null. - PHITransAddr PredPointer(Pointer); + PHITransAddr &PredPointer = PredList.back().second; PredPointer.PHITranslateValue(BB, Pred, 0); Value *PredPtrVal = PredPointer.getAddr(); @@ -1027,6 +1045,9 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, InsertRes = Visited.insert(std::make_pair(Pred, PredPtrVal)); if (!InsertRes.second) { + // We found the pred; take it off the list of preds to visit. + PredList.pop_back(); + // If the predecessor was visited with PredPtr, then we already did // the analysis and can ignore it. if (InsertRes.first->second == PredPtrVal) @@ -1035,14 +1056,47 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // Otherwise, the block was previously analyzed with a different // pointer. We can't represent the result of this case, so we just // treat this as a phi translation failure. + + // Make sure to clean up the Visited map before continuing on to + // PredTranslationFailure. + for (unsigned i = 0; i < PredList.size(); i++) + Visited.erase(PredList[i].first); + goto PredTranslationFailure; } - + } + + // Actually process results here; this need to be a separate loop to avoid + // calling getNonLocalPointerDepFromBB for blocks we don't want to return + // any results for. (getNonLocalPointerDepFromBB will modify our + // datastructures in ways the code after the PredTranslationFailure label + // doesn't expect.) + for (unsigned i = 0; i < PredList.size(); i++) { + BasicBlock *Pred = PredList[i].first; + PHITransAddr &PredPointer = PredList[i].second; + Value *PredPtrVal = PredPointer.getAddr(); + + bool CanTranslate = true; // If PHI translation was unable to find an available pointer in this // predecessor, then we have to assume that the pointer is clobbered in // that predecessor. We can still do PRE of the load, which would insert // a computation of the pointer in this predecessor. - if (PredPtrVal == 0) { + if (PredPtrVal == 0) + CanTranslate = false; + + // FIXME: it is entirely possible that PHI translating will end up with + // the same value. Consider PHI translating something like: + // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need* + // to recurse here, pedantically speaking. + + // If getNonLocalPointerDepFromBB fails here, that means the cached + // result conflicted with the Visited list; we have to conservatively + // assume a clobber, but this also does not block PRE of the load. + if (!CanTranslate || + getNonLocalPointerDepFromBB(PredPointer, + Loc.getWithNewPtr(PredPtrVal), + isLoad, Pred, + Result, Visited)) { // Add the entry to the Result list. NonLocalDepResult Entry(Pred, MemDepResult::getClobber(Pred->getTerminator()), @@ -1058,19 +1112,6 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, NLPI.Pair = BBSkipFirstBlockPair(); continue; } - - // FIXME: it is entirely possible that PHI translating will end up with - // the same value. Consider PHI translating something like: - // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need* - // to recurse here, pedantically speaking. - - // If we have a problem phi translating, fall through to the code below - // to handle the failure condition. - if (getNonLocalPointerDepFromBB(PredPointer, - Loc.getWithNewPtr(PredPointer.getAddr()), - isLoad, Pred, - Result, Visited)) - goto PredTranslationFailure; } // Refresh the CacheInfo/Cache pointer so that it isn't invalidated. @@ -1087,6 +1128,9 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, continue; PredTranslationFailure: + // The following code is "failure"; we can't produce a sane translation + // for the given block. It assumes that we haven't modified any of + // our datastructures while processing the current block. if (Cache == 0) { // Refresh the CacheInfo/Cache pointer if it got invalidated. @@ -1117,8 +1161,8 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, assert(I->getResult().isNonLocal() && "Should only be here with transparent block"); - I->setResult(MemDepResult::getClobber(BB->begin())); - ReverseNonLocalPtrDeps[BB->begin()].insert(CacheKey); + I->setResult(MemDepResult::getClobber(BB->getTerminator())); + ReverseNonLocalPtrDeps[BB->getTerminator()].insert(CacheKey); Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), Pointer.getAddr())); break; diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp index 3269dcc..80eda79 100644 --- a/lib/Analysis/RegionPass.cpp +++ b/lib/Analysis/RegionPass.cpp @@ -249,7 +249,7 @@ void RegionPass::assignPassManager(PMStack &PMS, assert (!PMS.empty() && "Unable to create Region Pass Manager"); PMDataManager *PMD = PMS.top(); - // [1] Create new Call Graph Pass Manager + // [1] Create new Region Pass Manager RGPM = new RGPassManager(PMD->getDepth() + 1); RGPM->populateInheritedAnalysis(PMS); diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index bab4619..025718e 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -1035,6 +1035,93 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, return S; } +// Get the limit of a recurrence such that incrementing by Step cannot cause +// signed overflow as long as the value of the recurrence within the loop does +// not exceed this limit before incrementing. +static const SCEV *getOverflowLimitForStep(const SCEV *Step, + ICmpInst::Predicate *Pred, + ScalarEvolution *SE) { + unsigned BitWidth = SE->getTypeSizeInBits(Step->getType()); + if (SE->isKnownPositive(Step)) { + *Pred = ICmpInst::ICMP_SLT; + return SE->getConstant(APInt::getSignedMinValue(BitWidth) - + SE->getSignedRange(Step).getSignedMax()); + } + if (SE->isKnownNegative(Step)) { + *Pred = ICmpInst::ICMP_SGT; + return SE->getConstant(APInt::getSignedMaxValue(BitWidth) - + SE->getSignedRange(Step).getSignedMin()); + } + return 0; +} + +// The recurrence AR has been shown to have no signed wrap. Typically, if we can +// prove NSW for AR, then we can just as easily prove NSW for its preincrement +// or postincrement sibling. This allows normalizing a sign extended AddRec as +// such: {sext(Step + Start),+,Step} => {(Step + sext(Start),+,Step} As a +// result, the expression "Step + sext(PreIncAR)" is congruent with +// "sext(PostIncAR)" +static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR, + const Type *Ty, + ScalarEvolution *SE) { + const Loop *L = AR->getLoop(); + const SCEV *Start = AR->getStart(); + const SCEV *Step = AR->getStepRecurrence(*SE); + + // Check for a simple looking step prior to loop entry. + const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start); + if (!SA || SA->getNumOperands() != 2 || SA->getOperand(0) != Step) + return 0; + + // This is a postinc AR. Check for overflow on the preinc recurrence using the + // same three conditions that getSignExtendedExpr checks. + + // 1. NSW flags on the step increment. + const SCEV *PreStart = SA->getOperand(1); + const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>( + SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap)); + + if (PreAR && PreAR->getNoWrapFlags(SCEV::FlagNSW)) + return PreStart; + + // 2. Direct overflow check on the step operation's expression. + unsigned BitWidth = SE->getTypeSizeInBits(AR->getType()); + const Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2); + const SCEV *OperandExtendedStart = + SE->getAddExpr(SE->getSignExtendExpr(PreStart, WideTy), + SE->getSignExtendExpr(Step, WideTy)); + if (SE->getSignExtendExpr(Start, WideTy) == OperandExtendedStart) { + // Cache knowledge of PreAR NSW. + if (PreAR) + const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(SCEV::FlagNSW); + // FIXME: this optimization needs a unit test + DEBUG(dbgs() << "SCEV: untested prestart overflow check\n"); + return PreStart; + } + + // 3. Loop precondition. + ICmpInst::Predicate Pred; + const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, SE); + + if (OverflowLimit && + SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) { + return PreStart; + } + return 0; +} + +// Get the normalized sign-extended expression for this AddRec's Start. +static const SCEV *getSignExtendAddRecStart(const SCEVAddRecExpr *AR, + const Type *Ty, + ScalarEvolution *SE) { + const SCEV *PreStart = getPreStartForSignExtend(AR, Ty, SE); + if (!PreStart) + return SE->getSignExtendExpr(AR->getStart(), Ty); + + return SE->getAddExpr(SE->getSignExtendExpr(AR->getStepRecurrence(*SE), Ty), + SE->getSignExtendExpr(PreStart, Ty)); +} + const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, const Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && @@ -1097,7 +1184,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // If we have special knowledge that this addrec won't overflow, // we don't need to do any further analysis. if (AR->getNoWrapFlags(SCEV::FlagNSW)) - return getAddRecExpr(getSignExtendExpr(Start, Ty), + return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this), getSignExtendExpr(Step, Ty), L, SCEV::FlagNSW); @@ -1133,7 +1220,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // Cache knowledge of AR NSW, which is propagated to this AddRec. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); // Return the expression with the addrec on the outside. - return getAddRecExpr(getSignExtendExpr(Start, Ty), + return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this), getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } @@ -1149,7 +1236,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // Cache knowledge of AR NSW, which is propagated to this AddRec. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); // Return the expression with the addrec on the outside. - return getAddRecExpr(getSignExtendExpr(Start, Ty), + return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this), getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } @@ -1159,34 +1246,18 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // the addrec is safe. Also, if the entry is guarded by a comparison // with the start value and the backedge is guarded by a comparison // with the post-inc value, the addrec is safe. - if (isKnownPositive(Step)) { - const SCEV *N = getConstant(APInt::getSignedMinValue(BitWidth) - - getSignedRange(Step).getSignedMax()); - if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, AR, N) || - (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, Start, N) && - isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, - AR->getPostIncExpr(*this), N))) { - // Cache knowledge of AR NSW, which is propagated to this AddRec. - const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); - // Return the expression with the addrec on the outside. - return getAddRecExpr(getSignExtendExpr(Start, Ty), - getSignExtendExpr(Step, Ty), - L, AR->getNoWrapFlags()); - } - } else if (isKnownNegative(Step)) { - const SCEV *N = getConstant(APInt::getSignedMaxValue(BitWidth) - - getSignedRange(Step).getSignedMin()); - if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, AR, N) || - (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGT, Start, N) && - isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, - AR->getPostIncExpr(*this), N))) { - // Cache knowledge of AR NSW, which is propagated to this AddRec. - const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); - // Return the expression with the addrec on the outside. - return getAddRecExpr(getSignExtendExpr(Start, Ty), - getSignExtendExpr(Step, Ty), - L, AR->getNoWrapFlags()); - } + ICmpInst::Predicate Pred; + const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, this); + if (OverflowLimit && + (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) || + (isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) && + isLoopBackedgeGuardedByCond(L, Pred, AR->getPostIncExpr(*this), + OverflowLimit)))) { + // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec. + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); + return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this), + getSignExtendExpr(Step, Ty), + L, AR->getNoWrapFlags()); } } } @@ -3783,7 +3854,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { // update the value. The temporary CouldNotCompute value tells SCEV // code elsewhere that it shouldn't attempt to request a new // backedge-taken count, which could result in infinite recursion. - std::pair<std::map<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair = + std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair = BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute())); if (!Pair.second) return Pair.first->second; @@ -4433,7 +4504,7 @@ Constant * ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, const APInt &BEs, const Loop *L) { - std::map<PHINode*, Constant*>::const_iterator I = + DenseMap<PHINode*, Constant*>::const_iterator I = ConstantEvolutionLoopExitValue.find(PN); if (I != ConstantEvolutionLoopExitValue.end()) return I->second; diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 8f18dd2..dab5aeb 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -131,8 +131,18 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, } return; } + + if (Argument *A = dyn_cast<Argument>(V)) { + // Get alignment information off byval arguments if specified in the IR. + if (A->hasByValAttr()) + if (unsigned Align = A->getParamAlignment()) + KnownZero = Mask & APInt::getLowBitsSet(BitWidth, + CountTrailingZeros_32(Align)); + return; + } - KnownZero.clearAllBits(); KnownOne.clearAllBits(); // Start out not knowing anything. + // Start out not knowing anything. + KnownZero.clearAllBits(); KnownOne.clearAllBits(); if (Depth == MaxDepth || Mask == 0) return; // Limit search depth. @@ -670,6 +680,10 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); break; } + case Intrinsic::x86_sse42_crc32_64_8: + case Intrinsic::x86_sse42_crc32_64_64: + KnownZero = APInt::getHighBitsSet(64, 32); + break; } } break; |