diff options
Diffstat (limited to 'lib/Analysis')
45 files changed, 4094 insertions, 2478 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index bd132c0..95c834b 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -440,3 +440,19 @@ bool llvm::isIdentifiedObject(const Value *V) { return A->hasNoAliasAttr() || A->hasByValAttr(); return false; } + +/// isKnownNonNull - Return true if we know that the specified value is never +/// null. +bool llvm::isKnownNonNull(const Value *V) { + // Alloca never returns null, malloc might. + if (isa<AllocaInst>(V)) return true; + + // A byval argument is never null. + if (const Argument *A = dyn_cast<Argument>(V)) + return A->hasByValAttr(); + + // Global values are not null unless extern weak. + if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) + return !GV->hasExternalWeakLinkage(); + return false; +} diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp index d947220..9f219f5 100644 --- a/lib/Analysis/AliasAnalysisCounter.cpp +++ b/lib/Analysis/AliasAnalysisCounter.cpp @@ -127,9 +127,8 @@ AliasAnalysis::AliasResult AliasAnalysisCounter::alias(const Location &LocA, const Location &LocB) { AliasResult R = getAnalysis<AliasAnalysis>().alias(LocA, LocB); - const char *AliasString; + const char *AliasString = 0; switch (R) { - default: llvm_unreachable("Unknown alias type!"); case NoAlias: No++; AliasString = "No alias"; break; case MayAlias: May++; AliasString = "May alias"; break; case PartialAlias: Partial++; AliasString = "Partial alias"; break; @@ -154,9 +153,8 @@ AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS, const Location &Loc) { ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, Loc); - const char *MRString; + const char *MRString = 0; switch (R) { - default: llvm_unreachable("Unknown mod/ref type!"); case NoModRef: NoMR++; MRString = "NoModRef"; break; case Ref: JustRef++; MRString = "JustRef"; break; case Mod: JustMod++; MRString = "JustMod"; break; diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp index 37271b9..ac72983 100644 --- a/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -193,8 +193,6 @@ bool AAEval::runOnFunction(Function &F) { case AliasAnalysis::MustAlias: PrintResults("MustAlias", PrintMustAlias, *I1, *I2, F.getParent()); ++MustAlias; break; - default: - errs() << "Unknown alias query result!\n"; } } } @@ -223,8 +221,6 @@ bool AAEval::runOnFunction(Function &F) { case AliasAnalysis::ModRef: PrintModRefResults("Both ModRef", PrintModRef, I, *V, F.getParent()); ++ModRef; break; - default: - errs() << "Unknown alias query result!\n"; } } } diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index 3fcd3b5..f80e2fb 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -189,7 +189,9 @@ bool AliasSet::aliasesUnknownInst(Instruction *Inst, AliasAnalysis &AA) const { } for (iterator I = begin(), E = end(); I != E; ++I) - if (AA.getModRefInfo(Inst, I.getPointer(), I.getSize()) != + if (AA.getModRefInfo(Inst, AliasAnalysis::Location(I.getPointer(), + I.getSize(), + I.getTBAAInfo())) != AliasAnalysis::NoModRef) return true; diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index af400ba..20ecfd2 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -42,22 +42,6 @@ using namespace llvm; // Useful predicates //===----------------------------------------------------------------------===// -/// isKnownNonNull - Return true if we know that the specified value is never -/// null. -static bool isKnownNonNull(const Value *V) { - // Alloca never returns null, malloc might. - if (isa<AllocaInst>(V)) return true; - - // A byval argument is never null. - if (const Argument *A = dyn_cast<Argument>(V)) - return A->hasByValAttr(); - - // Global values are not null unless extern weak. - if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) - return !GV->hasExternalWeakLinkage(); - return false; -} - /// isNonEscapingLocalObject - Return true if the pointer is to a function-local /// object that never escapes from the function. static bool isNonEscapingLocalObject(const Value *V) { @@ -100,42 +84,59 @@ static bool isEscapeSource(const Value *V) { /// getObjectSize - Return the size of the object specified by V, or /// UnknownSize if unknown. -static uint64_t getObjectSize(const Value *V, const TargetData &TD) { +static uint64_t getObjectSize(const Value *V, const TargetData &TD, + bool RoundToAlign = false) { Type *AccessTy; + unsigned Align; if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { if (!GV->hasDefinitiveInitializer()) return AliasAnalysis::UnknownSize; AccessTy = GV->getType()->getElementType(); + Align = GV->getAlignment(); } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { if (!AI->isArrayAllocation()) AccessTy = AI->getType()->getElementType(); else return AliasAnalysis::UnknownSize; + Align = AI->getAlignment(); } else if (const CallInst* CI = extractMallocCall(V)) { - if (!isArrayMalloc(V, &TD)) + if (!RoundToAlign && !isArrayMalloc(V, &TD)) // The size is the argument to the malloc call. if (const ConstantInt* C = dyn_cast<ConstantInt>(CI->getArgOperand(0))) return C->getZExtValue(); return AliasAnalysis::UnknownSize; } else if (const Argument *A = dyn_cast<Argument>(V)) { - if (A->hasByValAttr()) + if (A->hasByValAttr()) { AccessTy = cast<PointerType>(A->getType())->getElementType(); - else + Align = A->getParamAlignment(); + } else { return AliasAnalysis::UnknownSize; + } } else { return AliasAnalysis::UnknownSize; } - - if (AccessTy->isSized()) - return TD.getTypeAllocSize(AccessTy); - return AliasAnalysis::UnknownSize; + + if (!AccessTy->isSized()) + return AliasAnalysis::UnknownSize; + + uint64_t Size = TD.getTypeAllocSize(AccessTy); + // If there is an explicitly specified alignment, and we need to + // take alignment into account, round up the size. (If the alignment + // is implicit, getTypeAllocSize is sufficient.) + if (RoundToAlign && Align) + Size = RoundUpToAlignment(Size, Align); + + return Size; } /// isObjectSmallerThan - Return true if we can prove that the object specified /// by V is smaller than Size. static bool isObjectSmallerThan(const Value *V, uint64_t Size, const TargetData &TD) { - uint64_t ObjectSize = getObjectSize(V, TD); + // This function needs to use the aligned object size because we allow + // reads a bit past the end given sufficient alignment. + uint64_t ObjectSize = getObjectSize(V, TD, /*RoundToAlign*/true); + return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize < Size; } @@ -706,8 +707,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, // pointer were passed to arguments that were neither of these, then it // couldn't be no-capture. if (!(*CI)->getType()->isPointerTy() || - (!CS.paramHasAttr(ArgNo+1, Attribute::NoCapture) && - !CS.paramHasAttr(ArgNo+1, Attribute::ByVal))) + (!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo))) continue; // If this is a no-capture pointer argument, see if we can tell that it @@ -978,10 +978,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // // TODO: Returning PartialAlias instead of MayAlias is a mild hack; the // practical effect of this is protecting TBAA in the case of dynamic - // indices into arrays of unions. An alternative way to solve this would - // be to have clang emit extra metadata for unions and/or union accesses. - // A union-specific solution wouldn't handle the problem for malloc'd - // memory however. + // indices into arrays of unions or malloc'd memory. return PartialAlias; } diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp index d16665f..8a660f7 100644 --- a/lib/Analysis/BlockFrequencyInfo.cpp +++ b/lib/Analysis/BlockFrequencyInfo.cpp @@ -58,6 +58,6 @@ void BlockFrequencyInfo::print(raw_ostream &O, const Module *) const { /// that we should not rely on the value itself, but only on the comparison to /// the other block frequencies. We do this to avoid using of floating points. /// -BlockFrequency BlockFrequencyInfo::getBlockFreq(BasicBlock *BB) const { +BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const { return BFI->getBlockFreq(BB); } diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index bde3b76..2730ce6 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -12,11 +12,14 @@ //===----------------------------------------------------------------------===// #include "llvm/Constants.h" +#include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/LLVMContext.h" #include "llvm/Metadata.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" using namespace llvm; @@ -29,121 +32,118 @@ INITIALIZE_PASS_END(BranchProbabilityInfo, "branch-prob", char BranchProbabilityInfo::ID = 0; -namespace { -// Please note that BranchProbabilityAnalysis is not a FunctionPass. -// It is created by BranchProbabilityInfo (which is a FunctionPass), which -// provides a clear interface. Thanks to that, all heuristics and other -// private methods are hidden in the .cpp file. -class BranchProbabilityAnalysis { - - typedef std::pair<const BasicBlock *, const BasicBlock *> Edge; - - DenseMap<Edge, uint32_t> *Weights; - - BranchProbabilityInfo *BP; - - LoopInfo *LI; - - - // Weights are for internal use only. They are used by heuristics to help to - // estimate edges' probability. Example: - // - // Using "Loop Branch Heuristics" we predict weights of edges for the - // block BB2. - // ... - // | - // V - // BB1<-+ - // | | - // | | (Weight = 124) - // V | - // BB2--+ - // | - // | (Weight = 4) - // V - // BB3 - // - // Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875 - // Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125 - - static const uint32_t LBH_TAKEN_WEIGHT = 124; - static const uint32_t LBH_NONTAKEN_WEIGHT = 4; - - static const uint32_t RH_TAKEN_WEIGHT = 24; - static const uint32_t RH_NONTAKEN_WEIGHT = 8; - - static const uint32_t PH_TAKEN_WEIGHT = 20; - static const uint32_t PH_NONTAKEN_WEIGHT = 12; - - static const uint32_t ZH_TAKEN_WEIGHT = 20; - static const uint32_t ZH_NONTAKEN_WEIGHT = 12; - - // Standard weight value. Used when none of the heuristics set weight for - // the edge. - static const uint32_t NORMAL_WEIGHT = 16; - - // Minimum weight of an edge. Please note, that weight is NEVER 0. - static const uint32_t MIN_WEIGHT = 1; - - // Return TRUE if BB leads directly to a Return Instruction. - static bool isReturningBlock(BasicBlock *BB) { - SmallPtrSet<BasicBlock *, 8> Visited; - - while (true) { - TerminatorInst *TI = BB->getTerminator(); - if (isa<ReturnInst>(TI)) - return true; - - if (TI->getNumSuccessors() > 1) - break; - - // It is unreachable block which we can consider as a return instruction. - if (TI->getNumSuccessors() == 0) - return true; - - Visited.insert(BB); - BB = TI->getSuccessor(0); +// Weights are for internal use only. They are used by heuristics to help to +// estimate edges' probability. Example: +// +// Using "Loop Branch Heuristics" we predict weights of edges for the +// block BB2. +// ... +// | +// V +// BB1<-+ +// | | +// | | (Weight = 124) +// V | +// BB2--+ +// | +// | (Weight = 4) +// V +// BB3 +// +// Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875 +// Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125 +static const uint32_t LBH_TAKEN_WEIGHT = 124; +static const uint32_t LBH_NONTAKEN_WEIGHT = 4; + +/// \brief Unreachable-terminating branch taken weight. +/// +/// This is the weight for a branch being taken to a block that terminates +/// (eventually) in unreachable. These are predicted as unlikely as possible. +static const uint32_t UR_TAKEN_WEIGHT = 1; + +/// \brief Unreachable-terminating branch not-taken weight. +/// +/// This is the weight for a branch not being taken toward a block that +/// terminates (eventually) in unreachable. Such a branch is essentially never +/// taken. Set the weight to an absurdly high value so that nested loops don't +/// easily subsume it. +static const uint32_t UR_NONTAKEN_WEIGHT = 1024*1024 - 1; + +static const uint32_t PH_TAKEN_WEIGHT = 20; +static const uint32_t PH_NONTAKEN_WEIGHT = 12; + +static const uint32_t ZH_TAKEN_WEIGHT = 20; +static const uint32_t ZH_NONTAKEN_WEIGHT = 12; + +static const uint32_t FPH_TAKEN_WEIGHT = 20; +static const uint32_t FPH_NONTAKEN_WEIGHT = 12; + +// Standard weight value. Used when none of the heuristics set weight for +// the edge. +static const uint32_t NORMAL_WEIGHT = 16; + +// Minimum weight of an edge. Please note, that weight is NEVER 0. +static const uint32_t MIN_WEIGHT = 1; + +static uint32_t getMaxWeightFor(BasicBlock *BB) { + return UINT32_MAX / BB->getTerminator()->getNumSuccessors(); +} - // Stop if cycle is detected. - if (Visited.count(BB)) - return false; - } +/// \brief Calculate edge weights for successors lead to unreachable. +/// +/// Predict that a successor which leads necessarily to an +/// unreachable-terminated block as extremely unlikely. +bool BranchProbabilityInfo::calcUnreachableHeuristics(BasicBlock *BB) { + TerminatorInst *TI = BB->getTerminator(); + if (TI->getNumSuccessors() == 0) { + if (isa<UnreachableInst>(TI)) + PostDominatedByUnreachable.insert(BB); return false; } - uint32_t getMaxWeightFor(BasicBlock *BB) const { - return UINT32_MAX / BB->getTerminator()->getNumSuccessors(); - } + SmallPtrSet<BasicBlock *, 4> UnreachableEdges; + SmallPtrSet<BasicBlock *, 4> ReachableEdges; -public: - BranchProbabilityAnalysis(DenseMap<Edge, uint32_t> *W, - BranchProbabilityInfo *BP, LoopInfo *LI) - : Weights(W), BP(BP), LI(LI) { + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + if (PostDominatedByUnreachable.count(*I)) + UnreachableEdges.insert(*I); + else + ReachableEdges.insert(*I); } - // Metadata Weights - bool calcMetadataWeights(BasicBlock *BB); + // If all successors are in the set of blocks post-dominated by unreachable, + // this block is too. + if (UnreachableEdges.size() == TI->getNumSuccessors()) + PostDominatedByUnreachable.insert(BB); - // Return Heuristics - bool calcReturnHeuristics(BasicBlock *BB); - - // Pointer Heuristics - bool calcPointerHeuristics(BasicBlock *BB); - - // Loop Branch Heuristics - bool calcLoopBranchHeuristics(BasicBlock *BB); + // Skip probabilities if this block has a single successor or if all were + // reachable. + if (TI->getNumSuccessors() == 1 || UnreachableEdges.empty()) + return false; - // Zero Heurestics - bool calcZeroHeuristics(BasicBlock *BB); + uint32_t UnreachableWeight = + std::max(UR_TAKEN_WEIGHT / UnreachableEdges.size(), MIN_WEIGHT); + for (SmallPtrSet<BasicBlock *, 4>::iterator I = UnreachableEdges.begin(), + E = UnreachableEdges.end(); + I != E; ++I) + setEdgeWeight(BB, *I, UnreachableWeight); + + if (ReachableEdges.empty()) + return true; + uint32_t ReachableWeight = + std::max(UR_NONTAKEN_WEIGHT / ReachableEdges.size(), NORMAL_WEIGHT); + for (SmallPtrSet<BasicBlock *, 4>::iterator I = ReachableEdges.begin(), + E = ReachableEdges.end(); + I != E; ++I) + setEdgeWeight(BB, *I, ReachableWeight); - bool runOnFunction(Function &F); -}; -} // end anonymous namespace + return true; +} // Propagate existing explicit probabilities from either profile data or // 'expect' intrinsic processing. -bool BranchProbabilityAnalysis::calcMetadataWeights(BasicBlock *BB) { +bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) { TerminatorInst *TI = BB->getTerminator(); if (TI->getNumSuccessors() == 1) return false; @@ -174,54 +174,14 @@ bool BranchProbabilityAnalysis::calcMetadataWeights(BasicBlock *BB) { } assert(Weights.size() == TI->getNumSuccessors() && "Checked above"); for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - BP->setEdgeWeight(BB, TI->getSuccessor(i), Weights[i]); + setEdgeWeight(BB, TI->getSuccessor(i), Weights[i]); return true; } -// Calculate Edge Weights using "Return Heuristics". Predict a successor which -// leads directly to Return Instruction will not be taken. -bool BranchProbabilityAnalysis::calcReturnHeuristics(BasicBlock *BB){ - if (BB->getTerminator()->getNumSuccessors() == 1) - return false; - - SmallPtrSet<BasicBlock *, 4> ReturningEdges; - SmallPtrSet<BasicBlock *, 4> StayEdges; - - for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { - BasicBlock *Succ = *I; - if (isReturningBlock(Succ)) - ReturningEdges.insert(Succ); - else - StayEdges.insert(Succ); - } - - if (uint32_t numStayEdges = StayEdges.size()) { - uint32_t stayWeight = RH_TAKEN_WEIGHT / numStayEdges; - if (stayWeight < NORMAL_WEIGHT) - stayWeight = NORMAL_WEIGHT; - - for (SmallPtrSet<BasicBlock *, 4>::iterator I = StayEdges.begin(), - E = StayEdges.end(); I != E; ++I) - BP->setEdgeWeight(BB, *I, stayWeight); - } - - if (uint32_t numRetEdges = ReturningEdges.size()) { - uint32_t retWeight = RH_NONTAKEN_WEIGHT / numRetEdges; - if (retWeight < MIN_WEIGHT) - retWeight = MIN_WEIGHT; - for (SmallPtrSet<BasicBlock *, 4>::iterator I = ReturningEdges.begin(), - E = ReturningEdges.end(); I != E; ++I) { - BP->setEdgeWeight(BB, *I, retWeight); - } - } - - return ReturningEdges.size() > 0; -} - // Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion // between two pointer or pointer and NULL will fail. -bool BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) { +bool BranchProbabilityInfo::calcPointerHeuristics(BasicBlock *BB) { BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator()); if (!BI || !BI->isConditional()) return false; @@ -249,16 +209,14 @@ bool BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) { if (!isProb) std::swap(Taken, NonTaken); - BP->setEdgeWeight(BB, Taken, PH_TAKEN_WEIGHT); - BP->setEdgeWeight(BB, NonTaken, PH_NONTAKEN_WEIGHT); + setEdgeWeight(BB, Taken, PH_TAKEN_WEIGHT); + setEdgeWeight(BB, NonTaken, PH_NONTAKEN_WEIGHT); return true; } // Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges // as taken, exiting edges as not-taken. -bool BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) { - uint32_t numSuccs = BB->getTerminator()->getNumSuccessors(); - +bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { Loop *L = LI->getLoopFor(BB); if (!L) return false; @@ -267,17 +225,13 @@ bool BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) { SmallPtrSet<BasicBlock *, 8> ExitingEdges; SmallPtrSet<BasicBlock *, 8> InEdges; // Edges from header to the loop. - bool isHeader = BB == L->getHeader(); - for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { - BasicBlock *Succ = *I; - Loop *SuccL = LI->getLoopFor(Succ); - if (SuccL != L) - ExitingEdges.insert(Succ); - else if (Succ == L->getHeader()) - BackEdges.insert(Succ); - else if (isHeader) - InEdges.insert(Succ); + if (!L->contains(*I)) + ExitingEdges.insert(*I); + else if (L->getHeader() == *I) + BackEdges.insert(*I); + else + InEdges.insert(*I); } if (uint32_t numBackEdges = BackEdges.size()) { @@ -288,7 +242,7 @@ bool BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) { for (SmallPtrSet<BasicBlock *, 8>::iterator EI = BackEdges.begin(), EE = BackEdges.end(); EI != EE; ++EI) { BasicBlock *Back = *EI; - BP->setEdgeWeight(BB, Back, backWeight); + setEdgeWeight(BB, Back, backWeight); } } @@ -300,27 +254,26 @@ bool BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) { for (SmallPtrSet<BasicBlock *, 8>::iterator EI = InEdges.begin(), EE = InEdges.end(); EI != EE; ++EI) { BasicBlock *Back = *EI; - BP->setEdgeWeight(BB, Back, inWeight); + setEdgeWeight(BB, Back, inWeight); } } - uint32_t numExitingEdges = ExitingEdges.size(); - if (uint32_t numNonExitingEdges = numSuccs - numExitingEdges) { - uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numNonExitingEdges; + if (uint32_t numExitingEdges = ExitingEdges.size()) { + uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numExitingEdges; if (exitWeight < MIN_WEIGHT) exitWeight = MIN_WEIGHT; for (SmallPtrSet<BasicBlock *, 8>::iterator EI = ExitingEdges.begin(), EE = ExitingEdges.end(); EI != EE; ++EI) { BasicBlock *Exiting = *EI; - BP->setEdgeWeight(BB, Exiting, exitWeight); + setEdgeWeight(BB, Exiting, exitWeight); } } return true; } -bool BranchProbabilityAnalysis::calcZeroHeuristics(BasicBlock *BB) { +bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) { BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator()); if (!BI || !BI->isConditional()) return false; @@ -375,45 +328,94 @@ bool BranchProbabilityAnalysis::calcZeroHeuristics(BasicBlock *BB) { if (!isProb) std::swap(Taken, NonTaken); - BP->setEdgeWeight(BB, Taken, ZH_TAKEN_WEIGHT); - BP->setEdgeWeight(BB, NonTaken, ZH_NONTAKEN_WEIGHT); + setEdgeWeight(BB, Taken, ZH_TAKEN_WEIGHT); + setEdgeWeight(BB, NonTaken, ZH_NONTAKEN_WEIGHT); return true; } +bool BranchProbabilityInfo::calcFloatingPointHeuristics(BasicBlock *BB) { + BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); + if (!BI || !BI->isConditional()) + return false; -bool BranchProbabilityAnalysis::runOnFunction(Function &F) { - - for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { - BasicBlock *BB = I++; - - if (calcMetadataWeights(BB)) - continue; + Value *Cond = BI->getCondition(); + FCmpInst *FCmp = dyn_cast<FCmpInst>(Cond); + if (!FCmp) + return false; - if (calcLoopBranchHeuristics(BB)) - continue; + bool isProb; + if (FCmp->isEquality()) { + // f1 == f2 -> Unlikely + // f1 != f2 -> Likely + isProb = !FCmp->isTrueWhenEqual(); + } else if (FCmp->getPredicate() == FCmpInst::FCMP_ORD) { + // !isnan -> Likely + isProb = true; + } else if (FCmp->getPredicate() == FCmpInst::FCMP_UNO) { + // isnan -> Unlikely + isProb = false; + } else { + return false; + } - if (calcReturnHeuristics(BB)) - continue; + BasicBlock *Taken = BI->getSuccessor(0); + BasicBlock *NonTaken = BI->getSuccessor(1); - if (calcPointerHeuristics(BB)) - continue; + if (!isProb) + std::swap(Taken, NonTaken); - calcZeroHeuristics(BB); - } + setEdgeWeight(BB, Taken, FPH_TAKEN_WEIGHT); + setEdgeWeight(BB, NonTaken, FPH_NONTAKEN_WEIGHT); - return false; + return true; } void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<LoopInfo>(); - AU.setPreservesAll(); + AU.addRequired<LoopInfo>(); + AU.setPreservesAll(); } bool BranchProbabilityInfo::runOnFunction(Function &F) { - LoopInfo &LI = getAnalysis<LoopInfo>(); - BranchProbabilityAnalysis BPA(&Weights, this, &LI); - return BPA.runOnFunction(F); + LastF = &F; // Store the last function we ran on for printing. + LI = &getAnalysis<LoopInfo>(); + assert(PostDominatedByUnreachable.empty()); + + // Walk the basic blocks in post-order so that we can build up state about + // the successors of a block iteratively. + for (po_iterator<BasicBlock *> I = po_begin(&F.getEntryBlock()), + E = po_end(&F.getEntryBlock()); + I != E; ++I) { + DEBUG(dbgs() << "Computing probabilities for " << I->getName() << "\n"); + if (calcUnreachableHeuristics(*I)) + continue; + if (calcMetadataWeights(*I)) + continue; + if (calcLoopBranchHeuristics(*I)) + continue; + if (calcPointerHeuristics(*I)) + continue; + if (calcZeroHeuristics(*I)) + continue; + calcFloatingPointHeuristics(*I); + } + + PostDominatedByUnreachable.clear(); + return false; +} + +void BranchProbabilityInfo::print(raw_ostream &OS, const Module *) const { + OS << "---- Branch Probabilities ----\n"; + // We print the probabilities from the last function the analysis ran over, + // or the function it is currently running over. + assert(LastF && "Cannot print prior to running over a function"); + for (Function::const_iterator BI = LastF->begin(), BE = LastF->end(); + BI != BE; ++BI) { + for (succ_const_iterator SI = succ_begin(BI), SE = succ_end(BI); + SI != SE; ++SI) { + printEdgeProbability(OS << " ", BI, *SI); + } + } } uint32_t BranchProbabilityInfo::getSumForBlock(const BasicBlock *BB) const { @@ -434,12 +436,8 @@ uint32_t BranchProbabilityInfo::getSumForBlock(const BasicBlock *BB) const { bool BranchProbabilityInfo:: isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const { // Hot probability is at least 4/5 = 80% - uint32_t Weight = getEdgeWeight(Src, Dst); - uint32_t Sum = getSumForBlock(Src); - - // FIXME: Implement BranchProbability::compare then change this code to - // compare this BranchProbability against a static "hot" BranchProbability. - return (uint64_t)Weight * 5 > (uint64_t)Sum * 4; + // FIXME: Compare against a static "hot" BranchProbability. + return getEdgeProbability(Src, Dst) > BranchProbability(4, 5); } BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const { @@ -461,8 +459,8 @@ BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const { } } - // FIXME: Use BranchProbability::compare. - if ((uint64_t)MaxWeight * 5 > (uint64_t)Sum * 4) + // Hot probability is at least 4/5 = 80% + if (BranchProbability(MaxWeight, Sum) > BranchProbability(4, 5)) return MaxSucc; return 0; @@ -483,8 +481,8 @@ getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const { void BranchProbabilityInfo:: setEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst, uint32_t Weight) { Weights[std::make_pair(Src, Dst)] = Weight; - DEBUG(dbgs() << "set edge " << Src->getNameStr() << " -> " - << Dst->getNameStr() << " weight to " << Weight + DEBUG(dbgs() << "set edge " << Src->getName() << " -> " + << Dst->getName() << " weight to " << Weight << (isEdgeHot(Src, Dst) ? " [is HOT now]\n" : "\n")); } @@ -499,11 +497,12 @@ getEdgeProbability(const BasicBlock *Src, const BasicBlock *Dst) const { } raw_ostream & -BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, BasicBlock *Src, - BasicBlock *Dst) const { +BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, + const BasicBlock *Src, + const BasicBlock *Dst) const { const BranchProbability Prob = getEdgeProbability(Src, Dst); - OS << "edge " << Src->getNameStr() << " -> " << Dst->getNameStr() + OS << "edge " << Src->getName() << " -> " << Dst->getName() << " probability is " << Prob << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n"); diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp index 7bb063f..7685400 100644 --- a/lib/Analysis/CFGPrinter.cpp +++ b/lib/Analysis/CFGPrinter.cpp @@ -77,7 +77,7 @@ namespace { } virtual bool runOnFunction(Function &F) { - std::string Filename = "cfg." + F.getNameStr() + ".dot"; + std::string Filename = "cfg." + F.getName().str() + ".dot"; errs() << "Writing '" << Filename << "'..."; std::string ErrorInfo; @@ -111,7 +111,7 @@ namespace { } virtual bool runOnFunction(Function &F) { - std::string Filename = "cfg." + F.getNameStr() + ".dot"; + std::string Filename = "cfg." + F.getName().str() + ".dot"; errs() << "Writing '" << Filename << "'..."; std::string ErrorInfo; @@ -143,7 +143,7 @@ INITIALIZE_PASS(CFGOnlyPrinter, "dot-cfg-only", /// being a 'dot' and 'gv' program in your path. /// void Function::viewCFG() const { - ViewGraph(this, "cfg" + getNameStr()); + ViewGraph(this, "cfg" + getName()); } /// viewCFGOnly - This function is meant for use from the debugger. It works @@ -152,7 +152,7 @@ void Function::viewCFG() const { /// his can make the graph smaller. /// void Function::viewCFGOnly() const { - ViewGraph(this, "cfg" + getNameStr(), true); + ViewGraph(this, "cfg" + getName(), true); } FunctionPass *llvm::createCFGPrinterPass () { diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index e79459d..2e3ec8b 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -10,6 +10,7 @@ add_llvm_library(LLVMAnalysis BranchProbabilityInfo.cpp CFGPrinter.cpp CaptureTracking.cpp + CodeMetrics.cpp ConstantFolding.cpp DIBuilder.cpp DbgInfoPrinter.cpp @@ -58,10 +59,4 @@ add_llvm_library(LLVMAnalysis ValueTracking.cpp ) -add_llvm_library_dependencies(LLVMAnalysis - LLVMCore - LLVMSupport - LLVMTarget - ) - add_subdirectory(IPA) diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index b2c27d1..dd33eeb 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -16,25 +16,35 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/CaptureTracking.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" -#include "llvm/Value.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/CallSite.h" +#include "llvm/Analysis/CaptureTracking.h" using namespace llvm; -/// As its comment mentions, PointerMayBeCaptured can be expensive. -/// However, it's not easy for BasicAA to cache the result, because -/// it's an ImmutablePass. To work around this, bound queries at a -/// fixed number of uses. -/// -/// TODO: Write a new FunctionPass AliasAnalysis so that it can keep -/// a cache. Then we can move the code from BasicAliasAnalysis into -/// that path, and remove this threshold. -static int const Threshold = 20; +CaptureTracker::~CaptureTracker() {} + +namespace { + struct SimpleCaptureTracker : public CaptureTracker { + explicit SimpleCaptureTracker(bool ReturnCaptures) + : ReturnCaptures(ReturnCaptures), Captured(false) {} + + void tooManyUses() { Captured = true; } + + bool shouldExplore(Use *U) { return true; } + + bool captured(Use *U) { + if (isa<ReturnInst>(U->getUser()) && !ReturnCaptures) + return false; + + Captured = true; + return true; + } + + bool ReturnCaptures; + + bool Captured; + }; +} /// PointerMayBeCaptured - Return true if this pointer value may be captured /// by the enclosing function (which is required to exist). This routine can @@ -45,6 +55,26 @@ static int const Threshold = 20; /// counts as capturing it or not. bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures) { + assert(!isa<GlobalValue>(V) && + "It doesn't make sense to ask whether a global is captured."); + + // TODO: If StoreCaptures is not true, we could do Fancy analysis + // to determine whether this store is not actually an escape point. + // In that case, BasicAliasAnalysis should be updated as well to + // take advantage of this. + (void)StoreCaptures; + + SimpleCaptureTracker SCT(ReturnCaptures); + PointerMayBeCaptured(V, &SCT); + return SCT.Captured; +} + +/// TODO: Write a new FunctionPass AliasAnalysis so that it can keep +/// a cache. Then we can move the code from BasicAliasAnalysis into +/// that path, and remove this threshold. +static int const Threshold = 20; + +void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) { assert(V->getType()->isPointerTy() && "Capture is for pointers only!"); SmallVector<Use*, Threshold> Worklist; SmallSet<Use*, Threshold> Visited; @@ -55,9 +85,10 @@ bool llvm::PointerMayBeCaptured(const Value *V, // If there are lots of uses, conservatively say that the value // is captured to avoid taking too much compile time. if (Count++ >= Threshold) - return true; + return Tracker->tooManyUses(); Use *U = &UI.getUse(); + if (!Tracker->shouldExplore(U)) continue; Visited.insert(U); Worklist.push_back(U); } @@ -86,11 +117,10 @@ bool llvm::PointerMayBeCaptured(const Value *V, // (think of self-referential objects). CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end(); for (CallSite::arg_iterator A = B; A != E; ++A) - if (A->get() == V && !CS.paramHasAttr(A - B + 1, Attribute::NoCapture)) + if (A->get() == V && !CS.doesNotCapture(A - B)) // The parameter is not marked 'nocapture' - captured. - return true; - // Only passed via 'nocapture' arguments, or is the called function - not - // captured. + if (Tracker->captured(U)) + return; break; } case Instruction::Load: @@ -99,18 +129,11 @@ bool llvm::PointerMayBeCaptured(const Value *V, case Instruction::VAArg: // "va-arg" from a pointer does not cause it to be captured. break; - case Instruction::Ret: - if (ReturnCaptures) - return true; - break; case Instruction::Store: if (V == I->getOperand(0)) // Stored the pointer - conservatively assume it may be captured. - // TODO: If StoreCaptures is not true, we could do Fancy analysis - // to determine whether this store is not actually an escape point. - // In that case, BasicAliasAnalysis should be updated as well to - // take advantage of this. - return true; + if (Tracker->captured(U)) + return; // Storing to the pointee does not cause the pointer to be captured. break; case Instruction::BitCast: @@ -122,7 +145,8 @@ bool llvm::PointerMayBeCaptured(const Value *V, UI != UE; ++UI) { Use *U = &UI.getUse(); if (Visited.insert(U)) - Worklist.push_back(U); + if (Tracker->shouldExplore(U)) + Worklist.push_back(U); } break; case Instruction::ICmp: @@ -136,13 +160,16 @@ bool llvm::PointerMayBeCaptured(const Value *V, break; // Otherwise, be conservative. There are crazy ways to capture pointers // using comparisons. - return true; + if (Tracker->captured(U)) + return; + break; default: // Something else - be conservative and say it is captured. - return true; + if (Tracker->captured(U)) + return; + break; } } - // All uses examined - not captured. - return false; + // All uses examined. } diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp new file mode 100644 index 0000000..316e7bc9 --- /dev/null +++ b/lib/Analysis/CodeMetrics.cpp @@ -0,0 +1,184 @@ +//===- CodeMetrics.cpp - Code cost measurements ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements code cost measurement utilities. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Function.h" +#include "llvm/Support/CallSite.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Target/TargetData.h" + +using namespace llvm; + +/// callIsSmall - If a call is likely to lower to a single target instruction, +/// or is otherwise deemed small return true. +/// TODO: Perhaps calls like memcpy, strcpy, etc? +bool llvm::callIsSmall(const Function *F) { + if (!F) return false; + + if (F->hasLocalLinkage()) return false; + + if (!F->hasName()) return false; + + StringRef Name = F->getName(); + + // These will all likely lower to a single selection DAG node. + if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || + Name == "fabs" || Name == "fabsf" || Name == "fabsl" || + Name == "sin" || Name == "sinf" || Name == "sinl" || + Name == "cos" || Name == "cosf" || Name == "cosl" || + Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" ) + return true; + + // These are all likely to be optimized into something smaller. + if (Name == "pow" || Name == "powf" || Name == "powl" || + Name == "exp2" || Name == "exp2l" || Name == "exp2f" || + Name == "floor" || Name == "floorf" || Name == "ceil" || + Name == "round" || Name == "ffs" || Name == "ffsl" || + Name == "abs" || Name == "labs" || Name == "llabs") + return true; + + return false; +} + +bool llvm::isInstructionFree(const Instruction *I, const TargetData *TD) { + if (isa<PHINode>(I)) + return true; + + // If a GEP has all constant indices, it will probably be folded with + // a load/store. + if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) + return GEP->hasAllConstantIndices(); + + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: + return false; + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + // These intrinsics don't count as size. + return true; + } + } + + if (const CastInst *CI = dyn_cast<CastInst>(I)) { + // Noop casts, including ptr <-> int, don't count. + if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || isa<PtrToIntInst>(CI)) + return true; + // trunc to a native type is free (assuming the target has compare and + // shift-right of the same width). + if (TD && isa<TruncInst>(CI) && + TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType()))) + return true; + // Result of a cmp instruction is often extended (to be used by other + // cmp instructions, logical or return instructions). These are usually + // nop on most sane targets. + if (isa<CmpInst>(CI->getOperand(0))) + return true; + } + + return false; +} + +/// analyzeBasicBlock - Fill in the current structure with information gleaned +/// from the specified block. +void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, + const TargetData *TD) { + ++NumBlocks; + unsigned NumInstsBeforeThisBB = NumInsts; + for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); + II != E; ++II) { + if (isInstructionFree(II, TD)) + continue; + + // Special handling for calls. + if (isa<CallInst>(II) || isa<InvokeInst>(II)) { + ImmutableCallSite CS(cast<Instruction>(II)); + + if (const Function *F = CS.getCalledFunction()) { + // If a function is both internal and has a single use, then it is + // extremely likely to get inlined in the future (it was probably + // exposed by an interleaved devirtualization pass). + if (!CS.isNoInline() && F->hasInternalLinkage() && F->hasOneUse()) + ++NumInlineCandidates; + + // If this call is to function itself, then the function is recursive. + // Inlining it into other functions is a bad idea, because this is + // basically just a form of loop peeling, and our metrics aren't useful + // for that case. + if (F == BB->getParent()) + isRecursive = true; + } + + if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) { + // Each argument to a call takes on average one instruction to set up. + NumInsts += CS.arg_size(); + + // We don't want inline asm to count as a call - that would prevent loop + // unrolling. The argument setup cost is still real, though. + if (!isa<InlineAsm>(CS.getCalledValue())) + ++NumCalls; + } + } + + if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { + if (!AI->isStaticAlloca()) + this->usesDynamicAlloca = true; + } + + if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy()) + ++NumVectorInsts; + + ++NumInsts; + } + + if (isa<ReturnInst>(BB->getTerminator())) + ++NumRets; + + // We never want to inline functions that contain an indirectbr. This is + // incorrect because all the blockaddress's (in static global initializers + // for example) would be referring to the original function, and this indirect + // jump would jump from the inlined copy of the function into the original + // function which is extremely undefined behavior. + // FIXME: This logic isn't really right; we can safely inline functions + // with indirectbr's as long as no other function or global references the + // blockaddress of a block within the current function. And as a QOI issue, + // if someone is using a blockaddress without an indirectbr, and that + // reference somehow ends up in another function or global, we probably + // don't want to inline this function. + if (isa<IndirectBrInst>(BB->getTerminator())) + containsIndirectBr = true; + + // Remember NumInsts for this BB. + NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB; +} + +void CodeMetrics::analyzeFunction(Function *F, const TargetData *TD) { + // If this function contains a call that "returns twice" (e.g., setjmp or + // _setjmp) and it isn't marked with "returns twice" itself, never inline it. + // This is a hack because we depend on the user marking their local variables + // as volatile if they are live across a setjmp call, and they probably + // won't do this in callers. + exposesReturnsTwice = F->callsFunctionThatReturnsTwice() && + !F->hasFnAttr(Attribute::ReturnsTwice); + + // Look at the size of the callee. + for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + analyzeBasicBlock(&*BB, TD); +} diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index df79849..7a0a4e1e 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -26,6 +26,7 @@ #include "llvm/Operator.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/ErrorHandling.h" @@ -51,6 +52,42 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, if (C->isAllOnesValue() && !DestTy->isX86_MMXTy()) return Constant::getAllOnesValue(DestTy); + // Handle a vector->integer cast. + if (IntegerType *IT = dyn_cast<IntegerType>(DestTy)) { + ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(C); + if (CDV == 0) + return ConstantExpr::getBitCast(C, DestTy); + + unsigned NumSrcElts = CDV->getType()->getNumElements(); + + Type *SrcEltTy = CDV->getType()->getElementType(); + + // If the vector is a vector of floating point, convert it to vector of int + // to simplify things. + if (SrcEltTy->isFloatingPointTy()) { + unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); + Type *SrcIVTy = + VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElts); + // Ask VMCore to do the conversion now that #elts line up. + C = ConstantExpr::getBitCast(C, SrcIVTy); + CDV = cast<ConstantDataVector>(C); + } + + // Now that we know that the input value is a vector of integers, just shift + // and insert them into our result. + unsigned BitShift = TD.getTypeAllocSizeInBits(SrcEltTy); + APInt Result(IT->getBitWidth(), 0); + for (unsigned i = 0; i != NumSrcElts; ++i) { + Result <<= BitShift; + if (TD.isLittleEndian()) + Result |= CDV->getElementAsInteger(NumSrcElts-i-1); + else + Result |= CDV->getElementAsInteger(i); + } + + return ConstantInt::get(IT, Result); + } + // The code below only handles casts to vectors currently. VectorType *DestVTy = dyn_cast<VectorType>(DestTy); if (DestVTy == 0) @@ -64,17 +101,16 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, } // If this is a bitcast from constant vector -> vector, fold it. - ConstantVector *CV = dyn_cast<ConstantVector>(C); - if (CV == 0) + if (!isa<ConstantDataVector>(C) && !isa<ConstantVector>(C)) return ConstantExpr::getBitCast(C, DestTy); // If the element types match, VMCore can fold it. unsigned NumDstElt = DestVTy->getNumElements(); - unsigned NumSrcElt = CV->getNumOperands(); + unsigned NumSrcElt = C->getType()->getVectorNumElements(); if (NumDstElt == NumSrcElt) return ConstantExpr::getBitCast(C, DestTy); - Type *SrcEltTy = CV->getType()->getElementType(); + Type *SrcEltTy = C->getType()->getVectorElementType(); Type *DstEltTy = DestVTy->getElementType(); // Otherwise, we're changing the number of elements in a vector, which @@ -94,7 +130,6 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt); // Recursively handle this integer conversion, if possible. C = FoldBitCast(C, DestIVTy, TD); - if (!C) return ConstantExpr::getBitCast(C, DestTy); // Finally, VMCore can handle this now that #elts line up. return ConstantExpr::getBitCast(C, DestTy); @@ -108,8 +143,9 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt); // Ask VMCore to do the conversion now that #elts line up. C = ConstantExpr::getBitCast(C, SrcIVTy); - CV = dyn_cast<ConstantVector>(C); - if (!CV) // If VMCore wasn't able to fold it, bail out. + // If VMCore wasn't able to fold it, bail out. + if (!isa<ConstantVector>(C) && // FIXME: Remove ConstantVector. + !isa<ConstantDataVector>(C)) return C; } @@ -131,7 +167,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, Constant *Elt = Zero; unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1); for (unsigned j = 0; j != Ratio; ++j) { - Constant *Src = dyn_cast<ConstantInt>(CV->getOperand(SrcElt++)); + Constant *Src =dyn_cast<ConstantInt>(C->getAggregateElement(SrcElt++)); if (!Src) // Reject constantexpr elements. return ConstantExpr::getBitCast(C, DestTy); @@ -148,28 +184,29 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, } Result.push_back(Elt); } - } else { - // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>) - unsigned Ratio = NumDstElt/NumSrcElt; - unsigned DstBitSize = DstEltTy->getPrimitiveSizeInBits(); + return ConstantVector::get(Result); + } + + // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>) + unsigned Ratio = NumDstElt/NumSrcElt; + unsigned DstBitSize = DstEltTy->getPrimitiveSizeInBits(); + + // Loop over each source value, expanding into multiple results. + for (unsigned i = 0; i != NumSrcElt; ++i) { + Constant *Src = dyn_cast<ConstantInt>(C->getAggregateElement(i)); + if (!Src) // Reject constantexpr elements. + return ConstantExpr::getBitCast(C, DestTy); - // Loop over each source value, expanding into multiple results. - for (unsigned i = 0; i != NumSrcElt; ++i) { - Constant *Src = dyn_cast<ConstantInt>(CV->getOperand(i)); - if (!Src) // Reject constantexpr elements. - return ConstantExpr::getBitCast(C, DestTy); + unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1); + for (unsigned j = 0; j != Ratio; ++j) { + // Shift the piece of the value into the right place, depending on + // endianness. + Constant *Elt = ConstantExpr::getLShr(Src, + ConstantInt::get(Src->getType(), ShiftAmt)); + ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize; - unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1); - for (unsigned j = 0; j != Ratio; ++j) { - // Shift the piece of the value into the right place, depending on - // endianness. - Constant *Elt = ConstantExpr::getLShr(Src, - ConstantInt::get(Src->getType(), ShiftAmt)); - ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize; - - // Truncate and remember this piece. - Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy)); - } + // Truncate and remember this piece. + Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy)); } } @@ -272,7 +309,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, } return false; } - + if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) { const StructLayout *SL = TD.getStructLayout(CS->getType()); unsigned Index = SL->getElementContainingOffset(ByteOffset); @@ -310,12 +347,20 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, // not reached. } - if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) { - uint64_t EltSize = TD.getTypeAllocSize(CA->getType()->getElementType()); + if (isa<ConstantArray>(C) || isa<ConstantVector>(C) || + isa<ConstantDataSequential>(C)) { + Type *EltTy = cast<SequentialType>(C->getType())->getElementType(); + uint64_t EltSize = TD.getTypeAllocSize(EltTy); uint64_t Index = ByteOffset / EltSize; uint64_t Offset = ByteOffset - Index * EltSize; - for (; Index != CA->getType()->getNumElements(); ++Index) { - if (!ReadDataFromGlobal(CA->getOperand(Index), Offset, CurPtr, + uint64_t NumElts; + if (ArrayType *AT = dyn_cast<ArrayType>(C->getType())) + NumElts = AT->getNumElements(); + else + NumElts = cast<VectorType>(C->getType())->getNumElements(); + + for (; Index != NumElts; ++Index) { + if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr, BytesLeft, TD)) return false; if (EltSize >= BytesLeft) @@ -327,30 +372,12 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, } return true; } - - if (ConstantVector *CV = dyn_cast<ConstantVector>(C)) { - uint64_t EltSize = TD.getTypeAllocSize(CV->getType()->getElementType()); - uint64_t Index = ByteOffset / EltSize; - uint64_t Offset = ByteOffset - Index * EltSize; - for (; Index != CV->getType()->getNumElements(); ++Index) { - if (!ReadDataFromGlobal(CV->getOperand(Index), Offset, CurPtr, - BytesLeft, TD)) - return false; - if (EltSize >= BytesLeft) - return true; - Offset = 0; - BytesLeft -= EltSize; - CurPtr += EltSize; - } - return true; - } - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { if (CE->getOpcode() == Instruction::IntToPtr && CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getContext())) - return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr, - BytesLeft, TD); + return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr, + BytesLeft, TD); } // Otherwise, unknown initializer type. @@ -445,9 +472,9 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, // Instead of loading constant c string, use corresponding integer value // directly if string length is small enough. - std::string Str; - if (TD && GetConstantStringInfo(CE, Str) && !Str.empty()) { - unsigned StrLen = Str.length(); + StringRef Str; + if (TD && getConstantStringInfo(CE, Str) && !Str.empty()) { + unsigned StrLen = Str.size(); Type *Ty = cast<PointerType>(CE->getType())->getElementType(); unsigned NumBits = Ty->getPrimitiveSizeInBits(); // Replace load with immediate integer if the result is an integer or fp @@ -542,8 +569,8 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, /// explicitly cast them so that they aren't implicitly casted by the /// getelementptr. static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, - Type *ResultTy, - const TargetData *TD) { + Type *ResultTy, const TargetData *TD, + const TargetLibraryInfo *TLI) { if (!TD) return 0; Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext()); @@ -568,7 +595,7 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, Constant *C = ConstantExpr::getGetElementPtr(Ops[0], NewIdxs); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) C = Folded; return C; } @@ -576,10 +603,11 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, /// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP /// constant expression, do so. static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, - Type *ResultTy, - const TargetData *TD) { + Type *ResultTy, const TargetData *TD, + const TargetLibraryInfo *TLI) { Constant *Ptr = Ops[0]; - if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized()) + if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized() || + !Ptr->getType()->isPointerTy()) return 0; Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext()); @@ -602,7 +630,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, Res = ConstantExpr::getSub(Res, CE->getOperand(1)); Res = ConstantExpr::getIntToPtr(Res, ResultTy); if (ConstantExpr *ResCE = dyn_cast<ConstantExpr>(Res)) - Res = ConstantFoldConstantExpression(ResCE, TD); + Res = ConstantFoldConstantExpression(ResCE, TD, TLI); return Res; } } @@ -729,7 +757,9 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, /// Note that this fails if not all of the operands are constant. Otherwise, /// this function can only fail when attempting to fold instructions like loads /// and stores, which have no constant expression form. -Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) { +Constant *llvm::ConstantFoldInstruction(Instruction *I, + const TargetData *TD, + const TargetLibraryInfo *TLI) { // Handle PHI nodes quickly here... if (PHINode *PN = dyn_cast<PHINode>(I)) { Constant *CommonValue = 0; @@ -765,7 +795,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) { if (const CmpInst *CI = dyn_cast<CmpInst>(I)) return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1], - TD); + TD, TLI); if (const LoadInst *LI = dyn_cast<LoadInst>(I)) return ConstantFoldLoadInst(LI, TD); @@ -781,28 +811,29 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) { cast<Constant>(EVI->getAggregateOperand()), EVI->getIndices()); - return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD); + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD, TLI); } /// ConstantFoldConstantExpression - Attempt to fold the constant expression /// using the specified TargetData. If successful, the constant result is /// result is returned, if not, null is returned. Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, - const TargetData *TD) { + const TargetData *TD, + const TargetLibraryInfo *TLI) { SmallVector<Constant*, 8> Ops; for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i) { Constant *NewC = cast<Constant>(*i); // Recursively fold the ConstantExpr's operands. if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC)) - NewC = ConstantFoldConstantExpression(NewCE, TD); + NewC = ConstantFoldConstantExpression(NewCE, TD, TLI); Ops.push_back(NewC); } if (CE->isCompare()) return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1], - TD); - return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD); + TD, TLI); + return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD, TLI); } /// ConstantFoldInstOperands - Attempt to constant fold an instruction with the @@ -817,7 +848,8 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, /// Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, ArrayRef<Constant *> Ops, - const TargetData *TD) { + const TargetData *TD, + const TargetLibraryInfo *TLI) { // Handle easy binops first. if (Instruction::isBinaryOp(Opcode)) { if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1])) @@ -830,11 +862,11 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, switch (Opcode) { default: return 0; case Instruction::ICmp: - case Instruction::FCmp: assert(0 && "Invalid for compares"); + case Instruction::FCmp: llvm_unreachable("Invalid for compares"); case Instruction::Call: if (Function *F = dyn_cast<Function>(Ops.back())) if (canConstantFoldCallTo(F)) - return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1)); + return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1), TLI); return 0; case Instruction::PtrToInt: // If the input is a inttoptr, eliminate the pair. This requires knowing @@ -888,9 +920,9 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, case Instruction::ShuffleVector: return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]); case Instruction::GetElementPtr: - if (Constant *C = CastGEPIndices(Ops, DestTy, TD)) + if (Constant *C = CastGEPIndices(Ops, DestTy, TD, TLI)) return C; - if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, TD)) + if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, TD, TLI)) return C; return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1)); @@ -903,7 +935,8 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, /// Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, Constant *Ops0, Constant *Ops1, - const TargetData *TD) { + const TargetData *TD, + const TargetLibraryInfo *TLI) { // fold: icmp (inttoptr x), null -> icmp x, 0 // fold: icmp (ptrtoint x), 0 -> icmp x, null // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y @@ -920,7 +953,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0), IntPtrTy, false); Constant *Null = Constant::getNullValue(C->getType()); - return ConstantFoldCompareInstOperands(Predicate, C, Null, TD); + return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI); } // Only do this transformation if the int is intptrty in size, otherwise @@ -929,7 +962,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, CE0->getType() == IntPtrTy) { Constant *C = CE0->getOperand(0); Constant *Null = Constant::getNullValue(C->getType()); - return ConstantFoldCompareInstOperands(Predicate, C, Null, TD); + return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI); } } @@ -944,7 +977,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, IntPtrTy, false); Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0), IntPtrTy, false); - return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD); + return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD, TLI); } // Only do this transformation if the int is intptrty in size, otherwise @@ -953,7 +986,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, CE0->getType() == IntPtrTy && CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType())) return ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), - CE1->getOperand(0), TD); + CE1->getOperand(0), TD, TLI); } } @@ -962,13 +995,15 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) && CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) { Constant *LHS = - ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1,TD); + ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1, + TD, TLI); Constant *RHS = - ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1,TD); + ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1, + TD, TLI); unsigned OpC = Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; Constant *Ops[] = { LHS, RHS }; - return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, TD); + return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, TD, TLI); } } @@ -981,56 +1016,30 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, /// constant expression, or null if something is funny and we can't decide. Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, ConstantExpr *CE) { - if (CE->getOperand(1) != Constant::getNullValue(CE->getOperand(1)->getType())) + if (!CE->getOperand(1)->isNullValue()) return 0; // Do not allow stepping over the value! - + // Loop over all of the operands, tracking down which value we are - // addressing... - gep_type_iterator I = gep_type_begin(CE), E = gep_type_end(CE); - for (++I; I != E; ++I) - if (StructType *STy = dyn_cast<StructType>(*I)) { - ConstantInt *CU = cast<ConstantInt>(I.getOperand()); - assert(CU->getZExtValue() < STy->getNumElements() && - "Struct index out of range!"); - unsigned El = (unsigned)CU->getZExtValue(); - if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) { - C = CS->getOperand(El); - } else if (isa<ConstantAggregateZero>(C)) { - C = Constant::getNullValue(STy->getElementType(El)); - } else if (isa<UndefValue>(C)) { - C = UndefValue::get(STy->getElementType(El)); - } else { - return 0; - } - } else if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand())) { - if (ArrayType *ATy = dyn_cast<ArrayType>(*I)) { - if (CI->getZExtValue() >= ATy->getNumElements()) - return 0; - if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) - C = CA->getOperand(CI->getZExtValue()); - else if (isa<ConstantAggregateZero>(C)) - C = Constant::getNullValue(ATy->getElementType()); - else if (isa<UndefValue>(C)) - C = UndefValue::get(ATy->getElementType()); - else - return 0; - } else if (VectorType *VTy = dyn_cast<VectorType>(*I)) { - if (CI->getZExtValue() >= VTy->getNumElements()) - return 0; - if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) - C = CP->getOperand(CI->getZExtValue()); - else if (isa<ConstantAggregateZero>(C)) - C = Constant::getNullValue(VTy->getElementType()); - else if (isa<UndefValue>(C)) - C = UndefValue::get(VTy->getElementType()); - else - return 0; - } else { - return 0; - } - } else { - return 0; - } + // addressing. + for (unsigned i = 2, e = CE->getNumOperands(); i != e; ++i) { + C = C->getAggregateElement(CE->getOperand(i)); + if (C == 0) return 0; + } + return C; +} + +/// ConstantFoldLoadThroughGEPIndices - Given a constant and getelementptr +/// indices (with an *implied* zero pointer index that is not in the list), +/// return the constant value being addressed by a virtual load, or null if +/// something is funny and we can't decide. +Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C, + ArrayRef<Constant*> Indices) { + // Loop over all of the operands, tracking down which value we are + // addressing. + for (unsigned i = 0, e = Indices.size(); i != e; ++i) { + C = C->getAggregateElement(Indices[i]); + if (C == 0) return 0; + } return C; } @@ -1045,6 +1054,7 @@ bool llvm::canConstantFoldCallTo(const Function *F) { switch (F->getIntrinsicID()) { case Intrinsic::sqrt: + case Intrinsic::pow: case Intrinsic::powi: case Intrinsic::bswap: case Intrinsic::ctpop: @@ -1115,7 +1125,6 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, if (Ty->isDoubleTy()) return ConstantFP::get(Ty->getContext(), APFloat(V)); llvm_unreachable("Can only constant fold float/double"); - return 0; // dummy return to suppress warning } static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), @@ -1132,7 +1141,6 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), if (Ty->isDoubleTy()) return ConstantFP::get(Ty->getContext(), APFloat(V)); llvm_unreachable("Can only constant fold float/double"); - return 0; // dummy return to suppress warning } /// ConstantFoldConvertToInt - Attempt to an SSE floating point to integer @@ -1143,11 +1151,8 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), /// available for the result. Returns null if the conversion cannot be /// performed, otherwise returns the Constant value resulting from the /// conversion. -static Constant *ConstantFoldConvertToInt(ConstantFP *Op, bool roundTowardZero, - Type *Ty) { - assert(Op && "Called with NULL operand"); - APFloat Val(Op->getValueAPF()); - +static Constant *ConstantFoldConvertToInt(const APFloat &Val, + bool roundTowardZero, Type *Ty) { // All of these conversion intrinsics form an integer of at most 64bits. unsigned ResultWidth = cast<IntegerType>(Ty)->getBitWidth(); assert(ResultWidth <= 64 && @@ -1168,7 +1173,8 @@ static Constant *ConstantFoldConvertToInt(ConstantFP *Op, bool roundTowardZero, /// ConstantFoldCall - Attempt to constant fold a call to the specified function /// with the specified arguments, returning null if unsuccessful. Constant * -llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) { +llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, + const TargetLibraryInfo *TLI) { if (!F->hasName()) return 0; StringRef Name = F->getName(); @@ -1183,6 +1189,8 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) { return ConstantInt::get(F->getContext(), Val.bitcastToAPInt()); } + if (!TLI) + return 0; if (!Ty->isFloatTy() && !Ty->isDoubleTy()) return 0; @@ -1201,43 +1209,43 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) { Op->getValueAPF().convertToDouble(); switch (Name[0]) { case 'a': - if (Name == "acos") + if (Name == "acos" && TLI->has(LibFunc::acos)) return ConstantFoldFP(acos, V, Ty); - else if (Name == "asin") + else if (Name == "asin" && TLI->has(LibFunc::asin)) return ConstantFoldFP(asin, V, Ty); - else if (Name == "atan") + else if (Name == "atan" && TLI->has(LibFunc::atan)) return ConstantFoldFP(atan, V, Ty); break; case 'c': - if (Name == "ceil") + if (Name == "ceil" && TLI->has(LibFunc::ceil)) return ConstantFoldFP(ceil, V, Ty); - else if (Name == "cos") + else if (Name == "cos" && TLI->has(LibFunc::cos)) return ConstantFoldFP(cos, V, Ty); - else if (Name == "cosh") + else if (Name == "cosh" && TLI->has(LibFunc::cosh)) return ConstantFoldFP(cosh, V, Ty); - else if (Name == "cosf") + else if (Name == "cosf" && TLI->has(LibFunc::cosf)) return ConstantFoldFP(cos, V, Ty); break; case 'e': - if (Name == "exp") + if (Name == "exp" && TLI->has(LibFunc::exp)) return ConstantFoldFP(exp, V, Ty); - if (Name == "exp2") { + if (Name == "exp2" && TLI->has(LibFunc::exp2)) { // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a // C99 library. return ConstantFoldBinaryFP(pow, 2.0, V, Ty); } break; case 'f': - if (Name == "fabs") + if (Name == "fabs" && TLI->has(LibFunc::fabs)) return ConstantFoldFP(fabs, V, Ty); - else if (Name == "floor") + else if (Name == "floor" && TLI->has(LibFunc::floor)) return ConstantFoldFP(floor, V, Ty); break; case 'l': - if (Name == "log" && V > 0) + if (Name == "log" && V > 0 && TLI->has(LibFunc::log)) return ConstantFoldFP(log, V, Ty); - else if (Name == "log10" && V > 0) + else if (Name == "log10" && V > 0 && TLI->has(LibFunc::log10)) return ConstantFoldFP(log10, V, Ty); else if (F->getIntrinsicID() == Intrinsic::sqrt && (Ty->isFloatTy() || Ty->isDoubleTy())) { @@ -1248,21 +1256,21 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) { } break; case 's': - if (Name == "sin") + if (Name == "sin" && TLI->has(LibFunc::sin)) return ConstantFoldFP(sin, V, Ty); - else if (Name == "sinh") + else if (Name == "sinh" && TLI->has(LibFunc::sinh)) return ConstantFoldFP(sinh, V, Ty); - else if (Name == "sqrt" && V >= 0) + else if (Name == "sqrt" && V >= 0 && TLI->has(LibFunc::sqrt)) return ConstantFoldFP(sqrt, V, Ty); - else if (Name == "sqrtf" && V >= 0) + else if (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc::sqrtf)) return ConstantFoldFP(sqrt, V, Ty); - else if (Name == "sinf") + else if (Name == "sinf" && TLI->has(LibFunc::sinf)) return ConstantFoldFP(sin, V, Ty); break; case 't': - if (Name == "tan") + if (Name == "tan" && TLI->has(LibFunc::tan)) return ConstantFoldFP(tan, V, Ty); - else if (Name == "tanh") + else if (Name == "tanh" && TLI->has(LibFunc::tanh)) return ConstantFoldFP(tanh, V, Ty); break; default: @@ -1277,10 +1285,6 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) { return ConstantInt::get(F->getContext(), Op->getValue().byteSwap()); case Intrinsic::ctpop: return ConstantInt::get(Ty, Op->getValue().countPopulation()); - case Intrinsic::cttz: - return ConstantInt::get(Ty, Op->getValue().countTrailingZeros()); - case Intrinsic::ctlz: - return ConstantInt::get(Ty, Op->getValue().countLeadingZeros()); case Intrinsic::convert_from_fp16: { APFloat Val(Op->getValue()); @@ -1300,24 +1304,31 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) { } } - if (ConstantVector *Op = dyn_cast<ConstantVector>(Operands[0])) { + // Support ConstantVector in case we have an Undef in the top. + if (isa<ConstantVector>(Operands[0]) || + isa<ConstantDataVector>(Operands[0])) { + Constant *Op = cast<Constant>(Operands[0]); switch (F->getIntrinsicID()) { default: break; case Intrinsic::x86_sse_cvtss2si: case Intrinsic::x86_sse_cvtss2si64: case Intrinsic::x86_sse2_cvtsd2si: case Intrinsic::x86_sse2_cvtsd2si64: - if (ConstantFP *FPOp = dyn_cast<ConstantFP>(Op->getOperand(0))) - return ConstantFoldConvertToInt(FPOp, /*roundTowardZero=*/false, Ty); + if (ConstantFP *FPOp = + dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) + return ConstantFoldConvertToInt(FPOp->getValueAPF(), + /*roundTowardZero=*/false, Ty); case Intrinsic::x86_sse_cvttss2si: case Intrinsic::x86_sse_cvttss2si64: case Intrinsic::x86_sse2_cvttsd2si: case Intrinsic::x86_sse2_cvttsd2si64: - if (ConstantFP *FPOp = dyn_cast<ConstantFP>(Op->getOperand(0))) - return ConstantFoldConvertToInt(FPOp, /*roundTowardZero=*/true, Ty); + if (ConstantFP *FPOp = + dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) + return ConstantFoldConvertToInt(FPOp->getValueAPF(), + /*roundTowardZero=*/true, Ty); } } - + if (isa<UndefValue>(Operands[0])) { if (F->getIntrinsicID() == Intrinsic::bswap) return Operands[0]; @@ -1337,16 +1348,21 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) { if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) { if (Op2->getType() != Op1->getType()) return 0; - + double Op2V = Ty->isFloatTy() ? (double)Op2->getValueAPF().convertToFloat(): Op2->getValueAPF().convertToDouble(); - if (Name == "pow") + if (F->getIntrinsicID() == Intrinsic::pow) { return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); - if (Name == "fmod") + } + if (!TLI) + return 0; + if (Name == "pow" && TLI->has(LibFunc::pow)) + return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); + if (Name == "fmod" && TLI->has(LibFunc::fmod)) return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty); - if (Name == "atan2") + if (Name == "atan2" && TLI->has(LibFunc::atan2)) return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) { if (F->getIntrinsicID() == Intrinsic::powi && Ty->isFloatTy()) @@ -1361,7 +1377,6 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) { return 0; } - if (ConstantInt *Op1 = dyn_cast<ConstantInt>(Operands[0])) { if (ConstantInt *Op2 = dyn_cast<ConstantInt>(Operands[1])) { switch (F->getIntrinsicID()) { @@ -1375,7 +1390,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) { APInt Res; bool Overflow; switch (F->getIntrinsicID()) { - default: assert(0 && "Invalid case"); + default: llvm_unreachable("Invalid case"); case Intrinsic::sadd_with_overflow: Res = Op1->getValue().sadd_ov(Op2->getValue(), Overflow); break; @@ -1401,6 +1416,14 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) { }; return ConstantStruct::get(cast<StructType>(F->getReturnType()), Ops); } + case Intrinsic::cttz: + // FIXME: This should check for Op2 == 1, and become unreachable if + // Op1 == 0. + return ConstantInt::get(Ty, Op1->getValue().countTrailingZeros()); + case Intrinsic::ctlz: + // FIXME: This should check for Op2 == 1, and become unreachable if + // Op1 == 0. + return ConstantInt::get(Ty, Op1->getValue().countLeadingZeros()); } } diff --git a/lib/Analysis/DIBuilder.cpp b/lib/Analysis/DIBuilder.cpp index bfa429d..85913b1 100644 --- a/lib/Analysis/DIBuilder.cpp +++ b/lib/Analysis/DIBuilder.cpp @@ -17,6 +17,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" using namespace llvm; @@ -76,10 +77,11 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, StringRef Directory, StringRef Producer, bool isOptimized, StringRef Flags, unsigned RunTimeVer) { - assert (Lang <= dwarf::DW_LANG_D && Lang >= dwarf::DW_LANG_C89 - && "Invalid Language tag"); - assert (!Filename.empty() - && "Unable to create compile unit without filename"); + assert(((Lang <= dwarf::DW_LANG_Python && Lang >= dwarf::DW_LANG_C89) || + (Lang <= dwarf::DW_LANG_hi_user && Lang >= dwarf::DW_LANG_lo_user)) && + "Invalid Language tag"); + assert(!Filename.empty() && + "Unable to create compile unit without filename"); Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; TempEnumTypes = MDNode::getTemporary(VMContext, TElts); Value *THElts[] = { TempEnumTypes }; @@ -189,7 +191,7 @@ DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, return DIType(MDNode::get(VMContext, Elts)); } -/// createQaulifiedType - Create debugging information entry for a qualified +/// createQualifiedType - Create debugging information entry for a qualified /// type, e.g. 'const int'. DIType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) { // Qualified types are encoded in DIDerivedType format. @@ -358,13 +360,58 @@ DIType DIBuilder::createObjCIVar(StringRef Name, return DIType(MDNode::get(VMContext, Elts)); } +/// createObjCIVar - Create debugging information entry for Objective-C +/// instance variable. +DIType DIBuilder::createObjCIVar(StringRef Name, + DIFile File, unsigned LineNumber, + uint64_t SizeInBits, uint64_t AlignInBits, + uint64_t OffsetInBits, unsigned Flags, + DIType Ty, MDNode *PropertyNode) { + // TAG_member is encoded in DIDerivedType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_member), + getNonCompileUnitScope(File), + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + Ty, + PropertyNode + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createObjCProperty - Create debugging information entry for Objective-C +/// property. +DIObjCProperty DIBuilder::createObjCProperty(StringRef Name, + DIFile File, unsigned LineNumber, + StringRef GetterName, + StringRef SetterName, + unsigned PropertyAttributes, + DIType Ty) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_APPLE_property), + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + MDString::get(VMContext, GetterName), + MDString::get(VMContext, SetterName), + ConstantInt::get(Type::getInt32Ty(VMContext), PropertyAttributes), + Ty + }; + return DIObjCProperty(MDNode::get(VMContext, Elts)); +} + /// createClassType - Create debugging information entry for a class. DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags, DIType DerivedFrom, DIArray Elements, - MDNode *VTableHoder, MDNode *TemplateParams) { + MDNode *VTableHolder, MDNode *TemplateParams) { // TAG_class_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_class_type), @@ -379,7 +426,7 @@ DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, DerivedFrom, Elements, ConstantInt::get(Type::getInt32Ty(VMContext), 0), - VTableHoder, + VTableHolder, TemplateParams }; return DIType(MDNode::get(VMContext, Elts)); @@ -440,7 +487,7 @@ DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name, ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), ConstantInt::get(Type::getInt32Ty(VMContext), 0), ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + NULL, Elements, ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), @@ -465,7 +512,7 @@ DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name, ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), ConstantInt::get(Type::getInt64Ty(VMContext), 0), ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + NULL, Elements, ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), @@ -484,9 +531,9 @@ DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) { ConstantInt::get(Type::getInt32Ty(VMContext), 0), ConstantInt::get(Type::getInt64Ty(VMContext), 0), ConstantInt::get(Type::getInt64Ty(VMContext), 0), + ConstantInt::get(Type::getInt64Ty(VMContext), 0), ConstantInt::get(Type::getInt32Ty(VMContext), 0), - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + NULL, ParameterTypes, ConstantInt::get(Type::getInt32Ty(VMContext), 0), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), @@ -500,7 +547,7 @@ DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, - DIArray Elements) { + DIArray Elements) { // TAG_enumeration_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type), @@ -512,7 +559,7 @@ DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name, ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), ConstantInt::get(Type::getInt32Ty(VMContext), 0), ConstantInt::get(Type::getInt32Ty(VMContext), 0), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + NULL, Elements, ConstantInt::get(Type::getInt32Ty(VMContext), 0), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), @@ -628,6 +675,31 @@ DIType DIBuilder::createTemporaryType(DIFile F) { return DIType(Node); } +/// createForwardDecl - Create a temporary forward-declared type that +/// can be RAUW'd if the full type is seen. +DIType DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, DIFile F, + unsigned Line, unsigned RuntimeLang) { + // Create a temporary MDNode. + Value *Elts[] = { + GetTagConstant(VMContext, Tag), + NULL, // TheCU + MDString::get(VMContext, Name), + F, + ConstantInt::get(Type::getInt32Ty(VMContext), Line), + // To ease transition include sizes etc of 0. + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), + DIDescriptor::FlagFwdDecl), + NULL, + DIArray(), + ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang) + }; + MDNode *Node = MDNode::getTemporary(VMContext, Elts); + return DIType(Node); +} + /// getOrCreateArray - Get a DIArray, create one if required. DIArray DIBuilder::getOrCreateArray(ArrayRef<Value *> Elements) { if (Elements.empty()) { @@ -738,7 +810,7 @@ DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope, Elts.push_back(MDString::get(VMContext, Name)); Elts.push_back(F); Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), - (LineNo | (ArgNo << 24)))); + (LineNo | (ArgNo << 24)))); Elts.push_back(Ty); Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext))); Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext))); @@ -754,6 +826,7 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, DIFile File, unsigned LineNo, DIType Ty, bool isLocalToUnit, bool isDefinition, + unsigned ScopeLine, unsigned Flags, bool isOptimized, Function *Fn, MDNode *TParams, @@ -777,13 +850,14 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), ConstantInt::get(Type::getInt32Ty(VMContext), 0), ConstantInt::get(Type::getInt32Ty(VMContext), 0), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + NULL, ConstantInt::get(Type::getInt32Ty(VMContext), Flags), ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), Fn, TParams, Decl, - THolder + THolder, + ConstantInt::get(Type::getInt32Ty(VMContext), ScopeLine) }; MDNode *Node = MDNode::get(VMContext, Elts); @@ -831,7 +905,9 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, Fn, TParam, llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - THolder + THolder, + // FIXME: Do we want to use a different scope lines? + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo) }; MDNode *Node = MDNode::get(VMContext, Elts); return DISubprogram(Node); @@ -854,7 +930,7 @@ DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name, /// createLexicalBlockFile - This creates a new MDNode that encapsulates /// an existing scope with a new filename. DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope, - DIFile File) { + DIFile File) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block), Scope, diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp index 44457d3..f61a8f3 100644 --- a/lib/Analysis/DebugInfo.cpp +++ b/lib/Analysis/DebugInfo.cpp @@ -68,7 +68,7 @@ uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const { return 0; if (Elt < DbgNode->getNumOperands()) - if (ConstantInt *CI = dyn_cast<ConstantInt>(DbgNode->getOperand(Elt))) + if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DbgNode->getOperand(Elt))) return CI->getZExtValue(); return 0; @@ -289,6 +289,10 @@ bool DIDescriptor::isEnumerator() const { return DbgNode && getTag() == dwarf::DW_TAG_enumerator; } +/// isObjCProperty - Return true if the specified tag is DW_TAG +bool DIDescriptor::isObjCProperty() const { + return DbgNode && getTag() == dwarf::DW_TAG_APPLE_property; +} //===----------------------------------------------------------------------===// // Simple Descriptor Constructors and other Methods //===----------------------------------------------------------------------===// @@ -373,6 +377,19 @@ bool DICompileUnit::Verify() const { return true; } +/// Verify - Verify that an ObjC property is well formed. +bool DIObjCProperty::Verify() const { + if (!DbgNode) + return false; + unsigned Tag = getTag(); + if (Tag != dwarf::DW_TAG_APPLE_property) return false; + DIType Ty = getType(); + if (!Ty.Verify()) return false; + + // Don't worry about the rest of the strings for now. + return true; +} + /// Verify - Verify that a type descriptor is well formed. bool DIType::Verify() const { if (!DbgNode) @@ -482,6 +499,7 @@ bool DINameSpace::Verify() const { /// return base type size. uint64_t DIDerivedType::getOriginalTypeSize() const { unsigned Tag = getTag(); + if (Tag == dwarf::DW_TAG_member || Tag == dwarf::DW_TAG_typedef || Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type || Tag == dwarf::DW_TAG_restrict_type) { @@ -490,7 +508,13 @@ uint64_t DIDerivedType::getOriginalTypeSize() const { // approach. if (!BaseType.isValid()) return getSizeInBits(); - if (BaseType.isDerivedType()) + // If this is a derived type, go ahead and get the base type, unless + // it's a reference then it's just the size of the field. Pointer types + // have no need of this since they're a different type of qualification + // on the type. + if (BaseType.getTag() == dwarf::DW_TAG_reference_type) + return getSizeInBits(); + else if (BaseType.isDerivedType()) return DIDerivedType(BaseType).getOriginalTypeSize(); else return BaseType.getSizeInBits(); @@ -499,6 +523,13 @@ uint64_t DIDerivedType::getOriginalTypeSize() const { return getSizeInBits(); } +/// getObjCProperty - Return property node, if this ivar is associated with one. +MDNode *DIDerivedType::getObjCProperty() const { + if (getVersion() <= LLVMDebugVersion11 || DbgNode->getNumOperands() <= 10) + return NULL; + return dyn_cast_or_null<MDNode>(DbgNode->getOperand(10)); +} + /// isInlinedFnArgument - Return true if this variable provides debugging /// information for an inlined function arguments. bool DIVariable::isInlinedFnArgument(const Function *CurFn) { @@ -565,8 +596,7 @@ StringRef DIScope::getFilename() const { return DIType(DbgNode).getFilename(); if (isFile()) return DIFile(DbgNode).getFilename(); - assert(0 && "Invalid DIScope!"); - return StringRef(); + llvm_unreachable("Invalid DIScope!"); } StringRef DIScope::getDirectory() const { @@ -586,8 +616,7 @@ StringRef DIScope::getDirectory() const { return DIType(DbgNode).getDirectory(); if (isFile()) return DIFile(DbgNode).getDirectory(); - assert(0 && "Invalid DIScope!"); - return StringRef(); + llvm_unreachable("Invalid DIScope!"); } DIArray DICompileUnit::getEnumTypes() const { @@ -632,6 +661,32 @@ DIArray DICompileUnit::getGlobalVariables() const { } //===----------------------------------------------------------------------===// +// DIDescriptor: vtable anchors for all descriptors. +//===----------------------------------------------------------------------===// + +void DIScope::anchor() { } + +void DICompileUnit::anchor() { } + +void DIFile::anchor() { } + +void DIType::anchor() { } + +void DIBasicType::anchor() { } + +void DIDerivedType::anchor() { } + +void DICompositeType::anchor() { } + +void DISubprogram::anchor() { } + +void DILexicalBlock::anchor() { } + +void DINameSpace::anchor() { } + +void DILexicalBlockFile::anchor() { } + +//===----------------------------------------------------------------------===// // DIDescriptor: dump routines for all descriptors. //===----------------------------------------------------------------------===// @@ -679,8 +734,13 @@ void DIType::print(raw_ostream &OS) const { if (isBasicType()) DIBasicType(DbgNode).print(OS); - else if (isDerivedType()) - DIDerivedType(DbgNode).print(OS); + else if (isDerivedType()) { + DIDerivedType DTy = DIDerivedType(DbgNode); + DTy.print(OS); + DICompositeType CTy = getDICompositeType(DTy); + if (CTy.Verify()) + CTy.print(OS); + } else if (isCompositeType()) DICompositeType(DbgNode).print(OS); else { @@ -698,7 +758,9 @@ void DIBasicType::print(raw_ostream &OS) const { /// print - Print derived type. void DIDerivedType::print(raw_ostream &OS) const { - OS << "\n\t Derived From: "; getTypeDerivedFrom().print(OS); + OS << "\n\t Derived From: "; + getTypeDerivedFrom().print(OS); + OS << "\n\t"; } /// print - Print composite type. @@ -725,6 +787,9 @@ void DISubprogram::print(raw_ostream &OS) const { if (isDefinition()) OS << " [def] "; + if (getScopeLineNumber() != getLineNumber()) + OS << " [Scope: " << getScopeLineNumber() << "] "; + OS << "\n"; } @@ -927,9 +992,30 @@ DIVariable llvm::cleanseInlinedVariable(MDNode *DV, LLVMContext &VMContext) { /// processModule - Process entire module and collect debug info. void DebugInfoFinder::processModule(Module &M) { - if (NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu")) - for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) - addCompileUnit(DICompileUnit(CU_Nodes->getOperand(i))); + if (NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu")) { + for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { + DICompileUnit CU(CU_Nodes->getOperand(i)); + addCompileUnit(CU); + if (CU.getVersion() > LLVMDebugVersion10) { + DIArray GVs = CU.getGlobalVariables(); + for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) { + DIGlobalVariable DIG(GVs.getElement(i)); + if (addGlobalVariable(DIG)) + processType(DIG.getType()); + } + DIArray SPs = CU.getSubprograms(); + for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) + processSubprogram(DISubprogram(SPs.getElement(i))); + DIArray EnumTypes = CU.getEnumTypes(); + for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) + processType(DIType(EnumTypes.getElement(i))); + DIArray RetainedTypes = CU.getRetainedTypes(); + for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) + processType(DIType(RetainedTypes.getElement(i))); + return; + } + } + } for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI) diff --git a/lib/Analysis/DominanceFrontier.cpp b/lib/Analysis/DominanceFrontier.cpp index 6de4e1e..1604576 100644 --- a/lib/Analysis/DominanceFrontier.cpp +++ b/lib/Analysis/DominanceFrontier.cpp @@ -35,6 +35,8 @@ namespace { }; } +void DominanceFrontier::anchor() { } + const DominanceFrontier::DomSetType & DominanceFrontier::calculate(const DominatorTree &DT, const DomTreeNode *Node) { diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt index eae83fd..8ffef29 100644 --- a/lib/Analysis/IPA/CMakeLists.txt +++ b/lib/Analysis/IPA/CMakeLists.txt @@ -5,9 +5,3 @@ add_llvm_library(LLVMipa GlobalsModRef.cpp IPA.cpp ) - -add_llvm_library_dependencies(LLVMipa - LLVMAnalysis - LLVMCore - LLVMSupport - ) diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp index 2e79eab..0df3e8a 100644 --- a/lib/Analysis/IPA/CallGraph.cpp +++ b/lib/Analysis/IPA/CallGraph.cpp @@ -127,16 +127,9 @@ private: } } - // Loop over all of the users of the function, looking for non-call uses. - for (Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E; ++I){ - User *U = *I; - if ((!isa<CallInst>(U) && !isa<InvokeInst>(U)) - || !CallSite(cast<Instruction>(U)).isCallee(I)) { - // Not a call, or being used as a parameter rather than as the callee. - ExternalCallingNode->addCalledFunction(CallSite(), Node); - break; - } - } + // If this function has its address taken, anything could call it. + if (F->hasAddressTaken()) + ExternalCallingNode->addCalledFunction(CallSite(), Node); // If this function is not defined in this translation unit, it could call // anything. diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index b226d66..c1d8e3e 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -21,6 +21,7 @@ #include "llvm/Instructions.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -467,6 +468,11 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { } else if (isMalloc(&cast<Instruction>(*II)) || isFreeCall(&cast<Instruction>(*II))) { FunctionEffect |= ModRef; + } else if (IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(&*II)) { + // The callgraph doesn't include intrinsic calls. + Function *Callee = Intrinsic->getCalledFunction(); + ModRefBehavior Behaviour = AliasAnalysis::getModRefBehavior(Callee); + FunctionEffect |= (Behaviour & ModRef); } if ((FunctionEffect & Mod) == 0) diff --git a/lib/Analysis/IPA/LLVMBuild.txt b/lib/Analysis/IPA/LLVMBuild.txt new file mode 100644 index 0000000..980e918 --- /dev/null +++ b/lib/Analysis/IPA/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Analysis/IPA/LLVMBuild.txt -------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = IPA +parent = Libraries +library_name = ipa +required_libraries = Analysis Core Support diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index d0ca892..b80966b 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -79,10 +79,44 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, return false; } -/// AddUsersIfInteresting - Inspect the specified instruction. If it is a +/// Return true if all loop headers that dominate this block are in simplified +/// form. +static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT, + const LoopInfo *LI, + SmallPtrSet<Loop*,16> &SimpleLoopNests) { + Loop *NearestLoop = 0; + for (DomTreeNode *Rung = DT->getNode(BB); + Rung; Rung = Rung->getIDom()) { + BasicBlock *DomBB = Rung->getBlock(); + Loop *DomLoop = LI->getLoopFor(DomBB); + if (DomLoop && DomLoop->getHeader() == DomBB) { + // If the domtree walk reaches a loop with no preheader, return false. + if (!DomLoop->isLoopSimplifyForm()) + return false; + // If we have already checked this loop nest, stop checking. + if (SimpleLoopNests.count(DomLoop)) + break; + // If we have not already checked this loop nest, remember the loop + // header nearest to BB. The nearest loop may not contain BB. + if (!NearestLoop) + NearestLoop = DomLoop; + } + } + if (NearestLoop) + SimpleLoopNests.insert(NearestLoop); + return true; +} + +/// AddUsersImpl - Inspect the specified instruction. If it is a /// reducible SCEV, recursively add its users to the IVUsesByStride set and /// return true. Otherwise, return false. -bool IVUsers::AddUsersIfInteresting(Instruction *I) { +bool IVUsers::AddUsersImpl(Instruction *I, + SmallPtrSet<Loop*,16> &SimpleLoopNests) { + // Add this IV user to the Processed set before returning false to ensure that + // all IV users are members of the set. See IVUsers::isIVUserOrOperand. + if (!Processed.insert(I)) + return true; // Instruction already handled. + if (!SE->isSCEVable(I->getType())) return false; // Void and FP expressions cannot be reduced. @@ -93,9 +127,6 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { if (Width > 64 || (TD && !TD->isLegalInteger(Width))) return false; - if (!Processed.insert(I)) - return true; // Instruction already handled. - // Get the symbolic expression for this instruction. const SCEV *ISE = SE->getSCEV(I); @@ -115,6 +146,18 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { if (isa<PHINode>(User) && Processed.count(User)) continue; + // Only consider IVUsers that are dominated by simplified loop + // headers. Otherwise, SCEVExpander will crash. + BasicBlock *UseBB = User->getParent(); + // A phi's use is live out of its predecessor block. + if (PHINode *PHI = dyn_cast<PHINode>(User)) { + unsigned OperandNo = UI.getOperandNo(); + unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo); + UseBB = PHI->getIncomingBlock(ValNo); + } + if (!isSimplifiedLoopNest(UseBB, DT, LI, SimpleLoopNests)) + return false; + // Descend recursively, but not into PHI nodes outside the current loop. // It's important to see the entire expression outside the loop to get // choices that depend on addressing mode use right, although we won't @@ -124,12 +167,12 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { bool AddUserToIVUsers = false; if (LI->getLoopFor(User->getParent()) != L) { if (isa<PHINode>(User) || Processed.count(User) || - !AddUsersIfInteresting(User)) { + !AddUsersImpl(User, SimpleLoopNests)) { DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n' << " OF SCEV: " << *ISE << '\n'); AddUserToIVUsers = true; } - } else if (Processed.count(User) || !AddUsersIfInteresting(User)) { + } else if (Processed.count(User) || !AddUsersImpl(User, SimpleLoopNests)) { DEBUG(dbgs() << "FOUND USER: " << *User << '\n' << " OF SCEV: " << *ISE << '\n'); AddUserToIVUsers = true; @@ -153,6 +196,15 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { return true; } +bool IVUsers::AddUsersIfInteresting(Instruction *I) { + // SCEVExpander can only handle users that are dominated by simplified loop + // entries. Keep track of all loops that are only dominated by other simple + // loops so we don't traverse the domtree for each user. + SmallPtrSet<Loop*,16> SimpleLoopNests; + + return AddUsersImpl(I, SimpleLoopNests); +} + IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) { IVUses.push_back(new IVStrideUse(this, User, Operand)); return IVUses.back(); @@ -268,6 +320,7 @@ void IVStrideUse::transformToPostInc(const Loop *L) { void IVStrideUse::deleted() { // Remove this user from the list. + Parent->Processed.erase(this->getUser()); Parent->IVUses.erase(this); // this now dangles! } diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index e12e322..3e3d2ab 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -11,645 +11,1012 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "inline-cost" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Support/CallSite.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/InstVisitor.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/CallingConv.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Operator.h" +#include "llvm/GlobalAlias.h" #include "llvm/Target/TargetData.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" using namespace llvm; -/// callIsSmall - If a call is likely to lower to a single target instruction, -/// or is otherwise deemed small return true. -/// TODO: Perhaps calls like memcpy, strcpy, etc? -bool llvm::callIsSmall(const Function *F) { - if (!F) return false; +STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); + +namespace { + +class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { + typedef InstVisitor<CallAnalyzer, bool> Base; + friend class InstVisitor<CallAnalyzer, bool>; + + // TargetData if available, or null. + const TargetData *const TD; + + // The called function. + Function &F; + + int Threshold; + int Cost; + const bool AlwaysInline; + + bool IsRecursive; + bool ExposesReturnsTwice; + bool HasDynamicAlloca; + unsigned NumInstructions, NumVectorInstructions; + int FiftyPercentVectorBonus, TenPercentVectorBonus; + int VectorBonus; + + // While we walk the potentially-inlined instructions, we build up and + // maintain a mapping of simplified values specific to this callsite. The + // idea is to propagate any special information we have about arguments to + // this call through the inlinable section of the function, and account for + // likely simplifications post-inlining. The most important aspect we track + // is CFG altering simplifications -- when we prove a basic block dead, that + // can cause dramatic shifts in the cost of inlining a function. + DenseMap<Value *, Constant *> SimplifiedValues; + + // Keep track of the values which map back (through function arguments) to + // allocas on the caller stack which could be simplified through SROA. + DenseMap<Value *, Value *> SROAArgValues; + + // The mapping of caller Alloca values to their accumulated cost savings. If + // we have to disable SROA for one of the allocas, this tells us how much + // cost must be added. + DenseMap<Value *, int> SROAArgCosts; + + // Keep track of values which map to a pointer base and constant offset. + DenseMap<Value *, std::pair<Value *, APInt> > ConstantOffsetPtrs; + + // Custom simplification helper routines. + bool isAllocaDerivedArg(Value *V); + bool lookupSROAArgAndCost(Value *V, Value *&Arg, + DenseMap<Value *, int>::iterator &CostIt); + void disableSROA(DenseMap<Value *, int>::iterator CostIt); + void disableSROA(Value *V); + void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, + int InstructionCost); + bool handleSROACandidate(bool IsSROAValid, + DenseMap<Value *, int>::iterator CostIt, + int InstructionCost); + bool isGEPOffsetConstant(GetElementPtrInst &GEP); + bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); + ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); + + // Custom analysis routines. + bool analyzeBlock(BasicBlock *BB); + + // Disable several entry points to the visitor so we don't accidentally use + // them by declaring but not defining them here. + void visit(Module *); void visit(Module &); + void visit(Function *); void visit(Function &); + void visit(BasicBlock *); void visit(BasicBlock &); + + // Provide base case for our instruction visit. + bool visitInstruction(Instruction &I); + + // Our visit overrides. + bool visitAlloca(AllocaInst &I); + bool visitPHI(PHINode &I); + bool visitGetElementPtr(GetElementPtrInst &I); + bool visitBitCast(BitCastInst &I); + bool visitPtrToInt(PtrToIntInst &I); + bool visitIntToPtr(IntToPtrInst &I); + bool visitCastInst(CastInst &I); + bool visitUnaryInstruction(UnaryInstruction &I); + bool visitICmp(ICmpInst &I); + bool visitSub(BinaryOperator &I); + bool visitBinaryOperator(BinaryOperator &I); + bool visitLoad(LoadInst &I); + bool visitStore(StoreInst &I); + bool visitCallSite(CallSite CS); + +public: + CallAnalyzer(const TargetData *TD, Function &Callee, int Threshold) + : TD(TD), F(Callee), Threshold(Threshold), Cost(0), + AlwaysInline(F.hasFnAttr(Attribute::AlwaysInline)), + IsRecursive(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), + NumInstructions(0), NumVectorInstructions(0), + FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0), + NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), + NumConstantPtrCmps(0), NumConstantPtrDiffs(0), + NumInstructionsSimplified(0), SROACostSavings(0), SROACostSavingsLost(0) { + } - if (F->hasLocalLinkage()) return false; + bool analyzeCall(CallSite CS); - if (!F->hasName()) return false; + int getThreshold() { return Threshold; } + int getCost() { return Cost; } - StringRef Name = F->getName(); + // Keep a bunch of stats about the cost savings found so we can print them + // out when debugging. + unsigned NumConstantArgs; + unsigned NumConstantOffsetPtrArgs; + unsigned NumAllocaArgs; + unsigned NumConstantPtrCmps; + unsigned NumConstantPtrDiffs; + unsigned NumInstructionsSimplified; + unsigned SROACostSavings; + unsigned SROACostSavingsLost; - // These will all likely lower to a single selection DAG node. - if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || - Name == "fabs" || Name == "fabsf" || Name == "fabsl" || - Name == "sin" || Name == "sinf" || Name == "sinl" || - Name == "cos" || Name == "cosf" || Name == "cosl" || - Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" ) - return true; + void dump(); +}; - // These are all likely to be optimized into something smaller. - if (Name == "pow" || Name == "powf" || Name == "powl" || - Name == "exp2" || Name == "exp2l" || Name == "exp2f" || - Name == "floor" || Name == "floorf" || Name == "ceil" || - Name == "round" || Name == "ffs" || Name == "ffsl" || - Name == "abs" || Name == "labs" || Name == "llabs") - return true; +} // namespace - return false; +/// \brief Test whether the given value is an Alloca-derived function argument. +bool CallAnalyzer::isAllocaDerivedArg(Value *V) { + return SROAArgValues.count(V); } -/// analyzeBasicBlock - Fill in the current structure with information gleaned -/// from the specified block. -void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, - const TargetData *TD) { - ++NumBlocks; - unsigned NumInstsBeforeThisBB = NumInsts; - for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); - II != E; ++II) { - if (isa<PHINode>(II)) continue; // PHI nodes don't count. - - // Special handling for calls. - if (isa<CallInst>(II) || isa<InvokeInst>(II)) { - if (isa<DbgInfoIntrinsic>(II)) - continue; // Debug intrinsics don't count as size. - - ImmutableCallSite CS(cast<Instruction>(II)); - - if (const Function *F = CS.getCalledFunction()) { - // If a function is both internal and has a single use, then it is - // extremely likely to get inlined in the future (it was probably - // exposed by an interleaved devirtualization pass). - if (F->hasInternalLinkage() && F->hasOneUse()) - ++NumInlineCandidates; - - // If this call is to function itself, then the function is recursive. - // Inlining it into other functions is a bad idea, because this is - // basically just a form of loop peeling, and our metrics aren't useful - // for that case. - if (F == BB->getParent()) - isRecursive = true; - } +/// \brief Lookup the SROA-candidate argument and cost iterator which V maps to. +/// Returns false if V does not map to a SROA-candidate. +bool CallAnalyzer::lookupSROAArgAndCost( + Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) { + if (SROAArgValues.empty() || SROAArgCosts.empty()) + return false; - if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) { - // Each argument to a call takes on average one instruction to set up. - NumInsts += CS.arg_size(); + DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V); + if (ArgIt == SROAArgValues.end()) + return false; - // We don't want inline asm to count as a call - that would prevent loop - // unrolling. The argument setup cost is still real, though. - if (!isa<InlineAsm>(CS.getCalledValue())) - ++NumCalls; - } - } + Arg = ArgIt->second; + CostIt = SROAArgCosts.find(Arg); + return CostIt != SROAArgCosts.end(); +} - if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { - if (!AI->isStaticAlloca()) - this->usesDynamicAlloca = true; - } +/// \brief Disable SROA for the candidate marked by this cost iterator. +/// +/// This markes the candidate as no longer viable for SROA, and adds the cost +/// savings associated with it back into the inline cost measurement. +void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) { + // If we're no longer able to perform SROA we need to undo its cost savings + // and prevent subsequent analysis. + Cost += CostIt->second; + SROACostSavings -= CostIt->second; + SROACostSavingsLost += CostIt->second; + SROAArgCosts.erase(CostIt); +} - if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy()) - ++NumVectorInsts; +/// \brief If 'V' maps to a SROA candidate, disable SROA for it. +void CallAnalyzer::disableSROA(Value *V) { + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + if (lookupSROAArgAndCost(V, SROAArg, CostIt)) + disableSROA(CostIt); +} - if (const CastInst *CI = dyn_cast<CastInst>(II)) { - // Noop casts, including ptr <-> int, don't count. - if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || - isa<PtrToIntInst>(CI)) - continue; - // trunc to a native type is free (assuming the target has compare and - // shift-right of the same width). - if (isa<TruncInst>(CI) && TD && - TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType()))) - continue; - // Result of a cmp instruction is often extended (to be used by other - // cmp instructions, logical or return instructions). These are usually - // nop on most sane targets. - if (isa<CmpInst>(CI->getOperand(0))) - continue; - } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(II)){ - // If a GEP has all constant indices, it will probably be folded with - // a load/store. - if (GEPI->hasAllConstantIndices()) - continue; +/// \brief Accumulate the given cost for a particular SROA candidate. +void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, + int InstructionCost) { + CostIt->second += InstructionCost; + SROACostSavings += InstructionCost; +} + +/// \brief Helper for the common pattern of handling a SROA candidate. +/// Either accumulates the cost savings if the SROA remains valid, or disables +/// SROA for the candidate. +bool CallAnalyzer::handleSROACandidate(bool IsSROAValid, + DenseMap<Value *, int>::iterator CostIt, + int InstructionCost) { + if (IsSROAValid) { + accumulateSROACost(CostIt, InstructionCost); + return true; + } + + disableSROA(CostIt); + return false; +} + +/// \brief Check whether a GEP's indices are all constant. +/// +/// Respects any simplified values known during the analysis of this callsite. +bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) { + for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) + if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I)) + return false; + + return true; +} + +/// \brief Accumulate a constant GEP offset into an APInt if possible. +/// +/// Returns false if unable to compute the offset for any reason. Respects any +/// simplified values known during the analysis of this callsite. +bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { + if (!TD) + return false; + + unsigned IntPtrWidth = TD->getPointerSizeInBits(); + assert(IntPtrWidth == Offset.getBitWidth()); + + for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); + GTI != GTE; ++GTI) { + ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand()); + if (!OpC) + if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand())) + OpC = dyn_cast<ConstantInt>(SimpleOp); + if (!OpC) + return false; + if (OpC->isZero()) continue; + + // Handle a struct index, which adds its field offset to the pointer. + if (StructType *STy = dyn_cast<StructType>(*GTI)) { + unsigned ElementIdx = OpC->getZExtValue(); + const StructLayout *SL = TD->getStructLayout(STy); + Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); + continue; } - ++NumInsts; + APInt TypeSize(IntPtrWidth, TD->getTypeAllocSize(GTI.getIndexedType())); + Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; } + return true; +} - if (isa<ReturnInst>(BB->getTerminator())) - ++NumRets; +bool CallAnalyzer::visitAlloca(AllocaInst &I) { + // FIXME: Check whether inlining will turn a dynamic alloca into a static + // alloca, and handle that case. - // We never want to inline functions that contain an indirectbr. This is - // incorrect because all the blockaddress's (in static global initializers - // for example) would be referring to the original function, and this indirect - // jump would jump from the inlined copy of the function into the original - // function which is extremely undefined behavior. - if (isa<IndirectBrInst>(BB->getTerminator())) - containsIndirectBr = true; + // We will happily inline static alloca instructions or dynamic alloca + // instructions in always-inline situations. + if (AlwaysInline || I.isStaticAlloca()) + return Base::visitAlloca(I); - // Remember NumInsts for this BB. - NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB; + // FIXME: This is overly conservative. Dynamic allocas are inefficient for + // a variety of reasons, and so we would like to not inline them into + // functions which don't currently have a dynamic alloca. This simply + // disables inlining altogether in the presence of a dynamic alloca. + HasDynamicAlloca = true; + return false; } -// CountCodeReductionForConstant - Figure out an approximation for how many -// instructions will be constant folded if the specified value is constant. -// -unsigned CodeMetrics::CountCodeReductionForConstant(Value *V) { - unsigned Reduction = 0; - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ - User *U = *UI; - if (isa<BranchInst>(U) || isa<SwitchInst>(U)) { - // We will be able to eliminate all but one of the successors. - const TerminatorInst &TI = cast<TerminatorInst>(*U); - const unsigned NumSucc = TI.getNumSuccessors(); - unsigned Instrs = 0; - for (unsigned I = 0; I != NumSucc; ++I) - Instrs += NumBBInsts[TI.getSuccessor(I)]; - // We don't know which blocks will be eliminated, so use the average size. - Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc; - } else { - // Figure out if this instruction will be removed due to simple constant - // propagation. - Instruction &Inst = cast<Instruction>(*U); - - // We can't constant propagate instructions which have effects or - // read memory. - // - // FIXME: It would be nice to capture the fact that a load from a - // pointer-to-constant-global is actually a *really* good thing to zap. - // Unfortunately, we don't know the pointer that may get propagated here, - // so we can't make this decision. - if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() || - isa<AllocaInst>(Inst)) - continue; +bool CallAnalyzer::visitPHI(PHINode &I) { + // FIXME: We should potentially be tracking values through phi nodes, + // especially when they collapse to a single value due to deleted CFG edges + // during inlining. - bool AllOperandsConstant = true; - for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) - if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) { - AllOperandsConstant = false; - break; - } + // FIXME: We need to propagate SROA *disabling* through phi nodes, even + // though we don't want to propagate it's bonuses. The idea is to disable + // SROA if it *might* be used in an inappropriate manner. - if (AllOperandsConstant) { - // We will get to remove this instruction... - Reduction += InlineConstants::InstrCost; + // Phi nodes are always zero-cost. + return true; +} - // And any other instructions that use it which become constants - // themselves. - Reduction += CountCodeReductionForConstant(&Inst); +bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(), + SROAArg, CostIt); + + // Try to fold GEPs of constant-offset call site argument pointers. This + // requires target data and inbounds GEPs. + if (TD && I.isInBounds()) { + // Check if we have a base + offset for the pointer. + Value *Ptr = I.getPointerOperand(); + std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr); + if (BaseAndOffset.first) { + // Check if the offset of this GEP is constant, and if so accumulate it + // into Offset. + if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) { + // Non-constant GEPs aren't folded, and disable SROA. + if (SROACandidate) + disableSROA(CostIt); + return false; } + + // Add the result as a new mapping to Base + Offset. + ConstantOffsetPtrs[&I] = BaseAndOffset; + + // Also handle SROA candidates here, we already know that the GEP is + // all-constant indexed. + if (SROACandidate) + SROAArgValues[&I] = SROAArg; + + return true; } } - return Reduction; -} -// CountCodeReductionForAlloca - Figure out an approximation of how much smaller -// the function will be if it is inlined into a context where an argument -// becomes an alloca. -// -unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) { - if (!V->getType()->isPointerTy()) return 0; // Not a pointer - unsigned Reduction = 0; - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ - Instruction *I = cast<Instruction>(*UI); - if (isa<LoadInst>(I) || isa<StoreInst>(I)) - Reduction += InlineConstants::InstrCost; - else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { - // If the GEP has variable indices, we won't be able to do much with it. - if (GEP->hasAllConstantIndices()) - Reduction += CountCodeReductionForAlloca(GEP); - } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) { - // Track pointer through bitcasts. - Reduction += CountCodeReductionForAlloca(BCI); - } else { - // If there is some other strange instruction, we're not going to be able - // to do much if we inline this. - return 0; - } + if (isGEPOffsetConstant(I)) { + if (SROACandidate) + SROAArgValues[&I] = SROAArg; + + // Constant GEPs are modeled as free. + return true; } - return Reduction; + // Variable GEPs will require math and will disable SROA. + if (SROACandidate) + disableSROA(CostIt); + return false; } -/// analyzeFunction - Fill in the current structure with information gleaned -/// from the specified function. -void CodeMetrics::analyzeFunction(Function *F, const TargetData *TD) { - // If this function contains a call to setjmp or _setjmp, never inline - // it. This is a hack because we depend on the user marking their local - // variables as volatile if they are live across a setjmp call, and they - // probably won't do this in callers. - if (F->callsFunctionThatReturnsTwice()) - callsSetJmp = true; - - // Look at the size of the callee. - for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - analyzeBasicBlock(&*BB, TD); -} +bool CallAnalyzer::visitBitCast(BitCastInst &I) { + // Propagate constants through bitcasts. + if (Constant *COp = dyn_cast<Constant>(I.getOperand(0))) + if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) { + SimplifiedValues[&I] = C; + return true; + } -/// analyzeFunction - Fill in the current structure with information gleaned -/// from the specified function. -void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F, - const TargetData *TD) { - Metrics.analyzeFunction(F, TD); - - // A function with exactly one return has it removed during the inlining - // process (see InlineFunction), so don't count it. - // FIXME: This knowledge should really be encoded outside of FunctionInfo. - if (Metrics.NumRets==1) - --Metrics.NumInsts; - - // Check out all of the arguments to the function, figuring out how much - // code can be eliminated if one of the arguments is a constant. - ArgumentWeights.reserve(F->arg_size()); - for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) - ArgumentWeights.push_back(ArgInfo(Metrics.CountCodeReductionForConstant(I), - Metrics.CountCodeReductionForAlloca(I))); + // Track base/offsets through casts + std::pair<Value *, APInt> BaseAndOffset + = ConstantOffsetPtrs.lookup(I.getOperand(0)); + // Casts don't change the offset, just wrap it up. + if (BaseAndOffset.first) + ConstantOffsetPtrs[&I] = BaseAndOffset; + + // Also look for SROA candidates here. + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) + SROAArgValues[&I] = SROAArg; + + // Bitcasts are always zero cost. + return true; } -/// NeverInline - returns true if the function should never be inlined into -/// any caller -bool InlineCostAnalyzer::FunctionInfo::NeverInline() { - return (Metrics.callsSetJmp || Metrics.isRecursive || - Metrics.containsIndirectBr); +bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { + // Propagate constants through ptrtoint. + if (Constant *COp = dyn_cast<Constant>(I.getOperand(0))) + if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) { + SimplifiedValues[&I] = C; + return true; + } + + // Track base/offset pairs when converted to a plain integer provided the + // integer is large enough to represent the pointer. + unsigned IntegerSize = I.getType()->getScalarSizeInBits(); + if (TD && IntegerSize >= TD->getPointerSizeInBits()) { + std::pair<Value *, APInt> BaseAndOffset + = ConstantOffsetPtrs.lookup(I.getOperand(0)); + if (BaseAndOffset.first) + ConstantOffsetPtrs[&I] = BaseAndOffset; + } + + // This is really weird. Technically, ptrtoint will disable SROA. However, + // unless that ptrtoint is *used* somewhere in the live basic blocks after + // inlining, it will be nuked, and SROA should proceed. All of the uses which + // would block SROA would also block SROA if applied directly to a pointer, + // and so we can just add the integer in here. The only places where SROA is + // preserved either cannot fire on an integer, or won't in-and-of themselves + // disable SROA (ext) w/o some later use that we would see and disable. + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) + SROAArgValues[&I] = SROAArg; + + // A ptrtoint cast is free so long as the result is large enough to store the + // pointer, and a legal integer type. + return TD && TD->isLegalInteger(IntegerSize) && + IntegerSize >= TD->getPointerSizeInBits(); } -// getSpecializationBonus - The heuristic used to determine the per-call -// performance boost for using a specialization of Callee with argument -// specializedArgNo replaced by a constant. -int InlineCostAnalyzer::getSpecializationBonus(Function *Callee, - SmallVectorImpl<unsigned> &SpecializedArgNos) -{ - if (Callee->mayBeOverridden()) - return 0; - int Bonus = 0; - // If this function uses the coldcc calling convention, prefer not to - // specialize it. - if (Callee->getCallingConv() == CallingConv::Cold) - Bonus -= InlineConstants::ColdccPenalty; - - // Get information about the callee. - FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; - - // If we haven't calculated this information yet, do so now. - if (CalleeFI->Metrics.NumBlocks == 0) - CalleeFI->analyzeFunction(Callee, TD); - - unsigned ArgNo = 0; - unsigned i = 0; - for (Function::arg_iterator I = Callee->arg_begin(), E = Callee->arg_end(); - I != E; ++I, ++ArgNo) - if (ArgNo == SpecializedArgNos[i]) { - ++i; - Bonus += CountBonusForConstant(I); +bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { + // Propagate constants through ptrtoint. + if (Constant *COp = dyn_cast<Constant>(I.getOperand(0))) + if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) { + SimplifiedValues[&I] = C; + return true; } - // Calls usually take a long time, so they make the specialization gain - // smaller. - Bonus -= CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty; + // Track base/offset pairs when round-tripped through a pointer without + // modifications provided the integer is not too large. + Value *Op = I.getOperand(0); + unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); + if (TD && IntegerSize <= TD->getPointerSizeInBits()) { + std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op); + if (BaseAndOffset.first) + ConstantOffsetPtrs[&I] = BaseAndOffset; + } + + // "Propagate" SROA here in the same manner as we do for ptrtoint above. + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + if (lookupSROAArgAndCost(Op, SROAArg, CostIt)) + SROAArgValues[&I] = SROAArg; - return Bonus; + // An inttoptr cast is free so long as the input is a legal integer type + // which doesn't contain values outside the range of a pointer. + return TD && TD->isLegalInteger(IntegerSize) && + IntegerSize <= TD->getPointerSizeInBits(); } -// ConstantFunctionBonus - Figure out how much of a bonus we can get for -// possibly devirtualizing a function. We'll subtract the size of the function -// we may wish to inline from the indirect call bonus providing a limit on -// growth. Leave an upper limit of 0 for the bonus - we don't want to penalize -// inlining because we decide we don't want to give a bonus for -// devirtualizing. -int InlineCostAnalyzer::ConstantFunctionBonus(CallSite CS, Constant *C) { +bool CallAnalyzer::visitCastInst(CastInst &I) { + // Propagate constants through ptrtoint. + if (Constant *COp = dyn_cast<Constant>(I.getOperand(0))) + if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) { + SimplifiedValues[&I] = C; + return true; + } - // This could just be NULL. - if (!C) return 0; + // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere. + disableSROA(I.getOperand(0)); - Function *F = dyn_cast<Function>(C); - if (!F) return 0; + // No-op casts don't have any cost. + if (I.isLosslessCast()) + return true; - int Bonus = InlineConstants::IndirectCallBonus + getInlineSize(CS, F); - return (Bonus > 0) ? 0 : Bonus; + // trunc to a native type is free (assuming the target has compare and + // shift-right of the same width). + if (TD && isa<TruncInst>(I) && + TD->isLegalInteger(TD->getTypeSizeInBits(I.getType()))) + return true; + + // Result of a cmp instruction is often extended (to be used by other + // cmp instructions, logical or return instructions). These are usually + // no-ops on most sane targets. + if (isa<CmpInst>(I.getOperand(0))) + return true; + + // Assume the rest of the casts require work. + return false; } -// CountBonusForConstant - Figure out an approximation for how much per-call -// performance boost we can expect if the specified value is constant. -int InlineCostAnalyzer::CountBonusForConstant(Value *V, Constant *C) { - unsigned Bonus = 0; - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ - User *U = *UI; - if (CallInst *CI = dyn_cast<CallInst>(U)) { - // Turning an indirect call into a direct call is a BIG win - if (CI->getCalledValue() == V) - Bonus += ConstantFunctionBonus(CallSite(CI), C); - } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) { - // Turning an indirect call into a direct call is a BIG win - if (II->getCalledValue() == V) - Bonus += ConstantFunctionBonus(CallSite(II), C); +bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { + Value *Operand = I.getOperand(0); + Constant *Ops[1] = { dyn_cast<Constant>(Operand) }; + if (Ops[0] || (Ops[0] = SimplifiedValues.lookup(Operand))) + if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(), + Ops, TD)) { + SimplifiedValues[&I] = C; + return true; } - // FIXME: Eliminating conditional branches and switches should - // also yield a per-call performance boost. - else { - // Figure out the bonuses that wll accrue due to simple constant - // propagation. - Instruction &Inst = cast<Instruction>(*U); - - // We can't constant propagate instructions which have effects or - // read memory. - // - // FIXME: It would be nice to capture the fact that a load from a - // pointer-to-constant-global is actually a *really* good thing to zap. - // Unfortunately, we don't know the pointer that may get propagated here, - // so we can't make this decision. - if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() || - isa<AllocaInst>(Inst)) - continue; - bool AllOperandsConstant = true; - for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) - if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) { - AllOperandsConstant = false; - break; - } + // Disable any SROA on the argument to arbitrary unary operators. + disableSROA(Operand); - if (AllOperandsConstant) - Bonus += CountBonusForConstant(&Inst); + return false; +} + +bool CallAnalyzer::visitICmp(ICmpInst &I) { + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + // First try to handle simplified comparisons. + if (!isa<Constant>(LHS)) + if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) + LHS = SimpleLHS; + if (!isa<Constant>(RHS)) + if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) + RHS = SimpleRHS; + if (Constant *CLHS = dyn_cast<Constant>(LHS)) + if (Constant *CRHS = dyn_cast<Constant>(RHS)) + if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { + SimplifiedValues[&I] = C; + return true; + } + + // Otherwise look for a comparison between constant offset pointers with + // a common base. + Value *LHSBase, *RHSBase; + APInt LHSOffset, RHSOffset; + llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); + if (LHSBase) { + llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); + if (RHSBase && LHSBase == RHSBase) { + // We have common bases, fold the icmp to a constant based on the + // offsets. + Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); + Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); + if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { + SimplifiedValues[&I] = C; + ++NumConstantPtrCmps; + return true; + } } } - return Bonus; -} + // If the comparison is an equality comparison with null, we can simplify it + // for any alloca-derived argument. + if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1))) + if (isAllocaDerivedArg(I.getOperand(0))) { + // We can actually predict the result of comparisons between an + // alloca-derived value and null. Note that this fires regardless of + // SROA firing. + bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE; + SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType()) + : ConstantInt::getFalse(I.getType()); + return true; + } + + // Finally check for SROA candidates in comparisons. + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { + if (isa<ConstantPointerNull>(I.getOperand(1))) { + accumulateSROACost(CostIt, InlineConstants::InstrCost); + return true; + } -int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) { - // Get information about the callee. - FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; - - // If we haven't calculated this information yet, do so now. - if (CalleeFI->Metrics.NumBlocks == 0) - CalleeFI->analyzeFunction(Callee, TD); - - // InlineCost - This value measures how good of an inline candidate this call - // site is to inline. A lower inline cost make is more likely for the call to - // be inlined. This value may go negative. - // - int InlineCost = 0; - - // Compute any size reductions we can expect due to arguments being passed into - // the function. - // - unsigned ArgNo = 0; - CallSite::arg_iterator I = CS.arg_begin(); - for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end(); - FI != FE; ++I, ++FI, ++ArgNo) { - - // If an alloca is passed in, inlining this function is likely to allow - // significant future optimization possibilities (like scalar promotion, and - // scalarization), so encourage the inlining of the function. - // - if (isa<AllocaInst>(I)) - InlineCost -= CalleeFI->ArgumentWeights[ArgNo].AllocaWeight; - - // If this is a constant being passed into the function, use the argument - // weights calculated for the callee to determine how much will be folded - // away with this information. - else if (isa<Constant>(I)) - InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight; + disableSROA(CostIt); } - // Each argument passed in has a cost at both the caller and the callee - // sides. Measurements show that each argument costs about the same as an - // instruction. - InlineCost -= (CS.arg_size() * InlineConstants::InstrCost); + return false; +} - // Now that we have considered all of the factors that make the call site more - // likely to be inlined, look at factors that make us not want to inline it. +bool CallAnalyzer::visitSub(BinaryOperator &I) { + // Try to handle a special case: we can fold computing the difference of two + // constant-related pointers. + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + Value *LHSBase, *RHSBase; + APInt LHSOffset, RHSOffset; + llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); + if (LHSBase) { + llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); + if (RHSBase && LHSBase == RHSBase) { + // We have common bases, fold the subtract to a constant based on the + // offsets. + Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); + Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); + if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) { + SimplifiedValues[&I] = C; + ++NumConstantPtrDiffs; + return true; + } + } + } - // Calls usually take a long time, so they make the inlining gain smaller. - InlineCost += CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty; + // Otherwise, fall back to the generic logic for simplifying and handling + // instructions. + return Base::visitSub(I); +} - // Look at the size of the callee. Each instruction counts as 5. - InlineCost += CalleeFI->Metrics.NumInsts*InlineConstants::InstrCost; +bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + if (!isa<Constant>(LHS)) + if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) + LHS = SimpleLHS; + if (!isa<Constant>(RHS)) + if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) + RHS = SimpleRHS; + Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, TD); + if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) { + SimplifiedValues[&I] = C; + return true; + } - return InlineCost; -} + // Disable any SROA on arguments to arbitrary, unsimplified binary operators. + disableSROA(LHS); + disableSROA(RHS); -int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) { - // Get information about the callee. - FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; - - // If we haven't calculated this information yet, do so now. - if (CalleeFI->Metrics.NumBlocks == 0) - CalleeFI->analyzeFunction(Callee, TD); - - bool isDirectCall = CS.getCalledFunction() == Callee; - Instruction *TheCall = CS.getInstruction(); - int Bonus = 0; - - // If there is only one call of the function, and it has internal linkage, - // make it almost guaranteed to be inlined. - // - if (Callee->hasLocalLinkage() && Callee->hasOneUse() && isDirectCall) - Bonus += InlineConstants::LastCallToStaticBonus; - - // If the instruction after the call, or if the normal destination of the - // invoke is an unreachable instruction, the function is noreturn. As such, - // there is little point in inlining this. - if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { - if (isa<UnreachableInst>(II->getNormalDest()->begin())) - Bonus += InlineConstants::NoreturnPenalty; - } else if (isa<UnreachableInst>(++BasicBlock::iterator(TheCall))) - Bonus += InlineConstants::NoreturnPenalty; - - // If this function uses the coldcc calling convention, prefer not to inline - // it. - if (Callee->getCallingConv() == CallingConv::Cold) - Bonus += InlineConstants::ColdccPenalty; - - // Add to the inline quality for properties that make the call valuable to - // inline. This includes factors that indicate that the result of inlining - // the function will be optimizable. Currently this just looks at arguments - // passed into the function. - // - CallSite::arg_iterator I = CS.arg_begin(); - for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end(); - FI != FE; ++I, ++FI) - // Compute any constant bonus due to inlining we want to give here. - if (isa<Constant>(I)) - Bonus += CountBonusForConstant(FI, cast<Constant>(I)); - - return Bonus; + return false; } -// getInlineCost - The heuristic used to determine if we should inline the -// function call or not. -// -InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, - SmallPtrSet<const Function*, 16> &NeverInline) { - return getInlineCost(CS, CS.getCalledFunction(), NeverInline); -} +bool CallAnalyzer::visitLoad(LoadInst &I) { + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { + if (I.isSimple()) { + accumulateSROACost(CostIt, InlineConstants::InstrCost); + return true; + } -InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, - Function *Callee, - SmallPtrSet<const Function*, 16> &NeverInline) { - Instruction *TheCall = CS.getInstruction(); - Function *Caller = TheCall->getParent()->getParent(); + disableSROA(CostIt); + } - // Don't inline functions which can be redefined at link-time to mean - // something else. Don't inline functions marked noinline or call sites - // marked noinline. - if (Callee->mayBeOverridden() || - Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee) || - CS.isNoInline()) - return llvm::InlineCost::getNever(); + return false; +} - // Get information about the callee. - FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; +bool CallAnalyzer::visitStore(StoreInst &I) { + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { + if (I.isSimple()) { + accumulateSROACost(CostIt, InlineConstants::InstrCost); + return true; + } - // If we haven't calculated this information yet, do so now. - if (CalleeFI->Metrics.NumBlocks == 0) - CalleeFI->analyzeFunction(Callee, TD); + disableSROA(CostIt); + } - // If we should never inline this, return a huge cost. - if (CalleeFI->NeverInline()) - return InlineCost::getNever(); + return false; +} - // FIXME: It would be nice to kill off CalleeFI->NeverInline. Then we - // could move this up and avoid computing the FunctionInfo for - // things we are going to just return always inline for. This - // requires handling setjmp somewhere else, however. - if (!Callee->isDeclaration() && Callee->hasFnAttr(Attribute::AlwaysInline)) - return InlineCost::getAlways(); +bool CallAnalyzer::visitCallSite(CallSite CS) { + if (CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice() && + !F.hasFnAttr(Attribute::ReturnsTwice)) { + // This aborts the entire analysis. + ExposesReturnsTwice = true; + return false; + } - if (CalleeFI->Metrics.usesDynamicAlloca) { - // Get information about the caller. - FunctionInfo &CallerFI = CachedFunctionInfo[Caller]; + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) { + switch (II->getIntrinsicID()) { + default: + return Base::visitCallSite(CS); + + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::memset: + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::objectsize: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + // SROA can usually chew through these intrinsics and they have no cost + // so don't pay the price of analyzing them in detail. + return true; + } + } + + if (Function *F = CS.getCalledFunction()) { + if (F == CS.getInstruction()->getParent()->getParent()) { + // This flag will fully abort the analysis, so don't bother with anything + // else. + IsRecursive = true; + return false; + } - // If we haven't calculated this information yet, do so now. - if (CallerFI.Metrics.NumBlocks == 0) { - CallerFI.analyzeFunction(Caller, TD); + if (!callIsSmall(F)) { + // We account for the average 1 instruction per call argument setup + // here. + Cost += CS.arg_size() * InlineConstants::InstrCost; - // Recompute the CalleeFI pointer, getting Caller could have invalidated - // it. - CalleeFI = &CachedFunctionInfo[Callee]; + // Everything other than inline ASM will also have a significant cost + // merely from making the call. + if (!isa<InlineAsm>(CS.getCalledValue())) + Cost += InlineConstants::CallPenalty; } - // Don't inline a callee with dynamic alloca into a caller without them. - // Functions containing dynamic alloca's are inefficient in various ways; - // don't create more inefficiency. - if (!CallerFI.Metrics.usesDynamicAlloca) - return InlineCost::getNever(); + return Base::visitCallSite(CS); } - // InlineCost - This value measures how good of an inline candidate this call - // site is to inline. A lower inline cost make is more likely for the call to - // be inlined. This value may go negative due to the fact that bonuses - // are negative numbers. - // - int InlineCost = getInlineSize(CS, Callee) + getInlineBonuses(CS, Callee); - return llvm::InlineCost::get(InlineCost); + // Otherwise we're in a very special case -- an indirect function call. See + // if we can be particularly clever about this. + Value *Callee = CS.getCalledValue(); + + // First, pay the price of the argument setup. We account for the average + // 1 instruction per call argument setup here. + Cost += CS.arg_size() * InlineConstants::InstrCost; + + // Next, check if this happens to be an indirect function call to a known + // function in this inline context. If not, we've done all we can. + Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee)); + if (!F) + return Base::visitCallSite(CS); + + // If we have a constant that we are calling as a function, we can peer + // through it and see the function target. This happens not infrequently + // during devirtualization and so we want to give it a hefty bonus for + // inlining, but cap that bonus in the event that inlining wouldn't pan + // out. Pretend to inline the function, with a custom threshold. + CallAnalyzer CA(TD, *F, InlineConstants::IndirectCallThreshold); + if (CA.analyzeCall(CS)) { + // We were able to inline the indirect call! Subtract the cost from the + // bonus we want to apply, but don't go below zero. + Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost()); + } + + return Base::visitCallSite(CS); } -// getSpecializationCost - The heuristic used to determine the code-size -// impact of creating a specialized version of Callee with argument -// SpecializedArgNo replaced by a constant. -InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee, - SmallVectorImpl<unsigned> &SpecializedArgNos) -{ - // Don't specialize functions which can be redefined at link-time to mean - // something else. - if (Callee->mayBeOverridden()) - return llvm::InlineCost::getNever(); +bool CallAnalyzer::visitInstruction(Instruction &I) { + // We found something we don't understand or can't handle. Mark any SROA-able + // values in the operand list as no longer viable. + for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI) + disableSROA(*OI); + + return false; +} - // Get information about the callee. - FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; - // If we haven't calculated this information yet, do so now. - if (CalleeFI->Metrics.NumBlocks == 0) - CalleeFI->analyzeFunction(Callee, TD); +/// \brief Analyze a basic block for its contribution to the inline cost. +/// +/// This method walks the analyzer over every instruction in the given basic +/// block and accounts for their cost during inlining at this callsite. It +/// aborts early if the threshold has been exceeded or an impossible to inline +/// construct has been detected. It returns false if inlining is no longer +/// viable, and true if inlining remains viable. +bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { + for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end()); + I != E; ++I) { + ++NumInstructions; + if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy()) + ++NumVectorInstructions; + + // If the instruction simplified to a constant, there is no cost to this + // instruction. Visit the instructions using our InstVisitor to account for + // all of the per-instruction logic. The visit tree returns true if we + // consumed the instruction in any way, and false if the instruction's base + // cost should count against inlining. + if (Base::visit(I)) + ++NumInstructionsSimplified; + else + Cost += InlineConstants::InstrCost; + + // If the visit this instruction detected an uninlinable pattern, abort. + if (IsRecursive || ExposesReturnsTwice || HasDynamicAlloca) + return false; + + if (NumVectorInstructions > NumInstructions/2) + VectorBonus = FiftyPercentVectorBonus; + else if (NumVectorInstructions > NumInstructions/10) + VectorBonus = TenPercentVectorBonus; + else + VectorBonus = 0; + + // Check if we've past the threshold so we don't spin in huge basic + // blocks that will never inline. + if (!AlwaysInline && Cost > (Threshold + VectorBonus)) + return false; + } - int Cost = 0; + return true; +} - // Look at the original size of the callee. Each instruction counts as 5. - Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost; +/// \brief Compute the base pointer and cumulative constant offsets for V. +/// +/// This strips all constant offsets off of V, leaving it the base pointer, and +/// accumulates the total constant offset applied in the returned constant. It +/// returns 0 if V is not a pointer, and returns the constant '0' if there are +/// no constant offsets applied. +ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { + if (!TD || !V->getType()->isPointerTy()) + return 0; - // Offset that with the amount of code that can be constant-folded - // away with the given arguments replaced by constants. - for (SmallVectorImpl<unsigned>::iterator an = SpecializedArgNos.begin(), - ae = SpecializedArgNos.end(); an != ae; ++an) - Cost -= CalleeFI->ArgumentWeights[*an].ConstantWeight; + unsigned IntPtrWidth = TD->getPointerSizeInBits(); + APInt Offset = APInt::getNullValue(IntPtrWidth); + + // Even though we don't look through PHI nodes, we could be called on an + // instruction in an unreachable block, which may be on a cycle. + SmallPtrSet<Value *, 4> Visited; + Visited.insert(V); + do { + if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { + if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset)) + return 0; + V = GEP->getPointerOperand(); + } else if (Operator::getOpcode(V) == Instruction::BitCast) { + V = cast<Operator>(V)->getOperand(0); + } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { + if (GA->mayBeOverridden()) + break; + V = GA->getAliasee(); + } else { + break; + } + assert(V->getType()->isPointerTy() && "Unexpected operand type!"); + } while (Visited.insert(V)); - return llvm::InlineCost::get(Cost); + Type *IntPtrTy = TD->getIntPtrType(V->getContext()); + return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset)); } -// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a -// higher threshold to determine if the function call should be inlined. -float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) { - Function *Callee = CS.getCalledFunction(); - - // Get information about the callee. - FunctionInfo &CalleeFI = CachedFunctionInfo[Callee]; - - // If we haven't calculated this information yet, do so now. - if (CalleeFI.Metrics.NumBlocks == 0) - CalleeFI.analyzeFunction(Callee, TD); - - float Factor = 1.0f; - // Single BB functions are often written to be inlined. - if (CalleeFI.Metrics.NumBlocks == 1) - Factor += 0.5f; - - // Be more aggressive if the function contains a good chunk (if it mades up - // at least 10% of the instructions) of vector instructions. - if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/2) - Factor += 2.0f; - else if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/10) - Factor += 1.5f; - return Factor; -} +/// \brief Analyze a call site for potential inlining. +/// +/// Returns true if inlining this call is viable, and false if it is not +/// viable. It computes the cost and adjusts the threshold based on numerous +/// factors and heuristics. If this method returns false but the computed cost +/// is below the computed threshold, then inlining was forcibly disabled by +/// some artifact of the rountine. +bool CallAnalyzer::analyzeCall(CallSite CS) { + ++NumCallsAnalyzed; + + // Track whether the post-inlining function would have more than one basic + // block. A single basic block is often intended for inlining. Balloon the + // threshold by 50% until we pass the single-BB phase. + bool SingleBB = true; + int SingleBBBonus = Threshold / 2; + Threshold += SingleBBBonus; + + // Unless we are always-inlining, perform some tweaks to the cost and + // threshold based on the direct callsite information. + if (!AlwaysInline) { + // We want to more aggressively inline vector-dense kernels, so up the + // threshold, and we'll lower it if the % of vector instructions gets too + // low. + assert(NumInstructions == 0); + assert(NumVectorInstructions == 0); + FiftyPercentVectorBonus = Threshold; + TenPercentVectorBonus = Threshold / 2; + + // Subtract off one instruction per call argument as those will be free after + // inlining. + Cost -= CS.arg_size() * InlineConstants::InstrCost; + + // If there is only one call of the function, and it has internal linkage, + // the cost of inlining it drops dramatically. + if (F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction()) + Cost += InlineConstants::LastCallToStaticBonus; + + // If the instruction after the call, or if the normal destination of the + // invoke is an unreachable instruction, the function is noreturn. As such, + // there is little point in inlining this unless there is literally zero cost. + if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) { + if (isa<UnreachableInst>(II->getNormalDest()->begin())) + Threshold = 1; + } else if (isa<UnreachableInst>(++BasicBlock::iterator(CS.getInstruction()))) + Threshold = 1; + + // If this function uses the coldcc calling convention, prefer not to inline + // it. + if (F.getCallingConv() == CallingConv::Cold) + Cost += InlineConstants::ColdccPenalty; + + // Check if we're done. This can happen due to bonuses and penalties. + if (Cost > Threshold) + return false; + } -/// growCachedCostInfo - update the cached cost info for Caller after Callee has -/// been inlined. -void -InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) { - CodeMetrics &CallerMetrics = CachedFunctionInfo[Caller].Metrics; + if (F.empty()) + return true; - // For small functions we prefer to recalculate the cost for better accuracy. - if (CallerMetrics.NumBlocks < 10 && CallerMetrics.NumInsts < 1000) { - resetCachedCostInfo(Caller); - return; + // Track whether we've seen a return instruction. The first return + // instruction is free, as at least one will usually disappear in inlining. + bool HasReturn = false; + + // Populate our simplified values by mapping from function arguments to call + // arguments with known important simplifications. + CallSite::arg_iterator CAI = CS.arg_begin(); + for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end(); + FAI != FAE; ++FAI, ++CAI) { + assert(CAI != CS.arg_end()); + if (Constant *C = dyn_cast<Constant>(CAI)) + SimplifiedValues[FAI] = C; + + Value *PtrArg = *CAI; + if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) { + ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue()); + + // We can SROA any pointer arguments derived from alloca instructions. + if (isa<AllocaInst>(PtrArg)) { + SROAArgValues[FAI] = PtrArg; + SROAArgCosts[PtrArg] = 0; + } + } } + NumConstantArgs = SimplifiedValues.size(); + NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size(); + NumAllocaArgs = SROAArgValues.size(); + + // The worklist of live basic blocks in the callee *after* inlining. We avoid + // adding basic blocks of the callee which can be proven to be dead for this + // particular call site in order to get more accurate cost estimates. This + // requires a somewhat heavyweight iteration pattern: we need to walk the + // basic blocks in a breadth-first order as we insert live successors. To + // accomplish this, prioritizing for small iterations because we exit after + // crossing our threshold, we use a small-size optimized SetVector. + typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>, + SmallPtrSet<BasicBlock *, 16> > BBSetVector; + BBSetVector BBWorklist; + BBWorklist.insert(&F.getEntryBlock()); + // Note that we *must not* cache the size, this loop grows the worklist. + for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { + // Bail out the moment we cross the threshold. This means we'll under-count + // the cost, but only when undercounting doesn't matter. + if (!AlwaysInline && Cost > (Threshold + VectorBonus)) + break; + + BasicBlock *BB = BBWorklist[Idx]; + if (BB->empty()) + continue; + + // Handle the terminator cost here where we can track returns and other + // function-wide constructs. + TerminatorInst *TI = BB->getTerminator(); + + // We never want to inline functions that contain an indirectbr. This is + // incorrect because all the blockaddress's (in static global initializers + // for example) would be referring to the original function, and this indirect + // jump would jump from the inlined copy of the function into the original + // function which is extremely undefined behavior. + // FIXME: This logic isn't really right; we can safely inline functions + // with indirectbr's as long as no other function or global references the + // blockaddress of a block within the current function. And as a QOI issue, + // if someone is using a blockaddress without an indirectbr, and that + // reference somehow ends up in another function or global, we probably + // don't want to inline this function. + if (isa<IndirectBrInst>(TI)) + return false; + + if (!HasReturn && isa<ReturnInst>(TI)) + HasReturn = true; + else + Cost += InlineConstants::InstrCost; + + // Analyze the cost of this block. If we blow through the threshold, this + // returns false, and we can bail on out. + if (!analyzeBlock(BB)) { + if (IsRecursive || ExposesReturnsTwice || HasDynamicAlloca) + return false; + break; + } - // For large functions, we can save a lot of computation time by skipping - // recalculations. - if (CallerMetrics.NumCalls > 0) - --CallerMetrics.NumCalls; + // Add in the live successors by first checking whether we have terminator + // that may be simplified based on the values simplified by this call. + if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { + if (BI->isConditional()) { + Value *Cond = BI->getCondition(); + if (ConstantInt *SimpleCond + = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { + BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0)); + continue; + } + } + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { + Value *Cond = SI->getCondition(); + if (ConstantInt *SimpleCond + = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { + BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor()); + continue; + } + } - if (Callee == 0) return; + // If we're unable to select a particular successor, just count all of + // them. + for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; ++TIdx) + BBWorklist.insert(TI->getSuccessor(TIdx)); + + // If we had any successors at this point, than post-inlining is likely to + // have them as well. Note that we assume any basic blocks which existed + // due to branches or switches which folded above will also fold after + // inlining. + if (SingleBB && TI->getNumSuccessors() > 1) { + // Take off the bonus we applied to the threshold. + Threshold -= SingleBBBonus; + SingleBB = false; + } + } - CodeMetrics &CalleeMetrics = CachedFunctionInfo[Callee].Metrics; + Threshold += VectorBonus; - // If we don't have metrics for the callee, don't recalculate them just to - // update an approximation in the caller. Instead, just recalculate the - // caller info from scratch. - if (CalleeMetrics.NumBlocks == 0) { - resetCachedCostInfo(Caller); - return; - } + return AlwaysInline || Cost < Threshold; +} - // Since CalleeMetrics were already calculated, we know that the CallerMetrics - // reference isn't invalidated: both were in the DenseMap. - CallerMetrics.usesDynamicAlloca |= CalleeMetrics.usesDynamicAlloca; - - // FIXME: If any of these three are true for the callee, the callee was - // not inlined into the caller, so I think they're redundant here. - CallerMetrics.callsSetJmp |= CalleeMetrics.callsSetJmp; - CallerMetrics.isRecursive |= CalleeMetrics.isRecursive; - CallerMetrics.containsIndirectBr |= CalleeMetrics.containsIndirectBr; - - CallerMetrics.NumInsts += CalleeMetrics.NumInsts; - CallerMetrics.NumBlocks += CalleeMetrics.NumBlocks; - CallerMetrics.NumCalls += CalleeMetrics.NumCalls; - CallerMetrics.NumVectorInsts += CalleeMetrics.NumVectorInsts; - CallerMetrics.NumRets += CalleeMetrics.NumRets; - - // analyzeBasicBlock counts each function argument as an inst. - if (CallerMetrics.NumInsts >= Callee->arg_size()) - CallerMetrics.NumInsts -= Callee->arg_size(); - else - CallerMetrics.NumInsts = 0; - - // We are not updating the argument weights. We have already determined that - // Caller is a fairly large function, so we accept the loss of precision. +/// \brief Dump stats about this call's analysis. +void CallAnalyzer::dump() { +#define DEBUG_PRINT_STAT(x) llvm::dbgs() << " " #x ": " << x << "\n" + DEBUG_PRINT_STAT(NumConstantArgs); + DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs); + DEBUG_PRINT_STAT(NumAllocaArgs); + DEBUG_PRINT_STAT(NumConstantPtrCmps); + DEBUG_PRINT_STAT(NumConstantPtrDiffs); + DEBUG_PRINT_STAT(NumInstructionsSimplified); + DEBUG_PRINT_STAT(SROACostSavings); + DEBUG_PRINT_STAT(SROACostSavingsLost); +#undef DEBUG_PRINT_STAT } -/// clear - empty the cache of inline costs -void InlineCostAnalyzer::clear() { - CachedFunctionInfo.clear(); +InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, int Threshold) { + return getInlineCost(CS, CS.getCalledFunction(), Threshold); +} + +InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee, + int Threshold) { + // Don't inline functions which can be redefined at link-time to mean + // something else. Don't inline functions marked noinline or call sites + // marked noinline. + if (!Callee || Callee->mayBeOverridden() || + Callee->hasFnAttr(Attribute::NoInline) || CS.isNoInline()) + return llvm::InlineCost::getNever(); + + DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "...\n"); + + CallAnalyzer CA(TD, *Callee, Threshold); + bool ShouldInline = CA.analyzeCall(CS); + + DEBUG(CA.dump()); + + // Check if there was a reason to force inlining or no inlining. + if (!ShouldInline && CA.getCost() < CA.getThreshold()) + return InlineCost::getNever(); + if (ShouldInline && CA.getCost() >= CA.getThreshold()) + return InlineCost::getAlways(); + + return llvm::InlineCost::get(CA.getCost(), CA.getThreshold()); } diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 131cc97..16e7a72 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -18,13 +18,17 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "instsimplify" +#include "llvm/GlobalAlias.h" #include "llvm/Operator.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SetVector.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/ConstantRange.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Target/TargetData.h" @@ -37,23 +41,28 @@ STATISTIC(NumExpand, "Number of expansions"); STATISTIC(NumFactor , "Number of factorizations"); STATISTIC(NumReassoc, "Number of reassociations"); -static Value *SimplifyAndInst(Value *, Value *, const TargetData *, - const DominatorTree *, unsigned); -static Value *SimplifyBinOp(unsigned, Value *, Value *, const TargetData *, - const DominatorTree *, unsigned); -static Value *SimplifyCmpInst(unsigned, Value *, Value *, const TargetData *, - const DominatorTree *, unsigned); -static Value *SimplifyOrInst(Value *, Value *, const TargetData *, - const DominatorTree *, unsigned); -static Value *SimplifyXorInst(Value *, Value *, const TargetData *, - const DominatorTree *, unsigned); +struct Query { + const TargetData *TD; + const TargetLibraryInfo *TLI; + const DominatorTree *DT; + + Query(const TargetData *td, const TargetLibraryInfo *tli, + const DominatorTree *dt) : TD(td), TLI(tli), DT(dt) {}; +}; + +static Value *SimplifyAndInst(Value *, Value *, const Query &, unsigned); +static Value *SimplifyBinOp(unsigned, Value *, Value *, const Query &, + unsigned); +static Value *SimplifyCmpInst(unsigned, Value *, Value *, const Query &, + unsigned); +static Value *SimplifyOrInst(Value *, Value *, const Query &, unsigned); +static Value *SimplifyXorInst(Value *, Value *, const Query &, unsigned); +static Value *SimplifyTruncInst(Value *, Type *, const Query &, unsigned); /// getFalse - For a boolean type, or a vector of boolean type, return false, or /// a vector with every element false, as appropriate for the type. static Constant *getFalse(Type *Ty) { - assert((Ty->isIntegerTy(1) || - (Ty->isVectorTy() && - cast<VectorType>(Ty)->getElementType()->isIntegerTy(1))) && + assert(Ty->getScalarType()->isIntegerTy(1) && "Expected i1 type or a vector of i1!"); return Constant::getNullValue(Ty); } @@ -61,13 +70,25 @@ static Constant *getFalse(Type *Ty) { /// getTrue - For a boolean type, or a vector of boolean type, return true, or /// a vector with every element true, as appropriate for the type. static Constant *getTrue(Type *Ty) { - assert((Ty->isIntegerTy(1) || - (Ty->isVectorTy() && - cast<VectorType>(Ty)->getElementType()->isIntegerTy(1))) && + assert(Ty->getScalarType()->isIntegerTy(1) && "Expected i1 type or a vector of i1!"); return Constant::getAllOnesValue(Ty); } +/// isSameCompare - Is V equivalent to the comparison "LHS Pred RHS"? +static bool isSameCompare(Value *V, CmpInst::Predicate Pred, Value *LHS, + Value *RHS) { + CmpInst *Cmp = dyn_cast<CmpInst>(V); + if (!Cmp) + return false; + CmpInst::Predicate CPred = Cmp->getPredicate(); + Value *CLHS = Cmp->getOperand(0), *CRHS = Cmp->getOperand(1); + if (CPred == Pred && CLHS == LHS && CRHS == RHS) + return true; + return CPred == CmpInst::getSwappedPredicate(Pred) && CLHS == RHS && + CRHS == LHS; +} + /// ValueDominatesPHI - Does the given value dominate the specified phi node? static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { Instruction *I = dyn_cast<Instruction>(V); @@ -75,9 +96,20 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { // Arguments and constants dominate all instructions. return true; + // If we are processing instructions (and/or basic blocks) that have not been + // fully added to a function, the parent nodes may still be null. Simply + // return the conservative answer in these cases. + if (!I->getParent() || !P->getParent() || !I->getParent()->getParent()) + return false; + // If we have a DominatorTree then do a precise test. - if (DT) + if (DT) { + if (!DT->isReachableFromEntry(P->getParent())) + return true; + if (!DT->isReachableFromEntry(I->getParent())) + return false; return DT->dominates(I, P); + } // Otherwise, if the instruction is in the entry block, and is not an invoke, // then it obviously dominates all phi nodes. @@ -94,8 +126,8 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { /// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)". /// Returns the simplified value, or null if no simplification was performed. static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, - unsigned OpcToExpand, const TargetData *TD, - const DominatorTree *DT, unsigned MaxRecurse) { + unsigned OpcToExpand, const Query &Q, + unsigned MaxRecurse) { Instruction::BinaryOps OpcodeToExpand = (Instruction::BinaryOps)OpcToExpand; // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) @@ -107,8 +139,8 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, // It does! Try turning it into "(A op C) op' (B op C)". Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS; // Do "A op C" and "B op C" both simplify? - if (Value *L = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse)) - if (Value *R = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) { + if (Value *L = SimplifyBinOp(Opcode, A, C, Q, MaxRecurse)) + if (Value *R = SimplifyBinOp(Opcode, B, C, Q, MaxRecurse)) { // They do! Return "L op' R" if it simplifies or is already available. // If "L op' R" equals "A op' B" then "L op' R" is just the LHS. if ((L == A && R == B) || (Instruction::isCommutative(OpcodeToExpand) @@ -117,8 +149,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, return LHS; } // Otherwise return "L op' R" if it simplifies. - if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT, - MaxRecurse)) { + if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse)) { ++NumExpand; return V; } @@ -131,8 +162,8 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, // It does! Try turning it into "(A op B) op' (A op C)". Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1); // Do "A op B" and "A op C" both simplify? - if (Value *L = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse)) - if (Value *R = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse)) { + if (Value *L = SimplifyBinOp(Opcode, A, B, Q, MaxRecurse)) + if (Value *R = SimplifyBinOp(Opcode, A, C, Q, MaxRecurse)) { // They do! Return "L op' R" if it simplifies or is already available. // If "L op' R" equals "B op' C" then "L op' R" is just the RHS. if ((L == B && R == C) || (Instruction::isCommutative(OpcodeToExpand) @@ -141,8 +172,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, return RHS; } // Otherwise return "L op' R" if it simplifies. - if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT, - MaxRecurse)) { + if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse)) { ++NumExpand; return V; } @@ -157,8 +187,8 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, /// OpCodeToExtract is Mul then this tries to turn "(A*B)+(A*C)" into "A*(B+C)". /// Returns the simplified value, or null if no simplification was performed. static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, - unsigned OpcToExtract, const TargetData *TD, - const DominatorTree *DT, unsigned MaxRecurse) { + unsigned OpcToExtract, const Query &Q, + unsigned MaxRecurse) { Instruction::BinaryOps OpcodeToExtract = (Instruction::BinaryOps)OpcToExtract; // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) @@ -182,7 +212,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, Value *DD = A == C ? D : C; // Form "A op' (B op DD)" if it simplifies completely. // Does "B op DD" simplify? - if (Value *V = SimplifyBinOp(Opcode, B, DD, TD, DT, MaxRecurse)) { + if (Value *V = SimplifyBinOp(Opcode, B, DD, Q, MaxRecurse)) { // It does! Return "A op' V" if it simplifies or is already available. // If V equals B then "A op' V" is just the LHS. If V equals DD then // "A op' V" is just the RHS. @@ -191,7 +221,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, return V == B ? LHS : RHS; } // Otherwise return "A op' V" if it simplifies. - if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, TD, DT, MaxRecurse)) { + if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, Q, MaxRecurse)) { ++NumFactor; return W; } @@ -205,7 +235,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, Value *CC = B == D ? C : D; // Form "(A op CC) op' B" if it simplifies completely.. // Does "A op CC" simplify? - if (Value *V = SimplifyBinOp(Opcode, A, CC, TD, DT, MaxRecurse)) { + if (Value *V = SimplifyBinOp(Opcode, A, CC, Q, MaxRecurse)) { // It does! Return "V op' B" if it simplifies or is already available. // If V equals A then "V op' B" is just the LHS. If V equals CC then // "V op' B" is just the RHS. @@ -214,7 +244,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, return V == A ? LHS : RHS; } // Otherwise return "V op' B" if it simplifies. - if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, TD, DT, MaxRecurse)) { + if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, Q, MaxRecurse)) { ++NumFactor; return W; } @@ -227,9 +257,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, /// SimplifyAssociativeBinOp - Generic simplifications for associative binary /// operations. Returns the simpler value, or null if none was found. static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, - const TargetData *TD, - const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { Instruction::BinaryOps Opcode = (Instruction::BinaryOps)Opc; assert(Instruction::isAssociative(Opcode) && "Not an associative operation!"); @@ -247,12 +275,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, Value *C = RHS; // Does "B op C" simplify? - if (Value *V = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) { + if (Value *V = SimplifyBinOp(Opcode, B, C, Q, MaxRecurse)) { // It does! Return "A op V" if it simplifies or is already available. // If V equals B then "A op V" is just the LHS. if (V == B) return LHS; // Otherwise return "A op V" if it simplifies. - if (Value *W = SimplifyBinOp(Opcode, A, V, TD, DT, MaxRecurse)) { + if (Value *W = SimplifyBinOp(Opcode, A, V, Q, MaxRecurse)) { ++NumReassoc; return W; } @@ -266,12 +294,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, Value *C = Op1->getOperand(1); // Does "A op B" simplify? - if (Value *V = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse)) { + if (Value *V = SimplifyBinOp(Opcode, A, B, Q, MaxRecurse)) { // It does! Return "V op C" if it simplifies or is already available. // If V equals B then "V op C" is just the RHS. if (V == B) return RHS; // Otherwise return "V op C" if it simplifies. - if (Value *W = SimplifyBinOp(Opcode, V, C, TD, DT, MaxRecurse)) { + if (Value *W = SimplifyBinOp(Opcode, V, C, Q, MaxRecurse)) { ++NumReassoc; return W; } @@ -289,12 +317,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, Value *C = RHS; // Does "C op A" simplify? - if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) { + if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) { // It does! Return "V op B" if it simplifies or is already available. // If V equals A then "V op B" is just the LHS. if (V == A) return LHS; // Otherwise return "V op B" if it simplifies. - if (Value *W = SimplifyBinOp(Opcode, V, B, TD, DT, MaxRecurse)) { + if (Value *W = SimplifyBinOp(Opcode, V, B, Q, MaxRecurse)) { ++NumReassoc; return W; } @@ -308,12 +336,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, Value *C = Op1->getOperand(1); // Does "C op A" simplify? - if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) { + if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) { // It does! Return "B op V" if it simplifies or is already available. // If V equals C then "B op V" is just the RHS. if (V == C) return RHS; // Otherwise return "B op V" if it simplifies. - if (Value *W = SimplifyBinOp(Opcode, B, V, TD, DT, MaxRecurse)) { + if (Value *W = SimplifyBinOp(Opcode, B, V, Q, MaxRecurse)) { ++NumReassoc; return W; } @@ -328,9 +356,7 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, /// evaluating it on both branches of the select results in the same value. /// Returns the common value if so, otherwise returns null. static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS, - const TargetData *TD, - const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) return 0; @@ -347,11 +373,11 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS, Value *TV; Value *FV; if (SI == LHS) { - TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, TD, DT, MaxRecurse); - FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, TD, DT, MaxRecurse); + TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, Q, MaxRecurse); + FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, Q, MaxRecurse); } else { - TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), TD, DT, MaxRecurse); - FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), TD, DT, MaxRecurse); + TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), Q, MaxRecurse); + FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), Q, MaxRecurse); } // If they simplified to the same value, then return the common value. @@ -402,8 +428,7 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS, /// result in the same value. Returns the common value if so, otherwise returns /// null. static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, - Value *RHS, const TargetData *TD, - const DominatorTree *DT, + Value *RHS, const Query &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) @@ -416,40 +441,67 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, } assert(isa<SelectInst>(LHS) && "Not comparing with a select instruction!"); SelectInst *SI = cast<SelectInst>(LHS); + Value *Cond = SI->getCondition(); + Value *TV = SI->getTrueValue(); + Value *FV = SI->getFalseValue(); // Now that we have "cmp select(Cond, TV, FV), RHS", analyse it. // Does "cmp TV, RHS" simplify? - if (Value *TCmp = SimplifyCmpInst(Pred, SI->getTrueValue(), RHS, TD, DT, - MaxRecurse)) { - // It does! Does "cmp FV, RHS" simplify? - if (Value *FCmp = SimplifyCmpInst(Pred, SI->getFalseValue(), RHS, TD, DT, - MaxRecurse)) { - // It does! If they simplified to the same value, then use it as the - // result of the original comparison. - if (TCmp == FCmp) - return TCmp; - Value *Cond = SI->getCondition(); - // If the false value simplified to false, then the result of the compare - // is equal to "Cond && TCmp". This also catches the case when the false - // value simplified to false and the true value to true, returning "Cond". - if (match(FCmp, m_Zero())) - if (Value *V = SimplifyAndInst(Cond, TCmp, TD, DT, MaxRecurse)) - return V; - // If the true value simplified to true, then the result of the compare - // is equal to "Cond || FCmp". - if (match(TCmp, m_One())) - if (Value *V = SimplifyOrInst(Cond, FCmp, TD, DT, MaxRecurse)) - return V; - // Finally, if the false value simplified to true and the true value to - // false, then the result of the compare is equal to "!Cond". - if (match(FCmp, m_One()) && match(TCmp, m_Zero())) - if (Value *V = - SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()), - TD, DT, MaxRecurse)) - return V; - } + Value *TCmp = SimplifyCmpInst(Pred, TV, RHS, Q, MaxRecurse); + if (TCmp == Cond) { + // It not only simplified, it simplified to the select condition. Replace + // it with 'true'. + TCmp = getTrue(Cond->getType()); + } else if (!TCmp) { + // It didn't simplify. However if "cmp TV, RHS" is equal to the select + // condition then we can replace it with 'true'. Otherwise give up. + if (!isSameCompare(Cond, Pred, TV, RHS)) + return 0; + TCmp = getTrue(Cond->getType()); + } + + // Does "cmp FV, RHS" simplify? + Value *FCmp = SimplifyCmpInst(Pred, FV, RHS, Q, MaxRecurse); + if (FCmp == Cond) { + // It not only simplified, it simplified to the select condition. Replace + // it with 'false'. + FCmp = getFalse(Cond->getType()); + } else if (!FCmp) { + // It didn't simplify. However if "cmp FV, RHS" is equal to the select + // condition then we can replace it with 'false'. Otherwise give up. + if (!isSameCompare(Cond, Pred, FV, RHS)) + return 0; + FCmp = getFalse(Cond->getType()); } + // If both sides simplified to the same value, then use it as the result of + // the original comparison. + if (TCmp == FCmp) + return TCmp; + + // The remaining cases only make sense if the select condition has the same + // type as the result of the comparison, so bail out if this is not so. + if (Cond->getType()->isVectorTy() != RHS->getType()->isVectorTy()) + return 0; + // If the false value simplified to false, then the result of the compare + // is equal to "Cond && TCmp". This also catches the case when the false + // value simplified to false and the true value to true, returning "Cond". + if (match(FCmp, m_Zero())) + if (Value *V = SimplifyAndInst(Cond, TCmp, Q, MaxRecurse)) + return V; + // If the true value simplified to true, then the result of the compare + // is equal to "Cond || FCmp". + if (match(TCmp, m_One())) + if (Value *V = SimplifyOrInst(Cond, FCmp, Q, MaxRecurse)) + return V; + // Finally, if the false value simplified to true and the true value to + // false, then the result of the compare is equal to "!Cond". + if (match(FCmp, m_One()) && match(TCmp, m_Zero())) + if (Value *V = + SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()), + Q, MaxRecurse)) + return V; + return 0; } @@ -458,8 +510,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, /// it on the incoming phi values yields the same result for every value. If so /// returns the common value, otherwise returns null. static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) return 0; @@ -468,13 +519,13 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS, if (isa<PHINode>(LHS)) { PI = cast<PHINode>(LHS); // Bail out if RHS and the phi may be mutually interdependent due to a loop. - if (!ValueDominatesPHI(RHS, PI, DT)) + if (!ValueDominatesPHI(RHS, PI, Q.DT)) return 0; } else { assert(isa<PHINode>(RHS) && "No PHI instruction operand!"); PI = cast<PHINode>(RHS); // Bail out if LHS and the phi may be mutually interdependent due to a loop. - if (!ValueDominatesPHI(LHS, PI, DT)) + if (!ValueDominatesPHI(LHS, PI, Q.DT)) return 0; } @@ -485,8 +536,8 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS, // If the incoming value is the phi node itself, it can safely be skipped. if (Incoming == PI) continue; Value *V = PI == LHS ? - SimplifyBinOp(Opcode, Incoming, RHS, TD, DT, MaxRecurse) : - SimplifyBinOp(Opcode, LHS, Incoming, TD, DT, MaxRecurse); + SimplifyBinOp(Opcode, Incoming, RHS, Q, MaxRecurse) : + SimplifyBinOp(Opcode, LHS, Incoming, Q, MaxRecurse); // If the operation failed to simplify, or simplified to a different value // to previously, then give up. if (!V || (CommonValue && V != CommonValue)) @@ -502,8 +553,7 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS, /// incoming phi values yields the same result every time. If so returns the /// common result, otherwise returns null. static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) return 0; @@ -517,7 +567,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, PHINode *PI = cast<PHINode>(LHS); // Bail out if RHS and the phi may be mutually interdependent due to a loop. - if (!ValueDominatesPHI(RHS, PI, DT)) + if (!ValueDominatesPHI(RHS, PI, Q.DT)) return 0; // Evaluate the BinOp on the incoming phi values. @@ -526,7 +576,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, Value *Incoming = PI->getIncomingValue(i); // If the incoming value is the phi node itself, it can safely be skipped. if (Incoming == PI) continue; - Value *V = SimplifyCmpInst(Pred, Incoming, RHS, TD, DT, MaxRecurse); + Value *V = SimplifyCmpInst(Pred, Incoming, RHS, Q, MaxRecurse); // If the operation failed to simplify, or simplified to a different value // to previously, then give up. if (!V || (CommonValue && V != CommonValue)) @@ -540,13 +590,12 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, /// SimplifyAddInst - Given operands for an Add, see if we can /// fold the result. If not, this returns null. static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast<Constant>(Op0)) { if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; - return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(), - Ops, TD); + return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(), Ops, + Q.TD, Q.TLI); } // Canonicalize the constant to the RHS. @@ -576,17 +625,17 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, /// i1 add -> xor. if (MaxRecurse && Op0->getType()->isIntegerTy(1)) - if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1)) return V; // Try some generic simplifications for associative operations. - if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, TD, DT, + if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, Q, MaxRecurse)) return V; // Mul distributes over Add. Try some generic simplifications based on this. if (Value *V = FactorizeBinOp(Instruction::Add, Op0, Op1, Instruction::Mul, - TD, DT, MaxRecurse)) + Q, MaxRecurse)) return V; // Threading Add over selects and phi nodes is pointless, so don't bother. @@ -602,20 +651,116 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, } Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const TargetData *TD, const DominatorTree *DT) { - return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit); + const TargetData *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT), + RecursionLimit); +} + +/// \brief Accumulate the constant integer offset a GEP represents. +/// +/// Given a getelementptr instruction/constantexpr, accumulate the constant +/// offset from the base pointer into the provided APInt 'Offset'. Returns true +/// if the GEP has all-constant indices. Returns false if any non-constant +/// index is encountered leaving the 'Offset' in an undefined state. The +/// 'Offset' APInt must be the bitwidth of the target's pointer size. +static bool accumulateGEPOffset(const TargetData &TD, GEPOperator *GEP, + APInt &Offset) { + unsigned IntPtrWidth = TD.getPointerSizeInBits(); + assert(IntPtrWidth == Offset.getBitWidth()); + + gep_type_iterator GTI = gep_type_begin(GEP); + for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end(); I != E; + ++I, ++GTI) { + ConstantInt *OpC = dyn_cast<ConstantInt>(*I); + if (!OpC) return false; + if (OpC->isZero()) continue; + + // Handle a struct index, which adds its field offset to the pointer. + if (StructType *STy = dyn_cast<StructType>(*GTI)) { + unsigned ElementIdx = OpC->getZExtValue(); + const StructLayout *SL = TD.getStructLayout(STy); + Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); + continue; + } + + APInt TypeSize(IntPtrWidth, TD.getTypeAllocSize(GTI.getIndexedType())); + Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; + } + return true; +} + +/// \brief Compute the base pointer and cumulative constant offsets for V. +/// +/// This strips all constant offsets off of V, leaving it the base pointer, and +/// accumulates the total constant offset applied in the returned constant. It +/// returns 0 if V is not a pointer, and returns the constant '0' if there are +/// no constant offsets applied. +static Constant *stripAndComputeConstantOffsets(const TargetData &TD, + Value *&V) { + if (!V->getType()->isPointerTy()) + return 0; + + unsigned IntPtrWidth = TD.getPointerSizeInBits(); + APInt Offset = APInt::getNullValue(IntPtrWidth); + + // Even though we don't look through PHI nodes, we could be called on an + // instruction in an unreachable block, which may be on a cycle. + SmallPtrSet<Value *, 4> Visited; + Visited.insert(V); + do { + if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { + if (!GEP->isInBounds() || !accumulateGEPOffset(TD, GEP, Offset)) + break; + V = GEP->getPointerOperand(); + } else if (Operator::getOpcode(V) == Instruction::BitCast) { + V = cast<Operator>(V)->getOperand(0); + } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { + if (GA->mayBeOverridden()) + break; + V = GA->getAliasee(); + } else { + break; + } + assert(V->getType()->isPointerTy() && "Unexpected operand type!"); + } while (Visited.insert(V)); + + Type *IntPtrTy = TD.getIntPtrType(V->getContext()); + return ConstantInt::get(IntPtrTy, Offset); +} + +/// \brief Compute the constant difference between two pointer values. +/// If the difference is not a constant, returns zero. +static Constant *computePointerDifference(const TargetData &TD, + Value *LHS, Value *RHS) { + Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS); + if (!LHSOffset) + return 0; + Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS); + if (!RHSOffset) + return 0; + + // If LHS and RHS are not related via constant offsets to the same base + // value, there is nothing we can do here. + if (LHS != RHS) + return 0; + + // Otherwise, the difference of LHS - RHS can be computed as: + // LHS - RHS + // = (LHSOffset + Base) - (RHSOffset + Base) + // = LHSOffset - RHSOffset + return ConstantExpr::getSub(LHSOffset, RHSOffset); } /// SimplifySubInst - Given operands for a Sub, see if we can /// fold the result. If not, this returns null. static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast<Constant>(Op0)) if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Sub, CLHS->getType(), - Ops, TD); + Ops, Q.TD, Q.TLI); } // X - undef -> undef @@ -643,19 +788,17 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, Value *Y = 0, *Z = Op1; if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z // See if "V === Y - Z" simplifies. - if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, Q, MaxRecurse-1)) // It does! Now see if "X + V" simplifies. - if (Value *W = SimplifyBinOp(Instruction::Add, X, V, TD, DT, - MaxRecurse-1)) { + if (Value *W = SimplifyBinOp(Instruction::Add, X, V, Q, MaxRecurse-1)) { // It does, we successfully reassociated! ++NumReassoc; return W; } // See if "V === X - Z" simplifies. - if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1)) // It does! Now see if "Y + V" simplifies. - if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, TD, DT, - MaxRecurse-1)) { + if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, Q, MaxRecurse-1)) { // It does, we successfully reassociated! ++NumReassoc; return W; @@ -667,19 +810,17 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, X = Op0; if (MaxRecurse && match(Op1, m_Add(m_Value(Y), m_Value(Z)))) { // X - (Y + Z) // See if "V === X - Y" simplifies. - if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1)) // It does! Now see if "V - Z" simplifies. - if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, TD, DT, - MaxRecurse-1)) { + if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, Q, MaxRecurse-1)) { // It does, we successfully reassociated! ++NumReassoc; return W; } // See if "V === X - Z" simplifies. - if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1)) // It does! Now see if "V - Y" simplifies. - if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, TD, DT, - MaxRecurse-1)) { + if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, Q, MaxRecurse-1)) { // It does, we successfully reassociated! ++NumReassoc; return W; @@ -691,23 +832,39 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, Z = Op0; if (MaxRecurse && match(Op1, m_Sub(m_Value(X), m_Value(Y)))) // Z - (X - Y) // See if "V === Z - X" simplifies. - if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, Q, MaxRecurse-1)) // It does! Now see if "V + Y" simplifies. - if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, TD, DT, - MaxRecurse-1)) { + if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, Q, MaxRecurse-1)) { // It does, we successfully reassociated! ++NumReassoc; return W; } + // trunc(X) - trunc(Y) -> trunc(X - Y) if everything simplifies. + if (MaxRecurse && match(Op0, m_Trunc(m_Value(X))) && + match(Op1, m_Trunc(m_Value(Y)))) + if (X->getType() == Y->getType()) + // See if "V === X - Y" simplifies. + if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1)) + // It does! Now see if "trunc V" simplifies. + if (Value *W = SimplifyTruncInst(V, Op0->getType(), Q, MaxRecurse-1)) + // It does, return the simplified "trunc V". + return W; + + // Variations on GEP(base, I, ...) - GEP(base, i, ...) -> GEP(null, I-i, ...). + if (Q.TD && match(Op0, m_PtrToInt(m_Value(X))) && + match(Op1, m_PtrToInt(m_Value(Y)))) + if (Constant *Result = computePointerDifference(*Q.TD, X, Y)) + return ConstantExpr::getIntegerCast(Result, Op0->getType(), true); + // Mul distributes over Sub. Try some generic simplifications based on this. if (Value *V = FactorizeBinOp(Instruction::Sub, Op0, Op1, Instruction::Mul, - TD, DT, MaxRecurse)) + Q, MaxRecurse)) return V; // i1 sub -> xor. if (MaxRecurse && Op0->getType()->isIntegerTy(1)) - if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1)) return V; // Threading Sub over selects and phi nodes is pointless, so don't bother. @@ -723,19 +880,21 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, } Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const TargetData *TD, const DominatorTree *DT) { - return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit); + const TargetData *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT), + RecursionLimit); } /// SimplifyMulInst - Given operands for a Mul, see if we can /// fold the result. If not, this returns null. -static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD, - const DominatorTree *DT, unsigned MaxRecurse) { +static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast<Constant>(Op0)) { if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Mul, CLHS->getType(), - Ops, TD); + Ops, Q.TD, Q.TLI); } // Canonicalize the constant to the RHS. @@ -755,40 +914,37 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD, return Op0; // (X / Y) * Y -> X if the division is exact. - Value *X = 0, *Y = 0; - if ((match(Op0, m_IDiv(m_Value(X), m_Value(Y))) && Y == Op1) || // (X / Y) * Y - (match(Op1, m_IDiv(m_Value(X), m_Value(Y))) && Y == Op0)) { // Y * (X / Y) - BinaryOperator *Div = cast<BinaryOperator>(Y == Op1 ? Op0 : Op1); - if (Div->isExact()) - return X; - } + Value *X = 0; + if (match(Op0, m_Exact(m_IDiv(m_Value(X), m_Specific(Op1)))) || // (X / Y) * Y + match(Op1, m_Exact(m_IDiv(m_Value(X), m_Specific(Op0))))) // Y * (X / Y) + return X; // i1 mul -> and. if (MaxRecurse && Op0->getType()->isIntegerTy(1)) - if (Value *V = SimplifyAndInst(Op0, Op1, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyAndInst(Op0, Op1, Q, MaxRecurse-1)) return V; // Try some generic simplifications for associative operations. - if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, TD, DT, + if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, Q, MaxRecurse)) return V; // Mul distributes over Add. Try some generic simplifications based on this. if (Value *V = ExpandBinOp(Instruction::Mul, Op0, Op1, Instruction::Add, - TD, DT, MaxRecurse)) + Q, MaxRecurse)) return V; // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) - if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, TD, DT, + if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, Q, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) - if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, TD, DT, + if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, Q, MaxRecurse)) return V; @@ -796,19 +952,19 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD, } Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyMulInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifyMulInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } /// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can /// fold the result. If not, this returns null. static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { if (Constant *C0 = dyn_cast<Constant>(Op0)) { if (Constant *C1 = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { C0, C1 }; - return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD); + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI); } } @@ -842,7 +998,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, Value *X = 0, *Y = 0; if (match(Op0, m_Mul(m_Value(X), m_Value(Y))) && (X == Op1 || Y == Op1)) { if (Y != Op1) std::swap(X, Y); // Ensure expression is (X * Y) / Y, Y = Op1 - BinaryOperator *Mul = cast<BinaryOperator>(Op0); + OverflowingBinaryOperator *Mul = cast<OverflowingBinaryOperator>(Op0); // If the Mul knows it does not overflow, then we are good to go. if ((isSigned && Mul->hasNoSignedWrap()) || (!isSigned && Mul->hasNoUnsignedWrap())) @@ -861,13 +1017,13 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) - if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) - if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) return V; return 0; @@ -875,36 +1031,38 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, /// SimplifySDivInst - Given operands for an SDiv, see if we can /// fold the result. If not, this returns null. -static Value *SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD, - const DominatorTree *DT, unsigned MaxRecurse) { - if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, TD, DT, MaxRecurse)) +static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { + if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse)) return V; return 0; } Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifySDivInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifySDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } /// SimplifyUDivInst - Given operands for a UDiv, see if we can /// fold the result. If not, this returns null. -static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD, - const DominatorTree *DT, unsigned MaxRecurse) { - if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, TD, DT, MaxRecurse)) +static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { + if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse)) return V; return 0; } Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyUDivInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifyUDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } -static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *, - const DominatorTree *, unsigned) { +static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q, + unsigned) { // undef / X -> undef (the undef could be a snan). if (match(Op0, m_Undef())) return Op0; @@ -917,19 +1075,19 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *, } Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyFDivInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifyFDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } /// SimplifyRem - Given operands for an SRem or URem, see if we can /// fold the result. If not, this returns null. static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { if (Constant *C0 = dyn_cast<Constant>(Op0)) { if (Constant *C1 = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { C0, C1 }; - return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD); + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI); } } @@ -964,13 +1122,13 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) - if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) - if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) return V; return 0; @@ -978,36 +1136,38 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, /// SimplifySRemInst - Given operands for an SRem, see if we can /// fold the result. If not, this returns null. -static Value *SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD, - const DominatorTree *DT, unsigned MaxRecurse) { - if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, TD, DT, MaxRecurse)) +static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { + if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse)) return V; return 0; } Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifySRemInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifySRemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } /// SimplifyURemInst - Given operands for a URem, see if we can /// fold the result. If not, this returns null. -static Value *SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD, - const DominatorTree *DT, unsigned MaxRecurse) { - if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, TD, DT, MaxRecurse)) +static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { + if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse)) return V; return 0; } Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyURemInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifyURemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } -static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *, - const DominatorTree *, unsigned) { +static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &, + unsigned) { // undef % X -> undef (the undef could be a snan). if (match(Op0, m_Undef())) return Op0; @@ -1020,19 +1180,19 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *, } Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyFRemInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifyFRemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } /// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can /// fold the result. If not, this returns null. static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { if (Constant *C0 = dyn_cast<Constant>(Op0)) { if (Constant *C1 = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { C0, C1 }; - return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD); + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI); } } @@ -1057,13 +1217,13 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) - if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) - if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) return V; return 0; @@ -1072,9 +1232,8 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, /// SimplifyShlInst - Given operands for an Shl, see if we can /// fold the result. If not, this returns null. static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { - if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, TD, DT, MaxRecurse)) + const Query &Q, unsigned MaxRecurse) { + if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, Q, MaxRecurse)) return V; // undef << X -> 0 @@ -1083,23 +1242,23 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, // (X >> A) << A -> X Value *X; - if (match(Op0, m_Shr(m_Value(X), m_Specific(Op1))) && - cast<PossiblyExactOperator>(Op0)->isExact()) + if (match(Op0, m_Exact(m_Shr(m_Value(X), m_Specific(Op1))))) return X; return 0; } Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const TargetData *TD, const DominatorTree *DT) { - return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit); + const TargetData *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT), + RecursionLimit); } /// SimplifyLShrInst - Given operands for an LShr, see if we can /// fold the result. If not, this returns null. static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { - if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, TD, DT, MaxRecurse)) + const Query &Q, unsigned MaxRecurse) { + if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, Q, MaxRecurse)) return V; // undef >>l X -> 0 @@ -1116,16 +1275,18 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, } Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, - const TargetData *TD, const DominatorTree *DT) { - return ::SimplifyLShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit); + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyLShrInst(Op0, Op1, isExact, Query (TD, TLI, DT), + RecursionLimit); } /// SimplifyAShrInst - Given operands for an AShr, see if we can /// fold the result. If not, this returns null. static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { - if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, TD, DT, MaxRecurse)) + const Query &Q, unsigned MaxRecurse) { + if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, Q, MaxRecurse)) return V; // all ones >>a X -> all ones @@ -1146,19 +1307,22 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, } Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, - const TargetData *TD, const DominatorTree *DT) { - return ::SimplifyAShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit); + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyAShrInst(Op0, Op1, isExact, Query (TD, TLI, DT), + RecursionLimit); } /// SimplifyAndInst - Given operands for an And, see if we can /// fold the result. If not, this returns null. -static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD, - const DominatorTree *DT, unsigned MaxRecurse) { +static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast<Constant>(Op0)) { if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::And, CLHS->getType(), - Ops, TD); + Ops, Q.TD, Q.TLI); } // Canonicalize the constant to the RHS. @@ -1197,37 +1361,46 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD, (A == Op0 || B == Op0)) return Op0; + // A & (-A) = A if A is a power of two or zero. + if (match(Op0, m_Neg(m_Specific(Op1))) || + match(Op1, m_Neg(m_Specific(Op0)))) { + if (isPowerOfTwo(Op0, Q.TD, /*OrZero*/true)) + return Op0; + if (isPowerOfTwo(Op1, Q.TD, /*OrZero*/true)) + return Op1; + } + // Try some generic simplifications for associative operations. - if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, TD, DT, + if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, Q, MaxRecurse)) return V; // And distributes over Or. Try some generic simplifications based on this. if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Or, - TD, DT, MaxRecurse)) + Q, MaxRecurse)) return V; // And distributes over Xor. Try some generic simplifications based on this. if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Xor, - TD, DT, MaxRecurse)) + Q, MaxRecurse)) return V; // Or distributes over And. Try some generic simplifications based on this. if (Value *V = FactorizeBinOp(Instruction::And, Op0, Op1, Instruction::Or, - TD, DT, MaxRecurse)) + Q, MaxRecurse)) return V; // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) - if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, TD, DT, + if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, Q, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) - if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, TD, DT, + if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, Q, MaxRecurse)) return V; @@ -1235,19 +1408,20 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD, } Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyAndInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifyAndInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } /// SimplifyOrInst - Given operands for an Or, see if we can /// fold the result. If not, this returns null. -static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD, - const DominatorTree *DT, unsigned MaxRecurse) { +static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast<Constant>(Op0)) { if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(), - Ops, TD); + Ops, Q.TD, Q.TLI); } // Canonicalize the constant to the RHS. @@ -1297,51 +1471,51 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD, return Constant::getAllOnesValue(Op0->getType()); // Try some generic simplifications for associative operations. - if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, TD, DT, + if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q, MaxRecurse)) return V; // Or distributes over And. Try some generic simplifications based on this. - if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And, - TD, DT, MaxRecurse)) + if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And, Q, + MaxRecurse)) return V; // And distributes over Or. Try some generic simplifications based on this. if (Value *V = FactorizeBinOp(Instruction::Or, Op0, Op1, Instruction::And, - TD, DT, MaxRecurse)) + Q, MaxRecurse)) return V; // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) - if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, TD, DT, + if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, Q, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) - if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, TD, DT, - MaxRecurse)) + if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, Q, MaxRecurse)) return V; return 0; } Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyOrInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifyOrInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } /// SimplifyXorInst - Given operands for a Xor, see if we can /// fold the result. If not, this returns null. -static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD, - const DominatorTree *DT, unsigned MaxRecurse) { +static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast<Constant>(Op0)) { if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Xor, CLHS->getType(), - Ops, TD); + Ops, Q.TD, Q.TLI); } // Canonicalize the constant to the RHS. @@ -1366,13 +1540,13 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD, return Constant::getAllOnesValue(Op0->getType()); // Try some generic simplifications for associative operations. - if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, TD, DT, + if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, Q, MaxRecurse)) return V; // And distributes over Xor. Try some generic simplifications based on this. if (Value *V = FactorizeBinOp(Instruction::Xor, Op0, Op1, Instruction::And, - TD, DT, MaxRecurse)) + Q, MaxRecurse)) return V; // Threading Xor over selects and phi nodes is pointless, so don't bother. @@ -1388,8 +1562,9 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD, } Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyXorInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifyXorInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } static Type *GetCompareTy(Value *Op) { @@ -1416,17 +1591,56 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, return 0; } +static Constant *computePointerICmp(const TargetData &TD, + CmpInst::Predicate Pred, + Value *LHS, Value *RHS) { + // We can only fold certain predicates on pointer comparisons. + switch (Pred) { + default: + return 0; + + // Equality comaprisons are easy to fold. + case CmpInst::ICMP_EQ: + case CmpInst::ICMP_NE: + break; + + // We can only handle unsigned relational comparisons because 'inbounds' on + // a GEP only protects against unsigned wrapping. + case CmpInst::ICMP_UGT: + case CmpInst::ICMP_UGE: + case CmpInst::ICMP_ULT: + case CmpInst::ICMP_ULE: + // However, we have to switch them to their signed variants to handle + // negative indices from the base pointer. + Pred = ICmpInst::getSignedPredicate(Pred); + break; + } + + Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS); + if (!LHSOffset) + return 0; + Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS); + if (!RHSOffset) + return 0; + + // If LHS and RHS are not related via constant offsets to the same base + // value, there is nothing we can do here. + if (LHS != RHS) + return 0; + + return ConstantExpr::getICmp(Pred, LHSOffset, RHSOffset); +} + /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can /// fold the result. If not, this returns null. static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!"); if (Constant *CLHS = dyn_cast<Constant>(LHS)) { if (Constant *CRHS = dyn_cast<Constant>(RHS)) - return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD); + return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.TD, Q.TLI); // If we have a constant, make sure it is on the RHS. std::swap(LHS, RHS); @@ -1443,8 +1657,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred)); // Special case logic when the operands have i1 type. - if (OpTy->isIntegerTy(1) || (OpTy->isVectorTy() && - cast<VectorType>(OpTy)->getElementType()->isIntegerTy(1))) { + if (OpTy->getScalarType()->isIntegerTy(1)) { switch (Pred) { default: break; case ICmpInst::ICMP_EQ: @@ -1480,63 +1693,101 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } - // icmp <alloca*>, <global/alloca*/null> - Different stack variables have - // different addresses, and what's more the address of a stack variable is - // never null or equal to the address of a global. Note that generalizing - // to the case where LHS is a global variable address or null is pointless, - // since if both LHS and RHS are constants then we already constant folded - // the compare, and if only one of them is then we moved it to RHS already. - if (isa<AllocaInst>(LHS) && (isa<GlobalValue>(RHS) || isa<AllocaInst>(RHS) || - isa<ConstantPointerNull>(RHS))) - // We already know that LHS != RHS. - return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred)); + // icmp <object*>, <object*/null> - Different identified objects have + // different addresses (unless null), and what's more the address of an + // identified local is never equal to another argument (again, barring null). + // Note that generalizing to the case where LHS is a global variable address + // or null is pointless, since if both LHS and RHS are constants then we + // already constant folded the compare, and if only one of them is then we + // moved it to RHS already. + Value *LHSPtr = LHS->stripPointerCasts(); + Value *RHSPtr = RHS->stripPointerCasts(); + if (LHSPtr == RHSPtr) + return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred)); + + // Be more aggressive about stripping pointer adjustments when checking a + // comparison of an alloca address to another object. We can rip off all + // inbounds GEP operations, even if they are variable. + LHSPtr = LHSPtr->stripInBoundsOffsets(); + if (llvm::isIdentifiedObject(LHSPtr)) { + RHSPtr = RHSPtr->stripInBoundsOffsets(); + if (llvm::isKnownNonNull(LHSPtr) || llvm::isKnownNonNull(RHSPtr)) { + // If both sides are different identified objects, they aren't equal + // unless they're null. + if (LHSPtr != RHSPtr && llvm::isIdentifiedObject(RHSPtr) && + Pred == CmpInst::ICMP_EQ) + return ConstantInt::get(ITy, false); + + // A local identified object (alloca or noalias call) can't equal any + // incoming argument, unless they're both null. + if (isa<Instruction>(LHSPtr) && isa<Argument>(RHSPtr) && + Pred == CmpInst::ICMP_EQ) + return ConstantInt::get(ITy, false); + } + + // Assume that the constant null is on the right. + if (llvm::isKnownNonNull(LHSPtr) && isa<ConstantPointerNull>(RHSPtr)) { + if (Pred == CmpInst::ICMP_EQ) + return ConstantInt::get(ITy, false); + else if (Pred == CmpInst::ICMP_NE) + return ConstantInt::get(ITy, true); + } + } else if (isa<Argument>(LHSPtr)) { + RHSPtr = RHSPtr->stripInBoundsOffsets(); + // An alloca can't be equal to an argument. + if (isa<AllocaInst>(RHSPtr)) { + if (Pred == CmpInst::ICMP_EQ) + return ConstantInt::get(ITy, false); + else if (Pred == CmpInst::ICMP_NE) + return ConstantInt::get(ITy, true); + } + } // If we are comparing with zero then try hard since this is a common case. if (match(RHS, m_Zero())) { bool LHSKnownNonNegative, LHSKnownNegative; switch (Pred) { - default: - assert(false && "Unknown ICmp predicate!"); + default: llvm_unreachable("Unknown ICmp predicate!"); case ICmpInst::ICMP_ULT: return getFalse(ITy); case ICmpInst::ICMP_UGE: return getTrue(ITy); case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULE: - if (isKnownNonZero(LHS, TD)) + if (isKnownNonZero(LHS, Q.TD)) return getFalse(ITy); break; case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGT: - if (isKnownNonZero(LHS, TD)) + if (isKnownNonZero(LHS, Q.TD)) return getTrue(ITy); break; case ICmpInst::ICMP_SLT: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD); if (LHSKnownNegative) return getTrue(ITy); if (LHSKnownNonNegative) return getFalse(ITy); break; case ICmpInst::ICMP_SLE: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD); if (LHSKnownNegative) return getTrue(ITy); - if (LHSKnownNonNegative && isKnownNonZero(LHS, TD)) + if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.TD)) return getFalse(ITy); break; case ICmpInst::ICMP_SGE: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD); if (LHSKnownNegative) return getFalse(ITy); if (LHSKnownNonNegative) return getTrue(ITy); break; case ICmpInst::ICMP_SGT: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD); if (LHSKnownNegative) return getFalse(ITy); - if (LHSKnownNonNegative && isKnownNonZero(LHS, TD)) + if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.TD)) return getTrue(ITy); break; } @@ -1564,6 +1815,9 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // 'srem x, CI2' produces (-|CI2|, |CI2|). Upper = CI2->getValue().abs(); Lower = (-Upper) + 1; + } else if (match(LHS, m_UDiv(m_ConstantInt(CI2), m_Value()))) { + // 'udiv CI2, x' produces [0, CI2]. + Upper = CI2->getValue() + 1; } else if (match(LHS, m_UDiv(m_Value(), m_ConstantInt(CI2)))) { // 'udiv x, CI2' produces [0, UINT_MAX / CI2]. APInt NegOne = APInt::getAllOnesValue(Width); @@ -1616,19 +1870,19 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input // if the integer type is the same size as the pointer type. - if (MaxRecurse && TD && isa<PtrToIntInst>(LI) && - TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) { + if (MaxRecurse && Q.TD && isa<PtrToIntInst>(LI) && + Q.TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) { if (Constant *RHSC = dyn_cast<Constant>(RHS)) { // Transfer the cast to the constant. if (Value *V = SimplifyICmpInst(Pred, SrcOp, ConstantExpr::getIntToPtr(RHSC, SrcTy), - TD, DT, MaxRecurse-1)) + Q, MaxRecurse-1)) return V; } else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) { if (RI->getOperand(0)->getType() == SrcTy) // Compare without the cast. if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0), - TD, DT, MaxRecurse-1)) + Q, MaxRecurse-1)) return V; } } @@ -1640,7 +1894,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (MaxRecurse && SrcTy == RI->getOperand(0)->getType()) // Compare X and Y. Note that signed predicates become unsigned. if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred), - SrcOp, RI->getOperand(0), TD, DT, + SrcOp, RI->getOperand(0), Q, MaxRecurse-1)) return V; } @@ -1656,15 +1910,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // also a case of comparing two zero-extended values. if (RExt == CI && MaxRecurse) if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred), - SrcOp, Trunc, TD, DT, MaxRecurse-1)) + SrcOp, Trunc, Q, MaxRecurse-1)) return V; // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit // there. Use this to work out the result of the comparison. if (RExt != CI) { switch (Pred) { - default: - assert(false && "Unknown ICmp predicate!"); + default: llvm_unreachable("Unknown ICmp predicate!"); // LHS <u RHS. case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_UGT: @@ -1701,7 +1954,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (MaxRecurse && SrcTy == RI->getOperand(0)->getType()) // Compare X and Y. Note that the predicate does not change. if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0), - TD, DT, MaxRecurse-1)) + Q, MaxRecurse-1)) return V; } // Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended @@ -1715,16 +1968,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // If the re-extended constant didn't change then this is effectively // also a case of comparing two sign-extended values. if (RExt == CI && MaxRecurse) - if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, TD, DT, - MaxRecurse-1)) + if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, Q, MaxRecurse-1)) return V; // Otherwise the upper bits of LHS are all equal, while RHS has varying // bits there. Use this to work out the result of the comparison. if (RExt != CI) { switch (Pred) { - default: - assert(false && "Unknown ICmp predicate!"); + default: llvm_unreachable("Unknown ICmp predicate!"); case ICmpInst::ICMP_EQ: return ConstantInt::getFalse(CI->getContext()); case ICmpInst::ICMP_NE: @@ -1751,7 +2002,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (MaxRecurse) if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp, Constant::getNullValue(SrcTy), - TD, DT, MaxRecurse-1)) + Q, MaxRecurse-1)) return V; break; case ICmpInst::ICMP_ULT: @@ -1760,7 +2011,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (MaxRecurse) if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp, Constant::getNullValue(SrcTy), - TD, DT, MaxRecurse-1)) + Q, MaxRecurse-1)) return V; break; } @@ -1794,14 +2045,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if ((A == RHS || B == RHS) && NoLHSWrapProblem) if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A, Constant::getNullValue(RHS->getType()), - TD, DT, MaxRecurse-1)) + Q, MaxRecurse-1)) return V; // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow. if ((C == LHS || D == LHS) && NoRHSWrapProblem) if (Value *V = SimplifyICmpInst(Pred, Constant::getNullValue(LHS->getType()), - C == LHS ? D : C, TD, DT, MaxRecurse-1)) + C == LHS ? D : C, Q, MaxRecurse-1)) return V; // icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow. @@ -1810,7 +2061,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Determine Y and Z in the form icmp (X+Y), (X+Z). Value *Y = (A == C || A == D) ? B : A; Value *Z = (C == A || C == B) ? D : C; - if (Value *V = SimplifyICmpInst(Pred, Y, Z, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyICmpInst(Pred, Y, Z, Q, MaxRecurse-1)) return V; } } @@ -1822,7 +2073,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: - ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD); + ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD); if (!KnownNonNegative) break; // fall-through @@ -1832,7 +2083,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getFalse(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: - ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD); + ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD); if (!KnownNonNegative) break; // fall-through @@ -1849,7 +2100,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: - ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD); + ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD); if (!KnownNonNegative) break; // fall-through @@ -1859,7 +2110,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getTrue(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: - ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD); + ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD); if (!KnownNonNegative) break; // fall-through @@ -1870,6 +2121,15 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } + // x udiv y <=u x. + if (LBO && match(LBO, m_UDiv(m_Specific(RHS), m_Value()))) { + // icmp pred (X /u Y), X + if (Pred == ICmpInst::ICMP_UGT) + return getFalse(ITy); + if (Pred == ICmpInst::ICMP_ULE) + return getTrue(ITy); + } + if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() && LBO->getOperand(1) == RBO->getOperand(1)) { switch (LBO->getOpcode()) { @@ -1884,7 +2144,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (!LBO->isExact() || !RBO->isExact()) break; if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), - RBO->getOperand(0), TD, DT, MaxRecurse-1)) + RBO->getOperand(0), Q, MaxRecurse-1)) return V; break; case Instruction::Shl: { @@ -1895,7 +2155,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (!NSW && ICmpInst::isSigned(Pred)) break; if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), - RBO->getOperand(0), TD, DT, MaxRecurse-1)) + RBO->getOperand(0), Q, MaxRecurse-1)) return V; break; } @@ -1949,7 +2209,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return V; // Otherwise, see if "A EqP B" simplifies. if (MaxRecurse) - if (Value *V = SimplifyICmpInst(EqP, A, B, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse-1)) return V; break; case CmpInst::ICMP_NE: @@ -1963,7 +2223,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return V; // Otherwise, see if "A InvEqP B" simplifies. if (MaxRecurse) - if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse-1)) return V; break; } @@ -2019,7 +2279,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return V; // Otherwise, see if "A EqP B" simplifies. if (MaxRecurse) - if (Value *V = SimplifyICmpInst(EqP, A, B, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse-1)) return V; break; case CmpInst::ICMP_NE: @@ -2033,7 +2293,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return V; // Otherwise, see if "A InvEqP B" simplifies. if (MaxRecurse) - if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse-1)) return V; break; } @@ -2090,37 +2350,66 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getFalse(ITy); } + // Simplify comparisons of related pointers using a powerful, recursive + // GEP-walk when we have target data available.. + if (Q.TD && LHS->getType()->isPointerTy() && RHS->getType()->isPointerTy()) + if (Constant *C = computePointerICmp(*Q.TD, Pred, LHS, RHS)) + return C; + + if (GetElementPtrInst *GLHS = dyn_cast<GetElementPtrInst>(LHS)) { + if (GEPOperator *GRHS = dyn_cast<GEPOperator>(RHS)) { + if (GLHS->getPointerOperand() == GRHS->getPointerOperand() && + GLHS->hasAllConstantIndices() && GRHS->hasAllConstantIndices() && + (ICmpInst::isEquality(Pred) || + (GLHS->isInBounds() && GRHS->isInBounds() && + Pred == ICmpInst::getSignedPredicate(Pred)))) { + // The bases are equal and the indices are constant. Build a constant + // expression GEP with the same indices and a null base pointer to see + // what constant folding can make out of it. + Constant *Null = Constant::getNullValue(GLHS->getPointerOperandType()); + SmallVector<Value *, 4> IndicesLHS(GLHS->idx_begin(), GLHS->idx_end()); + Constant *NewLHS = ConstantExpr::getGetElementPtr(Null, IndicesLHS); + + SmallVector<Value *, 4> IndicesRHS(GRHS->idx_begin(), GRHS->idx_end()); + Constant *NewRHS = ConstantExpr::getGetElementPtr(Null, IndicesRHS); + return ConstantExpr::getICmp(Pred, NewLHS, NewRHS); + } + } + } + // If the comparison is with the result of a select instruction, check whether // comparing with either branch of the select always yields the same value. if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS)) - if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse)) + if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse)) return V; // If the comparison is with the result of a phi instruction, check whether // doing the compare with each incoming phi value yields a common result. if (isa<PHINode>(LHS) || isa<PHINode>(RHS)) - if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse)) + if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse)) return V; return 0; } Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT) { - return ::SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit); + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyICmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT), + RecursionLimit); } /// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can /// fold the result. If not, this returns null. static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!"); if (Constant *CLHS = dyn_cast<Constant>(LHS)) { if (Constant *CRHS = dyn_cast<Constant>(RHS)) - return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD); + return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.TD, Q.TLI); // If we have a constant, make sure it is on the RHS. std::swap(LHS, RHS); @@ -2188,27 +2477,31 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, // If the comparison is with the result of a select instruction, check whether // comparing with either branch of the select always yields the same value. if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS)) - if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse)) + if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse)) return V; // If the comparison is with the result of a phi instruction, check whether // doing the compare with each incoming phi value yields a common result. if (isa<PHINode>(LHS) || isa<PHINode>(RHS)) - if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse)) + if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse)) return V; return 0; } Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT) { - return ::SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit); + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT), + RecursionLimit); } /// SimplifySelectInst - Given operands for a SelectInst, see if we can fold /// the result. If not, this returns null. -Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal, - const TargetData *TD, const DominatorTree *) { +static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, + Value *FalseVal, const Query &Q, + unsigned MaxRecurse) { // select true, X, Y -> X // select false, X, Y -> Y if (ConstantInt *CB = dyn_cast<ConstantInt>(CondVal)) @@ -2231,12 +2524,22 @@ Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal, return 0; } +Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Query (TD, TLI, DT), + RecursionLimit); +} + /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can /// fold the result. If not, this returns null. -Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, - const TargetData *TD, const DominatorTree *) { +static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) { // The type of the GEP pointer operand. - PointerType *PtrTy = cast<PointerType>(Ops[0]->getType()); + PointerType *PtrTy = dyn_cast<PointerType>(Ops[0]->getType()); + // The GEP pointer operand is not a pointer, it's a vector of pointers. + if (!PtrTy) + return 0; // getelementptr P -> P. if (Ops.size() == 1) @@ -2255,9 +2558,9 @@ Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, if (C->isZero()) return Ops[0]; // getelementptr P, N -> P if P points to a type of zero size. - if (TD) { + if (Q.TD) { Type *Ty = PtrTy->getElementType(); - if (Ty->isSized() && TD->getTypeAllocSize(Ty) == 0) + if (Ty->isSized() && Q.TD->getTypeAllocSize(Ty) == 0) return Ops[0]; } } @@ -2270,12 +2573,17 @@ Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), Ops.slice(1)); } +Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyGEPInst(Ops, Query (TD, TLI, DT), RecursionLimit); +} + /// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we /// can fold the result. If not, this returns null. -Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, - ArrayRef<unsigned> Idxs, - const TargetData *, - const DominatorTree *) { +static Value *SimplifyInsertValueInst(Value *Agg, Value *Val, + ArrayRef<unsigned> Idxs, const Query &Q, + unsigned) { if (Constant *CAgg = dyn_cast<Constant>(Agg)) if (Constant *CVal = dyn_cast<Constant>(Val)) return ConstantFoldInsertValueInstruction(CAgg, CVal, Idxs); @@ -2300,8 +2608,17 @@ Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, return 0; } +Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, + ArrayRef<unsigned> Idxs, + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query (TD, TLI, DT), + RecursionLimit); +} + /// SimplifyPHINode - See if we can fold the given phi. If not, returns null. -static Value *SimplifyPHINode(PHINode *PN, const DominatorTree *DT) { +static Value *SimplifyPHINode(PHINode *PN, const Query &Q) { // If all of the PHI's incoming values are the same then replace the PHI node // with the common value. Value *CommonValue = 0; @@ -2329,67 +2646,77 @@ static Value *SimplifyPHINode(PHINode *PN, const DominatorTree *DT) { // instruction, we cannot return X as the result of the PHI node unless it // dominates the PHI block. if (HasUndefInput) - return ValueDominatesPHI(CommonValue, PN, DT) ? CommonValue : 0; + return ValueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : 0; return CommonValue; } +static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) { + if (Constant *C = dyn_cast<Constant>(Op)) + return ConstantFoldInstOperands(Instruction::Trunc, Ty, C, Q.TD, Q.TLI); + + return 0; +} + +Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyTruncInst(Op, Ty, Query (TD, TLI, DT), RecursionLimit); +} //=== Helper functions for higher up the class hierarchy. /// SimplifyBinOp - Given operands for a BinaryOperator, see if we can /// fold the result. If not, this returns null. static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { switch (Opcode) { case Instruction::Add: return SimplifyAddInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, - TD, DT, MaxRecurse); + Q, MaxRecurse); case Instruction::Sub: return SimplifySubInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, - TD, DT, MaxRecurse); - case Instruction::Mul: return SimplifyMulInst (LHS, RHS, TD, DT, MaxRecurse); - case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, TD, DT, MaxRecurse); - case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, TD, DT, MaxRecurse); - case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, TD, DT, MaxRecurse); - case Instruction::SRem: return SimplifySRemInst(LHS, RHS, TD, DT, MaxRecurse); - case Instruction::URem: return SimplifyURemInst(LHS, RHS, TD, DT, MaxRecurse); - case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, TD, DT, MaxRecurse); + Q, MaxRecurse); + case Instruction::Mul: return SimplifyMulInst (LHS, RHS, Q, MaxRecurse); + case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, Q, MaxRecurse); + case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse); + case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, Q, MaxRecurse); + case Instruction::SRem: return SimplifySRemInst(LHS, RHS, Q, MaxRecurse); + case Instruction::URem: return SimplifyURemInst(LHS, RHS, Q, MaxRecurse); + case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, Q, MaxRecurse); case Instruction::Shl: return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, - TD, DT, MaxRecurse); + Q, MaxRecurse); case Instruction::LShr: - return SimplifyLShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse); + return SimplifyLShrInst(LHS, RHS, /*isExact*/false, Q, MaxRecurse); case Instruction::AShr: - return SimplifyAShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse); - case Instruction::And: return SimplifyAndInst(LHS, RHS, TD, DT, MaxRecurse); - case Instruction::Or: return SimplifyOrInst (LHS, RHS, TD, DT, MaxRecurse); - case Instruction::Xor: return SimplifyXorInst(LHS, RHS, TD, DT, MaxRecurse); + return SimplifyAShrInst(LHS, RHS, /*isExact*/false, Q, MaxRecurse); + case Instruction::And: return SimplifyAndInst(LHS, RHS, Q, MaxRecurse); + case Instruction::Or: return SimplifyOrInst (LHS, RHS, Q, MaxRecurse); + case Instruction::Xor: return SimplifyXorInst(LHS, RHS, Q, MaxRecurse); default: if (Constant *CLHS = dyn_cast<Constant>(LHS)) if (Constant *CRHS = dyn_cast<Constant>(RHS)) { Constant *COps[] = {CLHS, CRHS}; - return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, TD); + return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, Q.TD, + Q.TLI); } // If the operation is associative, try some generic simplifications. if (Instruction::isAssociative(Opcode)) - if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, TD, DT, - MaxRecurse)) + if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, Q, MaxRecurse)) return V; - // If the operation is with the result of a select instruction, check whether + // If the operation is with the result of a select instruction check whether // operating on either branch of the select always yields the same value. if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS)) - if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, TD, DT, - MaxRecurse)) + if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, Q, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa<PHINode>(LHS) || isa<PHINode>(RHS)) - if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, TD, DT, MaxRecurse)) + if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, Q, MaxRecurse)) return V; return 0; @@ -2397,119 +2724,136 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, } Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT) { - return ::SimplifyBinOp(Opcode, LHS, RHS, TD, DT, RecursionLimit); + const TargetData *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyBinOp(Opcode, LHS, RHS, Query (TD, TLI, DT), RecursionLimit); } /// SimplifyCmpInst - Given operands for a CmpInst, see if we can /// fold the result. static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate)) - return SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse); - return SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse); + return SimplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse); + return SimplifyFCmpInst(Predicate, LHS, RHS, Q, MaxRecurse); } Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT) { - return ::SimplifyCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit); + const TargetData *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyCmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT), + RecursionLimit); +} + +static Value *SimplifyCallInst(CallInst *CI, const Query &) { + // call undef -> undef + if (isa<UndefValue>(CI->getCalledValue())) + return UndefValue::get(CI->getType()); + + return 0; } /// SimplifyInstruction - See if we can compute a simplified version of this /// instruction. If not, this returns null. Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { Value *Result; switch (I->getOpcode()) { default: - Result = ConstantFoldInstruction(I, TD); + Result = ConstantFoldInstruction(I, TD, TLI); break; case Instruction::Add: Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1), cast<BinaryOperator>(I)->hasNoSignedWrap(), cast<BinaryOperator>(I)->hasNoUnsignedWrap(), - TD, DT); + TD, TLI, DT); break; case Instruction::Sub: Result = SimplifySubInst(I->getOperand(0), I->getOperand(1), cast<BinaryOperator>(I)->hasNoSignedWrap(), cast<BinaryOperator>(I)->hasNoUnsignedWrap(), - TD, DT); + TD, TLI, DT); break; case Instruction::Mul: - Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::SDiv: - Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::UDiv: - Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::FDiv: - Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::SRem: - Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::URem: - Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::FRem: - Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::Shl: Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1), cast<BinaryOperator>(I)->hasNoSignedWrap(), cast<BinaryOperator>(I)->hasNoUnsignedWrap(), - TD, DT); + TD, TLI, DT); break; case Instruction::LShr: Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1), cast<BinaryOperator>(I)->isExact(), - TD, DT); + TD, TLI, DT); break; case Instruction::AShr: Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1), cast<BinaryOperator>(I)->isExact(), - TD, DT); + TD, TLI, DT); break; case Instruction::And: - Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::Or: - Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::Xor: - Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::ICmp: Result = SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(), - I->getOperand(0), I->getOperand(1), TD, DT); + I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::FCmp: Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), - I->getOperand(0), I->getOperand(1), TD, DT); + I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::Select: Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1), - I->getOperand(2), TD, DT); + I->getOperand(2), TD, TLI, DT); break; case Instruction::GetElementPtr: { SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end()); - Result = SimplifyGEPInst(Ops, TD, DT); + Result = SimplifyGEPInst(Ops, TD, TLI, DT); break; } case Instruction::InsertValue: { InsertValueInst *IV = cast<InsertValueInst>(I); Result = SimplifyInsertValueInst(IV->getAggregateOperand(), IV->getInsertedValueOperand(), - IV->getIndices(), TD, DT); + IV->getIndices(), TD, TLI, DT); break; } case Instruction::PHI: - Result = SimplifyPHINode(cast<PHINode>(I), DT); + Result = SimplifyPHINode(cast<PHINode>(I), Query (TD, TLI, DT)); + break; + case Instruction::Call: + Result = SimplifyCallInst(cast<CallInst>(I), Query (TD, TLI, DT)); + break; + case Instruction::Trunc: + Result = SimplifyTruncInst(I->getOperand(0), I->getType(), TD, TLI, DT); break; } @@ -2519,57 +2863,84 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD, return Result == I ? UndefValue::get(I->getType()) : Result; } -/// ReplaceAndSimplifyAllUses - Perform From->replaceAllUsesWith(To) and then -/// delete the From instruction. In addition to a basic RAUW, this does a -/// recursive simplification of the newly formed instructions. This catches -/// things where one simplification exposes other opportunities. This only -/// simplifies and deletes scalar operations, it does not change the CFG. +/// \brief Implementation of recursive simplification through an instructions +/// uses. /// -void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To, - const TargetData *TD, - const DominatorTree *DT) { - assert(From != To && "ReplaceAndSimplifyAllUses(X,X) is not valid!"); - - // FromHandle/ToHandle - This keeps a WeakVH on the from/to values so that - // we can know if it gets deleted out from under us or replaced in a - // recursive simplification. - WeakVH FromHandle(From); - WeakVH ToHandle(To); - - while (!From->use_empty()) { - // Update the instruction to use the new value. - Use &TheUse = From->use_begin().getUse(); - Instruction *User = cast<Instruction>(TheUse.getUser()); - TheUse = To; - - // Check to see if the instruction can be folded due to the operand - // replacement. For example changing (or X, Y) into (or X, -1) can replace - // the 'or' with -1. - Value *SimplifiedVal; - { - // Sanity check to make sure 'User' doesn't dangle across - // SimplifyInstruction. - AssertingVH<> UserHandle(User); - - SimplifiedVal = SimplifyInstruction(User, TD, DT); - if (SimplifiedVal == 0) continue; - } +/// This is the common implementation of the recursive simplification routines. +/// If we have a pre-simplified value in 'SimpleV', that is forcibly used to +/// replace the instruction 'I'. Otherwise, we simply add 'I' to the list of +/// instructions to process and attempt to simplify it using +/// InstructionSimplify. +/// +/// This routine returns 'true' only when *it* simplifies something. The passed +/// in simplified value does not count toward this. +static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + bool Simplified = false; + SmallSetVector<Instruction *, 8> Worklist; + + // If we have an explicit value to collapse to, do that round of the + // simplification loop by hand initially. + if (SimpleV) { + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; + ++UI) + if (*UI != I) + Worklist.insert(cast<Instruction>(*UI)); + + // Replace the instruction with its simplified value. + I->replaceAllUsesWith(SimpleV); + + // Gracefully handle edge cases where the instruction is not wired into any + // parent block. + if (I->getParent()) + I->eraseFromParent(); + } else { + Worklist.insert(I); + } - // Recursively simplify this user to the new value. - ReplaceAndSimplifyAllUses(User, SimplifiedVal, TD, DT); - From = dyn_cast_or_null<Instruction>((Value*)FromHandle); - To = ToHandle; + // Note that we must test the size on each iteration, the worklist can grow. + for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) { + I = Worklist[Idx]; + + // See if this instruction simplifies. + SimpleV = SimplifyInstruction(I, TD, TLI, DT); + if (!SimpleV) + continue; - assert(ToHandle && "To value deleted by recursive simplification?"); + Simplified = true; - // If the recursive simplification ended up revisiting and deleting - // 'From' then we're done. - if (From == 0) - return; + // Stash away all the uses of the old instruction so we can check them for + // recursive simplifications after a RAUW. This is cheaper than checking all + // uses of To on the recursive step in most cases. + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; + ++UI) + Worklist.insert(cast<Instruction>(*UI)); + + // Replace the instruction with its simplified value. + I->replaceAllUsesWith(SimpleV); + + // Gracefully handle edge cases where the instruction is not wired into any + // parent block. + if (I->getParent()) + I->eraseFromParent(); } + return Simplified; +} - // If 'From' has value handles referring to it, do a real RAUW to update them. - From->replaceAllUsesWith(To); +bool llvm::recursivelySimplifyInstruction(Instruction *I, + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return replaceAndRecursivelySimplifyImpl(I, 0, TD, TLI, DT); +} - From->eraseFromParent(); +bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!"); + assert(SimpleV && "Must provide a simplified value."); + return replaceAndRecursivelySimplifyImpl(I, SimpleV, TD, TLI, DT); } diff --git a/lib/Analysis/LLVMBuild.txt b/lib/Analysis/LLVMBuild.txt new file mode 100644 index 0000000..a8a8079 --- /dev/null +++ b/lib/Analysis/LLVMBuild.txt @@ -0,0 +1,25 @@ +;===- ./lib/Analysis/LLVMBuild.txt -----------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[common] +subdirectories = IPA + +[component_0] +type = Library +name = Analysis +parent = Libraries +required_libraries = Core Support Target diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index f80595c..5ca2746 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -20,20 +20,25 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/CFG.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/PatternMatch.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/ValueHandle.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include <map> #include <stack> using namespace llvm; +using namespace PatternMatch; char LazyValueInfo::ID = 0; -INITIALIZE_PASS(LazyValueInfo, "lazy-value-info", +INITIALIZE_PASS_BEGIN(LazyValueInfo, "lazy-value-info", + "Lazy Value Information Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_END(LazyValueInfo, "lazy-value-info", "Lazy Value Information Analysis", false, true) namespace llvm { @@ -61,10 +66,10 @@ class LVILatticeVal { constant, /// notconstant - This Value is known to not have the specified value. notconstant, - + /// constantrange - The Value falls within this range. constantrange, - + /// overdefined - This value is not known to be constant, and we know that /// it has a value. overdefined @@ -207,7 +212,7 @@ public: // Unless we can prove that the two Constants are different, we must // move to overdefined. - // FIXME: use TargetData for smarter constant folding. + // FIXME: use TargetData/TargetLibraryInfo for smarter constant folding. if (ConstantInt *Res = dyn_cast<ConstantInt>( ConstantFoldCompareInstOperands(CmpInst::ICMP_NE, getConstant(), @@ -233,7 +238,7 @@ public: // Unless we can prove that the two Constants are different, we must // move to overdefined. - // FIXME: use TargetData for smarter constant folding. + // FIXME: use TargetData/TargetLibraryInfo for smarter constant folding. if (ConstantInt *Res = dyn_cast<ConstantInt>( ConstantFoldCompareInstOperands(CmpInst::ICMP_NE, getNotConstant(), @@ -305,50 +310,6 @@ namespace { }; } -namespace llvm { - template<> - struct DenseMapInfo<LVIValueHandle> { - typedef DenseMapInfo<Value*> PointerInfo; - static inline LVIValueHandle getEmptyKey() { - return LVIValueHandle(PointerInfo::getEmptyKey(), - static_cast<LazyValueInfoCache*>(0)); - } - static inline LVIValueHandle getTombstoneKey() { - return LVIValueHandle(PointerInfo::getTombstoneKey(), - static_cast<LazyValueInfoCache*>(0)); - } - static unsigned getHashValue(const LVIValueHandle &Val) { - return PointerInfo::getHashValue(Val); - } - static bool isEqual(const LVIValueHandle &LHS, const LVIValueHandle &RHS) { - return LHS == RHS; - } - }; - - template<> - struct DenseMapInfo<std::pair<AssertingVH<BasicBlock>, Value*> > { - typedef std::pair<AssertingVH<BasicBlock>, Value*> PairTy; - typedef DenseMapInfo<AssertingVH<BasicBlock> > APointerInfo; - typedef DenseMapInfo<Value*> BPointerInfo; - static inline PairTy getEmptyKey() { - return std::make_pair(APointerInfo::getEmptyKey(), - BPointerInfo::getEmptyKey()); - } - static inline PairTy getTombstoneKey() { - return std::make_pair(APointerInfo::getTombstoneKey(), - BPointerInfo::getTombstoneKey()); - } - static unsigned getHashValue( const PairTy &Val) { - return APointerInfo::getHashValue(Val.first) ^ - BPointerInfo::getHashValue(Val.second); - } - static bool isEqual(const PairTy &LHS, const PairTy &RHS) { - return APointerInfo::isEqual(LHS.first, RHS.first) && - BPointerInfo::isEqual(LHS.second, RHS.second); - } - }; -} - namespace { /// LazyValueInfoCache - This is the cache kept by LazyValueInfo which /// maintains information about queries across the clients' queries. @@ -360,14 +321,18 @@ namespace { /// ValueCache - This is all of the cached information for all values, /// mapped from Value* to key information. - DenseMap<LVIValueHandle, ValueCacheEntryTy> ValueCache; + std::map<LVIValueHandle, ValueCacheEntryTy> ValueCache; /// OverDefinedCache - This tracks, on a per-block basis, the set of /// values that are over-defined at the end of that block. This is required /// for cache updating. typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy; DenseSet<OverDefinedPairTy> OverDefinedCache; - + + /// SeenBlocks - Keep track of all blocks that we have ever seen, so we + /// don't spend time removing unused blocks from our caches. + DenseSet<AssertingVH<BasicBlock> > SeenBlocks; + /// BlockValueStack - This stack holds the state of the value solver /// during a query. It basically emulates the callstack of the naive /// recursive value lookup process. @@ -438,6 +403,7 @@ namespace { /// clear - Empty the cache. void clear() { + SeenBlocks.clear(); ValueCache.clear(); OverDefinedCache.clear(); } @@ -466,6 +432,12 @@ void LVIValueHandle::deleted() { } void LazyValueInfoCache::eraseBlock(BasicBlock *BB) { + // Shortcut if we have never seen this block. + DenseSet<AssertingVH<BasicBlock> >::iterator I = SeenBlocks.find(BB); + if (I == SeenBlocks.end()) + return; + SeenBlocks.erase(I); + SmallVector<OverDefinedPairTy, 4> ToErase; for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(), E = OverDefinedCache.end(); I != E; ++I) { @@ -477,7 +449,7 @@ void LazyValueInfoCache::eraseBlock(BasicBlock *BB) { E = ToErase.end(); I != E; ++I) OverDefinedCache.erase(*I); - for (DenseMap<LVIValueHandle, ValueCacheEntryTy>::iterator + for (std::map<LVIValueHandle, ValueCacheEntryTy>::iterator I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I) I->second.erase(BB); } @@ -505,6 +477,7 @@ LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) { if (Constant *VC = dyn_cast<Constant>(Val)) return LVILatticeVal::get(VC); + SeenBlocks.insert(BB); return lookup(Val)[BB]; } @@ -513,6 +486,7 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { return true; ValueCacheEntryTy &Cache = lookup(Val); + SeenBlocks.insert(BB); LVILatticeVal &BBLV = Cache[BB]; // OverDefinedCacheUpdater is a helper object that will update @@ -823,9 +797,8 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, // If the condition of the branch is an equality comparison, we may be // able to infer the value. ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()); - if (ICI && ICI->getOperand(0) == Val && - isa<Constant>(ICI->getOperand(1))) { - if (ICI->isEquality()) { + if (ICI && isa<Constant>(ICI->getOperand(1))) { + if (ICI->isEquality() && ICI->getOperand(0) == Val) { // We know that V has the RHS constant if this is a true SETEQ or // false SETNE. if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ)) @@ -835,12 +808,23 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, return true; } - if (ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1))) { + // Recognize the range checking idiom that InstCombine produces. + // (X-C1) u< C2 --> [C1, C1+C2) + ConstantInt *NegOffset = 0; + if (ICI->getPredicate() == ICmpInst::ICMP_ULT) + match(ICI->getOperand(0), m_Add(m_Specific(Val), + m_ConstantInt(NegOffset))); + + ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1)); + if (CI && (ICI->getOperand(0) == Val || NegOffset)) { // Calculate the range of values that would satisfy the comparison. ConstantRange CmpRange(CI->getValue(), CI->getValue()+1); ConstantRange TrueValues = ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange); + if (NegOffset) // Apply the offset from above. + TrueValues = TrueValues.subtract(NegOffset->getValue()); + // If we're interested in the false dest, invert the condition. if (!isTrueDest) TrueValues = TrueValues.inverse(); @@ -882,10 +866,11 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, // BBFrom to BBTo. unsigned NumEdges = 0; ConstantInt *EdgeVal = 0; - for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) { - if (SI->getSuccessor(i) != BBTo) continue; + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) { + if (i.getCaseSuccessor() != BBTo) continue; if (NumEdges++) break; - EdgeVal = SI->getCaseValue(i); + EdgeVal = i.getCaseValue(); } assert(EdgeVal && "Missing successor?"); if (NumEdges == 1) { @@ -1007,12 +992,19 @@ static LazyValueInfoCache &getCache(void *&PImpl) { bool LazyValueInfo::runOnFunction(Function &F) { if (PImpl) getCache(PImpl).clear(); - + TD = getAnalysisIfAvailable<TargetData>(); + TLI = &getAnalysis<TargetLibraryInfo>(); + // Fully lazy. return false; } +void LazyValueInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<TargetLibraryInfo>(); +} + void LazyValueInfo::releaseMemory() { // If the cache was allocated, free it. if (PImpl) { @@ -1061,7 +1053,8 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, // If we know the value is a constant, evaluate the conditional. Constant *Res = 0; if (Result.isConstant()) { - Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD); + Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD, + TLI); if (ConstantInt *ResCI = dyn_cast<ConstantInt>(Res)) return ResCI->isZero() ? False : True; return Unknown; @@ -1102,13 +1095,15 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, if (Pred == ICmpInst::ICMP_EQ) { // !C1 == C -> false iff C1 == C. Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE, - Result.getNotConstant(), C, TD); + Result.getNotConstant(), C, TD, + TLI); if (Res->isNullValue()) return False; } else if (Pred == ICmpInst::ICMP_NE) { // !C1 != C -> true iff C1 == C. Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE, - Result.getNotConstant(), C, TD); + Result.getNotConstant(), C, TD, + TLI); if (Res->isNullValue()) return True; } diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index 38d677d..83bdf52 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -44,6 +44,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Pass.h" #include "llvm/PassManager.h" #include "llvm/IntrinsicInst.h" @@ -103,6 +104,7 @@ namespace { AliasAnalysis *AA; DominatorTree *DT; TargetData *TD; + TargetLibraryInfo *TLI; std::string Messages; raw_string_ostream MessagesStr; @@ -117,6 +119,7 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired<AliasAnalysis>(); + AU.addRequired<TargetLibraryInfo>(); AU.addRequired<DominatorTree>(); } virtual void print(raw_ostream &O, const Module *M) const {} @@ -149,6 +152,7 @@ namespace { char Lint::ID = 0; INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR", false, true) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_PASS_DEPENDENCY(DominatorTree) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR", @@ -174,6 +178,7 @@ bool Lint::runOnFunction(Function &F) { AA = &getAnalysis<AliasAnalysis>(); DT = &getAnalysis<DominatorTree>(); TD = getAnalysisIfAvailable<TargetData>(); + TLI = &getAnalysis<TargetLibraryInfo>(); visit(F); dbgs() << MessagesStr.str(); Messages.clear(); @@ -411,9 +416,8 @@ void Lint::visitMemoryReference(Instruction &I, if (Align != 0) { unsigned BitWidth = TD->getTypeSizeInBits(Ptr->getType()); - APInt Mask = APInt::getAllOnesValue(BitWidth), - KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(Ptr, Mask, KnownZero, KnownOne, TD); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(Ptr, KnownZero, KnownOne, TD); Assert1(!(KnownOne & APInt::getLowBitsSet(BitWidth, Log2_32(Align))), "Undefined behavior: Memory reference address is misaligned", &I); } @@ -471,9 +475,8 @@ static bool isZero(Value *V, TargetData *TD) { if (isa<UndefValue>(V)) return true; unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); - APInt Mask = APInt::getAllOnesValue(BitWidth), - KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(V, KnownZero, KnownOne, TD); return KnownZero.isAllOnesValue(); } @@ -614,10 +617,10 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, // As a last resort, try SimplifyInstruction or constant folding. if (Instruction *Inst = dyn_cast<Instruction>(V)) { - if (Value *W = SimplifyInstruction(Inst, TD, DT)) + if (Value *W = SimplifyInstruction(Inst, TD, TLI, DT)) return findValueImpl(W, OffsetOk, Visited); } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { - if (Value *W = ConstantFoldConstantExpression(CE, TD)) + if (Value *W = ConstantFoldConstantExpression(CE, TD, TLI)) if (W != V) return findValueImpl(W, OffsetOk, Visited); } diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp index 0e6bcbf..873a275 100644 --- a/lib/Analysis/Loads.cpp +++ b/lib/Analysis/Loads.cpp @@ -17,6 +17,7 @@ #include "llvm/GlobalAlias.h" #include "llvm/GlobalVariable.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Operator.h" using namespace llvm; @@ -160,10 +161,15 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, /// MaxInstsToScan specifies the maximum instructions to scan in the block. If /// it is set to 0, it will scan the whole block. You can also optionally /// specify an alias analysis implementation, which makes this more precise. +/// +/// If TBAATag is non-null and a load or store is found, the TBAA tag from the +/// load or store is recorded there. If there is no TBAA tag or if no access +/// is found, it is left unmodified. Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, - AliasAnalysis *AA) { + AliasAnalysis *AA, + MDNode **TBAATag) { if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; // If we're using alias analysis to disambiguate get the size of *Ptr. @@ -191,15 +197,19 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, // (This is true even if the load is volatile or atomic, although // those cases are unlikely.) if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) - if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) + if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) { + if (TBAATag) *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa); return LI; + } if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { // If this is a store through Ptr, the value is available! // (This is true even if the store is volatile or atomic, although // those cases are unlikely.) - if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) + if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) { + if (TBAATag) *TBAATag = SI->getMetadata(LLVMContext::MD_tbaa); return SI->getOperand(0); + } // If Ptr is an alloca and this is a store to a different alloca, ignore // the store. This is a trivial form of alias analysis that is important diff --git a/lib/Analysis/LoopDependenceAnalysis.cpp b/lib/Analysis/LoopDependenceAnalysis.cpp index 3997ac4..463269d 100644 --- a/lib/Analysis/LoopDependenceAnalysis.cpp +++ b/lib/Analysis/LoopDependenceAnalysis.cpp @@ -91,8 +91,6 @@ static Value *GetPointerOperand(Value *I) { if (StoreInst *i = dyn_cast<StoreInst>(I)) return i->getPointerOperand(); llvm_unreachable("Value is no load or store instruction!"); - // Never reached. - return 0; } static AliasAnalysis::AliasResult UnderlyingObjectsAlias(AliasAnalysis *AA, diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 85aacca..f7a60a1 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/Instructions.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" @@ -95,7 +96,7 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed, // Test if the value is already loop-invariant. if (isLoopInvariant(I)) return true; - if (!I->isSafeToSpeculativelyExecute()) + if (!isSafeToSpeculativelyExecute(I)) return false; if (I->mayReadFromMemory()) return false; @@ -165,99 +166,6 @@ PHINode *Loop::getCanonicalInductionVariable() const { return 0; } -/// getTripCount - Return a loop-invariant LLVM value indicating the number of -/// times the loop will be executed. Note that this means that the backedge -/// of the loop executes N-1 times. If the trip-count cannot be determined, -/// this returns null. -/// -/// The IndVarSimplify pass transforms loops to have a form that this -/// function easily understands. -/// -Value *Loop::getTripCount() const { - // Canonical loops will end with a 'cmp ne I, V', where I is the incremented - // canonical induction variable and V is the trip count of the loop. - PHINode *IV = getCanonicalInductionVariable(); - if (IV == 0 || IV->getNumIncomingValues() != 2) return 0; - - bool P0InLoop = contains(IV->getIncomingBlock(0)); - Value *Inc = IV->getIncomingValue(!P0InLoop); - BasicBlock *BackedgeBlock = IV->getIncomingBlock(!P0InLoop); - - if (BranchInst *BI = dyn_cast<BranchInst>(BackedgeBlock->getTerminator())) - if (BI->isConditional()) { - if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) { - if (ICI->getOperand(0) == Inc) { - if (BI->getSuccessor(0) == getHeader()) { - if (ICI->getPredicate() == ICmpInst::ICMP_NE) - return ICI->getOperand(1); - } else if (ICI->getPredicate() == ICmpInst::ICMP_EQ) { - return ICI->getOperand(1); - } - } - } - } - - return 0; -} - -/// getSmallConstantTripCount - Returns the trip count of this loop as a -/// normal unsigned value, if possible. Returns 0 if the trip count is unknown -/// or not constant. Will also return 0 if the trip count is very large -/// (>= 2^32) -unsigned Loop::getSmallConstantTripCount() const { - Value* TripCount = this->getTripCount(); - if (TripCount) { - if (ConstantInt *TripCountC = dyn_cast<ConstantInt>(TripCount)) { - // Guard against huge trip counts. - if (TripCountC->getValue().getActiveBits() <= 32) { - return (unsigned)TripCountC->getZExtValue(); - } - } - } - return 0; -} - -/// getSmallConstantTripMultiple - Returns the largest constant divisor of the -/// trip count of this loop as a normal unsigned value, if possible. This -/// means that the actual trip count is always a multiple of the returned -/// value (don't forget the trip count could very well be zero as well!). -/// -/// Returns 1 if the trip count is unknown or not guaranteed to be the -/// multiple of a constant (which is also the case if the trip count is simply -/// constant, use getSmallConstantTripCount for that case), Will also return 1 -/// if the trip count is very large (>= 2^32). -unsigned Loop::getSmallConstantTripMultiple() const { - Value* TripCount = this->getTripCount(); - // This will hold the ConstantInt result, if any - ConstantInt *Result = NULL; - if (TripCount) { - // See if the trip count is constant itself - Result = dyn_cast<ConstantInt>(TripCount); - // if not, see if it is a multiplication - if (!Result) - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TripCount)) { - switch (BO->getOpcode()) { - case BinaryOperator::Mul: - Result = dyn_cast<ConstantInt>(BO->getOperand(1)); - break; - case BinaryOperator::Shl: - if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) - if (CI->getValue().getActiveBits() <= 5) - return 1u << CI->getZExtValue(); - break; - default: - break; - } - } - } - // Guard against huge trip counts. - if (Result && Result->getValue().getActiveBits() <= 32) { - return (unsigned)Result->getZExtValue(); - } else { - return 1; - } -} - /// isLCSSAForm - Return true if the Loop is in LCSSA form bool Loop::isLCSSAForm(DominatorTree &DT) const { // Sort the blocks vector so that we can use binary search to do quick @@ -297,6 +205,17 @@ bool Loop::isLoopSimplifyForm() const { return getLoopPreheader() && getLoopLatch() && hasDedicatedExits(); } +/// isSafeToClone - Return true if the loop body is safe to clone in practice. +/// Routines that reform the loop CFG and split edges often fail on indirectbr. +bool Loop::isSafeToClone() const { + // Return false if any loop blocks contain indirectbrs. + for (Loop::block_iterator I = block_begin(), E = block_end(); I != E; ++I) { + if (isa<IndirectBrInst>((*I)->getTerminator())) + return false; + } + return true; +} + /// hasDedicatedExits - Return true if no exit block for the loop /// has a predecessor that is outside the loop. bool Loop::hasDedicatedExits() const { @@ -477,21 +396,19 @@ void UnloopUpdater::updateBlockParents() { /// removeBlocksFromAncestors - Remove unloop's blocks from all ancestors below /// their new parents. void UnloopUpdater::removeBlocksFromAncestors() { - // Remove unloop's blocks from all ancestors below their new parents. + // Remove all unloop's blocks (including those in nested subloops) from + // ancestors below the new parent loop. for (Loop::block_iterator BI = Unloop->block_begin(), BE = Unloop->block_end(); BI != BE; ++BI) { - Loop *NewParent = LI->getLoopFor(*BI); - // If this block is an immediate subloop, remove all blocks (including - // nested subloops) from ancestors below the new parent loop. - // Otherwise, if this block is in a nested subloop, skip it. - if (SubloopParents.count(NewParent)) - NewParent = SubloopParents[NewParent]; - else if (Unloop->contains(NewParent)) - continue; - + Loop *OuterParent = LI->getLoopFor(*BI); + if (Unloop->contains(OuterParent)) { + while (OuterParent->getParentLoop() != Unloop) + OuterParent = OuterParent->getParentLoop(); + OuterParent = SubloopParents[OuterParent]; + } // Remove blocks from former Ancestors except Unloop itself which will be // deleted. - for (Loop *OldParent = Unloop->getParentLoop(); OldParent != NewParent; + for (Loop *OldParent = Unloop->getParentLoop(); OldParent != OuterParent; OldParent = OldParent->getParentLoop()) { assert(OldParent && "new loop is not an ancestor of the original"); OldParent->removeBlockFromLoop(*BI); diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp index 5ba1f40..aba700a 100644 --- a/lib/Analysis/LoopPass.cpp +++ b/lib/Analysis/LoopPass.cpp @@ -14,10 +14,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LoopPass.h" -#include "llvm/DebugInfoProbe.h" #include "llvm/Assembly/PrintModulePass.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Timer.h" using namespace llvm; @@ -54,20 +52,6 @@ char PrintLoopPass::ID = 0; } //===----------------------------------------------------------------------===// -// DebugInfoProbe - -static DebugInfoProbeInfo *TheDebugProbe; -static void createDebugInfoProbe() { - if (TheDebugProbe) return; - - // Constructed the first time this is called. This guarantees that the - // object will be constructed, if -enable-debug-info-probe is set, - // before static globals, thus it will be destroyed before them. - static ManagedStatic<DebugInfoProbeInfo> DIP; - TheDebugProbe = &*DIP; -} - -//===----------------------------------------------------------------------===// // LPPassManager // @@ -195,7 +179,6 @@ void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const { bool LPPassManager::runOnFunction(Function &F) { LI = &getAnalysis<LoopInfo>(); bool Changed = false; - createDebugInfoProbe(); // Collect inherited analysis from Module level pass manager. populateInheritedAnalysis(TPM->activeStack); @@ -227,21 +210,19 @@ bool LPPassManager::runOnFunction(Function &F) { // Run all passes on the current Loop. for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { LoopPass *P = getContainedPass(Index); + dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG, CurrentLoop->getHeader()->getName()); dumpRequiredSet(P); initializeAnalysisImpl(P); - if (TheDebugProbe) - TheDebugProbe->initialize(P, F); + { PassManagerPrettyStackEntry X(P, *CurrentLoop->getHeader()); TimeRegion PassTimer(getPassTimer(P)); Changed |= P->runOnLoop(CurrentLoop, *this); } - if (TheDebugProbe) - TheDebugProbe->finalize(P, F); if (Changed) dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG, diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp index fde07ea..22414b3 100644 --- a/lib/Analysis/MemDepPrinter.cpp +++ b/lib/Analysis/MemDepPrinter.cpp @@ -130,7 +130,7 @@ bool MemDepPrinter::runOnFunction(Function &F) { AliasAnalysis::Location Loc = AA.getLocation(LI); MDA.getNonLocalPointerDependency(Loc, true, LI->getParent(), NLDI); } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { - if (!LI->isUnordered()) { + if (!SI->isUnordered()) { // FIXME: Handle atomic/volatile stores. Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown), static_cast<BasicBlock *>(0))); diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 8d451c4..b145650 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -48,10 +48,10 @@ static bool isMallocCall(const CallInst *CI) { // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin // attribute will exist. FunctionType *FTy = Callee->getFunctionType(); - if (FTy->getNumParams() != 1) - return false; - return FTy->getParamType(0)->isIntegerTy(32) || - FTy->getParamType(0)->isIntegerTy(64); + return FTy->getReturnType() == Type::getInt8PtrTy(FTy->getContext()) && + FTy->getNumParams() == 1 && + (FTy->getParamType(0)->isIntegerTy(32) || + FTy->getParamType(0)->isIntegerTy(64)); } /// extractMallocCall - Returns the corresponding CallInst if the instruction diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 92967c0..3a544f3 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -22,6 +22,7 @@ #include "llvm/Function.h" #include "llvm/LLVMContext.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -91,6 +92,7 @@ void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { bool MemoryDependenceAnalysis::runOnFunction(Function &) { AA = &getAnalysis<AliasAnalysis>(); TD = getAnalysisIfAvailable<TargetData>(); + DT = getAnalysisIfAvailable<DominatorTree>(); if (PredCache == 0) PredCache.reset(new PredIteratorCache()); return false; @@ -321,14 +323,100 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, !TD.fitsInLegalInteger(NewLoadByteSize*8)) return 0; + if (LIOffs+NewLoadByteSize > MemLocEnd && + LI->getParent()->getParent()->hasFnAttr(Attribute::AddressSafety)) { + // We will be reading past the location accessed by the original program. + // While this is safe in a regular build, Address Safety analysis tools + // may start reporting false warnings. So, don't do widening. + return 0; + } + // If a load of this width would include all of MemLoc, then we succeed. if (LIOffs+NewLoadByteSize >= MemLocEnd) return NewLoadByteSize; NewLoadByteSize <<= 1; } - - return 0; +} + +namespace { + /// Only find pointer captures which happen before the given instruction. Uses + /// the dominator tree to determine whether one instruction is before another. + struct CapturesBefore : public CaptureTracker { + CapturesBefore(const Instruction *I, DominatorTree *DT) + : BeforeHere(I), DT(DT), Captured(false) {} + + void tooManyUses() { Captured = true; } + + bool shouldExplore(Use *U) { + Instruction *I = cast<Instruction>(U->getUser()); + BasicBlock *BB = I->getParent(); + if (BeforeHere != I && + (!DT->isReachableFromEntry(BB) || DT->dominates(BeforeHere, I))) + return false; + return true; + } + + bool captured(Use *U) { + Instruction *I = cast<Instruction>(U->getUser()); + BasicBlock *BB = I->getParent(); + if (BeforeHere != I && + (!DT->isReachableFromEntry(BB) || DT->dominates(BeforeHere, I))) + return false; + Captured = true; + return true; + } + + const Instruction *BeforeHere; + DominatorTree *DT; + + bool Captured; + }; +} + +AliasAnalysis::ModRefResult +MemoryDependenceAnalysis::getModRefInfo(const Instruction *Inst, + const AliasAnalysis::Location &MemLoc) { + AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc); + if (MR != AliasAnalysis::ModRef) return MR; + + // FIXME: this is really just shoring-up a deficiency in alias analysis. + // BasicAA isn't willing to spend linear time determining whether an alloca + // was captured before or after this particular call, while we are. However, + // with a smarter AA in place, this test is just wasting compile time. + if (!DT) return AliasAnalysis::ModRef; + const Value *Object = GetUnderlyingObject(MemLoc.Ptr, TD); + if (!isIdentifiedObject(Object) || isa<GlobalValue>(Object)) + return AliasAnalysis::ModRef; + ImmutableCallSite CS(Inst); + if (!CS.getInstruction()) return AliasAnalysis::ModRef; + + CapturesBefore CB(Inst, DT); + llvm::PointerMayBeCaptured(Object, &CB); + + if (isa<Constant>(Object) || CS.getInstruction() == Object || CB.Captured) + return AliasAnalysis::ModRef; + + unsigned ArgNo = 0; + for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); + CI != CE; ++CI, ++ArgNo) { + // Only look at the no-capture or byval pointer arguments. If this + // pointer were passed to arguments that were neither of these, then it + // couldn't be no-capture. + if (!(*CI)->getType()->isPointerTy() || + (!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo))) + continue; + + // If this is a no-capture pointer argument, see if we can tell that it + // is impossible to alias the pointer we're checking. If not, we have to + // assume that the call could touch the pointer, even though it doesn't + // escape. + if (!AA->isNoAlias(AliasAnalysis::Location(*CI), + AliasAnalysis::Location(Object))) { + return AliasAnalysis::ModRef; + } + } + return AliasAnalysis::NoModRef; } /// getPointerDependencyFrom - Return the instruction on which a memory @@ -478,7 +566,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, } // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer. - switch (AA->getModRefInfo(Inst, MemLoc)) { + switch (getModRefInfo(Inst, MemLoc)) { case AliasAnalysis::NoModRef: // If the call has no effect on the queried pointer, just ignore it. continue; diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp index 7e22ddc..38cb1c9 100644 --- a/lib/Analysis/PHITransAddr.cpp +++ b/lib/Analysis/PHITransAddr.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/Analysis/Dominators.h" @@ -27,7 +28,7 @@ static bool CanPHITrans(Instruction *Inst) { return true; if (isa<CastInst>(Inst) && - Inst->isSafeToSpeculativelyExecute()) + isSafeToSpeculativelyExecute(Inst)) return true; if (Inst->getOpcode() == Instruction::Add && @@ -73,7 +74,6 @@ static bool VerifySubExpr(Value *Expr, errs() << *I << '\n'; llvm_unreachable("Either something is missing from InstInputs or " "CanPHITrans is wrong."); - return false; } // Validate the operands of the instruction. @@ -100,7 +100,6 @@ bool PHITransAddr::Verify() const { for (unsigned i = 0, e = InstInputs.size(); i != e; ++i) errs() << " InstInput #" << i << " is " << *InstInputs[i] << "\n"; llvm_unreachable("This is unexpected."); - return false; } // a-ok. @@ -186,7 +185,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, // operands need to be phi translated, and if so, reconstruct it. if (CastInst *Cast = dyn_cast<CastInst>(Inst)) { - if (!Cast->isSafeToSpeculativelyExecute()) return 0; + if (!isSafeToSpeculativelyExecute(Cast)) return 0; Value *PHIIn = PHITranslateSubExpr(Cast->getOperand(0), CurBB, PredBB, DT); if (PHIIn == 0) return 0; if (PHIIn == Cast->getOperand(0)) @@ -228,7 +227,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, return GEP; // Simplify the GEP to handle 'gep x, 0' -> x etc. - if (Value *V = SimplifyGEPInst(GEPOps, TD, DT)) { + if (Value *V = SimplifyGEPInst(GEPOps, TD, TLI, DT)) { for (unsigned i = 0, e = GEPOps.size(); i != e; ++i) RemoveInstInputs(GEPOps[i], InstInputs); @@ -284,7 +283,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, } // See if the add simplifies away. - if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD, DT)) { + if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD, TLI, DT)) { // If we simplified the operands, the LHS is no longer an input, but Res // is. RemoveInstInputs(LHS, InstInputs); @@ -381,7 +380,7 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, // Handle cast of PHI translatable value. if (CastInst *Cast = dyn_cast<CastInst>(Inst)) { - if (!Cast->isSafeToSpeculativelyExecute()) return 0; + if (!isSafeToSpeculativelyExecute(Cast)) return 0; Value *OpVal = InsertPHITranslatedSubExpr(Cast->getOperand(0), CurBB, PredBB, DT, NewInsts); if (OpVal == 0) return 0; diff --git a/lib/Analysis/PathNumbering.cpp b/lib/Analysis/PathNumbering.cpp index 0e3b6e6..80c5222 100644 --- a/lib/Analysis/PathNumbering.cpp +++ b/lib/Analysis/PathNumbering.cpp @@ -386,8 +386,8 @@ void BallLarusDag::buildNode(BLBlockNodeMap& inDag, BLNodeStack& dfsStack) { } TerminatorInst* terminator = currentNode->getBlock()->getTerminator(); - if(isa<ReturnInst>(terminator) || isa<UnreachableInst>(terminator) - || isa<ResumeInst>(terminator) || isa<UnwindInst>(terminator)) + if(isa<ReturnInst>(terminator) || isa<UnreachableInst>(terminator) || + isa<ResumeInst>(terminator)) addEdge(currentNode, getExit(),0); currentNode->setColor(BallLarusNode::GRAY); diff --git a/lib/Analysis/PathProfileVerifier.cpp b/lib/Analysis/PathProfileVerifier.cpp index 0ae734e..0fcdfe7 100644 --- a/lib/Analysis/PathProfileVerifier.cpp +++ b/lib/Analysis/PathProfileVerifier.cpp @@ -137,22 +137,22 @@ bool PathProfileVerifier::runOnModule (Module &M) { BasicBlock* source = nextEdge->getSource(); BasicBlock* target = nextEdge->getTarget(); unsigned duplicateNumber = nextEdge->getDuplicateNumber(); - DEBUG(dbgs () << source->getNameStr() << " --{" << duplicateNumber - << "}--> " << target->getNameStr()); + DEBUG(dbgs() << source->getName() << " --{" << duplicateNumber + << "}--> " << target->getName()); // Ensure all the referenced edges exist // TODO: make this a separate function if( !arrayMap.count(source) ) { - errs() << " error [" << F->getNameStr() << "()]: source '" - << source->getNameStr() + errs() << " error [" << F->getName() << "()]: source '" + << source->getName() << "' does not exist in the array map.\n"; } else if( !arrayMap[source].count(target) ) { - errs() << " error [" << F->getNameStr() << "()]: target '" - << target->getNameStr() + errs() << " error [" << F->getName() << "()]: target '" + << target->getName() << "' does not exist in the array map.\n"; } else if( !arrayMap[source][target].count(duplicateNumber) ) { - errs() << " error [" << F->getNameStr() << "()]: edge " - << source->getNameStr() << " -> " << target->getNameStr() + errs() << " error [" << F->getName() << "()]: edge " + << source->getName() << " -> " << target->getName() << " duplicate number " << duplicateNumber << " does not exist in the array map.\n"; } else { diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp index b594e2b..63468f8 100644 --- a/lib/Analysis/ProfileEstimatorPass.cpp +++ b/lib/Analysis/ProfileEstimatorPass.cpp @@ -332,7 +332,7 @@ bool ProfileEstimatorPass::runOnFunction(Function &F) { // Clear Minimal Edges. MinimalWeight.clear(); - DEBUG(dbgs() << "Working on function " << F.getNameStr() << "\n"); + DEBUG(dbgs() << "Working on function " << F.getName() << "\n"); // Since the entry block is the first one and has no predecessors, the edge // (0,entry) is inserted with the starting weight of 1. diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp index 098079b..c4da807 100644 --- a/lib/Analysis/ProfileInfoLoaderPass.cpp +++ b/lib/Analysis/ProfileInfoLoaderPass.cpp @@ -160,7 +160,7 @@ bool LoaderPass::runOnModule(Module &M) { ReadCount = 0; for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { if (F->isDeclaration()) continue; - DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n"); + DEBUG(dbgs() << "Working on " << F->getName() << "\n"); readEdge(getEdge(0,&F->getEntryBlock()), Counters); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { TerminatorInst *TI = BB->getTerminator(); @@ -181,7 +181,7 @@ bool LoaderPass::runOnModule(Module &M) { ReadCount = 0; for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { if (F->isDeclaration()) continue; - DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n"); + DEBUG(dbgs() << "Working on " << F->getName() << "\n"); readEdge(getEdge(0,&F->getEntryBlock()), Counters); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { TerminatorInst *TI = BB->getTerminator(); diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp index a017518..0cb1588 100644 --- a/lib/Analysis/ProfileVerifierPass.cpp +++ b/lib/Analysis/ProfileVerifierPass.cpp @@ -30,7 +30,7 @@ static cl::opt<bool,false> ProfileVerifierDisableAssertions("profile-verifier-noassert", cl::desc("Disable assertions")); -namespace llvm { +namespace { template<class FType, class BType> class ProfileVerifierPassT : public FunctionPass { @@ -125,8 +125,8 @@ namespace llvm { outCount++; } } - dbgs() << "Block " << BB->getNameStr() << " in " - << BB->getParent()->getNameStr() << ":" + dbgs() << "Block " << BB->getName() << " in " + << BB->getParent()->getName() << ":" << "BBWeight=" << format("%20.20g",BBWeight) << "," << "inWeight=" << format("%20.20g",inWeight) << "," << "inCount=" << inCount << "," @@ -143,8 +143,8 @@ namespace llvm { template<class FType, class BType> void ProfileVerifierPassT<FType, BType>::debugEntry (DetailedBlockInfo *DI) { - dbgs() << "TROUBLE: Block " << DI->BB->getNameStr() << " in " - << DI->BB->getParent()->getNameStr() << ":" + dbgs() << "TROUBLE: Block " << DI->BB->getName() << " in " + << DI->BB->getParent()->getName() << ":" << "BBWeight=" << format("%20.20g",DI->BBWeight) << "," << "inWeight=" << format("%20.20g",DI->inWeight) << "," << "inCount=" << DI->inCount << "," @@ -201,13 +201,13 @@ namespace llvm { double EdgeWeight = PI->getEdgeWeight(E); if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { dbgs() << "Edge " << E << " in Function " - << ProfileInfoT<FType, BType>::getFunction(E)->getNameStr() << ": "; + << ProfileInfoT<FType, BType>::getFunction(E)->getName() << ": "; ASSERTMESSAGE("Edge has missing value"); return 0; } else { if (EdgeWeight < 0) { dbgs() << "Edge " << E << " in Function " - << ProfileInfoT<FType, BType>::getFunction(E)->getNameStr() << ": "; + << ProfileInfoT<FType, BType>::getFunction(E)->getName() << ": "; ASSERTMESSAGE("Edge has negative value"); } return EdgeWeight; @@ -220,8 +220,8 @@ namespace llvm { DetailedBlockInfo *DI) { if (Error) { DEBUG(debugEntry(DI)); - dbgs() << "Block " << DI->BB->getNameStr() << " in Function " - << DI->BB->getParent()->getNameStr() << ": "; + dbgs() << "Block " << DI->BB->getName() << " in Function " + << DI->BB->getParent()->getName() << ": "; ASSERTMESSAGE(Message); } return; diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp index 52753cb..b507b1e 100644 --- a/lib/Analysis/RegionInfo.cpp +++ b/lib/Analysis/RegionInfo.cpp @@ -186,18 +186,16 @@ std::string Region::getNameStr() const { raw_string_ostream OS(entryName); WriteAsOperand(OS, getEntry(), false); - entryName = OS.str(); } else - entryName = getEntry()->getNameStr(); + entryName = getEntry()->getName(); if (getExit()) { if (getExit()->getName().empty()) { raw_string_ostream OS(exitName); WriteAsOperand(OS, getExit(), false); - exitName = OS.str(); } else - exitName = getExit()->getNameStr(); + exitName = getExit()->getName(); } else exitName = "<Function Return>"; @@ -652,7 +650,7 @@ void RegionInfo::buildRegionsTree(DomTreeNode *N, Region *region) { // This basic block is a start block of a region. It is already in the // BBtoRegion relation. Only the child basic blocks have to be updated. if (it != BBtoRegion.end()) { - Region *newRegion = it->second;; + Region *newRegion = it->second; region->addSubRegion(getTopMostParent(newRegion)); region = newRegion; } else { diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index e0ac56c..1d55642 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -74,6 +74,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" @@ -108,6 +109,7 @@ INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution", "Scalar Evolution Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution", "Scalar Evolution Analysis", false, true) char ScalarEvolution::ID = 0; @@ -188,6 +190,14 @@ void SCEV::print(raw_ostream &OS) const { OS << OpStr; } OS << ")"; + switch (NAry->getSCEVType()) { + case scAddExpr: + case scMulExpr: + if (NAry->getNoWrapFlags(FlagNUW)) + OS << "<nuw>"; + if (NAry->getNoWrapFlags(FlagNSW)) + OS << "<nsw>"; + } return; } case scUDivExpr: { @@ -249,11 +259,9 @@ Type *SCEV::getType() const { return cast<SCEVUnknown>(this)->getType(); case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - return 0; - default: break; + default: + llvm_unreachable("Unknown SCEV kind!"); } - llvm_unreachable("Unknown SCEV kind!"); - return 0; } bool SCEV::isZero() const { @@ -274,6 +282,20 @@ bool SCEV::isAllOnesValue() const { return false; } +/// isNonConstantNegative - Return true if the specified scev is negated, but +/// not a constant. +bool SCEV::isNonConstantNegative() const { + const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(this); + if (!Mul) return false; + + // If there is a constant factor, it will be first. + const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0)); + if (!SC) return false; + + // Return true if the value is negative, this matches things like (-42 * V). + return SC->getValue()->getValue().isNegative(); +} + SCEVCouldNotCompute::SCEVCouldNotCompute() : SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {} @@ -587,11 +609,8 @@ namespace { } default: - break; + llvm_unreachable("Unknown SCEV kind!"); } - - llvm_unreachable("Unknown SCEV kind!"); - return 0; } }; } @@ -2581,7 +2600,7 @@ const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) { Constant *C = ConstantExpr::getSizeOf(AllocTy); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) C = Folded; Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); @@ -2590,7 +2609,7 @@ const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) { const SCEV *ScalarEvolution::getAlignOfExpr(Type *AllocTy) { Constant *C = ConstantExpr::getAlignOf(AllocTy); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) C = Folded; Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); @@ -2607,7 +2626,7 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy, Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) C = Folded; Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); @@ -2617,7 +2636,7 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(Type *CTy, Constant *FieldNo) { Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) C = Folded; Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); @@ -3108,7 +3127,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { // PHI's incoming blocks are in a different loop, in which case doing so // risks breaking LCSSA form. Instcombine would normally zap these, but // it doesn't have DominatorTree information, so it may miss cases. - if (Value *V = SimplifyInstruction(PN, TD, DT)) + if (Value *V = SimplifyInstruction(PN, TD, TLI, DT)) if (LI->replacementPreservesLCSSAForm(PN, V)) return getSCEV(V); @@ -3168,7 +3187,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { // Add the total offset from all the GEP indices to the base. return getAddExpr(BaseS, TotalOffset, - isInBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap); + isInBounds ? SCEV::FlagNUW : SCEV::FlagAnyWrap); } /// GetMinTrailingZeros - Determine the minimum number of zero bits that S is @@ -3242,9 +3261,8 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { // For a SCEVUnknown, ask ValueTracking. unsigned BitWidth = getTypeSizeInBits(U->getType()); - APInt Mask = APInt::getAllOnesValue(BitWidth); APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones); + ComputeMaskedBits(U->getValue(), Zeros, Ones); return Zeros.countTrailingOnes(); } @@ -3382,9 +3400,8 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { // For a SCEVUnknown, ask ValueTracking. - APInt Mask = APInt::getAllOnesValue(BitWidth); APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD); + ComputeMaskedBits(U->getValue(), Zeros, Ones, TD); if (Ones == ~Zeros + 1) return setUnsignedRange(U, ConservativeResult); return setUnsignedRange(U, @@ -3584,6 +3601,12 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // because it leads to N-1 getAddExpr calls for N ultimate operands. // Instead, gather up all the operands and make a single getAddExpr call. // LLVM IR canonical form means we need only traverse the left operands. + // + // Don't apply this instruction's NSW or NUW flags to the new + // expression. The instruction may be guarded by control flow that the + // no-wrap behavior depends on. Non-control-equivalent instructions can be + // mapped to the same SCEV expression, and it would be incorrect to transfer + // NSW/NUW semantics to those operations. SmallVector<const SCEV *, 4> AddOps; AddOps.push_back(getSCEV(U->getOperand(1))); for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) { @@ -3598,16 +3621,10 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { AddOps.push_back(Op1); } AddOps.push_back(getSCEV(U->getOperand(0))); - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; - OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(V); - if (OBO->hasNoSignedWrap()) - setFlags(Flags, SCEV::FlagNSW); - if (OBO->hasNoUnsignedWrap()) - setFlags(Flags, SCEV::FlagNUW); - return getAddExpr(AddOps, Flags); + return getAddExpr(AddOps); } case Instruction::Mul: { - // See the Add code above. + // Don't transfer NSW/NUW for the same reason as AddExpr. SmallVector<const SCEV *, 4> MulOps; MulOps.push_back(getSCEV(U->getOperand(1))); for (Value *Op = U->getOperand(0); @@ -3641,9 +3658,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // knew about to reconstruct a low-bits mask value. unsigned LZ = A.countLeadingZeros(); unsigned BitWidth = A.getBitWidth(); - APInt AllOnes = APInt::getAllOnesValue(BitWidth); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(U->getOperand(0), AllOnes, KnownZero, KnownOne, TD); + ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD); APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ); @@ -3915,13 +3931,19 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // /// getSmallConstantTripCount - Returns the maximum trip count of this loop as a -/// normal unsigned value, if possible. Returns 0 if the trip count is unknown -/// or not constant. Will also return 0 if the maximum trip count is very large -/// (>= 2^32) -unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L, - BasicBlock *ExitBlock) { +/// normal unsigned value. Returns 0 if the trip count is unknown or not +/// constant. Will also return 0 if the maximum trip count is very large (>= +/// 2^32). +/// +/// This "trip count" assumes that control exits via ExitingBlock. More +/// precisely, it is the number of times that control may reach ExitingBlock +/// before taking the branch. For loops with multiple exits, it may not be the +/// number times that the loop header executes because the loop may exit +/// prematurely via another branch. +unsigned ScalarEvolution:: +getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock) { const SCEVConstant *ExitCount = - dyn_cast<SCEVConstant>(getExitCount(L, ExitBlock)); + dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock)); if (!ExitCount) return 0; @@ -3944,9 +3966,12 @@ unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L, /// multiple of a constant (which is also the case if the trip count is simply /// constant, use getSmallConstantTripCount for that case), Will also return 1 /// if the trip count is very large (>= 2^32). -unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L, - BasicBlock *ExitBlock) { - const SCEV *ExitCount = getExitCount(L, ExitBlock); +/// +/// As explained in the comments for getSmallConstantTripCount, this assumes +/// that control exits the loop via ExitingBlock. +unsigned ScalarEvolution:: +getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock) { + const SCEV *ExitCount = getExitCount(L, ExitingBlock); if (ExitCount == getCouldNotCompute()) return 1; @@ -4153,13 +4178,19 @@ void ScalarEvolution::forgetValue(Value *V) { } /// getExact - Get the exact loop backedge taken count considering all loop -/// exits. If all exits are computable, this is the minimum computed count. +/// exits. A computable result can only be return for loops with a single exit. +/// Returning the minimum taken count among all exits is incorrect because one +/// of the loop's exit limit's may have been skipped. HowFarToZero assumes that +/// the limit of each loop test is never skipped. This is a valid assumption as +/// long as the loop exits via that test. For precise results, it is the +/// caller's responsibility to specify the relevant loop exit using +/// getExact(ExitingBlock, SE). const SCEV * ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const { // If any exits were not computable, the loop is not computable. if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute(); - // We need at least one computable exit. + // We need exactly one computable exit. if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute(); assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info"); @@ -4171,8 +4202,8 @@ ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const { if (!BECount) BECount = ENT->ExactNotTaken; - else - BECount = SE->getUMinFromMismatchedTypes(BECount, ENT->ExactNotTaken); + else if (BECount != ENT->ExactNotTaken) + return SE->getCouldNotCompute(); } assert(BECount && "Invalid not taken count for loop exit"); return BECount; @@ -4253,8 +4284,15 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { if (MaxBECount == getCouldNotCompute()) MaxBECount = EL.Max; - else if (EL.Max != getCouldNotCompute()) - MaxBECount = getUMinFromMismatchedTypes(MaxBECount, EL.Max); + else if (EL.Max != getCouldNotCompute()) { + // We cannot take the "min" MaxBECount, because non-unit stride loops may + // skip some loop tests. Taking the max over the exits is sufficiently + // conservative. TODO: We could do better taking into consideration + // that (1) the loop has unit stride (2) the last loop test is + // less-than/greater-than (3) any loop test is less-than/greater-than AND + // falls-through some constant times less then the other tests. + MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, EL.Max); + } } return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount); @@ -4539,40 +4577,6 @@ EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C, return cast<SCEVConstant>(Val)->getValue(); } -/// GetAddressedElementFromGlobal - Given a global variable with an initializer -/// and a GEP expression (missing the pointer index) indexing into it, return -/// the addressed element of the initializer or null if the index expression is -/// invalid. -static Constant * -GetAddressedElementFromGlobal(GlobalVariable *GV, - const std::vector<ConstantInt*> &Indices) { - Constant *Init = GV->getInitializer(); - for (unsigned i = 0, e = Indices.size(); i != e; ++i) { - uint64_t Idx = Indices[i]->getZExtValue(); - if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) { - assert(Idx < CS->getNumOperands() && "Bad struct index!"); - Init = cast<Constant>(CS->getOperand(Idx)); - } else if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) { - if (Idx >= CA->getNumOperands()) return 0; // Bogus program - Init = cast<Constant>(CA->getOperand(Idx)); - } else if (isa<ConstantAggregateZero>(Init)) { - if (StructType *STy = dyn_cast<StructType>(Init->getType())) { - assert(Idx < STy->getNumElements() && "Bad struct index!"); - Init = Constant::getNullValue(STy->getElementType(Idx)); - } else if (ArrayType *ATy = dyn_cast<ArrayType>(Init->getType())) { - if (Idx >= ATy->getNumElements()) return 0; // Bogus program - Init = Constant::getNullValue(ATy->getElementType()); - } else { - llvm_unreachable("Unknown constant aggregate type!"); - } - return 0; - } else { - return 0; // Unknown initializer type - } - } - return Init; -} - /// ComputeLoadConstantCompareExitLimit - Given an exit condition of /// 'icmp op load X, cst', try to see if we can compute the backedge /// execution count. @@ -4600,7 +4604,7 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit( // Okay, we allow one non-constant index into the GEP instruction. Value *VarIdx = 0; - std::vector<ConstantInt*> Indexes; + std::vector<Constant*> Indexes; unsigned VarIdxNum = 0; for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i) if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) { @@ -4612,6 +4616,10 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit( Indexes.push_back(0); } + // Loop-invariant loads may be a byproduct of loop optimization. Skip them. + if (!VarIdx) + return getCouldNotCompute(); + // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant. // Check to see if X is a loop variant variable value now. const SCEV *Idx = getSCEV(VarIdx); @@ -4634,7 +4642,8 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit( // Form the GEP offset. Indexes[VarIdxNum] = Val; - Constant *Result = GetAddressedElementFromGlobal(GV, Indexes); + Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(), + Indexes); if (Result == 0) break; // Cannot compute! // Evaluate the condition for this iteration. @@ -4658,7 +4667,8 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit( /// specified type, assuming that all operands were constants. static bool CanConstantFold(const Instruction *I) { if (isa<BinaryOperator>(I) || isa<CmpInst>(I) || - isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I)) + isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I) || + isa<LoadInst>(I)) return true; if (const CallInst *CI = dyn_cast<CallInst>(I)) @@ -4748,16 +4758,23 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { /// reason, return null. static Constant *EvaluateExpression(Value *V, const Loop *L, DenseMap<Instruction *, Constant *> &Vals, - const TargetData *TD) { + const TargetData *TD, + const TargetLibraryInfo *TLI) { // Convenient constant check, but redundant for recursive calls. if (Constant *C = dyn_cast<Constant>(V)) return C; + Instruction *I = dyn_cast<Instruction>(V); + if (!I) return 0; - Instruction *I = cast<Instruction>(V); if (Constant *C = Vals.lookup(I)) return C; - assert(!isa<PHINode>(I) && "loop header phis should be mapped to constant"); - assert(canConstantEvolve(I, L) && "cannot evaluate expression in this loop"); - (void)L; + // An instruction inside the loop depends on a value outside the loop that we + // weren't given a mapping for, or a value such as a call inside the loop. + if (!canConstantEvolve(I, L)) return 0; + + // An unmapped PHI can be due to a branch or another loop inside this loop, + // or due to this not being the initial iteration through a loop where we + // couldn't compute the evolution of this particular PHI last time. + if (isa<PHINode>(I)) return 0; std::vector<Constant*> Operands(I->getNumOperands()); @@ -4768,16 +4785,21 @@ static Constant *EvaluateExpression(Value *V, const Loop *L, if (!Operands[i]) return 0; continue; } - Constant *C = EvaluateExpression(Operand, L, Vals, TD); + Constant *C = EvaluateExpression(Operand, L, Vals, TD, TLI); Vals[Operand] = C; if (!C) return 0; Operands[i] = C; } - if (const CmpInst *CI = dyn_cast<CmpInst>(I)) + if (CmpInst *CI = dyn_cast<CmpInst>(I)) return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], - Operands[1], TD); - return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD); + Operands[1], TD, TLI); + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + if (!LI->isVolatile()) + return ConstantFoldLoadFromConstPtr(Operands[0], TD); + } + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD, + TLI); } /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is @@ -4798,23 +4820,26 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, Constant *&RetVal = ConstantEvolutionLoopExitValue[PN]; - // FIXME: Nick's fix for PR11034 will seed constants for multiple header phis. DenseMap<Instruction *, Constant *> CurrentIterVals; + BasicBlock *Header = L->getHeader(); + assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!"); // Since the loop is canonicalized, the PHI node must have two entries. One // entry must be a constant (coming in from outside of the loop), and the // second must be derived from the same PHI. bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); - Constant *StartCST = - dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge)); - if (StartCST == 0) - return RetVal = 0; // Must be a constant. - CurrentIterVals[PN] = StartCST; + PHINode *PHI = 0; + for (BasicBlock::iterator I = Header->begin(); + (PHI = dyn_cast<PHINode>(I)); ++I) { + Constant *StartCST = + dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge)); + if (StartCST == 0) continue; + CurrentIterVals[PHI] = StartCST; + } + if (!CurrentIterVals.count(PN)) + return RetVal = 0; Value *BEValue = PN->getIncomingValue(SecondIsBackedge); - if (getConstantEvolvingPHI(BEValue, L) != PN && - !isa<Constant>(BEValue)) - return RetVal = 0; // Not derived from same PHI. // Execute the loop symbolically to determine the exit value. if (BEs.getActiveBits() >= 32) @@ -4826,15 +4851,46 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, if (IterationNum == NumIterations) return RetVal = CurrentIterVals[PN]; // Got exit value! - // Compute the value of the PHI node for the next iteration. + // Compute the value of the PHIs for the next iteration. // EvaluateExpression adds non-phi values to the CurrentIterVals map. - Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD); - if (NextPHI == CurrentIterVals[PN]) - return RetVal = NextPHI; // Stopped evolving! + DenseMap<Instruction *, Constant *> NextIterVals; + Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, + TLI); if (NextPHI == 0) return 0; // Couldn't evaluate! - DenseMap<Instruction *, Constant *> NextIterVals; NextIterVals[PN] = NextPHI; + + bool StoppedEvolving = NextPHI == CurrentIterVals[PN]; + + // Also evaluate the other PHI nodes. However, we don't get to stop if we + // cease to be able to evaluate one of them or if they stop evolving, + // because that doesn't necessarily prevent us from computing PN. + SmallVector<std::pair<PHINode *, Constant *>, 8> PHIsToCompute; + for (DenseMap<Instruction *, Constant *>::const_iterator + I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){ + PHINode *PHI = dyn_cast<PHINode>(I->first); + if (!PHI || PHI == PN || PHI->getParent() != Header) continue; + PHIsToCompute.push_back(std::make_pair(PHI, I->second)); + } + // We use two distinct loops because EvaluateExpression may invalidate any + // iterators into CurrentIterVals. + for (SmallVectorImpl<std::pair<PHINode *, Constant*> >::const_iterator + I = PHIsToCompute.begin(), E = PHIsToCompute.end(); I != E; ++I) { + PHINode *PHI = I->first; + Constant *&NextPHI = NextIterVals[PHI]; + if (!NextPHI) { // Not already computed. + Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); + NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI); + } + if (NextPHI != I->second) + StoppedEvolving = false; + } + + // If all entries in CurrentIterVals == NextIterVals then we can stop + // iterating, the loop can't continue to change. + if (StoppedEvolving) + return RetVal = CurrentIterVals[PN]; + CurrentIterVals.swap(NextIterVals); } } @@ -4844,9 +4900,9 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, /// try to evaluate a few iterations of the loop until we get the exit /// condition gets a value of ExitWhen (true or false). If we cannot /// evaluate the trip count of the loop, return getCouldNotCompute(). -const SCEV * ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, - Value *Cond, - bool ExitWhen) { +const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, + Value *Cond, + bool ExitWhen) { PHINode *PN = getConstantEvolvingPHI(Cond, L); if (PN == 0) return getCouldNotCompute(); @@ -4854,29 +4910,33 @@ const SCEV * ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, // That's the only form we support here. if (PN->getNumIncomingValues() != 2) return getCouldNotCompute(); + DenseMap<Instruction *, Constant *> CurrentIterVals; + BasicBlock *Header = L->getHeader(); + assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!"); + // One entry must be a constant (coming in from outside of the loop), and the // second must be derived from the same PHI. bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); - Constant *StartCST = - dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge)); - if (StartCST == 0) return getCouldNotCompute(); // Must be a constant. - - Value *BEValue = PN->getIncomingValue(SecondIsBackedge); - if (getConstantEvolvingPHI(BEValue, L) != PN && - !isa<Constant>(BEValue)) - return getCouldNotCompute(); // Not derived from same PHI. + PHINode *PHI = 0; + for (BasicBlock::iterator I = Header->begin(); + (PHI = dyn_cast<PHINode>(I)); ++I) { + Constant *StartCST = + dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge)); + if (StartCST == 0) continue; + CurrentIterVals[PHI] = StartCST; + } + if (!CurrentIterVals.count(PN)) + return getCouldNotCompute(); // Okay, we find a PHI node that defines the trip count of this loop. Execute // the loop symbolically to determine when the condition gets a value of // "ExitWhen". - unsigned IterationNum = 0; + unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. - for (Constant *PHIVal = StartCST; - IterationNum != MaxIterations; ++IterationNum) { - DenseMap<Instruction *, Constant *> PHIValMap; - PHIValMap[PN] = PHIVal; + for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){ ConstantInt *CondVal = - dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, PHIValMap, TD)); + dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, CurrentIterVals, + TD, TLI)); // Couldn't symbolically evaluate. if (!CondVal) return getCouldNotCompute(); @@ -4886,11 +4946,29 @@ const SCEV * ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, return getConstant(Type::getInt32Ty(getContext()), IterationNum); } - // Compute the value of the PHI node for the next iteration. - Constant *NextPHI = EvaluateExpression(BEValue, L, PHIValMap, TD); - if (NextPHI == 0 || NextPHI == PHIVal) - return getCouldNotCompute();// Couldn't evaluate or not making progress... - PHIVal = NextPHI; + // Update all the PHI nodes for the next iteration. + DenseMap<Instruction *, Constant *> NextIterVals; + + // Create a list of which PHIs we need to compute. We want to do this before + // calling EvaluateExpression on them because that may invalidate iterators + // into CurrentIterVals. + SmallVector<PHINode *, 8> PHIsToCompute; + for (DenseMap<Instruction *, Constant *>::const_iterator + I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){ + PHINode *PHI = dyn_cast<PHINode>(I->first); + if (!PHI || PHI->getParent() != Header) continue; + PHIsToCompute.push_back(PHI); + } + for (SmallVectorImpl<PHINode *>::const_iterator I = PHIsToCompute.begin(), + E = PHIsToCompute.end(); I != E; ++I) { + PHINode *PHI = *I; + Constant *&NextPHI = NextIterVals[PHI]; + if (NextPHI) continue; // Already computed! + + Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); + NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI); + } + CurrentIterVals.swap(NextIterVals); } // Too many iterations were needed to evaluate. @@ -4921,6 +4999,98 @@ const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { return C; } +/// This builds up a Constant using the ConstantExpr interface. That way, we +/// will return Constants for objects which aren't represented by a +/// SCEVConstant, because SCEVConstant is restricted to ConstantInt. +/// Returns NULL if the SCEV isn't representable as a Constant. +static Constant *BuildConstantFromSCEV(const SCEV *V) { + switch (V->getSCEVType()) { + default: // TODO: smax, umax. + case scCouldNotCompute: + case scAddRecExpr: + break; + case scConstant: + return cast<SCEVConstant>(V)->getValue(); + case scUnknown: + return dyn_cast<Constant>(cast<SCEVUnknown>(V)->getValue()); + case scSignExtend: { + const SCEVSignExtendExpr *SS = cast<SCEVSignExtendExpr>(V); + if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand())) + return ConstantExpr::getSExt(CastOp, SS->getType()); + break; + } + case scZeroExtend: { + const SCEVZeroExtendExpr *SZ = cast<SCEVZeroExtendExpr>(V); + if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand())) + return ConstantExpr::getZExt(CastOp, SZ->getType()); + break; + } + case scTruncate: { + const SCEVTruncateExpr *ST = cast<SCEVTruncateExpr>(V); + if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand())) + return ConstantExpr::getTrunc(CastOp, ST->getType()); + break; + } + case scAddExpr: { + const SCEVAddExpr *SA = cast<SCEVAddExpr>(V); + if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) { + if (C->getType()->isPointerTy()) + C = ConstantExpr::getBitCast(C, Type::getInt8PtrTy(C->getContext())); + for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) { + Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i)); + if (!C2) return 0; + + // First pointer! + if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) { + std::swap(C, C2); + // The offsets have been converted to bytes. We can add bytes to an + // i8* by GEP with the byte count in the first index. + C = ConstantExpr::getBitCast(C,Type::getInt8PtrTy(C->getContext())); + } + + // Don't bother trying to sum two pointers. We probably can't + // statically compute a load that results from it anyway. + if (C2->getType()->isPointerTy()) + return 0; + + if (C->getType()->isPointerTy()) { + if (cast<PointerType>(C->getType())->getElementType()->isStructTy()) + C2 = ConstantExpr::getIntegerCast( + C2, Type::getInt32Ty(C->getContext()), true); + C = ConstantExpr::getGetElementPtr(C, C2); + } else + C = ConstantExpr::getAdd(C, C2); + } + return C; + } + break; + } + case scMulExpr: { + const SCEVMulExpr *SM = cast<SCEVMulExpr>(V); + if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) { + // Don't bother with pointers at all. + if (C->getType()->isPointerTy()) return 0; + for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) { + Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i)); + if (!C2 || C2->getType()->isPointerTy()) return 0; + C = ConstantExpr::getMul(C, C2); + } + return C; + } + break; + } + case scUDivExpr: { + const SCEVUDivExpr *SU = cast<SCEVUDivExpr>(V); + if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS())) + if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS())) + if (LHS->getType() == RHS->getType()) + return ConstantExpr::getUDiv(LHS, RHS); + break; + } + } + return 0; +} + const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { if (isa<SCEVConstant>(V)) return V; @@ -4973,11 +5143,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { const SCEV *OpV = getSCEVAtScope(OrigV, L); MadeImprovement |= OrigV != OpV; - Constant *C = 0; - if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OpV)) - C = SC->getValue(); - if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(OpV)) - C = dyn_cast<Constant>(SU->getValue()); + Constant *C = BuildConstantFromSCEV(OpV); if (!C) return V; if (C->getType() != Op->getType()) C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, @@ -4992,10 +5158,14 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { Constant *C = 0; if (const CmpInst *CI = dyn_cast<CmpInst>(I)) C = ConstantFoldCompareInstOperands(CI->getPredicate(), - Operands[0], Operands[1], TD); - else + Operands[0], Operands[1], TD, + TLI); + else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { + if (!LI->isVolatile()) + C = ConstantFoldLoadFromConstPtr(Operands[0], TD); + } else C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), - Operands, TD); + Operands, TD, TLI); if (!C) return V; return getSCEV(C); } @@ -5113,7 +5283,6 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { } llvm_unreachable("Unknown SCEV type!"); - return 0; } /// getSCEVAtScope - This is a convenience function which does @@ -5350,10 +5519,10 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { // behavior. Loops must exhibit defined behavior until a wrapped value is // actually used. So the trip count computed by udiv could be smaller than the // number of well-defined iterations. - if (AddRec->getNoWrapFlags(SCEV::FlagNW)) + if (AddRec->getNoWrapFlags(SCEV::FlagNW)) { // FIXME: We really want an "isexact" bit for udiv. return getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); - + } // Then, try to solve the above equation provided that Start is constant. if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start)) return SolveLinEquationWithOverflow(StepC->getValue()->getValue(), @@ -5744,7 +5913,6 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred, switch (Pred) { default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); - break; case ICmpInst::ICMP_SGT: Pred = ICmpInst::ICMP_SLT; std::swap(LHS, RHS); @@ -6089,8 +6257,9 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, return getCouldNotCompute(); // Check to see if we have a flag which makes analysis easy. - bool NoWrap = isSigned ? AddRec->getNoWrapFlags(SCEV::FlagNSW) : - AddRec->getNoWrapFlags(SCEV::FlagNUW); + bool NoWrap = isSigned ? + AddRec->getNoWrapFlags((SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNW)) : + AddRec->getNoWrapFlags((SCEV::NoWrapFlags)(SCEV::FlagNUW | SCEV::FlagNW)); if (AddRec->isAffine()) { unsigned BitWidth = getTypeSizeInBits(AddRec->getType()); @@ -6381,6 +6550,7 @@ bool ScalarEvolution::runOnFunction(Function &F) { this->F = &F; LI = &getAnalysis<LoopInfo>(); TD = getAnalysisIfAvailable<TargetData>(); + TLI = &getAnalysis<TargetLibraryInfo>(); DT = &getAnalysis<DominatorTree>(); return false; } @@ -6417,6 +6587,7 @@ void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequiredTransitive<LoopInfo>(); AU.addRequiredTransitive<DominatorTree>(); + AU.addRequired<TargetLibraryInfo>(); } bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) { @@ -6592,11 +6763,8 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { return LoopInvariant; case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - return LoopVariant; - default: break; + default: llvm_unreachable("Unknown SCEV kind!"); } - llvm_unreachable("Unknown SCEV kind!"); - return LoopVariant; } bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) { @@ -6678,11 +6846,9 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { return ProperlyDominatesBlock; case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - return DoesNotDominateBlock; - default: break; + default: + llvm_unreachable("Unknown SCEV kind!"); } - llvm_unreachable("Unknown SCEV kind!"); - return DoesNotDominateBlock; } bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) { @@ -6728,11 +6894,9 @@ bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const { return false; case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - return false; - default: break; + default: + llvm_unreachable("Unknown SCEV kind!"); } - llvm_unreachable("Unknown SCEV kind!"); - return false; } void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 47f0f32..69507be 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -19,6 +19,7 @@ #include "llvm/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/ADT/STLExtras.h" using namespace llvm; @@ -30,6 +31,19 @@ using namespace llvm; Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, Instruction::CastOps Op, BasicBlock::iterator IP) { + // This function must be called with the builder having a valid insertion + // point. It doesn't need to be the actual IP where the uses of the returned + // cast will be added, but it must dominate such IP. + // We use this precondition to produce a cast that will dominate all its + // uses. In particular, this is crucial for the case where the builder's + // insertion point *is* the point where we were asked to put the cast. + // Since we don't know the the builder's insertion point is actually + // where the uses will be added (only that it dominates it), we are + // not allowed to move it. + BasicBlock::iterator BIP = Builder.GetInsertPoint(); + + Instruction *Ret = NULL; + // Check to see if there is already a cast! for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) { @@ -37,27 +51,35 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, if (U->getType() == Ty) if (CastInst *CI = dyn_cast<CastInst>(U)) if (CI->getOpcode() == Op) { - // If the cast isn't where we want it, fix it. - if (BasicBlock::iterator(CI) != IP) { + // If the cast isn't where we want it, create a new cast at IP. + // Likewise, do not reuse a cast at BIP because it must dominate + // instructions that might be inserted before BIP. + if (BasicBlock::iterator(CI) != IP || BIP == IP) { // Create a new cast, and leave the old cast in place in case // it is being used as an insert point. Clear its operand // so that it doesn't hold anything live. - Instruction *NewCI = CastInst::Create(Op, V, Ty, "", IP); - NewCI->takeName(CI); - CI->replaceAllUsesWith(NewCI); + Ret = CastInst::Create(Op, V, Ty, "", IP); + Ret->takeName(CI); + CI->replaceAllUsesWith(Ret); CI->setOperand(0, UndefValue::get(V->getType())); - rememberInstruction(NewCI); - return NewCI; + break; } - rememberInstruction(CI); - return CI; + Ret = CI; + break; } } // Create a new cast. - Instruction *I = CastInst::Create(Op, V, Ty, V->getName(), IP); - rememberInstruction(I); - return I; + if (!Ret) + Ret = CastInst::Create(Op, V, Ty, V->getName(), IP); + + // We assert at the end of the function since IP might point to an + // instruction with different dominance properties than a cast + // (an invoke for example) and not dominate BIP (but the cast does). + assert(SE.DT->dominates(Ret, BIP)); + + rememberInstruction(Ret); + return Ret; } /// InsertNoopCastOfTo - Insert a cast of V to the specified type, @@ -73,9 +95,14 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { "InsertNoopCastOfTo cannot change sizes!"); // Short-circuit unnecessary bitcasts. - if (Op == Instruction::BitCast && V->getType() == Ty) - return V; - + if (Op == Instruction::BitCast) { + if (V->getType() == Ty) + return V; + if (CastInst *CI = dyn_cast<CastInst>(V)) { + if (CI->getOperand(0)->getType() == Ty) + return CI->getOperand(0); + } + } // Short-circuit unnecessary inttoptr<->ptrtoint casts. if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) && SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) { @@ -115,8 +142,7 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { BasicBlock::iterator IP = I; ++IP; if (InvokeInst *II = dyn_cast<InvokeInst>(I)) IP = II->getNormalDest()->begin(); - while (isa<PHINode>(IP) || isa<DbgInfoIntrinsic>(IP) || - isa<LandingPadInst>(IP)) + while (isa<PHINode>(IP) || isa<LandingPadInst>(IP)) ++IP; return ReuseOrCreateCast(I, Ty, Op, IP); } @@ -492,6 +518,9 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, V = InsertNoopCastOfTo(V, Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace())); + assert(!isa<Instruction>(V) || + SE.DT->dominates(cast<Instruction>(V), Builder.GetInsertPoint())); + // Expand the operands for a plain byte offset. Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty); @@ -588,20 +617,6 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, return expand(SE.getAddExpr(Ops)); } -/// isNonConstantNegative - Return true if the specified scev is negated, but -/// not a constant. -static bool isNonConstantNegative(const SCEV *F) { - const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(F); - if (!Mul) return false; - - // If there is a constant factor, it will be first. - const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0)); - if (!SC) return false; - - // Return true if the value is negative, this matches things like (-42 * V). - return SC->getValue()->getValue().isNegative(); -} - /// PickMostRelevantLoop - Given two loops pick the one that's most relevant for /// SCEV expansion. If they are nested, this is the most nested. If they are /// neighboring, pick the later. @@ -655,7 +670,6 @@ const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) { return RelevantLoops[D] = Result; } llvm_unreachable("Unexpected SCEV type!"); - return 0; } namespace { @@ -680,10 +694,10 @@ public: // If one operand is a non-constant negative and the other is not, // put the non-constant negative on the right so that a sub can // be used instead of a negate and add. - if (isNonConstantNegative(LHS.second)) { - if (!isNonConstantNegative(RHS.second)) + if (LHS.second->isNonConstantNegative()) { + if (!RHS.second->isNonConstantNegative()) return false; - } else if (isNonConstantNegative(RHS.second)) + } else if (RHS.second->isNonConstantNegative()) return true; // Otherwise they are equivalent according to this comparison. @@ -744,7 +758,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { for (++I; I != E && I->first == CurLoop; ++I) NewOps.push_back(I->second); Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op)); - } else if (isNonConstantNegative(Op)) { + } else if (Op->isNonConstantNegative()) { // Instead of doing a negate and add, just do a subtract. Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty); Sum = InsertNoopCastOfTo(Sum, Ty); @@ -875,58 +889,138 @@ bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, return isNormalAddRecExprPHI(PN, IncV, L); } -/// Determine if this cyclic phi is in a form that would have been generated by -/// LSR. We don't care if the phi was actually expanded in this pass, as long -/// as it is in a low-cost form, for example, no implied multiplication. This -/// should match any patterns generated by getAddRecExprPHILiterally and -/// expandAddtoGEP. -bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, - const Loop *L) { +/// getIVIncOperand returns an induction variable increment's induction +/// variable operand. +/// +/// If allowScale is set, any type of GEP is allowed as long as the nonIV +/// operands dominate InsertPos. +/// +/// If allowScale is not set, ensure that a GEP increment conforms to one of the +/// simple patterns generated by getAddRecExprPHILiterally and +/// expandAddtoGEP. If the pattern isn't recognized, return NULL. +Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, + Instruction *InsertPos, + bool allowScale) { + if (IncV == InsertPos) + return NULL; + switch (IncV->getOpcode()) { + default: + return NULL; // Check for a simple Add/Sub or GEP of a loop invariant step. case Instruction::Add: - case Instruction::Sub: - return IncV->getOperand(0) == PN - && L->isLoopInvariant(IncV->getOperand(1)); + case Instruction::Sub: { + Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1)); + if (!OInst || SE.DT->dominates(OInst, InsertPos)) + return dyn_cast<Instruction>(IncV->getOperand(0)); + return NULL; + } case Instruction::BitCast: - IncV = dyn_cast<GetElementPtrInst>(IncV->getOperand(0)); - if (!IncV) - return false; - // fall-thru to GEP handling - case Instruction::GetElementPtr: { - // This must be a pointer addition of constants (pretty) or some number of - // address-size elements (ugly). + return dyn_cast<Instruction>(IncV->getOperand(0)); + case Instruction::GetElementPtr: for (Instruction::op_iterator I = IncV->op_begin()+1, E = IncV->op_end(); I != E; ++I) { if (isa<Constant>(*I)) continue; - // ugly geps have 2 operands. - // i1* is used by the expander to represent an address-size element. + if (Instruction *OInst = dyn_cast<Instruction>(*I)) { + if (!SE.DT->dominates(OInst, InsertPos)) + return NULL; + } + if (allowScale) { + // allow any kind of GEP as long as it can be hoisted. + continue; + } + // This must be a pointer addition of constants (pretty), which is already + // handled, or some number of address-size elements (ugly). Ugly geps + // have 2 operands. i1* is used by the expander to represent an + // address-size element. if (IncV->getNumOperands() != 2) - return false; + return NULL; unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace(); if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS) && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS)) - return false; - // Ensure the operands dominate the insertion point. I don't know of a - // case when this would not be true, so this is somewhat untested. - if (L == IVIncInsertLoop) { - for (User::op_iterator OI = IncV->op_begin()+1, - OE = IncV->op_end(); OI != OE; ++OI) - if (Instruction *OInst = dyn_cast<Instruction>(OI)) - if (!SE.DT->dominates(OInst, IVIncInsertPos)) - return false; - } + return NULL; break; } - IncV = dyn_cast<Instruction>(IncV->getOperand(0)); - if (IncV && IncV->getOpcode() == Instruction::BitCast) - IncV = dyn_cast<Instruction>(IncV->getOperand(0)); - return IncV == PN; + return dyn_cast<Instruction>(IncV->getOperand(0)); } - default: +} + +/// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make +/// it available to other uses in this loop. Recursively hoist any operands, +/// until we reach a value that dominates InsertPos. +bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) { + if (SE.DT->dominates(IncV, InsertPos)) + return true; + + // InsertPos must itself dominate IncV so that IncV's new position satisfies + // its existing users. + if (!SE.DT->dominates(InsertPos->getParent(), IncV->getParent())) return false; + + // Check that the chain of IV operands leading back to Phi can be hoisted. + SmallVector<Instruction*, 4> IVIncs; + for(;;) { + Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true); + if (!Oper) + return false; + // IncV is safe to hoist. + IVIncs.push_back(IncV); + IncV = Oper; + if (SE.DT->dominates(IncV, InsertPos)) + break; + } + for (SmallVectorImpl<Instruction*>::reverse_iterator I = IVIncs.rbegin(), + E = IVIncs.rend(); I != E; ++I) { + (*I)->moveBefore(InsertPos); + } + return true; +} + +/// Determine if this cyclic phi is in a form that would have been generated by +/// LSR. We don't care if the phi was actually expanded in this pass, as long +/// as it is in a low-cost form, for example, no implied multiplication. This +/// should match any patterns generated by getAddRecExprPHILiterally and +/// expandAddtoGEP. +bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, + const Loop *L) { + for(Instruction *IVOper = IncV; + (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(), + /*allowScale=*/false));) { + if (IVOper == PN) + return true; } + return false; +} + +/// expandIVInc - Expand an IV increment at Builder's current InsertPos. +/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may +/// need to materialize IV increments elsewhere to handle difficult situations. +Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L, + Type *ExpandTy, Type *IntTy, + bool useSubtract) { + Value *IncV; + // If the PHI is a pointer, use a GEP, otherwise use an add or sub. + if (ExpandTy->isPointerTy()) { + PointerType *GEPPtrTy = cast<PointerType>(ExpandTy); + // If the step isn't constant, don't use an implicitly scaled GEP, because + // that would require a multiply inside the loop. + if (!isa<ConstantInt>(StepV)) + GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()), + GEPPtrTy->getAddressSpace()); + const SCEV *const StepArray[1] = { SE.getSCEV(StepV) }; + IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN); + if (IncV->getType() != PN->getType()) { + IncV = Builder.CreateBitCast(IncV, PN->getType()); + rememberInstruction(IncV); + } + } else { + IncV = useSubtract ? + Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") : + Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next"); + rememberInstruction(IncV); + } + return IncV; } /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand @@ -956,26 +1050,28 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, if (LSRMode) { if (!isExpandedAddRecExprPHI(PN, IncV, L)) continue; + if (L == IVIncInsertLoop && !hoistIVInc(IncV, IVIncInsertPos)) + continue; } else { if (!isNormalAddRecExprPHI(PN, IncV, L)) continue; + if (L == IVIncInsertLoop) + do { + if (SE.DT->dominates(IncV, IVIncInsertPos)) + break; + // Make sure the increment is where we want it. But don't move it + // down past a potential existing post-inc user. + IncV->moveBefore(IVIncInsertPos); + IVIncInsertPos = IncV; + IncV = cast<Instruction>(IncV->getOperand(0)); + } while (IncV != PN); } // Ok, the add recurrence looks usable. // Remember this PHI, even in post-inc mode. InsertedValues.insert(PN); // Remember the increment. rememberInstruction(IncV); - if (L == IVIncInsertLoop) - do { - if (SE.DT->dominates(IncV, IVIncInsertPos)) - break; - // Make sure the increment is where we want it. But don't move it - // down past a potential existing post-inc user. - IncV->moveBefore(IVIncInsertPos); - IVIncInsertPos = IncV; - IncV = cast<Instruction>(IncV->getOperand(0)); - } while (IncV != PN); return PN; } } @@ -984,6 +1080,16 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + // Another AddRec may need to be recursively expanded below. For example, if + // this AddRec is quadratic, the StepV may itself be an AddRec in this + // loop. Remove this loop from the PostIncLoops set before expanding such + // AddRecs. Otherwise, we cannot find a valid position for the step + // (i.e. StepV can never dominate its loop header). Ideally, we could do + // SavedIncLoops.swap(PostIncLoops), but we generally have a single element, + // so it's not worth implementing SmallPtrSet::swap. + PostIncLoopSet SavedPostIncLoops = PostIncLoops; + PostIncLoops.clear(); + // Expand code for the start value. Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy, L->getHeader()->begin()); @@ -993,16 +1099,16 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, SE.DT->properlyDominates(cast<Instruction>(StartV)->getParent(), L->getHeader())); - // Expand code for the step value. Insert instructions right before the - // terminator corresponding to the back-edge. Do this before creating the PHI - // so that PHI reuse code doesn't see an incomplete PHI. If the stride is - // negative, insert a sub instead of an add for the increment (unless it's a - // constant, because subtracts of constants are canonicalized to adds). + // Expand code for the step value. Do this before creating the PHI so that PHI + // reuse code doesn't see an incomplete PHI. const SCEV *Step = Normalized->getStepRecurrence(SE); - bool isPointer = ExpandTy->isPointerTy(); - bool isNegative = !isPointer && isNonConstantNegative(Step); - if (isNegative) + // If the stride is negative, insert a sub instead of an add for the increment + // (unless it's a constant, because subtracts of constants are canonicalized + // to adds). + bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative(); + if (useSubtract) Step = SE.getNegativeSCEV(Step); + // Expand the step somewhere that dominates the loop header. Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); // Create the PHI. @@ -1023,33 +1129,14 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, continue; } - // Create a step value and add it to the PHI. If IVIncInsertLoop is - // non-null and equal to the addrec's loop, insert the instructions - // at IVIncInsertPos. + // Create a step value and add it to the PHI. + // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the + // instructions at IVIncInsertPos. Instruction *InsertPos = L == IVIncInsertLoop ? IVIncInsertPos : Pred->getTerminator(); Builder.SetInsertPoint(InsertPos); - Value *IncV; - // If the PHI is a pointer, use a GEP, otherwise use an add or sub. - if (isPointer) { - PointerType *GEPPtrTy = cast<PointerType>(ExpandTy); - // If the step isn't constant, don't use an implicitly scaled GEP, because - // that would require a multiply inside the loop. - if (!isa<ConstantInt>(StepV)) - GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()), - GEPPtrTy->getAddressSpace()); - const SCEV *const StepArray[1] = { SE.getSCEV(StepV) }; - IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN); - if (IncV->getType() != PN->getType()) { - IncV = Builder.CreateBitCast(IncV, PN->getType()); - rememberInstruction(IncV); - } - } else { - IncV = isNegative ? - Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") : - Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next"); - rememberInstruction(IncV); - } + Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); + PN->addIncoming(IncV, Pred); } @@ -1057,6 +1144,10 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, if (SaveInsertBB) restoreInsertPoint(SaveInsertBB, SaveInsertPt); + // After expanding subexpressions, restore the PostIncLoops set so the caller + // can ensure that IVIncrement dominates the current uses. + PostIncLoops = SavedPostIncLoops; + // Remember this PHI, even in post-inc mode. InsertedValues.insert(PN); @@ -1124,10 +1215,31 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // For an expansion to use the postinc form, the client must call // expandCodeFor with an InsertPoint that is either outside the PostIncLoop // or dominated by IVIncInsertPos. - assert((!isa<Instruction>(Result) || - SE.DT->dominates(cast<Instruction>(Result), - Builder.GetInsertPoint())) && - "postinc expansion does not dominate use"); + if (isa<Instruction>(Result) + && !SE.DT->dominates(cast<Instruction>(Result), + Builder.GetInsertPoint())) { + // The induction variable's postinc expansion does not dominate this use. + // IVUsers tries to prevent this case, so it is rare. However, it can + // happen when an IVUser outside the loop is not dominated by the latch + // block. Adjusting IVIncInsertPos before expansion begins cannot handle + // all cases. Consider a phi outide whose operand is replaced during + // expansion with the value of the postinc user. Without fundamentally + // changing the way postinc users are tracked, the only remedy is + // inserting an extra IV increment. StepV might fold into PostLoopOffset, + // but hopefully expandCodeFor handles that. + bool useSubtract = + !ExpandTy->isPointerTy() && Step->isNonConstantNegative(); + if (useSubtract) + Step = SE.getNegativeSCEV(Step); + // Expand the step somewhere that dominates the loop header. + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); + // Restore the insertion point to the place where the caller has + // determined dominates all uses. + restoreInsertPoint(SaveInsertBB, SaveInsertPt); + Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); + } } // Re-apply any non-loop-dominating scale. @@ -1363,10 +1475,7 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { } Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty, - Instruction *I) { - BasicBlock::iterator IP = I; - while (isInsertedInstruction(IP) || isa<DbgInfoIntrinsic>(IP)) - ++IP; + Instruction *IP) { Builder.SetInsertPoint(IP->getParent(), IP); return expandCodeFor(SH, Ty); } @@ -1392,14 +1501,23 @@ Value *SCEVExpander::expand(const SCEV *S) { if (!L) break; if (BasicBlock *Preheader = L->getLoopPreheader()) InsertPt = Preheader->getTerminator(); + else { + // LSR sets the insertion point for AddRec start/step values to the + // block start to simplify value reuse, even though it's an invalid + // position. SCEVExpander must correct for this in all cases. + InsertPt = L->getHeader()->getFirstInsertionPt(); + } } else { // If the SCEV is computable at this level, insert it into the header // after the PHIs (and after any other instructions that we've inserted // there) so that it is guaranteed to dominate any user inside the loop. if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) InsertPt = L->getHeader()->getFirstInsertionPt(); - while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt)) + while (InsertPt != Builder.GetInsertPoint() + && (isInsertedInstruction(InsertPt) + || isa<DbgInfoIntrinsic>(InsertPt))) { InsertPt = llvm::next(BasicBlock::iterator(InsertPt)); + } break; } @@ -1434,23 +1552,9 @@ void SCEVExpander::rememberInstruction(Value *I) { InsertedPostIncValues.insert(I); else InsertedValues.insert(I); - - // If we just claimed an existing instruction and that instruction had - // been the insert point, adjust the insert point forward so that - // subsequently inserted code will be dominated. - if (Builder.GetInsertPoint() == I) { - BasicBlock::iterator It = cast<Instruction>(I); - do { ++It; } while (isInsertedInstruction(It) || - isa<DbgInfoIntrinsic>(It)); - Builder.SetInsertPoint(Builder.GetInsertBlock(), It); - } } void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) { - // If we acquired more instructions since the old insert point was saved, - // advance past them. - while (isInsertedInstruction(I) || isa<DbgInfoIntrinsic>(I)) ++I; - Builder.SetInsertPoint(BB, I); } @@ -1478,40 +1582,13 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, return V; } -/// hoistStep - Attempt to hoist an IV increment above a potential use. -/// -/// To successfully hoist, two criteria must be met: -/// - IncV operands dominate InsertPos and -/// - InsertPos dominates IncV -/// -/// Meeting the second condition means that we don't need to check all of IncV's -/// existing uses (it's moving up in the domtree). -/// -/// This does not yet recursively hoist the operands, although that would -/// not be difficult. -/// -/// This does not require a SCEVExpander instance and could be replaced by a -/// general code-insertion helper. -bool SCEVExpander::hoistStep(Instruction *IncV, Instruction *InsertPos, - const DominatorTree *DT) { - if (DT->dominates(IncV, InsertPos)) - return true; - - if (!DT->dominates(InsertPos->getParent(), IncV->getParent())) - return false; - - if (IncV->mayHaveSideEffects()) - return false; - - // Attempt to hoist IncV - for (User::op_iterator OI = IncV->op_begin(), OE = IncV->op_end(); - OI != OE; ++OI) { - Instruction *OInst = dyn_cast<Instruction>(OI); - if (OInst && !DT->dominates(OInst, InsertPos)) - return false; - } - IncV->moveBefore(InsertPos); - return true; +/// Sort values by integer width for replaceCongruentIVs. +static bool width_descending(Value *lhs, Value *rhs) { + // Put pointers at the back and make sure pointer < pointer = false. + if (!lhs->getType()->isIntegerTy() || !rhs->getType()->isIntegerTy()) + return rhs->getType()->isIntegerTy() && !lhs->getType()->isIntegerTy(); + return rhs->getType()->getPrimitiveSizeInBits() + < lhs->getType()->getPrimitiveSizeInBits(); } /// replaceCongruentIVs - Check for congruent phis in this loop header and @@ -1521,23 +1598,45 @@ bool SCEVExpander::hoistStep(Instruction *IncV, Instruction *InsertPos, /// This does not depend on any SCEVExpander state but should be used in /// the same context that SCEVExpander is used. unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, - SmallVectorImpl<WeakVH> &DeadInsts) { + SmallVectorImpl<WeakVH> &DeadInsts, + const TargetLowering *TLI) { + // Find integer phis in order of increasing width. + SmallVector<PHINode*, 8> Phis; + for (BasicBlock::iterator I = L->getHeader()->begin(); + PHINode *Phi = dyn_cast<PHINode>(I); ++I) { + Phis.push_back(Phi); + } + if (TLI) + std::sort(Phis.begin(), Phis.end(), width_descending); + unsigned NumElim = 0; DenseMap<const SCEV *, PHINode *> ExprToIVMap; - for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { - PHINode *Phi = cast<PHINode>(I); + // Process phis from wide to narrow. Mapping wide phis to the their truncation + // so narrow phis can reuse them. + for (SmallVectorImpl<PHINode*>::const_iterator PIter = Phis.begin(), + PEnd = Phis.end(); PIter != PEnd; ++PIter) { + PHINode *Phi = *PIter; + if (!SE.isSCEVable(Phi->getType())) continue; PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)]; if (!OrigPhiRef) { OrigPhiRef = Phi; + if (Phi->getType()->isIntegerTy() && TLI + && TLI->isTruncateFree(Phi->getType(), Phis.back()->getType())) { + // This phi can be freely truncated to the narrowest phi type. Map the + // truncated expression to it so it will be reused for narrow types. + const SCEV *TruncExpr = + SE.getTruncateExpr(SE.getSCEV(Phi), Phis.back()->getType()); + ExprToIVMap[TruncExpr] = Phi; + } continue; } - // If one phi derives from the other via GEPs, types may differ. - // We could consider adding a bitcast here to handle it. - if (OrigPhiRef->getType() != Phi->getType()) + // Replacing a pointer phi with an integer phi or vice-versa doesn't make + // sense. + if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy()) continue; if (BasicBlock *LatchBlock = L->getLoopLatch()) { @@ -1546,32 +1645,56 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, Instruction *IsomorphicInc = cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock)); - // If this phi is more canonical, swap it with the original. - if (!isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L) - && isExpandedAddRecExprPHI(Phi, IsomorphicInc, L)) { + // If this phi has the same width but is more canonical, replace the + // original with it. As part of the "more canonical" determination, + // respect a prior decision to use an IV chain. + if (OrigPhiRef->getType() == Phi->getType() + && !(ChainedPhis.count(Phi) + || isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L)) + && (ChainedPhis.count(Phi) + || isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) { std::swap(OrigPhiRef, Phi); std::swap(OrigInc, IsomorphicInc); } // Replacing the congruent phi is sufficient because acyclic redundancy // elimination, CSE/GVN, should handle the rest. However, once SCEV proves // that a phi is congruent, it's often the head of an IV user cycle that - // is isomorphic with the original phi. So it's worth eagerly cleaning up - // the common case of a single IV increment. - if (OrigInc != IsomorphicInc && - OrigInc->getType() == IsomorphicInc->getType() && - SE.getSCEV(OrigInc) == SE.getSCEV(IsomorphicInc) && - hoistStep(OrigInc, IsomorphicInc, DT)) { + // is isomorphic with the original phi. It's worth eagerly cleaning up the + // common case of a single IV increment so that DeleteDeadPHIs can remove + // cycles that had postinc uses. + const SCEV *TruncExpr = SE.getTruncateOrNoop(SE.getSCEV(OrigInc), + IsomorphicInc->getType()); + if (OrigInc != IsomorphicInc + && TruncExpr == SE.getSCEV(IsomorphicInc) + && ((isa<PHINode>(OrigInc) && isa<PHINode>(IsomorphicInc)) + || hoistIVInc(OrigInc, IsomorphicInc))) { DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Eliminated congruent iv.inc: " << *IsomorphicInc << '\n'); - IsomorphicInc->replaceAllUsesWith(OrigInc); + Value *NewInc = OrigInc; + if (OrigInc->getType() != IsomorphicInc->getType()) { + Instruction *IP = isa<PHINode>(OrigInc) + ? (Instruction*)L->getHeader()->getFirstInsertionPt() + : OrigInc->getNextNode(); + IRBuilder<> Builder(IP); + Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc()); + NewInc = Builder. + CreateTruncOrBitCast(OrigInc, IsomorphicInc->getType(), IVName); + } + IsomorphicInc->replaceAllUsesWith(NewInc); DeadInsts.push_back(IsomorphicInc); } } DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi << '\n'); ++NumElim; - Phi->replaceAllUsesWith(OrigPhiRef); + Value *NewIV = OrigPhiRef; + if (OrigPhiRef->getType() != Phi->getType()) { + IRBuilder<> Builder(L->getHeader()->getFirstInsertionPt()); + Builder.SetCurrentDebugLocation(Phi->getDebugLoc()); + NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName); + } + Phi->replaceAllUsesWith(NewIV); DeadInsts.push_back(Phi); } return NumElim; diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp index c66ecd6..dd2ed4f 100644 --- a/lib/Analysis/ScalarEvolutionNormalization.cpp +++ b/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -118,7 +118,6 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) { // Conservatively use AnyWrap until/unless we need FlagNW. const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); switch (Kind) { - default: llvm_unreachable("Unexpected transform name!"); case NormalizeAutodetect: if (IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) { const SCEV *TransformedStep = @@ -191,7 +190,6 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) { } llvm_unreachable("Unexpected SCEV kind!"); - return 0; } /// Manage recursive transformation across an expression DAG. Revisiting diff --git a/lib/Analysis/SparsePropagation.cpp b/lib/Analysis/SparsePropagation.cpp index d8c207b..c819666 100644 --- a/lib/Analysis/SparsePropagation.cpp +++ b/lib/Analysis/SparsePropagation.cpp @@ -194,8 +194,8 @@ void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI, Succs.assign(TI.getNumSuccessors(), true); return; } - - Succs[SI.findCaseValue(cast<ConstantInt>(C))] = true; + SwitchInst::CaseIt Case = SI.findCaseValue(cast<ConstantInt>(C)); + Succs[Case.getSuccessorIndex()] = true; } @@ -327,13 +327,13 @@ void SparseSolver::Solve(Function &F) { } void SparseSolver::Print(Function &F, raw_ostream &OS) const { - OS << "\nFUNCTION: " << F.getNameStr() << "\n"; + OS << "\nFUNCTION: " << F.getName() << "\n"; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { if (!BBExecutable.count(BB)) OS << "INFEASIBLE: "; OS << "\t"; if (BB->hasName()) - OS << BB->getNameStr() << ":\n"; + OS << BB->getName() << ":\n"; else OS << "; anon bb\n"; for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { diff --git a/lib/Analysis/Trace.cpp b/lib/Analysis/Trace.cpp index 68a39cd..ff5010b 100644 --- a/lib/Analysis/Trace.cpp +++ b/lib/Analysis/Trace.cpp @@ -34,7 +34,7 @@ Module *Trace::getModule() const { /// void Trace::print(raw_ostream &O) const { Function *F = getFunction(); - O << "; Trace from function " << F->getNameStr() << ", blocks:\n"; + O << "; Trace from function " << F->getName() << ", blocks:\n"; for (const_iterator i = begin(), e = end(); i != e; ++i) { O << "; "; WriteAsOperand(O, *i, true, getModule()); diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 4d94f61..a430f62 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -20,8 +20,10 @@ #include "llvm/GlobalAlias.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" +#include "llvm/Metadata.h" #include "llvm/Operator.h" #include "llvm/Target/TargetData.h" +#include "llvm/Support/ConstantRange.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/PatternMatch.h" @@ -41,10 +43,176 @@ static unsigned getBitWidth(Type *Ty, const TargetData *TD) { return TD ? TD->getPointerSizeInBits() : 0; } -/// ComputeMaskedBits - Determine which of the bits specified in Mask are -/// known to be either zero or one and return them in the KnownZero/KnownOne -/// bit sets. This code only analyzes bits in Mask, in order to short-circuit -/// processing. +static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, + APInt &KnownZero, APInt &KnownOne, + APInt &KnownZero2, APInt &KnownOne2, + const TargetData *TD, unsigned Depth) { + if (!Add) { + if (ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) { + // We know that the top bits of C-X are clear if X contains less bits + // than C (i.e. no wrap-around can happen). For example, 20-X is + // positive if we can prove that X is >= 0 and < 16. + if (!CLHS->getValue().isNegative()) { + unsigned BitWidth = KnownZero.getBitWidth(); + unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros(); + // NLZ can't be BitWidth with no sign bit + APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); + llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); + + // If all of the MaskV bits are known to be zero, then we know the + // output top bits are zero, because we now know that the output is + // from [0-C]. + if ((KnownZero2 & MaskV) == MaskV) { + unsigned NLZ2 = CLHS->getValue().countLeadingZeros(); + // Top bits known zero. + KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2); + } + } + } + } + + unsigned BitWidth = KnownZero.getBitWidth(); + + // If one of the operands has trailing zeros, then the bits that the + // other operand has in those bit positions will be preserved in the + // result. For an add, this works with either operand. For a subtract, + // this only works if the known zeros are in the right operand. + APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); + llvm::ComputeMaskedBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1); + assert((LHSKnownZero & LHSKnownOne) == 0 && + "Bits known to be one AND zero?"); + unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes(); + + llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes(); + + // Determine which operand has more trailing zeros, and use that + // many bits from the other operand. + if (LHSKnownZeroOut > RHSKnownZeroOut) { + if (Add) { + APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut); + KnownZero |= KnownZero2 & Mask; + KnownOne |= KnownOne2 & Mask; + } else { + // If the known zeros are in the left operand for a subtract, + // fall back to the minimum known zeros in both operands. + KnownZero |= APInt::getLowBitsSet(BitWidth, + std::min(LHSKnownZeroOut, + RHSKnownZeroOut)); + } + } else if (RHSKnownZeroOut >= LHSKnownZeroOut) { + APInt Mask = APInt::getLowBitsSet(BitWidth, RHSKnownZeroOut); + KnownZero |= LHSKnownZero & Mask; + KnownOne |= LHSKnownOne & Mask; + } + + // Are we still trying to solve for the sign bit? + if (!KnownZero.isNegative() && !KnownOne.isNegative()) { + if (NSW) { + if (Add) { + // Adding two positive numbers can't wrap into negative + if (LHSKnownZero.isNegative() && KnownZero2.isNegative()) + KnownZero |= APInt::getSignBit(BitWidth); + // and adding two negative numbers can't wrap into positive. + else if (LHSKnownOne.isNegative() && KnownOne2.isNegative()) + KnownOne |= APInt::getSignBit(BitWidth); + } else { + // Subtracting a negative number from a positive one can't wrap + if (LHSKnownZero.isNegative() && KnownOne2.isNegative()) + KnownZero |= APInt::getSignBit(BitWidth); + // neither can subtracting a positive number from a negative one. + else if (LHSKnownOne.isNegative() && KnownZero2.isNegative()) + KnownOne |= APInt::getSignBit(BitWidth); + } + } + } +} + +static void ComputeMaskedBitsMul(Value *Op0, Value *Op1, bool NSW, + APInt &KnownZero, APInt &KnownOne, + APInt &KnownZero2, APInt &KnownOne2, + const TargetData *TD, unsigned Depth) { + unsigned BitWidth = KnownZero.getBitWidth(); + ComputeMaskedBits(Op1, KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(Op0, KnownZero2, KnownOne2, TD, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + bool isKnownNegative = false; + bool isKnownNonNegative = false; + // If the multiplication is known not to overflow, compute the sign bit. + if (NSW) { + if (Op0 == Op1) { + // The product of a number with itself is non-negative. + isKnownNonNegative = true; + } else { + bool isKnownNonNegativeOp1 = KnownZero.isNegative(); + bool isKnownNonNegativeOp0 = KnownZero2.isNegative(); + bool isKnownNegativeOp1 = KnownOne.isNegative(); + bool isKnownNegativeOp0 = KnownOne2.isNegative(); + // The product of two numbers with the same sign is non-negative. + isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) || + (isKnownNonNegativeOp1 && isKnownNonNegativeOp0); + // The product of a negative number and a non-negative number is either + // negative or zero. + if (!isKnownNonNegative) + isKnownNegative = (isKnownNegativeOp1 && isKnownNonNegativeOp0 && + isKnownNonZero(Op0, TD, Depth)) || + (isKnownNegativeOp0 && isKnownNonNegativeOp1 && + isKnownNonZero(Op1, TD, Depth)); + } + } + + // If low bits are zero in either operand, output low known-0 bits. + // Also compute a conserative estimate for high known-0 bits. + // More trickiness is possible, but this is sufficient for the + // interesting case of alignment computation. + KnownOne.clearAllBits(); + unsigned TrailZ = KnownZero.countTrailingOnes() + + KnownZero2.countTrailingOnes(); + unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + + KnownZero2.countLeadingOnes(), + BitWidth) - BitWidth; + + TrailZ = std::min(TrailZ, BitWidth); + LeadZ = std::min(LeadZ, BitWidth); + KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | + APInt::getHighBitsSet(BitWidth, LeadZ); + + // Only make use of no-wrap flags if we failed to compute the sign bit + // directly. This matters if the multiplication always overflows, in + // which case we prefer to follow the result of the direct computation, + // though as the program is invoking undefined behaviour we can choose + // whatever we like here. + if (isKnownNonNegative && !KnownOne.isNegative()) + KnownZero.setBit(BitWidth - 1); + else if (isKnownNegative && !KnownZero.isNegative()) + KnownOne.setBit(BitWidth - 1); +} + +void llvm::computeMaskedBitsLoad(const MDNode &Ranges, APInt &KnownZero) { + unsigned BitWidth = KnownZero.getBitWidth(); + unsigned NumRanges = Ranges.getNumOperands() / 2; + assert(NumRanges >= 1); + + // Use the high end of the ranges to find leading zeros. + unsigned MinLeadingZeros = BitWidth; + for (unsigned i = 0; i < NumRanges; ++i) { + ConstantInt *Lower = cast<ConstantInt>(Ranges.getOperand(2*i + 0)); + ConstantInt *Upper = cast<ConstantInt>(Ranges.getOperand(2*i + 1)); + ConstantRange Range(Lower->getValue(), Upper->getValue()); + if (Range.isWrappedSet()) + MinLeadingZeros = 0; // -1 has no zeros + unsigned LeadingZeros = (Upper->getValue() - 1).countLeadingZeros(); + MinLeadingZeros = std::min(LeadingZeros, MinLeadingZeros); + } + + KnownZero = APInt::getHighBitsSet(BitWidth, MinLeadingZeros); +} +/// ComputeMaskedBits - Determine which of the bits are known to be either zero +/// or one and return them in the KnownZero/KnownOne bit sets. +/// /// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that /// we cannot optimize based on the assumption that it is zero without changing /// it to be an explicit zero. If we don't change it to zero, other code could @@ -54,67 +222,75 @@ static unsigned getBitWidth(Type *Ty, const TargetData *TD) { /// /// This function is defined on values with integer type, values with pointer /// type (but only if TD is non-null), and vectors of integers. In the case -/// where V is a vector, the mask, known zero, and known one values are the +/// where V is a vector, known zero, and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. -void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, - APInt &KnownZero, APInt &KnownOne, +void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, const TargetData *TD, unsigned Depth) { assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); - unsigned BitWidth = Mask.getBitWidth(); - assert((V->getType()->isIntOrIntVectorTy() || V->getType()->isPointerTy()) - && "Not integer or pointer type!"); + unsigned BitWidth = KnownZero.getBitWidth(); + + assert((V->getType()->isIntOrIntVectorTy() || + V->getType()->getScalarType()->isPointerTy()) && + "Not integer or pointer type!"); assert((!TD || TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) && (!V->getType()->isIntOrIntVectorTy() || V->getType()->getScalarSizeInBits() == BitWidth) && - KnownZero.getBitWidth() == BitWidth && + KnownZero.getBitWidth() == BitWidth && KnownOne.getBitWidth() == BitWidth && "V, Mask, KnownOne and KnownZero should have same BitWidth"); if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { // We know all of the bits for a constant! - KnownOne = CI->getValue() & Mask; - KnownZero = ~KnownOne & Mask; + KnownOne = CI->getValue(); + KnownZero = ~KnownOne; return; } // Null and aggregate-zero are all-zeros. if (isa<ConstantPointerNull>(V) || isa<ConstantAggregateZero>(V)) { KnownOne.clearAllBits(); - KnownZero = Mask; + KnownZero = APInt::getAllOnesValue(BitWidth); return; } // Handle a constant vector by taking the intersection of the known bits of - // each element. - if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) { + // each element. There is no real need to handle ConstantVector here, because + // we don't handle undef in any particularly useful way. + if (ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V)) { + // We know that CDS must be a vector of integers. Take the intersection of + // each element. KnownZero.setAllBits(); KnownOne.setAllBits(); - for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { - APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); - ComputeMaskedBits(CV->getOperand(i), Mask, KnownZero2, KnownOne2, - TD, Depth); - KnownZero &= KnownZero2; - KnownOne &= KnownOne2; + APInt Elt(KnownZero.getBitWidth(), 0); + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + Elt = CDS->getElementAsInteger(i); + KnownZero &= ~Elt; + KnownOne &= Elt; } return; } + // The address of an aligned GlobalValue has trailing zeros. if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { unsigned Align = GV->getAlignment(); - if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) { - Type *ObjectType = GV->getType()->getElementType(); - // If the object is defined in the current Module, we'll be giving - // it the preferred alignment. Otherwise, we have to assume that it - // may only have the minimum ABI alignment. - if (!GV->isDeclaration() && !GV->mayBeOverridden()) - Align = TD->getPrefTypeAlignment(ObjectType); - else - Align = TD->getABITypeAlignment(ObjectType); + if (Align == 0 && TD) { + if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) { + Type *ObjectType = GVar->getType()->getElementType(); + if (ObjectType->isSized()) { + // If the object is defined in the current Module, we'll be giving + // it the preferred alignment. Otherwise, we have to assume that it + // may only have the minimum ABI alignment. + if (!GVar->isDeclaration() && !GVar->isWeakForLinker()) + Align = TD->getPreferredAlignment(GVar); + else + Align = TD->getABITypeAlignment(ObjectType); + } + } } if (Align > 0) - KnownZero = Mask & APInt::getLowBitsSet(BitWidth, - CountTrailingZeros_32(Align)); + KnownZero = APInt::getLowBitsSet(BitWidth, + CountTrailingZeros_32(Align)); else KnownZero.clearAllBits(); KnownOne.clearAllBits(); @@ -126,8 +302,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, if (GA->mayBeOverridden()) { KnownZero.clearAllBits(); KnownOne.clearAllBits(); } else { - ComputeMaskedBits(GA->getAliasee(), Mask, KnownZero, KnownOne, - TD, Depth+1); + ComputeMaskedBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth+1); } return; } @@ -136,15 +311,15 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // Get alignment information off byval arguments if specified in the IR. if (A->hasByValAttr()) if (unsigned Align = A->getParamAlignment()) - KnownZero = Mask & APInt::getLowBitsSet(BitWidth, - CountTrailingZeros_32(Align)); + KnownZero = APInt::getLowBitsSet(BitWidth, + CountTrailingZeros_32(Align)); return; } // Start out not knowing anything. KnownZero.clearAllBits(); KnownOne.clearAllBits(); - if (Depth == MaxDepth || Mask == 0) + if (Depth == MaxDepth) return; // Limit search depth. Operator *I = dyn_cast<Operator>(V); @@ -153,12 +328,14 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, APInt KnownZero2(KnownZero), KnownOne2(KnownOne); switch (I->getOpcode()) { default: break; + case Instruction::Load: + if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range)) + computeMaskedBitsLoad(*MD, KnownZero); + return; case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1); - APInt Mask2(Mask & ~KnownZero); - ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -169,10 +346,8 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, return; } case Instruction::Or: { - ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1); - APInt Mask2(Mask & ~KnownOne); - ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -183,9 +358,8 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, return; } case Instruction::Xor: { - ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1); - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -197,55 +371,32 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, return; } case Instruction::Mul: { - APInt Mask2 = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero, KnownOne, TD,Depth+1); - ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, - Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - - // If low bits are zero in either operand, output low known-0 bits. - // Also compute a conserative estimate for high known-0 bits. - // More trickiness is possible, but this is sufficient for the - // interesting case of alignment computation. - KnownOne.clearAllBits(); - unsigned TrailZ = KnownZero.countTrailingOnes() + - KnownZero2.countTrailingOnes(); - unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + - KnownZero2.countLeadingOnes(), - BitWidth) - BitWidth; - - TrailZ = std::min(TrailZ, BitWidth); - LeadZ = std::min(LeadZ, BitWidth); - KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | - APInt::getHighBitsSet(BitWidth, LeadZ); - KnownZero &= Mask; - return; + bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); + ComputeMaskedBitsMul(I->getOperand(0), I->getOperand(1), NSW, + KnownZero, KnownOne, KnownZero2, KnownOne2, TD, Depth); + break; } case Instruction::UDiv: { // For the purposes of computing leading zeros we can conservatively // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. - APInt AllOnes = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(I->getOperand(0), - AllOnes, KnownZero2, KnownOne2, TD, Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); unsigned LeadZ = KnownZero2.countLeadingOnes(); KnownOne2.clearAllBits(); KnownZero2.clearAllBits(); - ComputeMaskedBits(I->getOperand(1), - AllOnes, KnownZero2, KnownOne2, TD, Depth+1); + ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); if (RHSUnknownLeadingOnes != BitWidth) LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); - KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask; + KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ); return; } case Instruction::Select: - ComputeMaskedBits(I->getOperand(2), Mask, KnownZero, KnownOne, TD, Depth+1); - ComputeMaskedBits(I->getOperand(1), Mask, KnownZero2, KnownOne2, TD, + ComputeMaskedBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -278,11 +429,9 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, else SrcBitWidth = SrcTy->getScalarSizeInBits(); - APInt MaskIn = Mask.zextOrTrunc(SrcBitWidth); KnownZero = KnownZero.zextOrTrunc(SrcBitWidth); KnownOne = KnownOne.zextOrTrunc(SrcBitWidth); - ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); KnownZero = KnownZero.zextOrTrunc(BitWidth); KnownOne = KnownOne.zextOrTrunc(BitWidth); // Any top bits are known to be zero. @@ -296,8 +445,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // TODO: For now, not handling conversions like: // (bitcast i64 %x to <2 x i32>) !I->getType()->isVectorTy()) { - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); return; } break; @@ -306,11 +454,9 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // Compute the bits in the result that are not present in the input. unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); - APInt MaskIn = Mask.trunc(SrcBitWidth); KnownZero = KnownZero.trunc(SrcBitWidth); KnownOne = KnownOne.trunc(SrcBitWidth); - ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); @@ -327,9 +473,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); - APInt Mask2(Mask.lshr(ShiftAmt)); - ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero <<= ShiftAmt; KnownOne <<= ShiftAmt; @@ -344,9 +488,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); // Unsigned shift right. - APInt Mask2(Mask.shl(ShiftAmt)); - ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero,KnownOne, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero,KnownOne, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); @@ -362,9 +504,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); // Signed shift right. - APInt Mask2(Mask.shl(ShiftAmt)); - ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); @@ -378,100 +518,25 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, } break; case Instruction::Sub: { - if (ConstantInt *CLHS = dyn_cast<ConstantInt>(I->getOperand(0))) { - // We know that the top bits of C-X are clear if X contains less bits - // than C (i.e. no wrap-around can happen). For example, 20-X is - // positive if we can prove that X is >= 0 and < 16. - if (!CLHS->getValue().isNegative()) { - unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros(); - // NLZ can't be BitWidth with no sign bit - APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - ComputeMaskedBits(I->getOperand(1), MaskV, KnownZero2, KnownOne2, - TD, Depth+1); - - // If all of the MaskV bits are known to be zero, then we know the - // output top bits are zero, because we now know that the output is - // from [0-C]. - if ((KnownZero2 & MaskV) == MaskV) { - unsigned NLZ2 = CLHS->getValue().countLeadingZeros(); - // Top bits known zero. - KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask; - } - } - } + bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); + ComputeMaskedBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, + KnownZero, KnownOne, KnownZero2, KnownOne2, TD, + Depth); + break; } - // fall through case Instruction::Add: { - // If one of the operands has trailing zeros, then the bits that the - // other operand has in those bit positions will be preserved in the - // result. For an add, this works with either operand. For a subtract, - // this only works if the known zeros are in the right operand. - APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - APInt Mask2 = APInt::getLowBitsSet(BitWidth, - BitWidth - Mask.countLeadingZeros()); - ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, TD, - Depth+1); - assert((LHSKnownZero & LHSKnownOne) == 0 && - "Bits known to be one AND zero?"); - unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes(); - - ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero2, KnownOne2, TD, - Depth+1); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes(); - - // Determine which operand has more trailing zeros, and use that - // many bits from the other operand. - if (LHSKnownZeroOut > RHSKnownZeroOut) { - if (I->getOpcode() == Instruction::Add) { - APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut); - KnownZero |= KnownZero2 & Mask; - KnownOne |= KnownOne2 & Mask; - } else { - // If the known zeros are in the left operand for a subtract, - // fall back to the minimum known zeros in both operands. - KnownZero |= APInt::getLowBitsSet(BitWidth, - std::min(LHSKnownZeroOut, - RHSKnownZeroOut)); - } - } else if (RHSKnownZeroOut >= LHSKnownZeroOut) { - APInt Mask = APInt::getLowBitsSet(BitWidth, RHSKnownZeroOut); - KnownZero |= LHSKnownZero & Mask; - KnownOne |= LHSKnownOne & Mask; - } - - // Are we still trying to solve for the sign bit? - if (Mask.isNegative() && !KnownZero.isNegative() && !KnownOne.isNegative()){ - OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(I); - if (OBO->hasNoSignedWrap()) { - if (I->getOpcode() == Instruction::Add) { - // Adding two positive numbers can't wrap into negative - if (LHSKnownZero.isNegative() && KnownZero2.isNegative()) - KnownZero |= APInt::getSignBit(BitWidth); - // and adding two negative numbers can't wrap into positive. - else if (LHSKnownOne.isNegative() && KnownOne2.isNegative()) - KnownOne |= APInt::getSignBit(BitWidth); - } else { - // Subtracting a negative number from a positive one can't wrap - if (LHSKnownZero.isNegative() && KnownOne2.isNegative()) - KnownZero |= APInt::getSignBit(BitWidth); - // neither can subtracting a positive number from a negative one. - else if (LHSKnownOne.isNegative() && KnownZero2.isNegative()) - KnownOne |= APInt::getSignBit(BitWidth); - } - } - } - - return; + bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); + ComputeMaskedBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, + KnownZero, KnownOne, KnownZero2, KnownOne2, TD, + Depth); + break; } case Instruction::SRem: if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) { APInt RA = Rem->getValue().abs(); if (RA.isPowerOf2()) { APInt LowBits = RA - 1; - APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); - ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); // The low bits of the first operand are unchanged by the srem. KnownZero = KnownZero2 & LowBits; @@ -487,19 +552,15 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0)) KnownOne |= ~LowBits; - KnownZero &= Mask; - KnownOne &= Mask; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); } } // The sign bit is the LHS's sign bit, except when the result of the // remainder is zero. - if (Mask.isNegative() && KnownZero.isNonNegative()) { - APInt Mask2 = APInt::getSignBit(BitWidth); + if (KnownZero.isNonNegative()) { APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, TD, + ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, TD, Depth+1); // If it's known zero, our sign bit is also zero. if (LHSKnownZero.isNegative()) @@ -512,27 +573,24 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, APInt RA = Rem->getValue(); if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); - APInt Mask2 = LowBits & Mask; - KnownZero |= ~LowBits & Mask; - ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD, + ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero |= ~LowBits; + KnownOne &= LowBits; break; } } // Since the result is less than or equal to either operand, any leading // zero bits in either operand must also exist in the result. - APInt AllOnes = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(I->getOperand(0), AllOnes, KnownZero, KnownOne, - TD, Depth+1); - ComputeMaskedBits(I->getOperand(1), AllOnes, KnownZero2, KnownOne2, - TD, Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); unsigned Leaders = std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); KnownOne.clearAllBits(); - KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask; + KnownZero = APInt::getHighBitsSet(BitWidth, Leaders); break; } @@ -543,17 +601,15 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, Align = TD->getABITypeAlignment(AI->getType()->getElementType()); if (Align > 0) - KnownZero = Mask & APInt::getLowBitsSet(BitWidth, - CountTrailingZeros_32(Align)); + KnownZero = APInt::getLowBitsSet(BitWidth, CountTrailingZeros_32(Align)); break; } case Instruction::GetElementPtr: { // Analyze all of the subscripts of this getelementptr instruction // to determine if we can prove known low zero bits. - APInt LocalMask = APInt::getAllOnesValue(BitWidth); APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0); - ComputeMaskedBits(I->getOperand(0), LocalMask, - LocalKnownZero, LocalKnownOne, TD, Depth+1); + ComputeMaskedBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, TD, + Depth+1); unsigned TrailZ = LocalKnownZero.countTrailingOnes(); gep_type_iterator GTI = gep_type_begin(I); @@ -573,17 +629,15 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, if (!IndexedTy->isSized()) return; unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits(); uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1; - LocalMask = APInt::getAllOnesValue(GEPOpiBits); LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0); - ComputeMaskedBits(Index, LocalMask, - LocalKnownZero, LocalKnownOne, TD, Depth+1); + ComputeMaskedBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1); TrailZ = std::min(TrailZ, unsigned(CountTrailingZeros_64(TypeSize) + LocalKnownZero.countTrailingOnes())); } } - KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) & Mask; + KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ); break; } case Instruction::PHI: { @@ -618,17 +672,13 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, break; // Ok, we have a PHI of the form L op= R. Check for low // zero bits. - APInt Mask2 = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(R, Mask2, KnownZero2, KnownOne2, TD, Depth+1); - Mask2 = APInt::getLowBitsSet(BitWidth, - KnownZero2.countTrailingOnes()); + ComputeMaskedBits(R, KnownZero2, KnownOne2, TD, Depth+1); // We need to take the minimum number of known bits APInt KnownZero3(KnownZero), KnownOne3(KnownOne); - ComputeMaskedBits(L, Mask2, KnownZero3, KnownOne3, TD, Depth+1); + ComputeMaskedBits(L, KnownZero3, KnownOne3, TD, Depth+1); - KnownZero = Mask & - APInt::getLowBitsSet(BitWidth, + KnownZero = APInt::getLowBitsSet(BitWidth, std::min(KnownZero2.countTrailingOnes(), KnownZero3.countTrailingOnes())); break; @@ -657,8 +707,8 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, KnownOne2 = APInt(BitWidth, 0); // Recurse, but cap the recursion to one level, because we don't // want to waste time spinning around in loops. - ComputeMaskedBits(P->getIncomingValue(i), KnownZero | KnownOne, - KnownZero2, KnownOne2, TD, MaxDepth-1); + ComputeMaskedBits(P->getIncomingValue(i), KnownZero2, KnownOne2, TD, + MaxDepth-1); KnownZero &= KnownZero2; KnownOne &= KnownOne2; // If all bits have been ruled out, there's no need to check @@ -673,10 +723,17 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { switch (II->getIntrinsicID()) { default: break; - case Intrinsic::ctpop: case Intrinsic::ctlz: case Intrinsic::cttz: { unsigned LowBits = Log2_32(BitWidth)+1; + // If this call is undefined for 0, the result will be less than 2^n. + if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) + LowBits -= 1; + KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + break; + } + case Intrinsic::ctpop: { + unsigned LowBits = Log2_32(BitWidth)+1; KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); break; } @@ -687,6 +744,34 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, } } break; + case Instruction::ExtractValue: + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) { + ExtractValueInst *EVI = cast<ExtractValueInst>(I); + if (EVI->getNumIndices() != 1) break; + if (EVI->getIndices()[0] == 0) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::uadd_with_overflow: + case Intrinsic::sadd_with_overflow: + ComputeMaskedBitsAddSub(true, II->getArgOperand(0), + II->getArgOperand(1), false, KnownZero, + KnownOne, KnownZero2, KnownOne2, TD, Depth); + break; + case Intrinsic::usub_with_overflow: + case Intrinsic::ssub_with_overflow: + ComputeMaskedBitsAddSub(false, II->getArgOperand(0), + II->getArgOperand(1), false, KnownZero, + KnownOne, KnownZero2, KnownOne2, TD, Depth); + break; + case Intrinsic::umul_with_overflow: + case Intrinsic::smul_with_overflow: + ComputeMaskedBitsMul(II->getArgOperand(0), II->getArgOperand(1), + false, KnownZero, KnownOne, + KnownZero2, KnownOne2, TD, Depth); + break; + } + } + } } } @@ -702,8 +787,7 @@ void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, } APInt ZeroBits(BitWidth, 0); APInt OneBits(BitWidth, 0); - ComputeMaskedBits(V, APInt::getSignBit(BitWidth), ZeroBits, OneBits, TD, - Depth); + ComputeMaskedBits(V, ZeroBits, OneBits, TD, Depth); KnownOne = OneBits[BitWidth - 1]; KnownZero = ZeroBits[BitWidth - 1]; } @@ -712,10 +796,15 @@ void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, /// bit set when defined. For vectors return true if every element is known to /// be a power of two when defined. Supports values with integer or pointer /// types and vectors of integers. -bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, unsigned Depth) { - if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) - return CI->getValue().isPowerOf2(); - // TODO: Handle vector constants. +bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, bool OrZero, + unsigned Depth) { + if (Constant *C = dyn_cast<Constant>(V)) { + if (C->isNullValue()) + return OrZero; + if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) + return CI->getValue().isPowerOf2(); + // TODO: Handle vector constants. + } // 1 << X is clearly a power of two if the one is not shifted off the end. If // it is shifted off the end then the result is undefined. @@ -731,21 +820,36 @@ bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, unsigned Depth) { if (Depth++ == MaxDepth) return false; + Value *X = 0, *Y = 0; + // A shift of a power of two is a power of two or zero. + if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) || + match(V, m_Shr(m_Value(X), m_Value())))) + return isPowerOfTwo(X, TD, /*OrZero*/true, Depth); + if (ZExtInst *ZI = dyn_cast<ZExtInst>(V)) - return isPowerOfTwo(ZI->getOperand(0), TD, Depth); + return isPowerOfTwo(ZI->getOperand(0), TD, OrZero, Depth); if (SelectInst *SI = dyn_cast<SelectInst>(V)) - return isPowerOfTwo(SI->getTrueValue(), TD, Depth) && - isPowerOfTwo(SI->getFalseValue(), TD, Depth); + return isPowerOfTwo(SI->getTrueValue(), TD, OrZero, Depth) && + isPowerOfTwo(SI->getFalseValue(), TD, OrZero, Depth); + + if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) { + // A power of two and'd with anything is a power of two or zero. + if (isPowerOfTwo(X, TD, /*OrZero*/true, Depth) || + isPowerOfTwo(Y, TD, /*OrZero*/true, Depth)) + return true; + // X & (-X) is always a power of two or zero. + if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X)))) + return true; + return false; + } // An exact divide or right shift can only shift off zero bits, so the result // is a power of two only if the first operand is a power of two and not // copying a sign bit (sdiv int_min, 2). - if (match(V, m_LShr(m_Value(), m_Value())) || - match(V, m_UDiv(m_Value(), m_Value()))) { - PossiblyExactOperator *PEO = cast<PossiblyExactOperator>(V); - if (PEO->isExact()) - return isPowerOfTwo(PEO->getOperand(0), TD, Depth); + if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) || + match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) { + return isPowerOfTwo(cast<Operator>(V)->getOperand(0), TD, OrZero, Depth); } return false; @@ -767,7 +871,7 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { } // The remaining tests are all recursive, so bail out if we hit the limit. - if (Depth++ == MaxDepth) + if (Depth++ >= MaxDepth) return false; unsigned BitWidth = getBitWidth(V->getType(), TD); @@ -785,13 +889,13 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { // if the lowest bit is shifted off the end. if (BitWidth && match(V, m_Shl(m_Value(X), m_Value(Y)))) { // shl nuw can't remove any non-zero bits. - BinaryOperator *BO = cast<BinaryOperator>(V); + OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V); if (BO->hasNoUnsignedWrap()) return isKnownNonZero(X, TD, Depth); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(X, APInt(BitWidth, 1), KnownZero, KnownOne, TD, Depth); + ComputeMaskedBits(X, KnownZero, KnownOne, TD, Depth); if (KnownOne[0]) return true; } @@ -799,7 +903,7 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { // defined if the sign bit is shifted off the end. else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) { // shr exact can only shift out zero bits. - BinaryOperator *BO = cast<BinaryOperator>(V); + PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V); if (BO->isExact()) return isKnownNonZero(X, TD, Depth); @@ -809,10 +913,8 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { return true; } // div exact can only produce a zero if the dividend is zero. - else if (match(V, m_IDiv(m_Value(X), m_Value()))) { - BinaryOperator *BO = cast<BinaryOperator>(V); - if (BO->isExact()) - return isKnownNonZero(X, TD, Depth); + else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) { + return isKnownNonZero(X, TD, Depth); } // X + Y. else if (match(V, m_Add(m_Value(X), m_Value(Y)))) { @@ -835,20 +937,29 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { APInt Mask = APInt::getSignedMaxValue(BitWidth); // The sign bit of X is set. If some other bit is set then X is not equal // to INT_MIN. - ComputeMaskedBits(X, Mask, KnownZero, KnownOne, TD, Depth); + ComputeMaskedBits(X, KnownZero, KnownOne, TD, Depth); if ((KnownOne & Mask) != 0) return true; // The sign bit of Y is set. If some other bit is set then Y is not equal // to INT_MIN. - ComputeMaskedBits(Y, Mask, KnownZero, KnownOne, TD, Depth); + ComputeMaskedBits(Y, KnownZero, KnownOne, TD, Depth); if ((KnownOne & Mask) != 0) return true; } // The sum of a non-negative number and a power of two is not zero. - if (XKnownNonNegative && isPowerOfTwo(Y, TD, Depth)) + if (XKnownNonNegative && isPowerOfTwo(Y, TD, /*OrZero*/false, Depth)) return true; - if (YKnownNonNegative && isPowerOfTwo(X, TD, Depth)) + if (YKnownNonNegative && isPowerOfTwo(X, TD, /*OrZero*/false, Depth)) + return true; + } + // X * Y. + else if (match(V, m_Mul(m_Value(X), m_Value(Y)))) { + OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V); + // If X and Y are non-zero then so is X * Y as long as the multiplication + // does not overflow. + if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) && + isKnownNonZero(X, TD, Depth) && isKnownNonZero(Y, TD, Depth)) return true; } // (C ? X : Y) != 0 if X != 0 and Y != 0. @@ -861,8 +972,7 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { if (!BitWidth) return false; APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(V, APInt::getAllOnesValue(BitWidth), KnownZero, KnownOne, - TD, Depth); + ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); return KnownOne != 0; } @@ -878,7 +988,7 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, const TargetData *TD, unsigned Depth) { APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0); - ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth); + ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); return (KnownZero & Mask) == Mask; } @@ -917,30 +1027,28 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits(); return ComputeNumSignBits(U->getOperand(0), TD, Depth+1) + Tmp; - case Instruction::AShr: + case Instruction::AShr: { Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); - // ashr X, C -> adds C sign bits. - if (ConstantInt *C = dyn_cast<ConstantInt>(U->getOperand(1))) { - Tmp += C->getZExtValue(); + // ashr X, C -> adds C sign bits. Vectors too. + const APInt *ShAmt; + if (match(U->getOperand(1), m_APInt(ShAmt))) { + Tmp += ShAmt->getZExtValue(); if (Tmp > TyBits) Tmp = TyBits; } - // vector ashr X, <C, C, C, C> -> adds C sign bits - if (ConstantVector *C = dyn_cast<ConstantVector>(U->getOperand(1))) { - if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) { - Tmp += CI->getZExtValue(); - if (Tmp > TyBits) Tmp = TyBits; - } - } return Tmp; - case Instruction::Shl: - if (ConstantInt *C = dyn_cast<ConstantInt>(U->getOperand(1))) { + } + case Instruction::Shl: { + const APInt *ShAmt; + if (match(U->getOperand(1), m_APInt(ShAmt))) { // shl destroys sign bits. Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); - if (C->getZExtValue() >= TyBits || // Bad shift. - C->getZExtValue() >= Tmp) break; // Shifted all sign bits out. - return Tmp - C->getZExtValue(); + Tmp2 = ShAmt->getZExtValue(); + if (Tmp2 >= TyBits || // Bad shift. + Tmp2 >= Tmp) break; // Shifted all sign bits out. + return Tmp - Tmp2; } break; + } case Instruction::And: case Instruction::Or: case Instruction::Xor: // NOT is handled here. @@ -971,13 +1079,11 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, if (ConstantInt *CRHS = dyn_cast<ConstantInt>(U->getOperand(1))) if (CRHS->isAllOnesValue()) { APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - APInt Mask = APInt::getAllOnesValue(TyBits); - ComputeMaskedBits(U->getOperand(0), Mask, KnownZero, KnownOne, TD, - Depth+1); + ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((KnownZero | APInt(TyBits, 1)) == Mask) + if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue()) return TyBits; // If we are subtracting one from a positive number, there is no carry @@ -998,12 +1104,10 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, if (ConstantInt *CLHS = dyn_cast<ConstantInt>(U->getOperand(0))) if (CLHS->isNullValue()) { APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - APInt Mask = APInt::getAllOnesValue(TyBits); - ComputeMaskedBits(U->getOperand(1), Mask, KnownZero, KnownOne, - TD, Depth+1); + ComputeMaskedBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((KnownZero | APInt(TyBits, 1)) == Mask) + if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue()) return TyBits; // If the input is known to be positive (the sign bit is known clear), @@ -1045,8 +1149,8 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, // Finally, if we can prove that the top bits of the result are 0's or 1's, // use this information. APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - APInt Mask = APInt::getAllOnesValue(TyBits); - ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth); + APInt Mask; + ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); if (KnownZero.isNegative()) { // sign bit is 0 Mask = KnownZero; @@ -1282,23 +1386,21 @@ Value *llvm::isBytewiseValue(Value *V) { } } - // A ConstantArray is splatable if all its members are equal and also - // splatable. - if (ConstantArray *CA = dyn_cast<ConstantArray>(V)) { - if (CA->getNumOperands() == 0) - return 0; - - Value *Val = isBytewiseValue(CA->getOperand(0)); + // A ConstantDataArray/Vector is splatable if all its members are equal and + // also splatable. + if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(V)) { + Value *Elt = CA->getElementAsConstant(0); + Value *Val = isBytewiseValue(Elt); if (!Val) return 0; - for (unsigned I = 1, E = CA->getNumOperands(); I != E; ++I) - if (CA->getOperand(I-1) != CA->getOperand(I)) + for (unsigned I = 1, E = CA->getNumElements(); I != E; ++I) + if (CA->getElementAsConstant(I) != Elt) return 0; return Val; } - + // Conceptually, we could handle things like: // %a = zext i8 %X to i16 // %b = shl i16 %a, 8 @@ -1395,50 +1497,44 @@ static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range, Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, Instruction *InsertBefore) { // Nothing to index? Just return V then (this is useful at the end of our - // recursion) + // recursion). if (idx_range.empty()) return V; - // We have indices, so V should have an indexable type - assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) - && "Not looking at a struct or array?"); - assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) - && "Invalid indices for type?"); - CompositeType *PTy = cast<CompositeType>(V->getType()); - - if (isa<UndefValue>(V)) - return UndefValue::get(ExtractValueInst::getIndexedType(PTy, - idx_range)); - else if (isa<ConstantAggregateZero>(V)) - return Constant::getNullValue(ExtractValueInst::getIndexedType(PTy, - idx_range)); - else if (Constant *C = dyn_cast<Constant>(V)) { - if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) - // Recursively process this constant - return FindInsertedValue(C->getOperand(idx_range[0]), idx_range.slice(1), - InsertBefore); - } else if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) { + // We have indices, so V should have an indexable type. + assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) && + "Not looking at a struct or array?"); + assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) && + "Invalid indices for type?"); + + if (Constant *C = dyn_cast<Constant>(V)) { + C = C->getAggregateElement(idx_range[0]); + if (C == 0) return 0; + return FindInsertedValue(C, idx_range.slice(1), InsertBefore); + } + + if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) { // Loop the indices for the insertvalue instruction in parallel with the // requested indices const unsigned *req_idx = idx_range.begin(); for (const unsigned *i = I->idx_begin(), *e = I->idx_end(); i != e; ++i, ++req_idx) { if (req_idx == idx_range.end()) { - if (InsertBefore) - // The requested index identifies a part of a nested aggregate. Handle - // this specially. For example, - // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0 - // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1 - // %C = extractvalue {i32, { i32, i32 } } %B, 1 - // This can be changed into - // %A = insertvalue {i32, i32 } undef, i32 10, 0 - // %C = insertvalue {i32, i32 } %A, i32 11, 1 - // which allows the unused 0,0 element from the nested struct to be - // removed. - return BuildSubAggregate(V, makeArrayRef(idx_range.begin(), req_idx), - InsertBefore); - else - // We can't handle this without inserting insertvalues + // We can't handle this without inserting insertvalues + if (!InsertBefore) return 0; + + // The requested index identifies a part of a nested aggregate. Handle + // this specially. For example, + // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0 + // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1 + // %C = extractvalue {i32, { i32, i32 } } %B, 1 + // This can be changed into + // %A = insertvalue {i32, i32 } undef, i32 10, 0 + // %C = insertvalue {i32, i32 } %A, i32 11, 1 + // which allows the unused 0,0 element from the nested struct to be + // removed. + return BuildSubAggregate(V, makeArrayRef(idx_range.begin(), req_idx), + InsertBefore); } // This insert value inserts something else than what we are looking for. @@ -1454,7 +1550,9 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, return FindInsertedValue(I->getInsertedValueOperand(), makeArrayRef(req_idx, idx_range.end()), InsertBefore); - } else if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) { + } + + if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) { // If we're extracting a value from an aggregrate that was extracted from // something else, we can extract from that something else directly instead. // However, we will need to chain I's indices with the requested indices. @@ -1486,7 +1584,8 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const TargetData &TD) { Operator *PtrOp = dyn_cast<Operator>(Ptr); - if (PtrOp == 0) return Ptr; + if (PtrOp == 0 || Ptr->getType()->isVectorTy()) + return Ptr; // Just look through bitcasts. if (PtrOp->getOpcode() == Instruction::BitCast) @@ -1521,34 +1620,19 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, } -/// GetConstantStringInfo - This function computes the length of a +/// getConstantStringInfo - This function computes the length of a /// null-terminated C string pointed to by V. If successful, it returns true /// and returns the string in Str. If unsuccessful, it returns false. -bool llvm::GetConstantStringInfo(const Value *V, std::string &Str, - uint64_t Offset, - bool StopAtNul) { - // If V is NULL then return false; - if (V == NULL) return false; - - // Look through bitcast instructions. - if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V)) - return GetConstantStringInfo(BCI->getOperand(0), Str, Offset, StopAtNul); - - // If the value is not a GEP instruction nor a constant expression with a - // GEP instruction, then return false because ConstantArray can't occur - // any other way - const User *GEP = 0; - if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) { - GEP = GEPI; - } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { - if (CE->getOpcode() == Instruction::BitCast) - return GetConstantStringInfo(CE->getOperand(0), Str, Offset, StopAtNul); - if (CE->getOpcode() != Instruction::GetElementPtr) - return false; - GEP = CE; - } +bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, + uint64_t Offset, bool TrimAtNul) { + assert(V); + + // Look through bitcast instructions and geps. + V = V->stripPointerCasts(); - if (GEP) { + // If the value is a GEP instructionor constant expression, treat it as an + // offset. + if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { // Make sure the GEP has exactly three arguments. if (GEP->getNumOperands() != 3) return false; @@ -1573,51 +1657,48 @@ bool llvm::GetConstantStringInfo(const Value *V, std::string &Str, StartIdx = CI->getZExtValue(); else return false; - return GetConstantStringInfo(GEP->getOperand(0), Str, StartIdx+Offset, - StopAtNul); + return getConstantStringInfo(GEP->getOperand(0), Str, StartIdx+Offset); } - + // The GEP instruction, constant or instruction, must reference a global // variable that is a constant and is initialized. The referenced constant // initializer is the array that we'll use for optimization. - const GlobalVariable* GV = dyn_cast<GlobalVariable>(V); + const GlobalVariable *GV = dyn_cast<GlobalVariable>(V); if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) return false; - const Constant *GlobalInit = GV->getInitializer(); - - // Handle the ConstantAggregateZero case - if (isa<ConstantAggregateZero>(GlobalInit)) { + + // Handle the all-zeros case + if (GV->getInitializer()->isNullValue()) { // This is a degenerate case. The initializer is constant zero so the // length of the string must be zero. - Str.clear(); + Str = ""; return true; } // Must be a Constant Array - const ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit); - if (Array == 0 || !Array->getType()->getElementType()->isIntegerTy(8)) + const ConstantDataArray *Array = + dyn_cast<ConstantDataArray>(GV->getInitializer()); + if (Array == 0 || !Array->isString()) return false; // Get the number of elements in the array - uint64_t NumElts = Array->getType()->getNumElements(); - + uint64_t NumElts = Array->getType()->getArrayNumElements(); + + // Start out with the entire array in the StringRef. + Str = Array->getAsString(); + if (Offset > NumElts) return false; - // Traverse the constant array from 'Offset' which is the place the GEP refers - // to in the array. - Str.reserve(NumElts-Offset); - for (unsigned i = Offset; i != NumElts; ++i) { - const Constant *Elt = Array->getOperand(i); - const ConstantInt *CI = dyn_cast<ConstantInt>(Elt); - if (!CI) // This array isn't suitable, non-int initializer. - return false; - if (StopAtNul && CI->isZero()) - return true; // we found end of string, success! - Str += (char)CI->getZExtValue(); - } + // Skip over 'offset' bytes. + Str = Str.substr(Offset); - // The array isn't null terminated, but maybe this is a memcpy, not a strcpy. + if (TrimAtNul) { + // Trim off the \0 and anything after it. If the array is not nul + // terminated, we just return the whole end of string. The client may know + // some other way that the string is length-bound. + Str = Str.substr(0, Str.find('\0')); + } return true; } @@ -1629,8 +1710,7 @@ bool llvm::GetConstantStringInfo(const Value *V, std::string &Str, /// the specified pointer, return 'len+1'. If we can't, return 0. static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) { // Look through noop bitcast instructions. - if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) - return GetStringLengthH(BCI->getOperand(0), PHIs); + V = V->stripPointerCasts(); // If this is a PHI node, there are two cases: either we have already seen it // or we haven't. @@ -1666,75 +1746,13 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) { if (Len1 != Len2) return 0; return Len1; } - - // If the value is not a GEP instruction nor a constant expression with a - // GEP instruction, then return unknown. - User *GEP = 0; - if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) { - GEP = GEPI; - } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { - if (CE->getOpcode() != Instruction::GetElementPtr) - return 0; - GEP = CE; - } else { - return 0; - } - - // Make sure the GEP has exactly three arguments. - if (GEP->getNumOperands() != 3) - return 0; - - // Check to make sure that the first operand of the GEP is an integer and - // has value 0 so that we are sure we're indexing into the initializer. - if (ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1))) { - if (!Idx->isZero()) - return 0; - } else - return 0; - - // If the second index isn't a ConstantInt, then this is a variable index - // into the array. If this occurs, we can't say anything meaningful about - // the string. - uint64_t StartIdx = 0; - if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2))) - StartIdx = CI->getZExtValue(); - else - return 0; - - // The GEP instruction, constant or instruction, must reference a global - // variable that is a constant and is initialized. The referenced constant - // initializer is the array that we'll use for optimization. - GlobalVariable* GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)); - if (!GV || !GV->isConstant() || !GV->hasInitializer() || - GV->mayBeOverridden()) + + // Otherwise, see if we can read the string. + StringRef StrData; + if (!getConstantStringInfo(V, StrData)) return 0; - Constant *GlobalInit = GV->getInitializer(); - - // Handle the ConstantAggregateZero case, which is a degenerate case. The - // initializer is constant zero so the length of the string must be zero. - if (isa<ConstantAggregateZero>(GlobalInit)) - return 1; // Len = 0 offset by 1. - - // Must be a Constant Array - ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit); - if (!Array || !Array->getType()->getElementType()->isIntegerTy(8)) - return false; - - // Get the number of elements in the array - uint64_t NumElts = Array->getType()->getNumElements(); - - // Traverse the constant array from StartIdx (derived above) which is - // the place the GEP refers to in the array. - for (unsigned i = StartIdx; i != NumElts; ++i) { - Constant *Elt = Array->getOperand(i); - ConstantInt *CI = dyn_cast<ConstantInt>(Elt); - if (!CI) // This array isn't suitable, non-int initializer. - return 0; - if (CI->isZero()) - return i-StartIdx+1; // We found end of string, success! - } - return 0; // The array isn't null terminated, conservatively return 'unknown'. + return StrData.size()+1; } /// GetStringLength - If we can compute the length of the string pointed to by @@ -1793,3 +1811,94 @@ bool llvm::onlyUsedByLifetimeMarkers(const Value *V) { } return true; } + +bool llvm::isSafeToSpeculativelyExecute(const Value *V, + const TargetData *TD) { + const Operator *Inst = dyn_cast<Operator>(V); + if (!Inst) + return false; + + for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) + if (Constant *C = dyn_cast<Constant>(Inst->getOperand(i))) + if (C->canTrap()) + return false; + + switch (Inst->getOpcode()) { + default: + return true; + case Instruction::UDiv: + case Instruction::URem: + // x / y is undefined if y == 0, but calcuations like x / 3 are safe. + return isKnownNonZero(Inst->getOperand(1), TD); + case Instruction::SDiv: + case Instruction::SRem: { + Value *Op = Inst->getOperand(1); + // x / y is undefined if y == 0 + if (!isKnownNonZero(Op, TD)) + return false; + // x / y might be undefined if y == -1 + unsigned BitWidth = getBitWidth(Op->getType(), TD); + if (BitWidth == 0) + return false; + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + ComputeMaskedBits(Op, KnownZero, KnownOne, TD); + return !!KnownZero; + } + case Instruction::Load: { + const LoadInst *LI = cast<LoadInst>(Inst); + if (!LI->isUnordered()) + return false; + return LI->getPointerOperand()->isDereferenceablePointer(); + } + case Instruction::Call: { + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { + switch (II->getIntrinsicID()) { + // These synthetic intrinsics have no side-effects, and just mark + // information about their operands. + // FIXME: There are other no-op synthetic instructions that potentially + // should be considered at least *safe* to speculate... + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + return true; + + case Intrinsic::bswap: + case Intrinsic::ctlz: + case Intrinsic::ctpop: + case Intrinsic::cttz: + case Intrinsic::objectsize: + case Intrinsic::sadd_with_overflow: + case Intrinsic::smul_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::uadd_with_overflow: + case Intrinsic::umul_with_overflow: + case Intrinsic::usub_with_overflow: + return true; + // TODO: some fp intrinsics are marked as having the same error handling + // as libm. They're safe to speculate when they won't error. + // TODO: are convert_{from,to}_fp16 safe? + // TODO: can we list target-specific intrinsics here? + default: break; + } + } + return false; // The called function could have undefined behavior or + // side-effects, even if marked readnone nounwind. + } + case Instruction::VAArg: + case Instruction::Alloca: + case Instruction::Invoke: + case Instruction::PHI: + case Instruction::Store: + case Instruction::Ret: + case Instruction::Br: + case Instruction::IndirectBr: + case Instruction::Switch: + case Instruction::Unreachable: + case Instruction::Fence: + case Instruction::LandingPad: + case Instruction::AtomicRMW: + case Instruction::AtomicCmpXchg: + case Instruction::Resume: + return false; // Misc instructions which have effects + } +} |