diff options
Diffstat (limited to 'contrib/llvm/lib/Analysis/ValueTracking.cpp')
-rw-r--r-- | contrib/llvm/lib/Analysis/ValueTracking.cpp | 1898 |
1 files changed, 913 insertions, 985 deletions
diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp index a83e207..f2b4078 100644 --- a/contrib/llvm/lib/Analysis/ValueTracking.cpp +++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp @@ -18,7 +18,9 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" @@ -36,40 +38,19 @@ #include "llvm/IR/Statepoint.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" +#include <algorithm> +#include <array> #include <cstring> using namespace llvm; using namespace llvm::PatternMatch; const unsigned MaxDepth = 6; -/// Enable an experimental feature to leverage information about dominating -/// conditions to compute known bits. The individual options below control how -/// hard we search. The defaults are chosen to be fairly aggressive. If you -/// run into compile time problems when testing, scale them back and report -/// your findings. -static cl::opt<bool> EnableDomConditions("value-tracking-dom-conditions", - cl::Hidden, cl::init(false)); - -// This is expensive, so we only do it for the top level query value. -// (TODO: evaluate cost vs profit, consider higher thresholds) -static cl::opt<unsigned> DomConditionsMaxDepth("dom-conditions-max-depth", - cl::Hidden, cl::init(1)); - -/// How many dominating blocks should be scanned looking for dominating -/// conditions? -static cl::opt<unsigned> DomConditionsMaxDomBlocks("dom-conditions-dom-blocks", - cl::Hidden, - cl::init(20)); - // Controls the number of uses of the value searched for possible // dominating comparisons. static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses", cl::Hidden, cl::init(20)); -// If true, don't consider only compares whose only use is a branch. -static cl::opt<bool> DomConditionsSingleCmpUse("dom-conditions-single-cmp-use", - cl::Hidden, cl::init(false)); - /// Returns the bitwidth of the given scalar or pointer type (if unknown returns /// 0). For vector types, returns the element type's bitwidth. static unsigned getBitWidth(Type *Ty, const DataLayout &DL) { @@ -79,34 +60,45 @@ static unsigned getBitWidth(Type *Ty, const DataLayout &DL) { return DL.getPointerTypeSizeInBits(Ty); } -// Many of these functions have internal versions that take an assumption -// exclusion set. This is because of the potential for mutual recursion to -// cause computeKnownBits to repeatedly visit the same assume intrinsic. The -// classic case of this is assume(x = y), which will attempt to determine -// bits in x from bits in y, which will attempt to determine bits in y from -// bits in x, etc. Regarding the mutual recursion, computeKnownBits can call -// isKnownNonZero, which calls computeKnownBits and ComputeSignBit and -// isKnownToBeAPowerOfTwo (all of which can call computeKnownBits), and so on. -typedef SmallPtrSet<const Value *, 8> ExclInvsSet; - namespace { // Simplifying using an assume can only be done in a particular control-flow // context (the context instruction provides that context). If an assume and // the context instruction are not in the same block then the DT helps in // figuring out if we can use it. struct Query { - ExclInvsSet ExclInvs; + const DataLayout &DL; AssumptionCache *AC; const Instruction *CxtI; const DominatorTree *DT; - Query(AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr) - : AC(AC), CxtI(CxtI), DT(DT) {} + /// Set of assumptions that should be excluded from further queries. + /// This is because of the potential for mutual recursion to cause + /// computeKnownBits to repeatedly visit the same assume intrinsic. The + /// classic case of this is assume(x = y), which will attempt to determine + /// bits in x from bits in y, which will attempt to determine bits in y from + /// bits in x, etc. Regarding the mutual recursion, computeKnownBits can call + /// isKnownNonZero, which calls computeKnownBits and ComputeSignBit and + /// isKnownToBeAPowerOfTwo (all of which can call computeKnownBits), and so + /// on. + std::array<const Value*, MaxDepth> Excluded; + unsigned NumExcluded; + + Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT) + : DL(DL), AC(AC), CxtI(CxtI), DT(DT), NumExcluded(0) {} Query(const Query &Q, const Value *NewExcl) - : ExclInvs(Q.ExclInvs), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT) { - ExclInvs.insert(NewExcl); + : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), NumExcluded(Q.NumExcluded) { + Excluded = Q.Excluded; + Excluded[NumExcluded++] = NewExcl; + assert(NumExcluded <= Excluded.size()); + } + + bool isExcluded(const Value *Value) const { + if (NumExcluded == 0) + return false; + auto End = Excluded.begin() + NumExcluded; + return std::find(Excluded.begin(), End, Value) != End; } }; } // end anonymous namespace @@ -128,15 +120,14 @@ static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) { } static void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, - const DataLayout &DL, unsigned Depth, - const Query &Q); + unsigned Depth, const Query &Q); void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { - ::computeKnownBits(V, KnownZero, KnownOne, DL, Depth, - Query(AC, safeCxtI(V, CxtI), DT)); + ::computeKnownBits(V, KnownZero, KnownOne, Depth, + Query(DL, AC, safeCxtI(V, CxtI), DT)); } bool llvm::haveNoCommonBitsSet(Value *LHS, Value *RHS, const DataLayout &DL, @@ -155,35 +146,33 @@ bool llvm::haveNoCommonBitsSet(Value *LHS, Value *RHS, const DataLayout &DL, } static void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, - const DataLayout &DL, unsigned Depth, - const Query &Q); + unsigned Depth, const Query &Q); void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { - ::ComputeSignBit(V, KnownZero, KnownOne, DL, Depth, - Query(AC, safeCxtI(V, CxtI), DT)); + ::ComputeSignBit(V, KnownZero, KnownOne, Depth, + Query(DL, AC, safeCxtI(V, CxtI), DT)); } static bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, - const Query &Q, const DataLayout &DL); + const Query &Q); bool llvm::isKnownToBeAPowerOfTwo(Value *V, const DataLayout &DL, bool OrZero, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { return ::isKnownToBeAPowerOfTwo(V, OrZero, Depth, - Query(AC, safeCxtI(V, CxtI), DT), DL); + Query(DL, AC, safeCxtI(V, CxtI), DT)); } -static bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, - const Query &Q); +static bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q); bool llvm::isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { - return ::isKnownNonZero(V, DL, Depth, Query(AC, safeCxtI(V, CxtI), DT)); + return ::isKnownNonZero(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT)); } bool llvm::isKnownNonNegative(Value *V, const DataLayout &DL, unsigned Depth, @@ -194,42 +183,59 @@ bool llvm::isKnownNonNegative(Value *V, const DataLayout &DL, unsigned Depth, return NonNegative; } -static bool isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL, - const Query &Q); +bool llvm::isKnownPositive(Value *V, const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT) { + if (auto *CI = dyn_cast<ConstantInt>(V)) + return CI->getValue().isStrictlyPositive(); + + // TODO: We'd doing two recursive queries here. We should factor this such + // that only a single query is needed. + return isKnownNonNegative(V, DL, Depth, AC, CxtI, DT) && + isKnownNonZero(V, DL, Depth, AC, CxtI, DT); +} + +bool llvm::isKnownNegative(Value *V, const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT) { + bool NonNegative, Negative; + ComputeSignBit(V, NonNegative, Negative, DL, Depth, AC, CxtI, DT); + return Negative; +} + +static bool isKnownNonEqual(Value *V1, Value *V2, const Query &Q); bool llvm::isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { - return ::isKnownNonEqual(V1, V2, DL, Query(AC, - safeCxtI(V1, safeCxtI(V2, CxtI)), - DT)); + return ::isKnownNonEqual(V1, V2, Query(DL, AC, + safeCxtI(V1, safeCxtI(V2, CxtI)), + DT)); } -static bool MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL, - unsigned Depth, const Query &Q); +static bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth, + const Query &Q); bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { - return ::MaskedValueIsZero(V, Mask, DL, Depth, - Query(AC, safeCxtI(V, CxtI), DT)); + return ::MaskedValueIsZero(V, Mask, Depth, + Query(DL, AC, safeCxtI(V, CxtI), DT)); } -static unsigned ComputeNumSignBits(Value *V, const DataLayout &DL, - unsigned Depth, const Query &Q); +static unsigned ComputeNumSignBits(Value *V, unsigned Depth, const Query &Q); unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { - return ::ComputeNumSignBits(V, DL, Depth, Query(AC, safeCxtI(V, CxtI), DT)); + return ::ComputeNumSignBits(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT)); } static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, APInt &KnownZero, APInt &KnownOne, APInt &KnownZero2, APInt &KnownOne2, - const DataLayout &DL, unsigned Depth, - const Query &Q) { + unsigned Depth, const Query &Q) { if (!Add) { if (ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) { // We know that the top bits of C-X are clear if X contains less bits @@ -240,7 +246,7 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros(); // NLZ can't be BitWidth with no sign bit APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - computeKnownBits(Op1, KnownZero2, KnownOne2, DL, Depth + 1, Q); + computeKnownBits(Op1, KnownZero2, KnownOne2, Depth + 1, Q); // If all of the MaskV bits are known to be zero, then we know the // output top bits are zero, because we now know that the output is @@ -259,8 +265,8 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, // If an initial sequence of bits in the result is not needed, the // corresponding bits in the operands are not needed. APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, DL, Depth + 1, Q); - computeKnownBits(Op1, KnownZero2, KnownOne2, DL, Depth + 1, Q); + computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, Depth + 1, Q); + computeKnownBits(Op1, KnownZero2, KnownOne2, Depth + 1, Q); // Carry in a 1 for a subtract, rather than a 0. APInt CarryIn(BitWidth, 0); @@ -308,11 +314,10 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW, APInt &KnownZero, APInt &KnownOne, APInt &KnownZero2, APInt &KnownOne2, - const DataLayout &DL, unsigned Depth, - const Query &Q) { + unsigned Depth, const Query &Q) { unsigned BitWidth = KnownZero.getBitWidth(); - computeKnownBits(Op1, KnownZero, KnownOne, DL, Depth + 1, Q); - computeKnownBits(Op0, KnownZero2, KnownOne2, DL, Depth + 1, Q); + computeKnownBits(Op1, KnownZero, KnownOne, Depth + 1, Q); + computeKnownBits(Op0, KnownZero2, KnownOne2, Depth + 1, Q); bool isKnownNegative = false; bool isKnownNonNegative = false; @@ -333,9 +338,9 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW, // negative or zero. if (!isKnownNonNegative) isKnownNegative = (isKnownNegativeOp1 && isKnownNonNegativeOp0 && - isKnownNonZero(Op0, DL, Depth, Q)) || + isKnownNonZero(Op0, Depth, Q)) || (isKnownNegativeOp0 && isKnownNonNegativeOp1 && - isKnownNonZero(Op1, DL, Depth, Q)); + isKnownNonZero(Op1, Depth, Q)); } } @@ -451,7 +456,8 @@ static bool isAssumeLikeIntrinsic(const Instruction *I) { return false; } -static bool isValidAssumeForContext(Value *V, const Query &Q) { +static bool isValidAssumeForContext(Value *V, const Instruction *CxtI, + const DominatorTree *DT) { Instruction *Inv = cast<Instruction>(V); // There are two restrictions on the use of an assume: @@ -462,43 +468,43 @@ static bool isValidAssumeForContext(Value *V, const Query &Q) { // feeding the assume is trivially true, thus causing the removal of // the assume). - if (Q.DT) { - if (Q.DT->dominates(Inv, Q.CxtI)) { + if (DT) { + if (DT->dominates(Inv, CxtI)) { return true; - } else if (Inv->getParent() == Q.CxtI->getParent()) { + } else if (Inv->getParent() == CxtI->getParent()) { // The context comes first, but they're both in the same block. Make sure // there is nothing in between that might interrupt the control flow. for (BasicBlock::const_iterator I = - std::next(BasicBlock::const_iterator(Q.CxtI)), + std::next(BasicBlock::const_iterator(CxtI)), IE(Inv); I != IE; ++I) if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I)) return false; - return !isEphemeralValueOf(Inv, Q.CxtI); + return !isEphemeralValueOf(Inv, CxtI); } return false; } // When we don't have a DT, we do a limited search... - if (Inv->getParent() == Q.CxtI->getParent()->getSinglePredecessor()) { + if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor()) { return true; - } else if (Inv->getParent() == Q.CxtI->getParent()) { + } else if (Inv->getParent() == CxtI->getParent()) { // Search forward from the assume until we reach the context (or the end // of the block); the common case is that the assume will come first. for (BasicBlock::iterator I = std::next(BasicBlock::iterator(Inv)), IE = Inv->getParent()->end(); I != IE; ++I) - if (&*I == Q.CxtI) + if (&*I == CxtI) return true; // The context must come first... for (BasicBlock::const_iterator I = - std::next(BasicBlock::const_iterator(Q.CxtI)), + std::next(BasicBlock::const_iterator(CxtI)), IE(Inv); I != IE; ++I) if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I)) return false; - return !isEphemeralValueOf(Inv, Q.CxtI); + return !isEphemeralValueOf(Inv, CxtI); } return false; @@ -507,226 +513,12 @@ static bool isValidAssumeForContext(Value *V, const Query &Q) { bool llvm::isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT) { - return ::isValidAssumeForContext(const_cast<Instruction *>(I), - Query(nullptr, CxtI, DT)); -} - -template<typename LHS, typename RHS> -inline match_combine_or<CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate>, - CmpClass_match<RHS, LHS, ICmpInst, ICmpInst::Predicate>> -m_c_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) { - return m_CombineOr(m_ICmp(Pred, L, R), m_ICmp(Pred, R, L)); -} - -template<typename LHS, typename RHS> -inline match_combine_or<BinaryOp_match<LHS, RHS, Instruction::And>, - BinaryOp_match<RHS, LHS, Instruction::And>> -m_c_And(const LHS &L, const RHS &R) { - return m_CombineOr(m_And(L, R), m_And(R, L)); -} - -template<typename LHS, typename RHS> -inline match_combine_or<BinaryOp_match<LHS, RHS, Instruction::Or>, - BinaryOp_match<RHS, LHS, Instruction::Or>> -m_c_Or(const LHS &L, const RHS &R) { - return m_CombineOr(m_Or(L, R), m_Or(R, L)); -} - -template<typename LHS, typename RHS> -inline match_combine_or<BinaryOp_match<LHS, RHS, Instruction::Xor>, - BinaryOp_match<RHS, LHS, Instruction::Xor>> -m_c_Xor(const LHS &L, const RHS &R) { - return m_CombineOr(m_Xor(L, R), m_Xor(R, L)); -} - -/// Compute known bits in 'V' under the assumption that the condition 'Cmp' is -/// true (at the context instruction.) This is mostly a utility function for -/// the prototype dominating conditions reasoning below. -static void computeKnownBitsFromTrueCondition(Value *V, ICmpInst *Cmp, - APInt &KnownZero, - APInt &KnownOne, - const DataLayout &DL, - unsigned Depth, const Query &Q) { - Value *LHS = Cmp->getOperand(0); - Value *RHS = Cmp->getOperand(1); - // TODO: We could potentially be more aggressive here. This would be worth - // evaluating. If we can, explore commoning this code with the assume - // handling logic. - if (LHS != V && RHS != V) - return; - - const unsigned BitWidth = KnownZero.getBitWidth(); - - switch (Cmp->getPredicate()) { - default: - // We know nothing from this condition - break; - // TODO: implement unsigned bound from below (known one bits) - // TODO: common condition check implementations with assumes - // TODO: implement other patterns from assume (e.g. V & B == A) - case ICmpInst::ICMP_SGT: - if (LHS == V) { - APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0); - computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q); - if (KnownOneTemp.isAllOnesValue() || KnownZeroTemp.isNegative()) { - // We know that the sign bit is zero. - KnownZero |= APInt::getSignBit(BitWidth); - } - } - break; - case ICmpInst::ICMP_EQ: - { - APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0); - if (LHS == V) - computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q); - else if (RHS == V) - computeKnownBits(LHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q); - else - llvm_unreachable("missing use?"); - KnownZero |= KnownZeroTemp; - KnownOne |= KnownOneTemp; - } - break; - case ICmpInst::ICMP_ULE: - if (LHS == V) { - APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0); - computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q); - // The known zero bits carry over - unsigned SignBits = KnownZeroTemp.countLeadingOnes(); - KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits); - } - break; - case ICmpInst::ICMP_ULT: - if (LHS == V) { - APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0); - computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q); - // Whatever high bits in rhs are zero are known to be zero (if rhs is a - // power of 2, then one more). - unsigned SignBits = KnownZeroTemp.countLeadingOnes(); - if (isKnownToBeAPowerOfTwo(RHS, false, Depth + 1, Query(Q, Cmp), DL)) - SignBits++; - KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits); - } - break; - }; -} - -/// Compute known bits in 'V' from conditions which are known to be true along -/// all paths leading to the context instruction. In particular, look for -/// cases where one branch of an interesting condition dominates the context -/// instruction. This does not do general dataflow. -/// NOTE: This code is EXPERIMENTAL and currently off by default. -static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero, - APInt &KnownOne, - const DataLayout &DL, - unsigned Depth, - const Query &Q) { - // Need both the dominator tree and the query location to do anything useful - if (!Q.DT || !Q.CxtI) - return; - Instruction *Cxt = const_cast<Instruction *>(Q.CxtI); - // The context instruction might be in a statically unreachable block. If - // so, asking dominator queries may yield suprising results. (e.g. the block - // may not have a dom tree node) - if (!Q.DT->isReachableFromEntry(Cxt->getParent())) - return; - - // Avoid useless work - if (auto VI = dyn_cast<Instruction>(V)) - if (VI->getParent() == Cxt->getParent()) - return; - - // Note: We currently implement two options. It's not clear which of these - // will survive long term, we need data for that. - // Option 1 - Try walking the dominator tree looking for conditions which - // might apply. This works well for local conditions (loop guards, etc..), - // but not as well for things far from the context instruction (presuming a - // low max blocks explored). If we can set an high enough limit, this would - // be all we need. - // Option 2 - We restrict out search to those conditions which are uses of - // the value we're interested in. This is independent of dom structure, - // but is slightly less powerful without looking through lots of use chains. - // It does handle conditions far from the context instruction (e.g. early - // function exits on entry) really well though. - - // Option 1 - Search the dom tree - unsigned NumBlocksExplored = 0; - BasicBlock *Current = Cxt->getParent(); - while (true) { - // Stop searching if we've gone too far up the chain - if (NumBlocksExplored >= DomConditionsMaxDomBlocks) - break; - NumBlocksExplored++; - - if (!Q.DT->getNode(Current)->getIDom()) - break; - Current = Q.DT->getNode(Current)->getIDom()->getBlock(); - if (!Current) - // found function entry - break; - - BranchInst *BI = dyn_cast<BranchInst>(Current->getTerminator()); - if (!BI || BI->isUnconditional()) - continue; - ICmpInst *Cmp = dyn_cast<ICmpInst>(BI->getCondition()); - if (!Cmp) - continue; - - // We're looking for conditions that are guaranteed to hold at the context - // instruction. Finding a condition where one path dominates the context - // isn't enough because both the true and false cases could merge before - // the context instruction we're actually interested in. Instead, we need - // to ensure that the taken *edge* dominates the context instruction. We - // know that the edge must be reachable since we started from a reachable - // block. - BasicBlock *BB0 = BI->getSuccessor(0); - BasicBlockEdge Edge(BI->getParent(), BB0); - if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent())) - continue; - - computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, DL, Depth, - Q); - } - - // Option 2 - Search the other uses of V - unsigned NumUsesExplored = 0; - for (auto U : V->users()) { - // Avoid massive lists - if (NumUsesExplored >= DomConditionsMaxUses) - break; - NumUsesExplored++; - // Consider only compare instructions uniquely controlling a branch - ICmpInst *Cmp = dyn_cast<ICmpInst>(U); - if (!Cmp) - continue; - - if (DomConditionsSingleCmpUse && !Cmp->hasOneUse()) - continue; - - for (auto *CmpU : Cmp->users()) { - BranchInst *BI = dyn_cast<BranchInst>(CmpU); - if (!BI || BI->isUnconditional()) - continue; - // We're looking for conditions that are guaranteed to hold at the - // context instruction. Finding a condition where one path dominates - // the context isn't enough because both the true and false cases could - // merge before the context instruction we're actually interested in. - // Instead, we need to ensure that the taken *edge* dominates the context - // instruction. - BasicBlock *BB0 = BI->getSuccessor(0); - BasicBlockEdge Edge(BI->getParent(), BB0); - if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent())) - continue; - - computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, DL, Depth, - Q); - } - } + return ::isValidAssumeForContext(const_cast<Instruction *>(I), CxtI, DT); } static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, - APInt &KnownOne, const DataLayout &DL, - unsigned Depth, const Query &Q) { + APInt &KnownOne, unsigned Depth, + const Query &Q) { // Use of assumptions is context-sensitive. If we don't have a context, we // cannot use them! if (!Q.AC || !Q.CxtI) @@ -740,7 +532,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, CallInst *I = cast<CallInst>(AssumeVH); assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() && "Got assumption for the wrong function!"); - if (Q.ExclInvs.count(I)) + if (Q.isExcluded(I)) continue; // Warning: This loop can end up being somewhat performance sensetive. @@ -752,7 +544,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, Value *Arg = I->getArgOperand(0); - if (Arg == V && isValidAssumeForContext(I, Q)) { + if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { assert(BitWidth == 1 && "assume operand is not i1?"); KnownZero.clearAllBits(); KnownOne.setAllBits(); @@ -772,19 +564,20 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, ConstantInt *C; // assume(v = a) if (match(Arg, m_c_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); KnownZero |= RHSKnownZero; KnownOne |= RHSKnownOne; // assume(v & b = a) } else if (match(Arg, m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { + Pred == ICmpInst::ICMP_EQ && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0); - computeKnownBits(B, MaskKnownZero, MaskKnownOne, DL, Depth+1, Query(Q, I)); + computeKnownBits(B, MaskKnownZero, MaskKnownOne, Depth+1, Query(Q, I)); // For those bits in the mask that are known to be one, we can propagate // known bits from the RHS to V. @@ -793,11 +586,12 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, // assume(~(v & b) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))), m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { + Pred == ICmpInst::ICMP_EQ && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0); - computeKnownBits(B, MaskKnownZero, MaskKnownOne, DL, Depth+1, Query(Q, I)); + computeKnownBits(B, MaskKnownZero, MaskKnownOne, Depth+1, Query(Q, I)); // For those bits in the mask that are known to be one, we can propagate // inverted known bits from the RHS to V. @@ -806,11 +600,12 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, // assume(v | b = a) } else if (match(Arg, m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { + Pred == ICmpInst::ICMP_EQ && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); - computeKnownBits(B, BKnownZero, BKnownOne, DL, Depth+1, Query(Q, I)); + computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I)); // For those bits in B that are known to be zero, we can propagate known // bits from the RHS to V. @@ -819,11 +614,12 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, // assume(~(v | b) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))), m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { + Pred == ICmpInst::ICMP_EQ && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); - computeKnownBits(B, BKnownZero, BKnownOne, DL, Depth+1, Query(Q, I)); + computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I)); // For those bits in B that are known to be zero, we can propagate // inverted known bits from the RHS to V. @@ -832,11 +628,12 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, // assume(v ^ b = a) } else if (match(Arg, m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { + Pred == ICmpInst::ICMP_EQ && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); - computeKnownBits(B, BKnownZero, BKnownOne, DL, Depth+1, Query(Q, I)); + computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I)); // For those bits in B that are known to be zero, we can propagate known // bits from the RHS to V. For those bits in B that are known to be one, @@ -848,11 +645,12 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, // assume(~(v ^ b) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))), m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { + Pred == ICmpInst::ICMP_EQ && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); - computeKnownBits(B, BKnownZero, BKnownOne, DL, Depth+1, Query(Q, I)); + computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I)); // For those bits in B that are known to be zero, we can propagate // inverted known bits from the RHS to V. For those bits in B that are @@ -864,9 +662,10 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, // assume(v << c = a) } else if (match(Arg, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)), m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { + Pred == ICmpInst::ICMP_EQ && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them to known // bits in V shifted to the right by C. KnownZero |= RHSKnownZero.lshr(C->getZExtValue()); @@ -874,9 +673,10 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, // assume(~(v << c) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))), m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { + Pred == ICmpInst::ICMP_EQ && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them inverted // to known bits in V shifted to the right by C. KnownZero |= RHSKnownOne.lshr(C->getZExtValue()); @@ -886,9 +686,10 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, m_c_ICmp(Pred, m_CombineOr(m_LShr(m_V, m_ConstantInt(C)), m_AShr(m_V, m_ConstantInt(C))), m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { + Pred == ICmpInst::ICMP_EQ && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them to known // bits in V shifted to the right by C. KnownZero |= RHSKnownZero << C->getZExtValue(); @@ -898,18 +699,20 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, m_LShr(m_V, m_ConstantInt(C)), m_AShr(m_V, m_ConstantInt(C)))), m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) { + Pred == ICmpInst::ICMP_EQ && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them inverted // to known bits in V shifted to the right by C. KnownZero |= RHSKnownOne << C->getZExtValue(); KnownOne |= RHSKnownZero << C->getZExtValue(); // assume(v >=_s c) where c is non-negative } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_SGE && isValidAssumeForContext(I, Q)) { + Pred == ICmpInst::ICMP_SGE && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); if (RHSKnownZero.isNegative()) { // We know that the sign bit is zero. @@ -917,9 +720,10 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, } // assume(v >_s c) where c is at least -1. } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_SGT && isValidAssumeForContext(I, Q)) { + Pred == ICmpInst::ICMP_SGT && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); if (RHSKnownOne.isAllOnesValue() || RHSKnownZero.isNegative()) { // We know that the sign bit is zero. @@ -927,9 +731,10 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, } // assume(v <=_s c) where c is negative } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_SLE && isValidAssumeForContext(I, Q)) { + Pred == ICmpInst::ICMP_SLE && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); if (RHSKnownOne.isNegative()) { // We know that the sign bit is one. @@ -937,9 +742,10 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, } // assume(v <_s c) where c is non-positive } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_SLT && isValidAssumeForContext(I, Q)) { + Pred == ICmpInst::ICMP_SLT && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); if (RHSKnownZero.isAllOnesValue() || RHSKnownOne.isNegative()) { // We know that the sign bit is one. @@ -947,22 +753,24 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, } // assume(v <=_u c) } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_ULE && isValidAssumeForContext(I, Q)) { + Pred == ICmpInst::ICMP_ULE && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); // Whatever high bits in c are zero are known to be zero. KnownZero |= APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes()); // assume(v <_u c) } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_ULT && isValidAssumeForContext(I, Q)) { + Pred == ICmpInst::ICMP_ULT && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); // Whatever high bits in c are zero are known to be zero (if c is a power // of 2, then one more). - if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I), DL)) + if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I))) KnownZero |= APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes()+1); else @@ -984,20 +792,19 @@ template <typename KZFunctor, typename KOFunctor> static void computeKnownBitsFromShiftOperator(Operator *I, APInt &KnownZero, APInt &KnownOne, APInt &KnownZero2, APInt &KnownOne2, - const DataLayout &DL, unsigned Depth, const Query &Q, - KZFunctor KZF, KOFunctor KOF) { + unsigned Depth, const Query &Q, KZFunctor KZF, KOFunctor KOF) { unsigned BitWidth = KnownZero.getBitWidth(); if (auto *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { unsigned ShiftAmt = SA->getLimitedValue(BitWidth-1); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); KnownZero = KZF(KnownZero, ShiftAmt); KnownOne = KOF(KnownOne, ShiftAmt); return; } - computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q); + computeKnownBits(I->getOperand(1), KnownZero, KnownOne, Depth + 1, Q); // Note: We cannot use KnownZero.getLimitedValue() here, because if // BitWidth > 64 and any upper bits are known, we'll end up returning the @@ -1007,7 +814,8 @@ static void computeKnownBitsFromShiftOperator(Operator *I, // It would be more-clearly correct to use the two temporaries for this // calculation. Reusing the APInts here to prevent unnecessary allocations. - KnownZero.clearAllBits(), KnownOne.clearAllBits(); + KnownZero.clearAllBits(); + KnownOne.clearAllBits(); // If we know the shifter operand is nonzero, we can sometimes infer more // known bits. However this is expensive to compute, so be lazy about it and @@ -1017,12 +825,12 @@ static void computeKnownBitsFromShiftOperator(Operator *I, // Early exit if we can't constrain any well-defined shift amount. if (!(ShiftAmtKZ & (BitWidth - 1)) && !(ShiftAmtKO & (BitWidth - 1))) { ShifterOperandIsNonZero = - isKnownNonZero(I->getOperand(1), DL, Depth + 1, Q); + isKnownNonZero(I->getOperand(1), Depth + 1, Q); if (!*ShifterOperandIsNonZero) return; } - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); for (unsigned ShiftAmt = 0; ShiftAmt < BitWidth; ++ShiftAmt) { @@ -1038,7 +846,7 @@ static void computeKnownBitsFromShiftOperator(Operator *I, if (ShiftAmt == 0) { if (!ShifterOperandIsNonZero.hasValue()) ShifterOperandIsNonZero = - isKnownNonZero(I->getOperand(1), DL, Depth + 1, Q); + isKnownNonZero(I->getOperand(1), Depth + 1, Q); if (*ShifterOperandIsNonZero) continue; } @@ -1052,13 +860,15 @@ static void computeKnownBitsFromShiftOperator(Operator *I, // return anything we'd like, but we need to make sure the sets of known bits // stay disjoint (it should be better for some other code to actually // propagate the undef than to pick a value here using known bits). - if ((KnownZero & KnownOne) != 0) - KnownZero.clearAllBits(), KnownOne.clearAllBits(); + if ((KnownZero & KnownOne) != 0) { + KnownZero.clearAllBits(); + KnownOne.clearAllBits(); + } } static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, - APInt &KnownOne, const DataLayout &DL, - unsigned Depth, const Query &Q) { + APInt &KnownOne, unsigned Depth, + const Query &Q) { unsigned BitWidth = KnownZero.getBitWidth(); APInt KnownZero2(KnownZero), KnownOne2(KnownOne); @@ -1070,8 +880,8 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, break; case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. - computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q); - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q); + computeKnownBits(I->getOperand(1), KnownZero, KnownOne, Depth + 1, Q); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); // Output known-1 bits are only known if set in both the LHS & RHS. KnownOne &= KnownOne2; @@ -1089,15 +899,15 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, match(I->getOperand(1), m_Add(m_Specific(I->getOperand(0)), m_Value(Y)))) { APInt KnownZero3(BitWidth, 0), KnownOne3(BitWidth, 0); - computeKnownBits(Y, KnownZero3, KnownOne3, DL, Depth + 1, Q); + computeKnownBits(Y, KnownZero3, KnownOne3, Depth + 1, Q); if (KnownOne3.countTrailingOnes() > 0) KnownZero |= APInt::getLowBitsSet(BitWidth, 1); } break; } case Instruction::Or: { - computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q); - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q); + computeKnownBits(I->getOperand(1), KnownZero, KnownOne, Depth + 1, Q); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); // Output known-0 bits are only known if clear in both the LHS & RHS. KnownZero &= KnownZero2; @@ -1106,8 +916,8 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, break; } case Instruction::Xor: { - computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q); - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q); + computeKnownBits(I->getOperand(1), KnownZero, KnownOne, Depth + 1, Q); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); // Output known-0 bits are known if clear or set in both the LHS & RHS. APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); @@ -1119,19 +929,19 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, case Instruction::Mul: { bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, KnownZero, - KnownOne, KnownZero2, KnownOne2, DL, Depth, Q); + KnownOne, KnownZero2, KnownOne2, Depth, Q); break; } case Instruction::UDiv: { // For the purposes of computing leading zeros we can conservatively // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); unsigned LeadZ = KnownZero2.countLeadingOnes(); KnownOne2.clearAllBits(); KnownZero2.clearAllBits(); - computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, DL, Depth + 1, Q); + computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q); unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); if (RHSUnknownLeadingOnes != BitWidth) LeadZ = std::min(BitWidth, @@ -1141,8 +951,8 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, break; } case Instruction::Select: - computeKnownBits(I->getOperand(2), KnownZero, KnownOne, DL, Depth + 1, Q); - computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, DL, Depth + 1, Q); + computeKnownBits(I->getOperand(2), KnownZero, KnownOne, Depth + 1, Q); + computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q); // Only known if known in both the LHS and RHS. KnownOne &= KnownOne2; @@ -1166,12 +976,12 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, unsigned SrcBitWidth; // Note that we handle pointer operands here because of inttoptr/ptrtoint // which fall through here. - SrcBitWidth = DL.getTypeSizeInBits(SrcTy->getScalarType()); + SrcBitWidth = Q.DL.getTypeSizeInBits(SrcTy->getScalarType()); assert(SrcBitWidth && "SrcBitWidth can't be zero"); KnownZero = KnownZero.zextOrTrunc(SrcBitWidth); KnownOne = KnownOne.zextOrTrunc(SrcBitWidth); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); KnownZero = KnownZero.zextOrTrunc(BitWidth); KnownOne = KnownOne.zextOrTrunc(BitWidth); // Any top bits are known to be zero. @@ -1181,12 +991,11 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, } case Instruction::BitCast: { Type *SrcTy = I->getOperand(0)->getType(); - if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy() || - SrcTy->isFloatingPointTy()) && + if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && // TODO: For now, not handling conversions like: // (bitcast i64 %x to <2 x i32>) !I->getType()->isVectorTy()) { - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); break; } break; @@ -1197,7 +1006,7 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, KnownZero = KnownZero.trunc(SrcBitWidth); KnownOne = KnownOne.trunc(SrcBitWidth); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); @@ -1221,8 +1030,8 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, }; computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, - KnownZero2, KnownOne2, DL, Depth, Q, - KZF, KOF); + KnownZero2, KnownOne2, Depth, Q, KZF, + KOF); break; } case Instruction::LShr: { @@ -1238,8 +1047,8 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, }; computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, - KnownZero2, KnownOne2, DL, Depth, Q, - KZF, KOF); + KnownZero2, KnownOne2, Depth, Q, KZF, + KOF); break; } case Instruction::AShr: { @@ -1253,22 +1062,22 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, }; computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, - KnownZero2, KnownOne2, DL, Depth, Q, - KZF, KOF); + KnownZero2, KnownOne2, Depth, Q, KZF, + KOF); break; } case Instruction::Sub: { bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, - KnownZero, KnownOne, KnownZero2, KnownOne2, DL, - Depth, Q); + KnownZero, KnownOne, KnownZero2, KnownOne2, Depth, + Q); break; } case Instruction::Add: { bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, - KnownZero, KnownOne, KnownZero2, KnownOne2, DL, - Depth, Q); + KnownZero, KnownOne, KnownZero2, KnownOne2, Depth, + Q); break; } case Instruction::SRem: @@ -1276,7 +1085,7 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, APInt RA = Rem->getValue().abs(); if (RA.isPowerOf2()) { APInt LowBits = RA - 1; - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); // The low bits of the first operand are unchanged by the srem. @@ -1301,8 +1110,8 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, // remainder is zero. if (KnownZero.isNonNegative()) { APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, DL, - Depth + 1, Q); + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, + Q); // If it's known zero, our sign bit is also zero. if (LHSKnownZero.isNegative()) KnownZero.setBit(BitWidth - 1); @@ -1311,11 +1120,10 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, break; case Instruction::URem: { if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) { - APInt RA = Rem->getValue(); + const APInt &RA = Rem->getValue(); if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, - Q); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); KnownZero |= ~LowBits; KnownOne &= LowBits; break; @@ -1324,8 +1132,8 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, // Since the result is less than or equal to either operand, any leading // zero bits in either operand must also exist in the result. - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); - computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, DL, Depth + 1, Q); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); + computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q); unsigned Leaders = std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); @@ -1338,7 +1146,7 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, AllocaInst *AI = cast<AllocaInst>(I); unsigned Align = AI->getAlignment(); if (Align == 0) - Align = DL.getABITypeAlignment(AI->getType()->getElementType()); + Align = Q.DL.getABITypeAlignment(AI->getAllocatedType()); if (Align > 0) KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align)); @@ -1348,8 +1156,8 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, // Analyze all of the subscripts of this getelementptr instruction // to determine if we can prove known low zero bits. APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0); - computeKnownBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, DL, - Depth + 1, Q); + computeKnownBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, Depth + 1, + Q); unsigned TrailZ = LocalKnownZero.countTrailingOnes(); gep_type_iterator GTI = gep_type_begin(I); @@ -1367,7 +1175,7 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, Index = CIndex->getSplatValue(); unsigned Idx = cast<ConstantInt>(Index)->getZExtValue(); - const StructLayout *SL = DL.getStructLayout(STy); + const StructLayout *SL = Q.DL.getStructLayout(STy); uint64_t Offset = SL->getElementOffset(Idx); TrailZ = std::min<unsigned>(TrailZ, countTrailingZeros(Offset)); @@ -1379,10 +1187,9 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, break; } unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits(); - uint64_t TypeSize = DL.getTypeAllocSize(IndexedTy); + uint64_t TypeSize = Q.DL.getTypeAllocSize(IndexedTy); LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0); - computeKnownBits(Index, LocalKnownZero, LocalKnownOne, DL, Depth + 1, - Q); + computeKnownBits(Index, LocalKnownZero, LocalKnownOne, Depth + 1, Q); TrailZ = std::min(TrailZ, unsigned(countTrailingZeros(TypeSize) + LocalKnownZero.countTrailingOnes())); @@ -1424,11 +1231,11 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, break; // Ok, we have a PHI of the form L op= R. Check for low // zero bits. - computeKnownBits(R, KnownZero2, KnownOne2, DL, Depth + 1, Q); + computeKnownBits(R, KnownZero2, KnownOne2, Depth + 1, Q); // We need to take the minimum number of known bits APInt KnownZero3(KnownZero), KnownOne3(KnownOne); - computeKnownBits(L, KnownZero3, KnownOne3, DL, Depth + 1, Q); + computeKnownBits(L, KnownZero3, KnownOne3, Depth + 1, Q); KnownZero = APInt::getLowBitsSet(BitWidth, std::min(KnownZero2.countTrailingOnes(), @@ -1459,8 +1266,7 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, KnownOne2 = APInt(BitWidth, 0); // Recurse, but cap the recursion to one level, because we don't // want to waste time spinning around in loops. - computeKnownBits(IncValue, KnownZero2, KnownOne2, DL, - MaxDepth - 1, Q); + computeKnownBits(IncValue, KnownZero2, KnownOne2, MaxDepth - 1, Q); KnownZero &= KnownZero2; KnownOne &= KnownOne2; // If all bits have been ruled out, there's no need to check @@ -1473,17 +1279,21 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, } case Instruction::Call: case Instruction::Invoke: + // If range metadata is attached to this call, set known bits from that, + // and then intersect with known bits based on other properties of the + // function. if (MDNode *MD = cast<Instruction>(I)->getMetadata(LLVMContext::MD_range)) computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne); - // If a range metadata is attached to this IntrinsicInst, intersect the - // explicit range specified by the metadata and the implicit range of - // the intrinsic. + if (Value *RV = CallSite(I).getReturnedArgOperand()) { + computeKnownBits(RV, KnownZero2, KnownOne2, Depth + 1, Q); + KnownZero |= KnownZero2; + KnownOne |= KnownOne2; + } if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { switch (II->getIntrinsicID()) { default: break; case Intrinsic::bswap: - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, - Depth + 1, Q); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); KnownZero |= KnownZero2.byteSwap(); KnownOne |= KnownOne2.byteSwap(); break; @@ -1497,8 +1307,7 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, break; } case Intrinsic::ctpop: { - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, - Depth + 1, Q); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); // We can bound the space the count needs. Also, bits known to be zero // can't contribute to the population. unsigned BitsPossiblySet = BitWidth - KnownZero2.countPopulation(); @@ -1511,12 +1320,6 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, // of bits which might be set provided by popcnt KnownOne2. break; } - case Intrinsic::fabs: { - Type *Ty = II->getType(); - APInt SignBit = APInt::getSignBit(Ty->getScalarSizeInBits()); - KnownZero |= APInt::getSplat(Ty->getPrimitiveSizeInBits(), SignBit); - break; - } case Intrinsic::x86_sse42_crc32_64_64: KnownZero |= APInt::getHighBitsSet(64, 32); break; @@ -1534,19 +1337,19 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, case Intrinsic::sadd_with_overflow: computeKnownBitsAddSub(true, II->getArgOperand(0), II->getArgOperand(1), false, KnownZero, - KnownOne, KnownZero2, KnownOne2, DL, Depth, Q); + KnownOne, KnownZero2, KnownOne2, Depth, Q); break; case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: computeKnownBitsAddSub(false, II->getArgOperand(0), II->getArgOperand(1), false, KnownZero, - KnownOne, KnownZero2, KnownOne2, DL, Depth, Q); + KnownOne, KnownZero2, KnownOne2, Depth, Q); break; case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false, - KnownZero, KnownOne, KnownZero2, KnownOne2, DL, - Depth, Q); + KnownZero, KnownOne, KnownZero2, KnownOne2, Depth, + Q); break; } } @@ -1554,46 +1357,6 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, } } -static unsigned getAlignment(const Value *V, const DataLayout &DL) { - unsigned Align = 0; - if (auto *GO = dyn_cast<GlobalObject>(V)) { - Align = GO->getAlignment(); - if (Align == 0) { - if (auto *GVar = dyn_cast<GlobalVariable>(GO)) { - Type *ObjectType = GVar->getType()->getElementType(); - if (ObjectType->isSized()) { - // If the object is defined in the current Module, we'll be giving - // it the preferred alignment. Otherwise, we have to assume that it - // may only have the minimum ABI alignment. - if (GVar->isStrongDefinitionForLinker()) - Align = DL.getPreferredAlignment(GVar); - else - Align = DL.getABITypeAlignment(ObjectType); - } - } - } - } else if (const Argument *A = dyn_cast<Argument>(V)) { - Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0; - - if (!Align && A->hasStructRetAttr()) { - // An sret parameter has at least the ABI alignment of the return type. - Type *EltTy = cast<PointerType>(A->getType())->getElementType(); - if (EltTy->isSized()) - Align = DL.getABITypeAlignment(EltTy); - } - } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) - Align = AI->getAlignment(); - else if (auto CS = ImmutableCallSite(V)) - Align = CS.getAttributes().getParamAlignment(AttributeSet::ReturnIndex); - else if (const LoadInst *LI = dyn_cast<LoadInst>(V)) - if (MDNode *MD = LI->getMetadata(LLVMContext::MD_align)) { - ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(0)); - Align = CI->getLimitedValue(); - } - - return Align; -} - /// Determine which bits of V are known to be either zero or one and return /// them in the KnownZero/KnownOne bit sets. /// @@ -1610,16 +1373,15 @@ static unsigned getAlignment(const Value *V, const DataLayout &DL) { /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, - const DataLayout &DL, unsigned Depth, const Query &Q) { + unsigned Depth, const Query &Q) { assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); unsigned BitWidth = KnownZero.getBitWidth(); assert((V->getType()->isIntOrIntVectorTy() || - V->getType()->isFPOrFPVectorTy() || V->getType()->getScalarType()->isPointerTy()) && - "Not integer, floating point, or pointer type!"); - assert((DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) && + "Not integer or pointer type!"); + assert((Q.DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) && (!V->getType()->isIntOrIntVectorTy() || V->getType()->getScalarSizeInBits() == BitWidth) && KnownZero.getBitWidth() == BitWidth && @@ -1633,15 +1395,13 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, return; } // Null and aggregate-zero are all-zeros. - if (isa<ConstantPointerNull>(V) || - isa<ConstantAggregateZero>(V)) { + if (isa<ConstantPointerNull>(V) || isa<ConstantAggregateZero>(V)) { KnownOne.clearAllBits(); KnownZero = APInt::getAllOnesValue(BitWidth); return; } // Handle a constant vector by taking the intersection of the known bits of - // each element. There is no real need to handle ConstantVector here, because - // we don't handle undef in any particularly useful way. + // each element. if (ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V)) { // We know that CDS must be a vector of integers. Take the intersection of // each element. @@ -1655,6 +1415,26 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, return; } + if (auto *CV = dyn_cast<ConstantVector>(V)) { + // We know that CV must be a vector of integers. Take the intersection of + // each element. + KnownZero.setAllBits(); KnownOne.setAllBits(); + APInt Elt(KnownZero.getBitWidth(), 0); + for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { + Constant *Element = CV->getAggregateElement(i); + auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element); + if (!ElementCI) { + KnownZero.clearAllBits(); + KnownOne.clearAllBits(); + return; + } + Elt = ElementCI->getValue(); + KnownZero &= ~Elt; + KnownOne &= Elt; + } + return; + } + // Start out not knowing anything. KnownZero.clearAllBits(); KnownOne.clearAllBits(); @@ -1666,33 +1446,26 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has // the bits of its aliasee. if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { - if (!GA->mayBeOverridden()) - computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, DL, Depth + 1, Q); + if (!GA->isInterposable()) + computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, Depth + 1, Q); return; } if (Operator *I = dyn_cast<Operator>(V)) - computeKnownBitsFromOperator(I, KnownZero, KnownOne, DL, Depth, Q); + computeKnownBitsFromOperator(I, KnownZero, KnownOne, Depth, Q); // Aligned pointers have trailing zeros - refine KnownZero set if (V->getType()->isPointerTy()) { - unsigned Align = getAlignment(V, DL); + unsigned Align = V->getPointerAlignment(Q.DL); if (Align) KnownZero |= APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align)); } - // computeKnownBitsFromAssume and computeKnownBitsFromDominatingCondition - // strictly refines KnownZero and KnownOne. Therefore, we run them after - // computeKnownBitsFromOperator. + // computeKnownBitsFromAssume strictly refines KnownZero and + // KnownOne. Therefore, we run them after computeKnownBitsFromOperator. // Check whether a nearby assume intrinsic can determine some known bits. - computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q); - - // Check whether there's a dominating condition which implies something about - // this value at the given context. - if (EnableDomConditions && Depth <= DomConditionsMaxDepth) - computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL, Depth, - Q); + computeKnownBitsFromAssume(V, KnownZero, KnownOne, Depth, Q); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); } @@ -1700,8 +1473,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, /// Determine whether the sign bit is known to be zero or one. /// Convenience wrapper around computeKnownBits. void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, - const DataLayout &DL, unsigned Depth, const Query &Q) { - unsigned BitWidth = getBitWidth(V->getType(), DL); + unsigned Depth, const Query &Q) { + unsigned BitWidth = getBitWidth(V->getType(), Q.DL); if (!BitWidth) { KnownZero = false; KnownOne = false; @@ -1709,7 +1482,7 @@ void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, } APInt ZeroBits(BitWidth, 0); APInt OneBits(BitWidth, 0); - computeKnownBits(V, ZeroBits, OneBits, DL, Depth, Q); + computeKnownBits(V, ZeroBits, OneBits, Depth, Q); KnownOne = OneBits[BitWidth - 1]; KnownZero = ZeroBits[BitWidth - 1]; } @@ -1719,13 +1492,14 @@ void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, /// be a power of two when defined. Supports values with integer or pointer /// types and vectors of integers. bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, - const Query &Q, const DataLayout &DL) { + const Query &Q) { if (Constant *C = dyn_cast<Constant>(V)) { if (C->isNullValue()) return OrZero; - if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) - return CI->getValue().isPowerOf2(); - // TODO: Handle vector constants. + + const APInt *ConstIntOrConstSplatInt; + if (match(C, m_APInt(ConstIntOrConstSplatInt))) + return ConstIntOrConstSplatInt->isPowerOf2(); } // 1 << X is clearly a power of two if the one is not shifted off the end. If @@ -1747,19 +1521,19 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, // or zero. if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) || match(V, m_LShr(m_Value(X), m_Value())))) - return isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q, DL); + return isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q); if (ZExtInst *ZI = dyn_cast<ZExtInst>(V)) - return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q, DL); + return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q); if (SelectInst *SI = dyn_cast<SelectInst>(V)) - return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q, DL) && - isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q, DL); + return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q) && + isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q); if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) { // A power of two and'd with anything is a power of two or zero. - if (isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q, DL) || - isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, Depth, Q, DL)) + if (isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q) || + isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, Depth, Q)) return true; // X & (-X) is always a power of two or zero. if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X)))) @@ -1774,19 +1548,19 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, if (OrZero || VOBO->hasNoUnsignedWrap() || VOBO->hasNoSignedWrap()) { if (match(X, m_And(m_Specific(Y), m_Value())) || match(X, m_And(m_Value(), m_Specific(Y)))) - if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q, DL)) + if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q)) return true; if (match(Y, m_And(m_Specific(X), m_Value())) || match(Y, m_And(m_Value(), m_Specific(X)))) - if (isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q, DL)) + if (isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q)) return true; unsigned BitWidth = V->getType()->getScalarSizeInBits(); APInt LHSZeroBits(BitWidth, 0), LHSOneBits(BitWidth, 0); - computeKnownBits(X, LHSZeroBits, LHSOneBits, DL, Depth, Q); + computeKnownBits(X, LHSZeroBits, LHSOneBits, Depth, Q); APInt RHSZeroBits(BitWidth, 0), RHSOneBits(BitWidth, 0); - computeKnownBits(Y, RHSZeroBits, RHSOneBits, DL, Depth, Q); + computeKnownBits(Y, RHSZeroBits, RHSOneBits, Depth, Q); // If i8 V is a power of two or zero: // ZeroBits: 1 1 1 0 1 1 1 1 // ~ZeroBits: 0 0 0 1 0 0 0 0 @@ -1804,7 +1578,7 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) || match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) { return isKnownToBeAPowerOfTwo(cast<Operator>(V)->getOperand(0), OrZero, - Depth, Q, DL); + Depth, Q); } return false; @@ -1816,8 +1590,8 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, /// to be non-null. /// /// Currently this routine does not support vector GEPs. -static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout &DL, - unsigned Depth, const Query &Q) { +static bool isGEPKnownNonNull(GEPOperator *GEP, unsigned Depth, + const Query &Q) { if (!GEP->isInBounds() || GEP->getPointerAddressSpace() != 0) return false; @@ -1826,7 +1600,7 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout &DL, // If the base pointer is non-null, we cannot walk to a null address with an // inbounds GEP in address space zero. - if (isKnownNonZero(GEP->getPointerOperand(), DL, Depth, Q)) + if (isKnownNonZero(GEP->getPointerOperand(), Depth, Q)) return true; // Walk the GEP operands and see if any operand introduces a non-zero offset. @@ -1838,7 +1612,7 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout &DL, if (StructType *STy = dyn_cast<StructType>(*GTI)) { ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand()); unsigned ElementIdx = OpC->getZExtValue(); - const StructLayout *SL = DL.getStructLayout(STy); + const StructLayout *SL = Q.DL.getStructLayout(STy); uint64_t ElementOffset = SL->getElementOffset(ElementIdx); if (ElementOffset > 0) return true; @@ -1846,7 +1620,7 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout &DL, } // If we have a zero-sized type, the index doesn't matter. Keep looping. - if (DL.getTypeAllocSize(GTI.getIndexedType()) == 0) + if (Q.DL.getTypeAllocSize(GTI.getIndexedType()) == 0) continue; // Fast path the constant operand case both for efficiency and so we don't @@ -1865,7 +1639,7 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout &DL, if (Depth++ >= MaxDepth) continue; - if (isKnownNonZero(GTI.getOperand(), DL, Depth, Q)) + if (isKnownNonZero(GTI.getOperand(), Depth, Q)) return true; } @@ -1875,8 +1649,7 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout &DL, /// Does the 'Range' metadata (which must be a valid MD_range operand list) /// ensure that the value it's attached to is never Value? 'RangeType' is /// is the type of the value described by the range. -static bool rangeMetadataExcludesValue(MDNode* Ranges, - const APInt& Value) { +static bool rangeMetadataExcludesValue(MDNode* Ranges, const APInt& Value) { const unsigned NumRanges = Ranges->getNumOperands() / 2; assert(NumRanges >= 1); for (unsigned i = 0; i < NumRanges; ++i) { @@ -1895,23 +1668,35 @@ static bool rangeMetadataExcludesValue(MDNode* Ranges, /// For vectors return true if every element is known to be non-zero when /// defined. Supports values with integer or pointer type and vectors of /// integers. -bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, - const Query &Q) { - if (Constant *C = dyn_cast<Constant>(V)) { +bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q) { + if (auto *C = dyn_cast<Constant>(V)) { if (C->isNullValue()) return false; if (isa<ConstantInt>(C)) // Must be non-zero due to null test above. return true; - // TODO: Handle vectors + + // For constant vectors, check that all elements are undefined or known + // non-zero to determine that the whole vector is known non-zero. + if (auto *VecTy = dyn_cast<VectorType>(C->getType())) { + for (unsigned i = 0, e = VecTy->getNumElements(); i != e; ++i) { + Constant *Elt = C->getAggregateElement(i); + if (!Elt || Elt->isNullValue()) + return false; + if (!isa<UndefValue>(Elt) && !isa<ConstantInt>(Elt)) + return false; + } + return true; + } + return false; } - if (Instruction* I = dyn_cast<Instruction>(V)) { + if (auto *I = dyn_cast<Instruction>(V)) { if (MDNode *Ranges = I->getMetadata(LLVMContext::MD_range)) { // If the possible ranges don't contain zero, then the value is // definitely non-zero. - if (IntegerType* Ty = dyn_cast<IntegerType>(V->getType())) { + if (auto *Ty = dyn_cast<IntegerType>(V->getType())) { const APInt ZeroValue(Ty->getBitWidth(), 0); if (rangeMetadataExcludesValue(Ranges, ZeroValue)) return true; @@ -1926,22 +1711,22 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, // Check for pointer simplifications. if (V->getType()->isPointerTy()) { if (isKnownNonNull(V)) - return true; + return true; if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) - if (isGEPKnownNonNull(GEP, DL, Depth, Q)) + if (isGEPKnownNonNull(GEP, Depth, Q)) return true; } - unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), DL); + unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), Q.DL); // X | Y != 0 if X != 0 or Y != 0. Value *X = nullptr, *Y = nullptr; if (match(V, m_Or(m_Value(X), m_Value(Y)))) - return isKnownNonZero(X, DL, Depth, Q) || isKnownNonZero(Y, DL, Depth, Q); + return isKnownNonZero(X, Depth, Q) || isKnownNonZero(Y, Depth, Q); // ext X != 0 if X != 0. if (isa<SExtInst>(V) || isa<ZExtInst>(V)) - return isKnownNonZero(cast<Instruction>(V)->getOperand(0), DL, Depth, Q); + return isKnownNonZero(cast<Instruction>(V)->getOperand(0), Depth, Q); // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined // if the lowest bit is shifted off the end. @@ -1949,11 +1734,11 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, // shl nuw can't remove any non-zero bits. OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V); if (BO->hasNoUnsignedWrap()) - return isKnownNonZero(X, DL, Depth, Q); + return isKnownNonZero(X, Depth, Q); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - computeKnownBits(X, KnownZero, KnownOne, DL, Depth, Q); + computeKnownBits(X, KnownZero, KnownOne, Depth, Q); if (KnownOne[0]) return true; } @@ -1963,10 +1748,10 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, // shr exact can only shift out zero bits. PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V); if (BO->isExact()) - return isKnownNonZero(X, DL, Depth, Q); + return isKnownNonZero(X, Depth, Q); bool XKnownNonNegative, XKnownNegative; - ComputeSignBit(X, XKnownNonNegative, XKnownNegative, DL, Depth, Q); + ComputeSignBit(X, XKnownNonNegative, XKnownNegative, Depth, Q); if (XKnownNegative) return true; @@ -1976,32 +1761,32 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, if (ConstantInt *Shift = dyn_cast<ConstantInt>(Y)) { APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - computeKnownBits(X, KnownZero, KnownOne, DL, Depth, Q); - + computeKnownBits(X, KnownZero, KnownOne, Depth, Q); + auto ShiftVal = Shift->getLimitedValue(BitWidth - 1); // Is there a known one in the portion not shifted out? if (KnownOne.countLeadingZeros() < BitWidth - ShiftVal) return true; // Are all the bits to be shifted out known zero? if (KnownZero.countTrailingOnes() >= ShiftVal) - return isKnownNonZero(X, DL, Depth, Q); + return isKnownNonZero(X, Depth, Q); } } // div exact can only produce a zero if the dividend is zero. else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) { - return isKnownNonZero(X, DL, Depth, Q); + return isKnownNonZero(X, Depth, Q); } // X + Y. else if (match(V, m_Add(m_Value(X), m_Value(Y)))) { bool XKnownNonNegative, XKnownNegative; bool YKnownNonNegative, YKnownNegative; - ComputeSignBit(X, XKnownNonNegative, XKnownNegative, DL, Depth, Q); - ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, DL, Depth, Q); + ComputeSignBit(X, XKnownNonNegative, XKnownNegative, Depth, Q); + ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, Depth, Q); // If X and Y are both non-negative (as signed values) then their sum is not // zero unless both X and Y are zero. if (XKnownNonNegative && YKnownNonNegative) - if (isKnownNonZero(X, DL, Depth, Q) || isKnownNonZero(Y, DL, Depth, Q)) + if (isKnownNonZero(X, Depth, Q) || isKnownNonZero(Y, Depth, Q)) return true; // If X and Y are both negative (as signed values) then their sum is not @@ -2012,22 +1797,22 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, APInt Mask = APInt::getSignedMaxValue(BitWidth); // The sign bit of X is set. If some other bit is set then X is not equal // to INT_MIN. - computeKnownBits(X, KnownZero, KnownOne, DL, Depth, Q); + computeKnownBits(X, KnownZero, KnownOne, Depth, Q); if ((KnownOne & Mask) != 0) return true; // The sign bit of Y is set. If some other bit is set then Y is not equal // to INT_MIN. - computeKnownBits(Y, KnownZero, KnownOne, DL, Depth, Q); + computeKnownBits(Y, KnownZero, KnownOne, Depth, Q); if ((KnownOne & Mask) != 0) return true; } // The sum of a non-negative number and a power of two is not zero. if (XKnownNonNegative && - isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q, DL)) + isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q)) return true; if (YKnownNonNegative && - isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q, DL)) + isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q)) return true; } // X * Y. @@ -2036,13 +1821,13 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, // If X and Y are non-zero then so is X * Y as long as the multiplication // does not overflow. if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) && - isKnownNonZero(X, DL, Depth, Q) && isKnownNonZero(Y, DL, Depth, Q)) + isKnownNonZero(X, Depth, Q) && isKnownNonZero(Y, Depth, Q)) return true; } // (C ? X : Y) != 0 if X != 0 and Y != 0. else if (SelectInst *SI = dyn_cast<SelectInst>(V)) { - if (isKnownNonZero(SI->getTrueValue(), DL, Depth, Q) && - isKnownNonZero(SI->getFalseValue(), DL, Depth, Q)) + if (isKnownNonZero(SI->getTrueValue(), Depth, Q) && + isKnownNonZero(SI->getFalseValue(), Depth, Q)) return true; } // PHI @@ -2064,18 +1849,23 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, } } } + // Check if all incoming values are non-zero constant. + bool AllNonZeroConstants = all_of(PN->operands(), [](Value *V) { + return isa<ConstantInt>(V) && !cast<ConstantInt>(V)->isZeroValue(); + }); + if (AllNonZeroConstants) + return true; } if (!BitWidth) return false; APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - computeKnownBits(V, KnownZero, KnownOne, DL, Depth, Q); + computeKnownBits(V, KnownZero, KnownOne, Depth, Q); return KnownOne != 0; } /// Return true if V2 == V1 + X, where X is known non-zero. -static bool isAddOfNonZero(Value *V1, Value *V2, const DataLayout &DL, - const Query &Q) { +static bool isAddOfNonZero(Value *V1, Value *V2, const Query &Q) { BinaryOperator *BO = dyn_cast<BinaryOperator>(V1); if (!BO || BO->getOpcode() != Instruction::Add) return false; @@ -2086,18 +1876,17 @@ static bool isAddOfNonZero(Value *V1, Value *V2, const DataLayout &DL, Op = BO->getOperand(0); else return false; - return isKnownNonZero(Op, DL, 0, Q); + return isKnownNonZero(Op, 0, Q); } /// Return true if it is known that V1 != V2. -static bool isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL, - const Query &Q) { +static bool isKnownNonEqual(Value *V1, Value *V2, const Query &Q) { if (V1->getType()->isVectorTy() || V1 == V2) return false; if (V1->getType() != V2->getType()) // We can't look through casts yet. return false; - if (isAddOfNonZero(V1, V2, DL, Q) || isAddOfNonZero(V2, V1, DL, Q)) + if (isAddOfNonZero(V1, V2, Q) || isAddOfNonZero(V2, V1, Q)) return true; if (IntegerType *Ty = dyn_cast<IntegerType>(V1->getType())) { @@ -2106,10 +1895,10 @@ static bool isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL, auto BitWidth = Ty->getBitWidth(); APInt KnownZero1(BitWidth, 0); APInt KnownOne1(BitWidth, 0); - computeKnownBits(V1, KnownZero1, KnownOne1, DL, 0, Q); + computeKnownBits(V1, KnownZero1, KnownOne1, 0, Q); APInt KnownZero2(BitWidth, 0); APInt KnownOne2(BitWidth, 0); - computeKnownBits(V2, KnownZero2, KnownOne2, DL, 0, Q); + computeKnownBits(V2, KnownZero2, KnownOne2, 0, Q); auto OppositeBits = (KnownZero1 & KnownOne2) | (KnownZero2 & KnownOne1); if (OppositeBits.getBoolValue()) @@ -2127,26 +1916,48 @@ static bool isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL, /// where V is a vector, the mask, known zero, and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. -bool MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL, - unsigned Depth, const Query &Q) { +bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth, + const Query &Q) { APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0); - computeKnownBits(V, KnownZero, KnownOne, DL, Depth, Q); + computeKnownBits(V, KnownZero, KnownOne, Depth, Q); return (KnownZero & Mask) == Mask; } +/// For vector constants, loop over the elements and find the constant with the +/// minimum number of sign bits. Return 0 if the value is not a vector constant +/// or if any element was not analyzed; otherwise, return the count for the +/// element with the minimum number of sign bits. +static unsigned computeNumSignBitsVectorConstant(Value *V, unsigned TyBits) { + auto *CV = dyn_cast<Constant>(V); + if (!CV || !CV->getType()->isVectorTy()) + return 0; + unsigned MinSignBits = TyBits; + unsigned NumElts = CV->getType()->getVectorNumElements(); + for (unsigned i = 0; i != NumElts; ++i) { + // If we find a non-ConstantInt, bail out. + auto *Elt = dyn_cast_or_null<ConstantInt>(CV->getAggregateElement(i)); + if (!Elt) + return 0; + + // If the sign bit is 1, flip the bits, so we always count leading zeros. + APInt EltVal = Elt->getValue(); + if (EltVal.isNegative()) + EltVal = ~EltVal; + MinSignBits = std::min(MinSignBits, EltVal.countLeadingZeros()); + } + + return MinSignBits; +} /// Return the number of times the sign bit of the register is replicated into /// the other bits. We know that at least 1 bit is always equal to the sign bit /// (itself), but other cases can give us information. For example, immediately /// after an "ashr X, 2", we know that the top 3 bits are all equal to each -/// other, so we return 3. -/// -/// 'Op' must have a scalar integer type. -/// -unsigned ComputeNumSignBits(Value *V, const DataLayout &DL, unsigned Depth, - const Query &Q) { - unsigned TyBits = DL.getTypeSizeInBits(V->getType()->getScalarType()); +/// other, so we return 3. For vectors, return the number of sign bits for the +/// vector element with the mininum number of known sign bits. +unsigned ComputeNumSignBits(Value *V, unsigned Depth, const Query &Q) { + unsigned TyBits = Q.DL.getTypeSizeInBits(V->getType()->getScalarType()); unsigned Tmp, Tmp2; unsigned FirstAnswer = 1; @@ -2161,7 +1972,7 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout &DL, unsigned Depth, default: break; case Instruction::SExt: Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits(); - return ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q) + Tmp; + return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q) + Tmp; case Instruction::SDiv: { const APInt *Denominator; @@ -2173,7 +1984,7 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout &DL, unsigned Depth, break; // Calculate the incoming numerator bits. - unsigned NumBits = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q); + unsigned NumBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); // Add floor(log(C)) bits to the numerator bits. return std::min(TyBits, NumBits + Denominator->logBase2()); @@ -2195,7 +2006,7 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout &DL, unsigned Depth, // Calculate the incoming numerator bits. SRem by a positive constant // can't lower the number of sign bits. unsigned NumrBits = - ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q); + ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); // Calculate the leading sign bit constraints by examining the // denominator. Given that the denominator is positive, there are two @@ -2217,7 +2028,7 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout &DL, unsigned Depth, } case Instruction::AShr: { - Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q); + Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); // ashr X, C -> adds C sign bits. Vectors too. const APInt *ShAmt; if (match(U->getOperand(1), m_APInt(ShAmt))) { @@ -2230,7 +2041,7 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout &DL, unsigned Depth, const APInt *ShAmt; if (match(U->getOperand(1), m_APInt(ShAmt))) { // shl destroys sign bits. - Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q); + Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); Tmp2 = ShAmt->getZExtValue(); if (Tmp2 >= TyBits || // Bad shift. Tmp2 >= Tmp) break; // Shifted all sign bits out. @@ -2242,9 +2053,9 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout &DL, unsigned Depth, case Instruction::Or: case Instruction::Xor: // NOT is handled here. // Logical binary ops preserve the number of sign bits at the worst. - Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q); + Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); if (Tmp != 1) { - Tmp2 = ComputeNumSignBits(U->getOperand(1), DL, Depth + 1, Q); + Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); FirstAnswer = std::min(Tmp, Tmp2); // We computed what we know about the sign bits as our first // answer. Now proceed to the generic code that uses @@ -2253,23 +2064,22 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout &DL, unsigned Depth, break; case Instruction::Select: - Tmp = ComputeNumSignBits(U->getOperand(1), DL, Depth + 1, Q); + Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); if (Tmp == 1) return 1; // Early out. - Tmp2 = ComputeNumSignBits(U->getOperand(2), DL, Depth + 1, Q); + Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q); return std::min(Tmp, Tmp2); case Instruction::Add: // Add can have at most one carry bit. Thus we know that the output // is, at worst, one more bit than the inputs. - Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q); + Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); if (Tmp == 1) return 1; // Early out. // Special case decrementing a value (ADD X, -1): if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1))) if (CRHS->isAllOnesValue()) { APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, - Q); + computeKnownBits(U->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. @@ -2282,20 +2092,19 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout &DL, unsigned Depth, return Tmp; } - Tmp2 = ComputeNumSignBits(U->getOperand(1), DL, Depth + 1, Q); + Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); if (Tmp2 == 1) return 1; return std::min(Tmp, Tmp2)-1; case Instruction::Sub: - Tmp2 = ComputeNumSignBits(U->getOperand(1), DL, Depth + 1, Q); + Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); if (Tmp2 == 1) return 1; // Handle NEG. if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0))) if (CLHS->isNullValue()) { APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - computeKnownBits(U->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, - Q); + computeKnownBits(U->getOperand(1), KnownZero, KnownOne, Depth + 1, Q); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue()) @@ -2311,7 +2120,7 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout &DL, unsigned Depth, // Sub can have at most one carry bit. Thus we know that the output // is, at worst, one more bit than the inputs. - Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q); + Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); if (Tmp == 1) return 1; // Early out. return std::min(Tmp, Tmp2)-1; @@ -2325,11 +2134,11 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout &DL, unsigned Depth, // Take the minimum of all incoming values. This can't infinitely loop // because of our depth threshold. - Tmp = ComputeNumSignBits(PN->getIncomingValue(0), DL, Depth + 1, Q); + Tmp = ComputeNumSignBits(PN->getIncomingValue(0), Depth + 1, Q); for (unsigned i = 1, e = NumIncomingValues; i != e; ++i) { if (Tmp == 1) return Tmp; Tmp = std::min( - Tmp, ComputeNumSignBits(PN->getIncomingValue(i), DL, Depth + 1, Q)); + Tmp, ComputeNumSignBits(PN->getIncomingValue(i), Depth + 1, Q)); } return Tmp; } @@ -2342,26 +2151,25 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout &DL, unsigned Depth, // Finally, if we can prove that the top bits of the result are 0's or 1's, // use this information. + + // If we can examine all elements of a vector constant successfully, we're + // done (we can't do any better than that). If not, keep trying. + if (unsigned VecSignBits = computeNumSignBitsVectorConstant(V, TyBits)) + return VecSignBits; + APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - APInt Mask; - computeKnownBits(V, KnownZero, KnownOne, DL, Depth, Q); - - if (KnownZero.isNegative()) { // sign bit is 0 - Mask = KnownZero; - } else if (KnownOne.isNegative()) { // sign bit is 1; - Mask = KnownOne; - } else { - // Nothing known. - return FirstAnswer; - } + computeKnownBits(V, KnownZero, KnownOne, Depth, Q); + + // If we know that the sign bit is either zero or one, determine the number of + // identical bits in the top of the input value. + if (KnownZero.isNegative()) + return std::max(FirstAnswer, KnownZero.countLeadingOnes()); - // Okay, we know that the sign bit in Mask is set. Use CLZ to determine - // the number of identical bits in the top of the input value. - Mask = ~Mask; - Mask <<= Mask.getBitWidth()-TyBits; - // Return # leading zeros. We use 'min' here in case Val was zero before - // shifting. We don't want to return '64' as for an i32 "0". - return std::max(FirstAnswer, std::min(TyBits, Mask.countLeadingZeros())); + if (KnownOne.isNegative()) + return std::max(FirstAnswer, KnownOne.countLeadingOnes()); + + // computeKnownBits gave us no extra information about the top bits. + return FirstAnswer; } /// This function computes the integer multiple of Base that equals V. @@ -2484,13 +2292,124 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, return false; } +Intrinsic::ID llvm::getIntrinsicForCallSite(ImmutableCallSite ICS, + const TargetLibraryInfo *TLI) { + const Function *F = ICS.getCalledFunction(); + if (!F) + return Intrinsic::not_intrinsic; + + if (F->isIntrinsic()) + return F->getIntrinsicID(); + + if (!TLI) + return Intrinsic::not_intrinsic; + + LibFunc::Func Func; + // We're going to make assumptions on the semantics of the functions, check + // that the target knows that it's available in this environment and it does + // not have local linkage. + if (!F || F->hasLocalLinkage() || !TLI->getLibFunc(*F, Func)) + return Intrinsic::not_intrinsic; + + if (!ICS.onlyReadsMemory()) + return Intrinsic::not_intrinsic; + + // Otherwise check if we have a call to a function that can be turned into a + // vector intrinsic. + switch (Func) { + default: + break; + case LibFunc::sin: + case LibFunc::sinf: + case LibFunc::sinl: + return Intrinsic::sin; + case LibFunc::cos: + case LibFunc::cosf: + case LibFunc::cosl: + return Intrinsic::cos; + case LibFunc::exp: + case LibFunc::expf: + case LibFunc::expl: + return Intrinsic::exp; + case LibFunc::exp2: + case LibFunc::exp2f: + case LibFunc::exp2l: + return Intrinsic::exp2; + case LibFunc::log: + case LibFunc::logf: + case LibFunc::logl: + return Intrinsic::log; + case LibFunc::log10: + case LibFunc::log10f: + case LibFunc::log10l: + return Intrinsic::log10; + case LibFunc::log2: + case LibFunc::log2f: + case LibFunc::log2l: + return Intrinsic::log2; + case LibFunc::fabs: + case LibFunc::fabsf: + case LibFunc::fabsl: + return Intrinsic::fabs; + case LibFunc::fmin: + case LibFunc::fminf: + case LibFunc::fminl: + return Intrinsic::minnum; + case LibFunc::fmax: + case LibFunc::fmaxf: + case LibFunc::fmaxl: + return Intrinsic::maxnum; + case LibFunc::copysign: + case LibFunc::copysignf: + case LibFunc::copysignl: + return Intrinsic::copysign; + case LibFunc::floor: + case LibFunc::floorf: + case LibFunc::floorl: + return Intrinsic::floor; + case LibFunc::ceil: + case LibFunc::ceilf: + case LibFunc::ceill: + return Intrinsic::ceil; + case LibFunc::trunc: + case LibFunc::truncf: + case LibFunc::truncl: + return Intrinsic::trunc; + case LibFunc::rint: + case LibFunc::rintf: + case LibFunc::rintl: + return Intrinsic::rint; + case LibFunc::nearbyint: + case LibFunc::nearbyintf: + case LibFunc::nearbyintl: + return Intrinsic::nearbyint; + case LibFunc::round: + case LibFunc::roundf: + case LibFunc::roundl: + return Intrinsic::round; + case LibFunc::pow: + case LibFunc::powf: + case LibFunc::powl: + return Intrinsic::pow; + case LibFunc::sqrt: + case LibFunc::sqrtf: + case LibFunc::sqrtl: + if (ICS->hasNoNaNs()) + return Intrinsic::sqrt; + return Intrinsic::not_intrinsic; + } + + return Intrinsic::not_intrinsic; +} + /// Return true if we can prove that the specified FP value is never equal to /// -0.0. /// /// NOTE: this function will need to be revisited when we support non-default /// rounding modes! /// -bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { +bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI, + unsigned Depth) { if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) return !CFP->getValueAPF().isNegZero(); @@ -2518,30 +2437,26 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { if (isa<SIToFPInst>(I) || isa<UIToFPInst>(I)) return true; - if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) + if (const CallInst *CI = dyn_cast<CallInst>(I)) { + Intrinsic::ID IID = getIntrinsicForCallSite(CI, TLI); + switch (IID) { + default: + break; // sqrt(-0.0) = -0.0, no other negative results are possible. - if (II->getIntrinsicID() == Intrinsic::sqrt) - return CannotBeNegativeZero(II->getArgOperand(0), Depth+1); - - if (const CallInst *CI = dyn_cast<CallInst>(I)) - if (const Function *F = CI->getCalledFunction()) { - if (F->isDeclaration()) { - // abs(x) != -0.0 - if (F->getName() == "abs") return true; - // fabs[lf](x) != -0.0 - if (F->getName() == "fabs") return true; - if (F->getName() == "fabsf") return true; - if (F->getName() == "fabsl") return true; - if (F->getName() == "sqrt" || F->getName() == "sqrtf" || - F->getName() == "sqrtl") - return CannotBeNegativeZero(CI->getArgOperand(0), Depth+1); - } + case Intrinsic::sqrt: + return CannotBeNegativeZero(CI->getArgOperand(0), TLI, Depth + 1); + // fabs(x) != -0.0 + case Intrinsic::fabs: + return true; } + } return false; } -bool llvm::CannotBeOrderedLessThanZero(const Value *V, unsigned Depth) { +bool llvm::CannotBeOrderedLessThanZero(const Value *V, + const TargetLibraryInfo *TLI, + unsigned Depth) { if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) return !CFP->getValueAPF().isNegative() || CFP->getValueAPF().isZero(); @@ -2561,52 +2476,53 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V, unsigned Depth) { return true; case Instruction::FMul: // x*x is always non-negative or a NaN. - if (I->getOperand(0) == I->getOperand(1)) + if (I->getOperand(0) == I->getOperand(1)) return true; // Fall through case Instruction::FAdd: case Instruction::FDiv: case Instruction::FRem: - return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1) && - CannotBeOrderedLessThanZero(I->getOperand(1), Depth+1); + return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) && + CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1); case Instruction::Select: - return CannotBeOrderedLessThanZero(I->getOperand(1), Depth+1) && - CannotBeOrderedLessThanZero(I->getOperand(2), Depth+1); + return CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1) && + CannotBeOrderedLessThanZero(I->getOperand(2), TLI, Depth + 1); case Instruction::FPExt: case Instruction::FPTrunc: // Widening/narrowing never change sign. - return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1); - case Instruction::Call: - if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) - switch (II->getIntrinsicID()) { - default: break; - case Intrinsic::maxnum: - return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1) || - CannotBeOrderedLessThanZero(I->getOperand(1), Depth+1); - case Intrinsic::minnum: - return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1) && - CannotBeOrderedLessThanZero(I->getOperand(1), Depth+1); - case Intrinsic::exp: - case Intrinsic::exp2: - case Intrinsic::fabs: - case Intrinsic::sqrt: - return true; - case Intrinsic::powi: - if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { - // powi(x,n) is non-negative if n is even. - if (CI->getBitWidth() <= 64 && CI->getSExtValue() % 2u == 0) - return true; - } - return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1); - case Intrinsic::fma: - case Intrinsic::fmuladd: - // x*x+y is non-negative if y is non-negative. - return I->getOperand(0) == I->getOperand(1) && - CannotBeOrderedLessThanZero(I->getOperand(2), Depth+1); + return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1); + case Instruction::Call: + Intrinsic::ID IID = getIntrinsicForCallSite(cast<CallInst>(I), TLI); + switch (IID) { + default: + break; + case Intrinsic::maxnum: + return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) || + CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1); + case Intrinsic::minnum: + return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) && + CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1); + case Intrinsic::exp: + case Intrinsic::exp2: + case Intrinsic::fabs: + case Intrinsic::sqrt: + return true; + case Intrinsic::powi: + if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { + // powi(x,n) is non-negative if n is even. + if (CI->getBitWidth() <= 64 && CI->getSExtValue() % 2u == 0) + return true; } + return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1); + case Intrinsic::fma: + case Intrinsic::fmuladd: + // x*x+y is non-negative if y is non-negative. + return I->getOperand(0) == I->getOperand(1) && + CannotBeOrderedLessThanZero(I->getOperand(2), TLI, Depth + 1); + } break; } - return false; + return false; } /// If the specified value can be set by repeating the same byte in memory, @@ -2863,7 +2779,7 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, Operator::getOpcode(Ptr) == Instruction::AddrSpaceCast) { Ptr = cast<Operator>(Ptr)->getOperand(0); } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) { - if (GA->mayBeOverridden()) + if (GA->isInterposable()) break; Ptr = GA->getAliasee(); } else { @@ -2874,6 +2790,24 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, return Ptr; } +bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP) { + // Make sure the GEP has exactly three arguments. + if (GEP->getNumOperands() != 3) + return false; + + // Make sure the index-ee is a pointer to array of i8. + ArrayType *AT = dyn_cast<ArrayType>(GEP->getSourceElementType()); + if (!AT || !AT->getElementType()->isIntegerTy(8)) + return false; + + // Check to make sure that the first operand of the GEP is an integer and + // has value 0 so that we are sure we're indexing into the initializer. + const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1)); + if (!FirstIdx || !FirstIdx->isZero()) + return false; + + return true; +} /// This function computes the length of a null-terminated C string pointed to /// by V. If successful, it returns true and returns the string in Str. @@ -2888,20 +2822,9 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, // If the value is a GEP instruction or constant expression, treat it as an // offset. if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { - // Make sure the GEP has exactly three arguments. - if (GEP->getNumOperands() != 3) - return false; - - // Make sure the index-ee is a pointer to array of i8. - PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType()); - ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType()); - if (!AT || !AT->getElementType()->isIntegerTy(8)) - return false; - - // Check to make sure that the first operand of the GEP is an integer and - // has value 0 so that we are sure we're indexing into the initializer. - const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1)); - if (!FirstIdx || !FirstIdx->isZero()) + // The GEP operator should be based on a pointer to string constant, and is + // indexing into the string constant. + if (!isGEPBasedOnPointerToString(GEP)) return false; // If the second index isn't a ConstantInt, then this is a variable index @@ -2923,7 +2846,7 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) return false; - // Handle the all-zeros case + // Handle the all-zeros case. if (GV->getInitializer()->isNullValue()) { // This is a degenerate case. The initializer is constant zero so the // length of the string must be zero. @@ -2931,13 +2854,12 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, return true; } - // Must be a Constant Array - const ConstantDataArray *Array = - dyn_cast<ConstantDataArray>(GV->getInitializer()); + // This must be a ConstantDataArray. + const auto *Array = dyn_cast<ConstantDataArray>(GV->getInitializer()); if (!Array || !Array->isString()) return false; - // Get the number of elements in the array + // Get the number of elements in the array. uint64_t NumElts = Array->getType()->getArrayNumElements(); // Start out with the entire array in the StringRef. @@ -3060,10 +2982,16 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL, Operator::getOpcode(V) == Instruction::AddrSpaceCast) { V = cast<Operator>(V)->getOperand(0); } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { - if (GA->mayBeOverridden()) + if (GA->isInterposable()) return V; V = GA->getAliasee(); } else { + if (auto CS = CallSite(V)) + if (Value *RV = CS.getReturnedArgOperand()) { + V = RV; + continue; + } + // See if InstructionSimplify knows any relevant tricks. if (Instruction *I = dyn_cast<Instruction>(V)) // TODO: Acquire a DominatorTree and AssumptionCache and use them. @@ -3133,213 +3061,9 @@ bool llvm::onlyUsedByLifetimeMarkers(const Value *V) { return true; } -static bool isDereferenceableFromAttribute(const Value *BV, APInt Offset, - Type *Ty, const DataLayout &DL, - const Instruction *CtxI, - const DominatorTree *DT, - const TargetLibraryInfo *TLI) { - assert(Offset.isNonNegative() && "offset can't be negative"); - assert(Ty->isSized() && "must be sized"); - - APInt DerefBytes(Offset.getBitWidth(), 0); - bool CheckForNonNull = false; - if (const Argument *A = dyn_cast<Argument>(BV)) { - DerefBytes = A->getDereferenceableBytes(); - if (!DerefBytes.getBoolValue()) { - DerefBytes = A->getDereferenceableOrNullBytes(); - CheckForNonNull = true; - } - } else if (auto CS = ImmutableCallSite(BV)) { - DerefBytes = CS.getDereferenceableBytes(0); - if (!DerefBytes.getBoolValue()) { - DerefBytes = CS.getDereferenceableOrNullBytes(0); - CheckForNonNull = true; - } - } else if (const LoadInst *LI = dyn_cast<LoadInst>(BV)) { - if (MDNode *MD = LI->getMetadata(LLVMContext::MD_dereferenceable)) { - ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(0)); - DerefBytes = CI->getLimitedValue(); - } - if (!DerefBytes.getBoolValue()) { - if (MDNode *MD = - LI->getMetadata(LLVMContext::MD_dereferenceable_or_null)) { - ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(0)); - DerefBytes = CI->getLimitedValue(); - } - CheckForNonNull = true; - } - } - - if (DerefBytes.getBoolValue()) - if (DerefBytes.uge(Offset + DL.getTypeStoreSize(Ty))) - if (!CheckForNonNull || isKnownNonNullAt(BV, CtxI, DT, TLI)) - return true; - - return false; -} - -static bool isDereferenceableFromAttribute(const Value *V, const DataLayout &DL, - const Instruction *CtxI, - const DominatorTree *DT, - const TargetLibraryInfo *TLI) { - Type *VTy = V->getType(); - Type *Ty = VTy->getPointerElementType(); - if (!Ty->isSized()) - return false; - - APInt Offset(DL.getTypeStoreSizeInBits(VTy), 0); - return isDereferenceableFromAttribute(V, Offset, Ty, DL, CtxI, DT, TLI); -} - -static bool isAligned(const Value *Base, APInt Offset, unsigned Align, - const DataLayout &DL) { - APInt BaseAlign(Offset.getBitWidth(), getAlignment(Base, DL)); - - if (!BaseAlign) { - Type *Ty = Base->getType()->getPointerElementType(); - if (!Ty->isSized()) - return false; - BaseAlign = DL.getABITypeAlignment(Ty); - } - - APInt Alignment(Offset.getBitWidth(), Align); - - assert(Alignment.isPowerOf2() && "must be a power of 2!"); - return BaseAlign.uge(Alignment) && !(Offset & (Alignment-1)); -} - -static bool isAligned(const Value *Base, unsigned Align, const DataLayout &DL) { - Type *Ty = Base->getType(); - assert(Ty->isSized() && "must be sized"); - APInt Offset(DL.getTypeStoreSizeInBits(Ty), 0); - return isAligned(Base, Offset, Align, DL); -} - -/// Test if V is always a pointer to allocated and suitably aligned memory for -/// a simple load or store. -static bool isDereferenceableAndAlignedPointer( - const Value *V, unsigned Align, const DataLayout &DL, - const Instruction *CtxI, const DominatorTree *DT, - const TargetLibraryInfo *TLI, SmallPtrSetImpl<const Value *> &Visited) { - // Note that it is not safe to speculate into a malloc'd region because - // malloc may return null. - - // These are obviously ok if aligned. - if (isa<AllocaInst>(V)) - return isAligned(V, Align, DL); - - // It's not always safe to follow a bitcast, for example: - // bitcast i8* (alloca i8) to i32* - // would result in a 4-byte load from a 1-byte alloca. However, - // if we're casting from a pointer from a type of larger size - // to a type of smaller size (or the same size), and the alignment - // is at least as large as for the resulting pointer type, then - // we can look through the bitcast. - if (const BitCastOperator *BC = dyn_cast<BitCastOperator>(V)) { - Type *STy = BC->getSrcTy()->getPointerElementType(), - *DTy = BC->getDestTy()->getPointerElementType(); - if (STy->isSized() && DTy->isSized() && - (DL.getTypeStoreSize(STy) >= DL.getTypeStoreSize(DTy)) && - (DL.getABITypeAlignment(STy) >= DL.getABITypeAlignment(DTy))) - return isDereferenceableAndAlignedPointer(BC->getOperand(0), Align, DL, - CtxI, DT, TLI, Visited); - } - - // Global variables which can't collapse to null are ok. - if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) - if (!GV->hasExternalWeakLinkage()) - return isAligned(V, Align, DL); - - // byval arguments are okay. - if (const Argument *A = dyn_cast<Argument>(V)) - if (A->hasByValAttr()) - return isAligned(V, Align, DL); - - if (isDereferenceableFromAttribute(V, DL, CtxI, DT, TLI)) - return isAligned(V, Align, DL); - - // For GEPs, determine if the indexing lands within the allocated object. - if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { - Type *VTy = GEP->getType(); - Type *Ty = VTy->getPointerElementType(); - const Value *Base = GEP->getPointerOperand(); - - // Conservatively require that the base pointer be fully dereferenceable - // and aligned. - if (!Visited.insert(Base).second) - return false; - if (!isDereferenceableAndAlignedPointer(Base, Align, DL, CtxI, DT, TLI, - Visited)) - return false; - - APInt Offset(DL.getPointerTypeSizeInBits(VTy), 0); - if (!GEP->accumulateConstantOffset(DL, Offset)) - return false; - - // Check if the load is within the bounds of the underlying object - // and offset is aligned. - uint64_t LoadSize = DL.getTypeStoreSize(Ty); - Type *BaseType = Base->getType()->getPointerElementType(); - assert(isPowerOf2_32(Align) && "must be a power of 2!"); - return (Offset + LoadSize).ule(DL.getTypeAllocSize(BaseType)) && - !(Offset & APInt(Offset.getBitWidth(), Align-1)); - } - - // For gc.relocate, look through relocations - if (const GCRelocateInst *RelocateInst = dyn_cast<GCRelocateInst>(V)) - return isDereferenceableAndAlignedPointer( - RelocateInst->getDerivedPtr(), Align, DL, CtxI, DT, TLI, Visited); - - if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(V)) - return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Align, DL, - CtxI, DT, TLI, Visited); - - // If we don't know, assume the worst. - return false; -} - -bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, - const DataLayout &DL, - const Instruction *CtxI, - const DominatorTree *DT, - const TargetLibraryInfo *TLI) { - // When dereferenceability information is provided by a dereferenceable - // attribute, we know exactly how many bytes are dereferenceable. If we can - // determine the exact offset to the attributed variable, we can use that - // information here. - Type *VTy = V->getType(); - Type *Ty = VTy->getPointerElementType(); - - // Require ABI alignment for loads without alignment specification - if (Align == 0) - Align = DL.getABITypeAlignment(Ty); - - if (Ty->isSized()) { - APInt Offset(DL.getTypeStoreSizeInBits(VTy), 0); - const Value *BV = V->stripAndAccumulateInBoundsConstantOffsets(DL, Offset); - - if (Offset.isNonNegative()) - if (isDereferenceableFromAttribute(BV, Offset, Ty, DL, CtxI, DT, TLI) && - isAligned(BV, Offset, Align, DL)) - return true; - } - - SmallPtrSet<const Value *, 32> Visited; - return ::isDereferenceableAndAlignedPointer(V, Align, DL, CtxI, DT, TLI, - Visited); -} - -bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL, - const Instruction *CtxI, - const DominatorTree *DT, - const TargetLibraryInfo *TLI) { - return isDereferenceableAndAlignedPointer(V, 1, DL, CtxI, DT, TLI); -} - bool llvm::isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI, - const DominatorTree *DT, - const TargetLibraryInfo *TLI) { + const DominatorTree *DT) { const Operator *Inst = dyn_cast<Operator>(V); if (!Inst) return false; @@ -3383,15 +3107,13 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, const LoadInst *LI = cast<LoadInst>(Inst); if (!LI->isUnordered() || // Speculative load may create a race that did not exist in the source. - LI->getParent()->getParent()->hasFnAttribute( - Attribute::SanitizeThread) || + LI->getFunction()->hasFnAttribute(Attribute::SanitizeThread) || // Speculative load may load data from dirty regions. - LI->getParent()->getParent()->hasFnAttribute( - Attribute::SanitizeAddress)) + LI->getFunction()->hasFnAttribute(Attribute::SanitizeAddress)) return false; const DataLayout &DL = LI->getModule()->getDataLayout(); - return isDereferenceableAndAlignedPointer( - LI->getPointerOperand(), LI->getAlignment(), DL, CtxI, DT, TLI); + return isDereferenceableAndAlignedPointer(LI->getPointerOperand(), + LI->getAlignment(), DL, CtxI, DT); } case Instruction::Call: { if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { @@ -3416,17 +3138,29 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, case Intrinsic::umul_with_overflow: case Intrinsic::usub_with_overflow: return true; - // Sqrt should be OK, since the llvm sqrt intrinsic isn't defined to set - // errno like libm sqrt would. + // These intrinsics are defined to have the same behavior as libm + // functions except for setting errno. case Intrinsic::sqrt: case Intrinsic::fma: case Intrinsic::fmuladd: + return true; + // These intrinsics are defined to have the same behavior as libm + // functions, and the corresponding libm functions never set errno. + case Intrinsic::trunc: + case Intrinsic::copysign: case Intrinsic::fabs: case Intrinsic::minnum: case Intrinsic::maxnum: return true; - // TODO: some fp intrinsics are marked as having the same error handling - // as libm. They're safe to speculate when they won't error. + // These intrinsics are defined to have the same behavior as libm + // functions, which never overflow when operating on the IEEE754 types + // that we support, and never set errno otherwise. + case Intrinsic::ceil: + case Intrinsic::floor: + case Intrinsic::nearbyint: + case Intrinsic::rint: + case Intrinsic::round: + return true; // TODO: are convert_{from,to}_fp16 safe? // TODO: can we list target-specific intrinsics here? default: break; @@ -3464,7 +3198,7 @@ bool llvm::mayBeMemoryDependent(const Instruction &I) { } /// Return true if we know that the specified value is never null. -bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { +bool llvm::isKnownNonNull(const Value *V) { assert(V->getType()->isPointerTy() && "V must be pointer type"); // Alloca never returns null, malloc might. @@ -3481,7 +3215,7 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { return !GV->hasExternalWeakLinkage() && GV->getType()->getAddressSpace() == 0; - // A Load tagged w/nonnull metadata is never null. + // A Load tagged with nonnull metadata is never null. if (const LoadInst *LI = dyn_cast<LoadInst>(V)) return LI->getMetadata(LLVMContext::MD_nonnull); @@ -3498,41 +3232,31 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V, assert(V->getType()->isPointerTy() && "V must be pointer type"); unsigned NumUsesExplored = 0; - for (auto U : V->users()) { + for (auto *U : V->users()) { // Avoid massive lists if (NumUsesExplored >= DomConditionsMaxUses) break; NumUsesExplored++; // Consider only compare instructions uniquely controlling a branch - const ICmpInst *Cmp = dyn_cast<ICmpInst>(U); - if (!Cmp) - continue; - - if (DomConditionsSingleCmpUse && !Cmp->hasOneUse()) + CmpInst::Predicate Pred; + if (!match(const_cast<User *>(U), + m_c_ICmp(Pred, m_Specific(V), m_Zero())) || + (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)) continue; - for (auto *CmpU : Cmp->users()) { - const BranchInst *BI = dyn_cast<BranchInst>(CmpU); - if (!BI) - continue; - - assert(BI->isConditional() && "uses a comparison!"); - - BasicBlock *NonNullSuccessor = nullptr; - CmpInst::Predicate Pred; - - if (match(const_cast<ICmpInst*>(Cmp), - m_c_ICmp(Pred, m_Specific(V), m_Zero()))) { - if (Pred == ICmpInst::ICMP_EQ) - NonNullSuccessor = BI->getSuccessor(1); - else if (Pred == ICmpInst::ICMP_NE) - NonNullSuccessor = BI->getSuccessor(0); - } + for (auto *CmpU : U->users()) { + if (const BranchInst *BI = dyn_cast<BranchInst>(CmpU)) { + assert(BI->isConditional() && "uses a comparison!"); - if (NonNullSuccessor) { + BasicBlock *NonNullSuccessor = + BI->getSuccessor(Pred == ICmpInst::ICMP_EQ ? 1 : 0); BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor); if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent())) return true; + } else if (Pred == ICmpInst::ICMP_NE && + match(CmpU, m_Intrinsic<Intrinsic::experimental_guard>()) && + DT->dominates(cast<Instruction>(CmpU), CtxI)) { + return true; } } } @@ -3541,8 +3265,8 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V, } bool llvm::isKnownNonNullAt(const Value *V, const Instruction *CtxI, - const DominatorTree *DT, const TargetLibraryInfo *TLI) { - if (isKnownNonNull(V, TLI)) + const DominatorTree *DT) { + if (isKnownNonNull(V)) return true; return CtxI ? ::isKnownNonNullFromDominatingCondition(V, CtxI, DT) : false; @@ -3671,6 +3395,67 @@ static OverflowResult computeOverflowForSignedAdd( return OverflowResult::MayOverflow; } +bool llvm::isOverflowIntrinsicNoWrap(IntrinsicInst *II, DominatorTree &DT) { +#ifndef NDEBUG + auto IID = II->getIntrinsicID(); + assert((IID == Intrinsic::sadd_with_overflow || + IID == Intrinsic::uadd_with_overflow || + IID == Intrinsic::ssub_with_overflow || + IID == Intrinsic::usub_with_overflow || + IID == Intrinsic::smul_with_overflow || + IID == Intrinsic::umul_with_overflow) && + "Not an overflow intrinsic!"); +#endif + + SmallVector<BranchInst *, 2> GuardingBranches; + SmallVector<ExtractValueInst *, 2> Results; + + for (User *U : II->users()) { + if (auto *EVI = dyn_cast<ExtractValueInst>(U)) { + assert(EVI->getNumIndices() == 1 && "Obvious from CI's type"); + + if (EVI->getIndices()[0] == 0) + Results.push_back(EVI); + else { + assert(EVI->getIndices()[0] == 1 && "Obvious from CI's type"); + + for (auto *U : EVI->users()) + if (auto *B = dyn_cast<BranchInst>(U)) { + assert(B->isConditional() && "How else is it using an i1?"); + GuardingBranches.push_back(B); + } + } + } else { + // We are using the aggregate directly in a way we don't want to analyze + // here (storing it to a global, say). + return false; + } + } + + auto AllUsesGuardedByBranch = [&](BranchInst *BI) { + BasicBlockEdge NoWrapEdge(BI->getParent(), BI->getSuccessor(1)); + if (!NoWrapEdge.isSingleEdge()) + return false; + + // Check if all users of the add are provably no-wrap. + for (auto *Result : Results) { + // If the extractvalue itself is not executed on overflow, the we don't + // need to check each use separately, since domination is transitive. + if (DT.dominates(NoWrapEdge, Result->getParent())) + continue; + + for (auto &RU : Result->uses()) + if (!DT.dominates(NoWrapEdge, RU)) + return false; + } + + return true; + }; + + return any_of(GuardingBranches, AllUsesGuardedByBranch); +} + + OverflowResult llvm::computeOverflowForSignedAdd(AddOperator *Add, const DataLayout &DL, AssumptionCache *AC, @@ -3689,16 +3474,46 @@ OverflowResult llvm::computeOverflowForSignedAdd(Value *LHS, Value *RHS, } bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { - // FIXME: This conservative implementation can be relaxed. E.g. most - // atomic operations are guaranteed to terminate on most platforms - // and most functions terminate. - - return !I->isAtomic() && // atomics may never succeed on some platforms - !isa<CallInst>(I) && // could throw and might not terminate - !isa<InvokeInst>(I) && // might not terminate and could throw to - // non-successor (see bug 24185 for details). - !isa<ResumeInst>(I) && // has no successors - !isa<ReturnInst>(I); // has no successors + // A memory operation returns normally if it isn't volatile. A volatile + // operation is allowed to trap. + // + // An atomic operation isn't guaranteed to return in a reasonable amount of + // time because it's possible for another thread to interfere with it for an + // arbitrary length of time, but programs aren't allowed to rely on that. + if (const LoadInst *LI = dyn_cast<LoadInst>(I)) + return !LI->isVolatile(); + if (const StoreInst *SI = dyn_cast<StoreInst>(I)) + return !SI->isVolatile(); + if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I)) + return !CXI->isVolatile(); + if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I)) + return !RMWI->isVolatile(); + if (const MemIntrinsic *MII = dyn_cast<MemIntrinsic>(I)) + return !MII->isVolatile(); + + // If there is no successor, then execution can't transfer to it. + if (const auto *CRI = dyn_cast<CleanupReturnInst>(I)) + return !CRI->unwindsToCaller(); + if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) + return !CatchSwitch->unwindsToCaller(); + if (isa<ResumeInst>(I)) + return false; + if (isa<ReturnInst>(I)) + return false; + + // Calls can throw, or contain an infinite loop, or kill the process. + if (CallSite CS = CallSite(const_cast<Instruction*>(I))) { + // Calls which don't write to arbitrary memory are safe. + // FIXME: Ignoring infinite loops without any side-effects is too aggressive, + // but it's consistent with other passes. See http://llvm.org/PR965 . + // FIXME: This isn't aggressive enough; a call which only writes to a + // global is guaranteed to return. + return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory() || + match(I, m_Intrinsic<Intrinsic::assume>()); + } + + // Other instructions return normally. + return true; } bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I, @@ -3775,6 +3590,11 @@ bool llvm::propagatesFullPoison(const Instruction *I) { return false; } + case Instruction::ICmp: + // Comparing poison with any value yields poison. This is why, for + // instance, x s< (x +nsw 1) can be folded to true. + return true; + case Instruction::GetElementPtr: // A GEP implicitly represents a sequence of additions, subtractions, // truncations, sign extensions and multiplications. The multiplications @@ -3827,26 +3647,44 @@ bool llvm::isKnownNotFullPoison(const Instruction *PoisonI) { // Set of instructions that we have proved will yield poison if PoisonI // does. SmallSet<const Value *, 16> YieldsPoison; + SmallSet<const BasicBlock *, 4> Visited; YieldsPoison.insert(PoisonI); + Visited.insert(PoisonI->getParent()); - for (BasicBlock::const_iterator I = PoisonI->getIterator(), E = BB->end(); - I != E; ++I) { - if (&*I != PoisonI) { - const Value *NotPoison = getGuaranteedNonFullPoisonOp(&*I); - if (NotPoison != nullptr && YieldsPoison.count(NotPoison)) return true; - if (!isGuaranteedToTransferExecutionToSuccessor(&*I)) - return false; + BasicBlock::const_iterator Begin = PoisonI->getIterator(), End = BB->end(); + + unsigned Iter = 0; + while (Iter++ < MaxDepth) { + for (auto &I : make_range(Begin, End)) { + if (&I != PoisonI) { + const Value *NotPoison = getGuaranteedNonFullPoisonOp(&I); + if (NotPoison != nullptr && YieldsPoison.count(NotPoison)) + return true; + if (!isGuaranteedToTransferExecutionToSuccessor(&I)) + return false; + } + + // Mark poison that propagates from I through uses of I. + if (YieldsPoison.count(&I)) { + for (const User *User : I.users()) { + const Instruction *UserI = cast<Instruction>(User); + if (propagatesFullPoison(UserI)) + YieldsPoison.insert(User); + } + } } - // Mark poison that propagates from I through uses of I. - if (YieldsPoison.count(&*I)) { - for (const User *User : I->users()) { - const Instruction *UserI = cast<Instruction>(User); - if (UserI->getParent() == BB && propagatesFullPoison(UserI)) - YieldsPoison.insert(User); + if (auto *NextBB = BB->getSingleSuccessor()) { + if (Visited.insert(NextBB).second) { + BB = NextBB; + Begin = BB->getFirstNonPHI()->getIterator(); + End = BB->end(); + continue; } } - } + + break; + }; return false; } @@ -3979,10 +3817,11 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, return {(CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false}; } } - + // Y >s C ? ~Y : ~C == ~Y <s ~C ? ~Y : ~C = SMIN(~Y, ~C) if (const auto *C2 = dyn_cast<ConstantInt>(FalseVal)) { - if (C1->getType() == C2->getType() && ~C1->getValue() == C2->getValue() && + if (Pred == ICmpInst::ICMP_SGT && C1->getType() == C2->getType() && + ~C1->getValue() == C2->getValue() && (match(TrueVal, m_Not(m_Specific(CmpLHS))) || match(CmpLHS, m_Not(m_Specific(TrueVal))))) { LHS = TrueVal; @@ -4001,12 +3840,11 @@ static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2, Instruction::CastOps *CastOp) { CastInst *CI = dyn_cast<CastInst>(V1); Constant *C = dyn_cast<Constant>(V2); - CastInst *CI2 = dyn_cast<CastInst>(V2); if (!CI) return nullptr; *CastOp = CI->getOpcode(); - if (CI2) { + if (auto *CI2 = dyn_cast<CastInst>(V2)) { // If V1 and V2 are both the same cast from the same type, we can look // through V1. if (CI2->getOpcode() == CI->getOpcode() && @@ -4017,43 +3855,48 @@ static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2, return nullptr; } - if (isa<SExtInst>(CI) && CmpI->isSigned()) { - Constant *T = ConstantExpr::getTrunc(C, CI->getSrcTy()); - // This is only valid if the truncated value can be sign-extended - // back to the original value. - if (ConstantExpr::getSExt(T, C->getType()) == C) - return T; - return nullptr; - } + Constant *CastedTo = nullptr; + if (isa<ZExtInst>(CI) && CmpI->isUnsigned()) - return ConstantExpr::getTrunc(C, CI->getSrcTy()); + CastedTo = ConstantExpr::getTrunc(C, CI->getSrcTy()); + + if (isa<SExtInst>(CI) && CmpI->isSigned()) + CastedTo = ConstantExpr::getTrunc(C, CI->getSrcTy(), true); if (isa<TruncInst>(CI)) - return ConstantExpr::getIntegerCast(C, CI->getSrcTy(), CmpI->isSigned()); + CastedTo = ConstantExpr::getIntegerCast(C, CI->getSrcTy(), CmpI->isSigned()); + + if (isa<FPTruncInst>(CI)) + CastedTo = ConstantExpr::getFPExtend(C, CI->getSrcTy(), true); + + if (isa<FPExtInst>(CI)) + CastedTo = ConstantExpr::getFPTrunc(C, CI->getSrcTy(), true); if (isa<FPToUIInst>(CI)) - return ConstantExpr::getUIToFP(C, CI->getSrcTy(), true); + CastedTo = ConstantExpr::getUIToFP(C, CI->getSrcTy(), true); if (isa<FPToSIInst>(CI)) - return ConstantExpr::getSIToFP(C, CI->getSrcTy(), true); + CastedTo = ConstantExpr::getSIToFP(C, CI->getSrcTy(), true); if (isa<UIToFPInst>(CI)) - return ConstantExpr::getFPToUI(C, CI->getSrcTy(), true); + CastedTo = ConstantExpr::getFPToUI(C, CI->getSrcTy(), true); if (isa<SIToFPInst>(CI)) - return ConstantExpr::getFPToSI(C, CI->getSrcTy(), true); + CastedTo = ConstantExpr::getFPToSI(C, CI->getSrcTy(), true); - if (isa<FPTruncInst>(CI)) - return ConstantExpr::getFPExtend(C, CI->getSrcTy(), true); + if (!CastedTo) + return nullptr; - if (isa<FPExtInst>(CI)) - return ConstantExpr::getFPTrunc(C, CI->getSrcTy(), true); + Constant *CastedBack = + ConstantExpr::getCast(CI->getOpcode(), CastedTo, C->getType(), true); + // Make sure the cast doesn't lose any information. + if (CastedBack != C) + return nullptr; - return nullptr; + return CastedTo; } -SelectPatternResult llvm::matchSelectPattern(Value *V, - Value *&LHS, Value *&RHS, +SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp) { SelectInst *SI = dyn_cast<SelectInst>(V); if (!SI) return {SPF_UNKNOWN, SPNB_NA, false}; @@ -4172,46 +4015,105 @@ static bool isTruePredicate(CmpInst::Predicate Pred, Value *LHS, Value *RHS, } /// Return true if "icmp Pred BLHS BRHS" is true whenever "icmp Pred -/// ALHS ARHS" is true. -static bool isImpliedCondOperands(CmpInst::Predicate Pred, Value *ALHS, - Value *ARHS, Value *BLHS, Value *BRHS, - const DataLayout &DL, unsigned Depth, - AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT) { +/// ALHS ARHS" is true. Otherwise, return None. +static Optional<bool> +isImpliedCondOperands(CmpInst::Predicate Pred, Value *ALHS, Value *ARHS, + Value *BLHS, Value *BRHS, const DataLayout &DL, + unsigned Depth, AssumptionCache *AC, + const Instruction *CxtI, const DominatorTree *DT) { switch (Pred) { default: - return false; + return None; case CmpInst::ICMP_SLT: case CmpInst::ICMP_SLE: - return isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS, DL, Depth, AC, CxtI, - DT) && - isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth, AC, CxtI, - DT); + if (isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS, DL, Depth, AC, CxtI, + DT) && + isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth, AC, CxtI, DT)) + return true; + return None; case CmpInst::ICMP_ULT: case CmpInst::ICMP_ULE: - return isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS, DL, Depth, AC, CxtI, - DT) && - isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth, AC, CxtI, - DT); + if (isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS, DL, Depth, AC, CxtI, + DT) && + isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth, AC, CxtI, DT)) + return true; + return None; } } -bool llvm::isImpliedCondition(Value *LHS, Value *RHS, const DataLayout &DL, - unsigned Depth, AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT) { - assert(LHS->getType() == RHS->getType() && "mismatched type"); +/// Return true if the operands of the two compares match. IsSwappedOps is true +/// when the operands match, but are swapped. +static bool isMatchingOps(Value *ALHS, Value *ARHS, Value *BLHS, Value *BRHS, + bool &IsSwappedOps) { + + bool IsMatchingOps = (ALHS == BLHS && ARHS == BRHS); + IsSwappedOps = (ALHS == BRHS && ARHS == BLHS); + return IsMatchingOps || IsSwappedOps; +} + +/// Return true if "icmp1 APred ALHS ARHS" implies "icmp2 BPred BLHS BRHS" is +/// true. Return false if "icmp1 APred ALHS ARHS" implies "icmp2 BPred BLHS +/// BRHS" is false. Otherwise, return None if we can't infer anything. +static Optional<bool> isImpliedCondMatchingOperands(CmpInst::Predicate APred, + Value *ALHS, Value *ARHS, + CmpInst::Predicate BPred, + Value *BLHS, Value *BRHS, + bool IsSwappedOps) { + // Canonicalize the operands so they're matching. + if (IsSwappedOps) { + std::swap(BLHS, BRHS); + BPred = ICmpInst::getSwappedPredicate(BPred); + } + if (CmpInst::isImpliedTrueByMatchingCmp(APred, BPred)) + return true; + if (CmpInst::isImpliedFalseByMatchingCmp(APred, BPred)) + return false; + + return None; +} + +/// Return true if "icmp1 APred ALHS C1" implies "icmp2 BPred BLHS C2" is +/// true. Return false if "icmp1 APred ALHS C1" implies "icmp2 BPred BLHS +/// C2" is false. Otherwise, return None if we can't infer anything. +static Optional<bool> +isImpliedCondMatchingImmOperands(CmpInst::Predicate APred, Value *ALHS, + ConstantInt *C1, CmpInst::Predicate BPred, + Value *BLHS, ConstantInt *C2) { + assert(ALHS == BLHS && "LHS operands must match."); + ConstantRange DomCR = + ConstantRange::makeExactICmpRegion(APred, C1->getValue()); + ConstantRange CR = + ConstantRange::makeAllowedICmpRegion(BPred, C2->getValue()); + ConstantRange Intersection = DomCR.intersectWith(CR); + ConstantRange Difference = DomCR.difference(CR); + if (Intersection.isEmptySet()) + return false; + if (Difference.isEmptySet()) + return true; + return None; +} + +Optional<bool> llvm::isImpliedCondition(Value *LHS, Value *RHS, + const DataLayout &DL, bool InvertAPred, + unsigned Depth, AssumptionCache *AC, + const Instruction *CxtI, + const DominatorTree *DT) { + // A mismatch occurs when we compare a scalar cmp to a vector cmp, for example. + if (LHS->getType() != RHS->getType()) + return None; + Type *OpTy = LHS->getType(); assert(OpTy->getScalarType()->isIntegerTy(1)); // LHS ==> RHS by definition - if (LHS == RHS) return true; + if (!InvertAPred && LHS == RHS) + return true; if (OpTy->isVectorTy()) // TODO: extending the code below to handle vectors - return false; + return None; assert(OpTy->isIntegerTy(1) && "implied by above"); ICmpInst::Predicate APred, BPred; @@ -4220,11 +4122,37 @@ bool llvm::isImpliedCondition(Value *LHS, Value *RHS, const DataLayout &DL, if (!match(LHS, m_ICmp(APred, m_Value(ALHS), m_Value(ARHS))) || !match(RHS, m_ICmp(BPred, m_Value(BLHS), m_Value(BRHS)))) - return false; + return None; + + if (InvertAPred) + APred = CmpInst::getInversePredicate(APred); + + // Can we infer anything when the two compares have matching operands? + bool IsSwappedOps; + if (isMatchingOps(ALHS, ARHS, BLHS, BRHS, IsSwappedOps)) { + if (Optional<bool> Implication = isImpliedCondMatchingOperands( + APred, ALHS, ARHS, BPred, BLHS, BRHS, IsSwappedOps)) + return Implication; + // No amount of additional analysis will infer the second condition, so + // early exit. + return None; + } + + // Can we infer anything when the LHS operands match and the RHS operands are + // constants (not necessarily matching)? + if (ALHS == BLHS && isa<ConstantInt>(ARHS) && isa<ConstantInt>(BRHS)) { + if (Optional<bool> Implication = isImpliedCondMatchingImmOperands( + APred, ALHS, cast<ConstantInt>(ARHS), BPred, BLHS, + cast<ConstantInt>(BRHS))) + return Implication; + // No amount of additional analysis will infer the second condition, so + // early exit. + return None; + } if (APred == BPred) return isImpliedCondOperands(APred, ALHS, ARHS, BLHS, BRHS, DL, Depth, AC, CxtI, DT); - return false; + return None; } |