1 files changed, 259 insertions, 166 deletions
diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index c3d2803..43d5c3c 100644
--- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -37,22 +37,23 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <algorithm>
+
+#define DEBUG_TYPE "basicaa"
+
 using namespace llvm;
 
 /// Enable analysis of recursive PHI nodes.
 static cl::opt<bool> EnableRecPhiAnalysis("basicaa-recphi", cl::Hidden,
                                           cl::init(false));
-
 /// SearchLimitReached / SearchTimes shows how often the limit of
 /// to decompose GEPs is reached. It will affect the precision
 /// of basic alias analysis.
-#define DEBUG_TYPE "basicaa"
 STATISTIC(SearchLimitReached, "Number of times the limit to "
                               "decompose GEPs is reached");
 STATISTIC(SearchTimes, "Number of times a GEP is decomposed");
 
 /// Cutoff after which to stop analysing a set of phi nodes potentially involved
-/// in a cycle. Because we are analysing 'through' phi nodes we need to be
+/// in a cycle. Because we are analysing 'through' phi nodes, we need to be
 /// careful with value equivalence. We use reachability to make sure a value
 /// cannot be involved in a cycle.
 const unsigned MaxNumPhiBBsValueReachabilityCheck = 20;
@@ -83,7 +84,7 @@ static bool isNonEscapingLocalObject(const Value *V) {
   // inside the function.
   if (const Argument *A = dyn_cast<Argument>(V))
     if (A->hasByValAttr() || A->hasNoAliasAttr())
-      // Note even if the argument is marked nocapture we still need to check
+      // Note even if the argument is marked nocapture, we still need to check
       // for copies made inside the function. The nocapture attribute only
       // specifies that there are no copies made that outlive the function.
       return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
@@ -106,7 +107,7 @@ static bool isEscapeSource(const Value *V) {
   return false;
 }
 
-/// Returns the size of the object specified by V, or UnknownSize if unknown.
+/// Returns the size of the object specified by V or UnknownSize if unknown.
 static uint64_t getObjectSize(const Value *V, const DataLayout &DL,
                               const TargetLibraryInfo &TLI,
                               bool RoundToAlign = false) {
@@ -173,7 +174,7 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
 ///
 /// Returns the scale and offset values as APInts and return V as a Value*, and
 /// return whether we looked through any sign or zero extends.  The incoming
-/// Value is known to have IntegerType and it may already be sign or zero
+/// Value is known to have IntegerType, and it may already be sign or zero
 /// extended.
 ///
 /// Note that this looks through extends, so the high bits may not be
@@ -192,8 +193,8 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
   }
 
   if (const ConstantInt *Const = dyn_cast<ConstantInt>(V)) {
-    // if it's a constant, just convert it to an offset and remove the variable.
-    // If we've been called recursively the Offset bit width will be greater
+    // If it's a constant, just convert it to an offset and remove the variable.
+    // If we've been called recursively, the Offset bit width will be greater
     // than the constant's (the Offset's always as wide as the outermost call),
     // so we'll zext here and process any extension in the isa<SExtInst> &
     // isa<ZExtInst> cases below.
@@ -205,8 +206,8 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
   if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
     if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
 
-      // If we've been called recursively then Offset and Scale will be wider
-      // that the BOp operands. We'll always zext it here as we'll process sign
+      // If we've been called recursively, then Offset and Scale will be wider
+      // than the BOp operands. We'll always zext it here as we'll process sign
       // extensions below (see the isa<SExtInst> / isa<ZExtInst> cases).
       APInt RHS = RHSC->getValue().zextOrSelf(Offset.getBitWidth());
 
@@ -319,6 +320,16 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
   return V;
 }
 
+/// To ensure a pointer offset fits in an integer of size PointerSize
+/// (in bits) when that size is smaller than 64. This is an issue in
+/// particular for 32b programs with negative indices that rely on two's
+/// complement wrap-arounds for precise alias information.
+static int64_t adjustToPointerSize(int64_t Offset, unsigned PointerSize) {
+  assert(PointerSize <= 64 && "Invalid PointerSize!");
+  unsigned ShiftBits = 64 - PointerSize;
+  return (int64_t)((uint64_t)Offset << ShiftBits) >> ShiftBits;
+}
+
 /// If V is a symbolic pointer expression, decompose it into a base pointer
 /// with a constant offset and a number of scaled symbolic offsets.
 ///
@@ -332,28 +343,29 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
 /// GetUnderlyingObject and DecomposeGEPExpression must use the same search
 /// depth (MaxLookupSearchDepth). When DataLayout not is around, it just looks
 /// through pointer casts.
-/*static*/ const Value *BasicAAResult::DecomposeGEPExpression(
-    const Value *V, int64_t &BaseOffs,
-    SmallVectorImpl<VariableGEPIndex> &VarIndices, bool &MaxLookupReached,
-    const DataLayout &DL, AssumptionCache *AC, DominatorTree *DT) {
+bool BasicAAResult::DecomposeGEPExpression(const Value *V,
+       DecomposedGEP &Decomposed, const DataLayout &DL, AssumptionCache *AC,
+       DominatorTree *DT) {
   // Limit recursion depth to limit compile time in crazy cases.
   unsigned MaxLookup = MaxLookupSearchDepth;
-  MaxLookupReached = false;
   SearchTimes++;
 
-  BaseOffs = 0;
+  Decomposed.StructOffset = 0;
+  Decomposed.OtherOffset = 0;
+  Decomposed.VarIndices.clear();
   do {
     // See if this is a bitcast or GEP.
     const Operator *Op = dyn_cast<Operator>(V);
     if (!Op) {
       // The only non-operator case we can handle are GlobalAliases.
       if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
-        if (!GA->mayBeOverridden()) {
+        if (!GA->isInterposable()) {
           V = GA->getAliasee();
           continue;
         }
       }
-      return V;
+      Decomposed.Base = V;
+      return false;
     }
 
     if (Op->getOpcode() == Instruction::BitCast ||
@@ -364,6 +376,12 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
 
     const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op);
     if (!GEPOp) {
+      if (auto CS = ImmutableCallSite(V))
+        if (const Value *RV = CS.getReturnedArgOperand()) {
+          V = RV;
+          continue;
+        }
+
       // If it's not a GEP, hand it off to SimplifyInstruction to see if it
       // can come up with something. This matches what GetUnderlyingObject does.
       if (const Instruction *I = dyn_cast<Instruction>(V))
@@ -377,16 +395,20 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
           continue;
         }
 
-      return V;
+      Decomposed.Base = V;
+      return false;
     }
 
     // Don't attempt to analyze GEPs over unsized objects.
-    if (!GEPOp->getOperand(0)->getType()->getPointerElementType()->isSized())
-      return V;
+    if (!GEPOp->getSourceElementType()->isSized()) {
+      Decomposed.Base = V;
+      return false;
+    }
 
     unsigned AS = GEPOp->getPointerAddressSpace();
     // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
     gep_type_iterator GTI = gep_type_begin(GEPOp);
+    unsigned PointerSize = DL.getPointerSizeInBits(AS);
     for (User::const_op_iterator I = GEPOp->op_begin() + 1, E = GEPOp->op_end();
          I != E; ++I) {
       const Value *Index = *I;
@@ -397,7 +419,8 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
         if (FieldNo == 0)
           continue;
 
-        BaseOffs += DL.getStructLayout(STy)->getElementOffset(FieldNo);
+        Decomposed.StructOffset +=
+          DL.getStructLayout(STy)->getElementOffset(FieldNo);
         continue;
       }
 
@@ -405,7 +428,8 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
       if (const ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
         if (CIdx->isZero())
           continue;
-        BaseOffs += DL.getTypeAllocSize(*GTI) * CIdx->getSExtValue();
+        Decomposed.OtherOffset +=
+          DL.getTypeAllocSize(*GTI) * CIdx->getSExtValue();
         continue;
       }
 
@@ -415,7 +439,6 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
       // If the integer type is smaller than the pointer size, it is implicitly
       // sign extended to pointer size.
       unsigned Width = Index->getType()->getIntegerBitWidth();
-      unsigned PointerSize = DL.getPointerSizeInBits(AS);
       if (PointerSize > Width)
         SExtBits += PointerSize - Width;
 
@@ -427,44 +450,48 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
 
       // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
       // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
-      BaseOffs += IndexOffset.getSExtValue() * Scale;
+      Decomposed.OtherOffset += IndexOffset.getSExtValue() * Scale;
       Scale *= IndexScale.getSExtValue();
 
       // If we already had an occurrence of this index variable, merge this
       // scale into it.  For example, we want to handle:
       //   A[x][x] -> x*16 + x*4 -> x*20
       // This also ensures that 'x' only appears in the index list once.
-      for (unsigned i = 0, e = VarIndices.size(); i != e; ++i) {
-        if (VarIndices[i].V == Index && VarIndices[i].ZExtBits == ZExtBits &&
-            VarIndices[i].SExtBits == SExtBits) {
-          Scale += VarIndices[i].Scale;
-          VarIndices.erase(VarIndices.begin() + i);
+      for (unsigned i = 0, e = Decomposed.VarIndices.size(); i != e; ++i) {
+        if (Decomposed.VarIndices[i].V == Index && 
+            Decomposed.VarIndices[i].ZExtBits == ZExtBits &&
+            Decomposed.VarIndices[i].SExtBits == SExtBits) {
+          Scale += Decomposed.VarIndices[i].Scale;
+          Decomposed.VarIndices.erase(Decomposed.VarIndices.begin() + i);
           break;
         }
       }
 
       // Make sure that we have a scale that makes sense for this target's
       // pointer size.
-      if (unsigned ShiftBits = 64 - PointerSize) {
-        Scale <<= ShiftBits;
-        Scale = (int64_t)Scale >> ShiftBits;
-      }
+      Scale = adjustToPointerSize(Scale, PointerSize);
 
       if (Scale) {
         VariableGEPIndex Entry = {Index, ZExtBits, SExtBits,
                                   static_cast<int64_t>(Scale)};
-        VarIndices.push_back(Entry);
+        Decomposed.VarIndices.push_back(Entry);
       }
     }
 
+    // Take care of wrap-arounds
+    Decomposed.StructOffset =
+      adjustToPointerSize(Decomposed.StructOffset, PointerSize);
+    Decomposed.OtherOffset =
+      adjustToPointerSize(Decomposed.OtherOffset, PointerSize);
+
     // Analyze the base pointer next.
     V = GEPOp->getOperand(0);
   } while (--MaxLookup);
 
   // If the chain of expressions is too deep, just return early.
-  MaxLookupReached = true;
+  Decomposed.Base = V;
   SearchLimitReached++;
-  return V;
+  return true;
 }
 
 /// Returns whether the given pointer value points to memory that is local to
@@ -530,22 +557,6 @@ bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
   return Worklist.empty();
 }
 
-// FIXME: This code is duplicated with MemoryLocation and should be hoisted to
-// some common utility location.
-static bool isMemsetPattern16(const Function *MS,
-                              const TargetLibraryInfo &TLI) {
-  if (TLI.has(LibFunc::memset_pattern16) &&
-      MS->getName() == "memset_pattern16") {
-    FunctionType *MemsetType = MS->getFunctionType();
-    if (!MemsetType->isVarArg() && MemsetType->getNumParams() == 3 &&
-        isa<PointerType>(MemsetType->getParamType(0)) &&
-        isa<PointerType>(MemsetType->getParamType(1)) &&
-        isa<IntegerType>(MemsetType->getParamType(2)))
-      return true;
-  }
-  return false;
-}
-
 /// Returns the behavior when calling the given call site.
 FunctionModRefBehavior BasicAAResult::getModRefBehavior(ImmutableCallSite CS) {
   if (CS.doesNotAccessMemory())
@@ -558,12 +569,21 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(ImmutableCallSite CS) {
   // than that.
   if (CS.onlyReadsMemory())
     Min = FMRB_OnlyReadsMemory;
+  else if (CS.doesNotReadMemory())
+    Min = FMRB_DoesNotReadMemory;
 
   if (CS.onlyAccessesArgMemory())
     Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees);
 
-  // The AAResultBase base class has some smarts, lets use them.
-  return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min);
+  // If CS has operand bundles then aliasing attributes from the function it
+  // calls do not directly apply to the CallSite.  This can be made more
+  // precise in the future.
+  if (!CS.hasOperandBundles())
+    if (const Function *F = CS.getCalledFunction())
+      Min =
+          FunctionModRefBehavior(Min & getBestAAResults().getModRefBehavior(F));
+
+  return Min;
 }
 
 /// Returns the behavior when calling the given function. For use when the call
@@ -578,41 +598,30 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) {
   // If the function declares it only reads memory, go with that.
   if (F->onlyReadsMemory())
     Min = FMRB_OnlyReadsMemory;
+  else if (F->doesNotReadMemory())
+    Min = FMRB_DoesNotReadMemory;
 
   if (F->onlyAccessesArgMemory())
     Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees);
 
-  // Otherwise be conservative.
-  return FunctionModRefBehavior(AAResultBase::getModRefBehavior(F) & Min);
+  return Min;
 }
 
-/// Returns true if this is a writeonly (i.e Mod only) parameter.  Currently,
-/// we don't have a writeonly attribute, so this only knows about builtin
-/// intrinsics and target library functions.  We could consider adding a
-/// writeonly attribute in the future and moving all of these facts to either
-/// Intrinsics.td or InferFunctionAttr.cpp
+/// Returns true if this is a writeonly (i.e Mod only) parameter.
 static bool isWriteOnlyParam(ImmutableCallSite CS, unsigned ArgIdx,
                              const TargetLibraryInfo &TLI) {
-  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction()))
-    switch (II->getIntrinsicID()) {
-    default:
-      break;
-    case Intrinsic::memset:
-    case Intrinsic::memcpy:
-    case Intrinsic::memmove:
-      // We don't currently have a writeonly attribute.  All other properties
-      // of these intrinsics are nicely described via attributes in
-      // Intrinsics.td and handled generically.
-      if (ArgIdx == 0)
-        return true;
-    }
+  if (CS.paramHasAttr(ArgIdx + 1, Attribute::WriteOnly))
+    return true;
 
   // We can bound the aliasing properties of memset_pattern16 just as we can
   // for memcpy/memset.  This is particularly important because the
   // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
-  // whenever possible.  Note that all but the missing writeonly attribute are
-  // handled via InferFunctionAttr.
-  if (CS.getCalledFunction() && isMemsetPattern16(CS.getCalledFunction(), TLI))
+  // whenever possible.
+  // FIXME Consider handling this in InferFunctionAttr.cpp together with other
+  // attributes.
+  LibFunc::Func F;
+  if (CS.getCalledFunction() && TLI.getLibFunc(*CS.getCalledFunction(), F) &&
+      F == LibFunc::memset_pattern16 && TLI.has(F))
     if (ArgIdx == 0)
       return true;
 
@@ -626,8 +635,7 @@ static bool isWriteOnlyParam(ImmutableCallSite CS, unsigned ArgIdx,
 ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS,
                                            unsigned ArgIdx) {
 
-  // Emulate the missing writeonly attribute by checking for known builtin
-  // intrinsics and target library functions.
+  // Checking for known builtin intrinsics and target library functions.
   if (isWriteOnlyParam(CS, ArgIdx, TLI))
     return MRI_Mod;
 
@@ -640,9 +648,9 @@ ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS,
   return AAResultBase::getArgModRefInfo(CS, ArgIdx);
 }
 
-static bool isAssumeIntrinsic(ImmutableCallSite CS) {
+static bool isIntrinsicCall(ImmutableCallSite CS, Intrinsic::ID IID) {
   const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
-  return II && II->getIntrinsicID() == Intrinsic::assume;
+  return II && II->getIntrinsicID() == IID;
 }
 
 #ifndef NDEBUG
@@ -717,14 +725,14 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
   if (!isa<Constant>(Object) && CS.getInstruction() != Object &&
       isNonEscapingLocalObject(Object)) {
     bool PassedAsArg = false;
-    unsigned ArgNo = 0;
-    for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
-         CI != CE; ++CI, ++ArgNo) {
+    unsigned OperandNo = 0;
+    for (auto CI = CS.data_operands_begin(), CE = CS.data_operands_end();
+         CI != CE; ++CI, ++OperandNo) {
       // Only look at the no-capture or byval pointer arguments.  If this
       // pointer were passed to arguments that were neither of these, then it
       // couldn't be no-capture.
       if (!(*CI)->getType()->isPointerTy() ||
-          (!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo)))
+          (!CS.doesNotCapture(OperandNo) && !CS.isByValArgument(OperandNo)))
         continue;
 
       // If this is a no-capture pointer argument, see if we can tell that it
@@ -743,12 +751,36 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
       return MRI_NoModRef;
   }
 
+  // If the CallSite is to malloc or calloc, we can assume that it doesn't
+  // modify any IR visible value.  This is only valid because we assume these
+  // routines do not read values visible in the IR.  TODO: Consider special
+  // casing realloc and strdup routines which access only their arguments as
+  // well.  Or alternatively, replace all of this with inaccessiblememonly once
+  // that's implemented fully. 
+  auto *Inst = CS.getInstruction();
+  if (isMallocLikeFn(Inst, &TLI) || isCallocLikeFn(Inst, &TLI)) {
+    // Be conservative if the accessed pointer may alias the allocation -
+    // fallback to the generic handling below.
+    if (getBestAAResults().alias(MemoryLocation(Inst), Loc) == NoAlias)
+      return MRI_NoModRef;
+  }
+
   // While the assume intrinsic is marked as arbitrarily writing so that
   // proper control dependencies will be maintained, it never aliases any
   // particular memory location.
-  if (isAssumeIntrinsic(CS))
+  if (isIntrinsicCall(CS, Intrinsic::assume))
     return MRI_NoModRef;
 
+  // Like assumes, guard intrinsics are also marked as arbitrarily writing so
+  // that proper control dependencies are maintained but they never mods any
+  // particular memory location.
+  //
+  // *Unlike* assumes, guard intrinsics are modeled as reading memory since the
+  // heap state at the point the guard is issued needs to be consistent in case
+  // the guard invokes the "deopt" continuation.
+  if (isIntrinsicCall(CS, Intrinsic::experimental_guard))
+    return MRI_Ref;
+
   // The AAResultBase base class has some smarts, lets use them.
   return AAResultBase::getModRefInfo(CS, Loc);
 }
@@ -758,9 +790,27 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS1,
   // While the assume intrinsic is marked as arbitrarily writing so that
   // proper control dependencies will be maintained, it never aliases any
   // particular memory location.
-  if (isAssumeIntrinsic(CS1) || isAssumeIntrinsic(CS2))
+  if (isIntrinsicCall(CS1, Intrinsic::assume) ||
+      isIntrinsicCall(CS2, Intrinsic::assume))
     return MRI_NoModRef;
 
+  // Like assumes, guard intrinsics are also marked as arbitrarily writing so
+  // that proper control dependencies are maintained but they never mod any
+  // particular memory location.
+  //
+  // *Unlike* assumes, guard intrinsics are modeled as reading memory since the
+  // heap state at the point the guard is issued needs to be consistent in case
+  // the guard invokes the "deopt" continuation.
+
+  // NB! This function is *not* commutative, so we specical case two
+  // possibilities for guard intrinsics.
+
+  if (isIntrinsicCall(CS1, Intrinsic::experimental_guard))
+    return getModRefBehavior(CS2) & MRI_Mod ? MRI_Ref : MRI_NoModRef;
+
+  if (isIntrinsicCall(CS2, Intrinsic::experimental_guard))
+    return getModRefBehavior(CS1) & MRI_Mod ? MRI_Mod : MRI_NoModRef;
+
   // The AAResultBase base class has some smarts, lets use them.
   return AAResultBase::getModRefInfo(CS1, CS2);
 }
@@ -773,7 +823,10 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
                                             uint64_t V2Size,
                                             const DataLayout &DL) {
 
-  assert(GEP1->getPointerOperand() == GEP2->getPointerOperand() &&
+  assert(GEP1->getPointerOperand()->stripPointerCasts() ==
+         GEP2->getPointerOperand()->stripPointerCasts() &&
+         GEP1->getPointerOperand()->getType() ==
+         GEP2->getPointerOperand()->getType() &&
          "Expected GEPs with the same pointer operand");
 
   // Try to determine whether GEP1 and GEP2 index through arrays, into structs,
@@ -796,7 +849,7 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
 
   // If the last (struct) indices are constants and are equal, the other indices
   // might be also be dynamically equal, so the GEPs can alias.
-  if (C1 && C2 && C1 == C2)
+  if (C1 && C2 && C1->getSExtValue() == C2->getSExtValue())
     return MayAlias;
 
   // Find the last-indexed type of the GEP, i.e., the type you'd get if
@@ -895,6 +948,67 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
   return MayAlias;
 }
 
+// If a we have (a) a GEP and (b) a pointer based on an alloca, and the
+// beginning of the object the GEP points would have a negative offset with
+// repsect to the alloca, that means the GEP can not alias pointer (b).
+// Note that the pointer based on the alloca may not be a GEP. For
+// example, it may be the alloca itself.
+// The same applies if (b) is based on a GlobalVariable. Note that just being
+// based on isIdentifiedObject() is not enough - we need an identified object
+// that does not permit access to negative offsets. For example, a negative
+// offset from a noalias argument or call can be inbounds w.r.t the actual
+// underlying object.
+//
+// For example, consider:
+//
+//   struct { int f0, int f1, ...} foo;
+//   foo alloca;
+//   foo* random = bar(alloca);
+//   int *f0 = &alloca.f0
+//   int *f1 = &random->f1;
+//
+// Which is lowered, approximately, to:
+//
+//  %alloca = alloca %struct.foo
+//  %random = call %struct.foo* @random(%struct.foo* %alloca)
+//  %f0 = getelementptr inbounds %struct, %struct.foo* %alloca, i32 0, i32 0
+//  %f1 = getelementptr inbounds %struct, %struct.foo* %random, i32 0, i32 1
+//
+// Assume %f1 and %f0 alias. Then %f1 would point into the object allocated
+// by %alloca. Since the %f1 GEP is inbounds, that means %random must also
+// point into the same object. But since %f0 points to the beginning of %alloca,
+// the highest %f1 can be is (%alloca + 3). This means %random can not be higher
+// than (%alloca - 1), and so is not inbounds, a contradiction.
+bool BasicAAResult::isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp,
+      const DecomposedGEP &DecompGEP, const DecomposedGEP &DecompObject, 
+      uint64_t ObjectAccessSize) {
+  // If the object access size is unknown, or the GEP isn't inbounds, bail.
+  if (ObjectAccessSize == MemoryLocation::UnknownSize || !GEPOp->isInBounds())
+    return false;
+
+  // We need the object to be an alloca or a globalvariable, and want to know
+  // the offset of the pointer from the object precisely, so no variable
+  // indices are allowed.
+  if (!(isa<AllocaInst>(DecompObject.Base) ||
+        isa<GlobalVariable>(DecompObject.Base)) ||
+      !DecompObject.VarIndices.empty())
+    return false;
+
+  int64_t ObjectBaseOffset = DecompObject.StructOffset +
+                             DecompObject.OtherOffset;
+
+  // If the GEP has no variable indices, we know the precise offset
+  // from the base, then use it. If the GEP has variable indices, we're in
+  // a bit more trouble: we can't count on the constant offsets that come
+  // from non-struct sources, since these can be "rewound" by a negative
+  // variable offset. So use only offsets that came from structs.
+  int64_t GEPBaseOffset = DecompGEP.StructOffset;
+  if (DecompGEP.VarIndices.empty())
+    GEPBaseOffset += DecompGEP.OtherOffset;
+
+  return (GEPBaseOffset >= ObjectBaseOffset + (int64_t)ObjectAccessSize);
+}
+
 /// Provides a bunch of ad-hoc rules to disambiguate a GEP instruction against
 /// another pointer.
 ///
@@ -906,14 +1020,34 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
                                     uint64_t V2Size, const AAMDNodes &V2AAInfo,
                                     const Value *UnderlyingV1,
                                     const Value *UnderlyingV2) {
-  int64_t GEP1BaseOffset;
-  bool GEP1MaxLookupReached;
-  SmallVector<VariableGEPIndex, 4> GEP1VariableIndices;
-
+  DecomposedGEP DecompGEP1, DecompGEP2;
+  bool GEP1MaxLookupReached =
+    DecomposeGEPExpression(GEP1, DecompGEP1, DL, &AC, DT);
+  bool GEP2MaxLookupReached =
+    DecomposeGEPExpression(V2, DecompGEP2, DL, &AC, DT);
+
+  int64_t GEP1BaseOffset = DecompGEP1.StructOffset + DecompGEP1.OtherOffset;
+  int64_t GEP2BaseOffset = DecompGEP2.StructOffset + DecompGEP2.OtherOffset;
+
+  assert(DecompGEP1.Base == UnderlyingV1 && DecompGEP2.Base == UnderlyingV2 &&
+         "DecomposeGEPExpression returned a result different from "
+         "GetUnderlyingObject");
+
+  // If the GEP's offset relative to its base is such that the base would
+  // fall below the start of the object underlying V2, then the GEP and V2
+  // cannot alias.
+  if (!GEP1MaxLookupReached && !GEP2MaxLookupReached &&
+      isGEPBaseAtNegativeOffset(GEP1, DecompGEP1, DecompGEP2, V2Size))
+    return NoAlias;
   // If we have two gep instructions with must-alias or not-alias'ing base
   // pointers, figure out if the indexes to the GEP tell us anything about the
   // derived pointer.
   if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) {
+    // Check for the GEP base being at a negative offset, this time in the other
+    // direction.
+    if (!GEP1MaxLookupReached && !GEP2MaxLookupReached &&
+        isGEPBaseAtNegativeOffset(GEP2, DecompGEP2, DecompGEP1, V1Size))
+      return NoAlias;
     // Do the base pointers alias?
     AliasResult BaseAlias =
         aliasCheck(UnderlyingV1, MemoryLocation::UnknownSize, AAMDNodes(),
@@ -928,31 +1062,14 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
       if (PreciseBaseAlias == NoAlias) {
         // See if the computed offset from the common pointer tells us about the
         // relation of the resulting pointer.
-        int64_t GEP2BaseOffset;
-        bool GEP2MaxLookupReached;
-        SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
-        const Value *GEP2BasePtr =
-            DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices,
-                                   GEP2MaxLookupReached, DL, &AC, DT);
-        const Value *GEP1BasePtr =
-            DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
-                                   GEP1MaxLookupReached, DL, &AC, DT);
-        // DecomposeGEPExpression and GetUnderlyingObject should return the
-        // same result except when DecomposeGEPExpression has no DataLayout.
-        // FIXME: They always have a DataLayout so this should become an
-        // assert.
-        if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
-          return MayAlias;
-        }
         // If the max search depth is reached the result is undefined
         if (GEP2MaxLookupReached || GEP1MaxLookupReached)
           return MayAlias;
 
         // Same offsets.
         if (GEP1BaseOffset == GEP2BaseOffset &&
-            GEP1VariableIndices == GEP2VariableIndices)
+            DecompGEP1.VarIndices == DecompGEP2.VarIndices)
           return NoAlias;
-        GEP1VariableIndices.clear();
       }
     }
 
@@ -964,42 +1081,27 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
     // Otherwise, we have a MustAlias.  Since the base pointers alias each other
     // exactly, see if the computed offset from the common pointer tells us
     // about the relation of the resulting pointer.
-    const Value *GEP1BasePtr =
-        DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
-                               GEP1MaxLookupReached, DL, &AC, DT);
-
-    int64_t GEP2BaseOffset;
-    bool GEP2MaxLookupReached;
-    SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
-    const Value *GEP2BasePtr =
-        DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices,
-                               GEP2MaxLookupReached, DL, &AC, DT);
-
-    // DecomposeGEPExpression and GetUnderlyingObject should return the
-    // same result except when DecomposeGEPExpression has no DataLayout.
-    // FIXME: They always have a DataLayout so this should become an assert.
-    if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
-      return MayAlias;
-    }
-
     // If we know the two GEPs are based off of the exact same pointer (and not
     // just the same underlying object), see if that tells us anything about
     // the resulting pointers.
-    if (GEP1->getPointerOperand() == GEP2->getPointerOperand()) {
+    if (GEP1->getPointerOperand()->stripPointerCasts() ==
+        GEP2->getPointerOperand()->stripPointerCasts() &&
+        GEP1->getPointerOperand()->getType() ==
+        GEP2->getPointerOperand()->getType()) {
       AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, DL);
       // If we couldn't find anything interesting, don't abandon just yet.
       if (R != MayAlias)
         return R;
     }
 
-    // If the max search depth is reached the result is undefined
+    // If the max search depth is reached, the result is undefined
     if (GEP2MaxLookupReached || GEP1MaxLookupReached)
       return MayAlias;
 
     // Subtract the GEP2 pointer from the GEP1 pointer to find out their
     // symbolic difference.
     GEP1BaseOffset -= GEP2BaseOffset;
-    GetIndexDifference(GEP1VariableIndices, GEP2VariableIndices);
+    GetIndexDifference(DecompGEP1.VarIndices, DecompGEP2.VarIndices);
 
   } else {
     // Check to see if these two pointers are related by the getelementptr
@@ -1021,16 +1123,6 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
       // with the first operand of the getelementptr".
       return R;
 
-    const Value *GEP1BasePtr =
-        DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
-                               GEP1MaxLookupReached, DL, &AC, DT);
-
-    // DecomposeGEPExpression and GetUnderlyingObject should return the
-    // same result except when DecomposeGEPExpression has no DataLayout.
-    // FIXME: They always have a DataLayout so this should become an assert.
-    if (GEP1BasePtr != UnderlyingV1) {
-      return MayAlias;
-    }
     // If the max search depth is reached the result is undefined
     if (GEP1MaxLookupReached)
       return MayAlias;
@@ -1038,18 +1130,18 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
 
   // In the two GEP Case, if there is no difference in the offsets of the
   // computed pointers, the resultant pointers are a must alias.  This
-  // hapens when we have two lexically identical GEP's (for example).
+  // happens when we have two lexically identical GEP's (for example).
   //
   // In the other case, if we have getelementptr <ptr>, 0, 0, 0, 0, ... and V2
   // must aliases the GEP, the end result is a must alias also.
-  if (GEP1BaseOffset == 0 && GEP1VariableIndices.empty())
+  if (GEP1BaseOffset == 0 && DecompGEP1.VarIndices.empty())
     return MustAlias;
 
   // If there is a constant difference between the pointers, but the difference
   // is less than the size of the associated memory object, then we know
   // that the objects are partially overlapping.  If the difference is
   // greater, we know they do not overlap.
-  if (GEP1BaseOffset != 0 && GEP1VariableIndices.empty()) {
+  if (GEP1BaseOffset != 0 && DecompGEP1.VarIndices.empty()) {
     if (GEP1BaseOffset >= 0) {
       if (V2Size != MemoryLocation::UnknownSize) {
         if ((uint64_t)GEP1BaseOffset < V2Size)
@@ -1074,22 +1166,22 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
     }
   }
 
-  if (!GEP1VariableIndices.empty()) {
+  if (!DecompGEP1.VarIndices.empty()) {
     uint64_t Modulo = 0;
     bool AllPositive = true;
-    for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i) {
+    for (unsigned i = 0, e = DecompGEP1.VarIndices.size(); i != e; ++i) {
 
       // Try to distinguish something like &A[i][1] against &A[42][0].
       // Grab the least significant bit set in any of the scales. We
       // don't need std::abs here (even if the scale's negative) as we'll
       // be ^'ing Modulo with itself later.
-      Modulo |= (uint64_t)GEP1VariableIndices[i].Scale;
+      Modulo |= (uint64_t)DecompGEP1.VarIndices[i].Scale;
 
       if (AllPositive) {
         // If the Value could change between cycles, then any reasoning about
         // the Value this cycle may not hold in the next cycle. We'll just
         // give up if we can't determine conditions that hold for every cycle:
-        const Value *V = GEP1VariableIndices[i].V;
+        const Value *V = DecompGEP1.VarIndices[i].V;
 
         bool SignKnownZero, SignKnownOne;
         ComputeSignBit(const_cast<Value *>(V), SignKnownZero, SignKnownOne, DL,
@@ -1097,14 +1189,14 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
 
         // Zero-extension widens the variable, and so forces the sign
         // bit to zero.
-        bool IsZExt = GEP1VariableIndices[i].ZExtBits > 0 || isa<ZExtInst>(V);
+        bool IsZExt = DecompGEP1.VarIndices[i].ZExtBits > 0 || isa<ZExtInst>(V);
         SignKnownZero |= IsZExt;
         SignKnownOne &= !IsZExt;
 
         // If the variable begins with a zero then we know it's
         // positive, regardless of whether the value is signed or
         // unsigned.
-        int64_t Scale = GEP1VariableIndices[i].Scale;
+        int64_t Scale = DecompGEP1.VarIndices[i].Scale;
         AllPositive =
             (SignKnownZero && Scale >= 0) || (SignKnownOne && Scale < 0);
       }
@@ -1127,7 +1219,7 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
     if (AllPositive && GEP1BaseOffset > 0 && V2Size <= (uint64_t)GEP1BaseOffset)
       return NoAlias;
 
-    if (constantOffsetHeuristic(GEP1VariableIndices, V1Size, V2Size,
+    if (constantOffsetHeuristic(DecompGEP1.VarIndices, V1Size, V2Size,
                                 GEP1BaseOffset, &AC, DT))
       return NoAlias;
   }
@@ -1312,7 +1404,7 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size,
     return NoAlias;
 
   // Are we checking for alias of the same value?
-  // Because we look 'through' phi nodes we could look at "Value" pointers from
+  // Because we look 'through' phi nodes, we could look at "Value" pointers from
   // different iterations. We must therefore make sure that this is not the
   // case. The function isValueEqualInPotentialCycles ensures that this cannot
   // happen by looking at the visited phi nodes and making sure they cannot
@@ -1337,7 +1429,7 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size,
       return NoAlias;
 
   if (O1 != O2) {
-    // If V1/V2 point to two different objects we know that we have no alias.
+    // If V1/V2 point to two different objects, we know that we have no alias.
     if (isIdentifiedObject(O1) && isIdentifiedObject(O2))
       return NoAlias;
 
@@ -1430,8 +1522,7 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size,
   }
 
   // If both pointers are pointing into the same object and one of them
-  // accesses is accessing the entire object, then the accesses must
-  // overlap in some way.
+  // accesses the entire object, then the accesses must overlap in some way.
   if (O1 == O2)
     if ((V1Size != MemoryLocation::UnknownSize &&
          isObjectSize(O1, V1Size, DL, TLI)) ||
@@ -1544,7 +1635,8 @@ bool BasicAAResult::constantOffsetHeuristic(
   unsigned V0ZExtBits = 0, V0SExtBits = 0, V1ZExtBits = 0, V1SExtBits = 0;
   const Value *V0 = GetLinearExpression(Var0.V, V0Scale, V0Offset, V0ZExtBits,
                                         V0SExtBits, DL, 0, AC, DT, NSW, NUW);
-  NSW = true, NUW = true;
+  NSW = true;
+  NUW = true;
   const Value *V1 = GetLinearExpression(Var1.V, V1Scale, V1Offset, V1ZExtBits,
                                         V1SExtBits, DL, 0, AC, DT, NSW, NUW);
 
@@ -1577,12 +1669,12 @@ bool BasicAAResult::constantOffsetHeuristic(
 
 char BasicAA::PassID;
 
-BasicAAResult BasicAA::run(Function &F, AnalysisManager<Function> *AM) {
+BasicAAResult BasicAA::run(Function &F, AnalysisManager<Function> &AM) {
   return BasicAAResult(F.getParent()->getDataLayout(),
-                       AM->getResult<TargetLibraryAnalysis>(F),
-                       AM->getResult<AssumptionAnalysis>(F),
-                       AM->getCachedResult<DominatorTreeAnalysis>(F),
-                       AM->getCachedResult<LoopAnalysis>(F));
+                       AM.getResult<TargetLibraryAnalysis>(F),
+                       AM.getResult<AssumptionAnalysis>(F),
+                       &AM.getResult<DominatorTreeAnalysis>(F),
+                       AM.getCachedResult<LoopAnalysis>(F));
 }
 
 BasicAAWrapperPass::BasicAAWrapperPass() : FunctionPass(ID) {
@@ -1595,6 +1687,7 @@ void BasicAAWrapperPass::anchor() {}
 INITIALIZE_PASS_BEGIN(BasicAAWrapperPass, "basicaa",
                       "Basic Alias Analysis (stateless AA impl)", true, true)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(BasicAAWrapperPass, "basicaa",
                     "Basic Alias Analysis (stateless AA impl)", true, true)
@@ -1606,12 +1699,11 @@ FunctionPass *llvm::createBasicAAWrapperPass() {
 bool BasicAAWrapperPass::runOnFunction(Function &F) {
   auto &ACT = getAnalysis<AssumptionCacheTracker>();
   auto &TLIWP = getAnalysis<TargetLibraryInfoWrapperPass>();
-  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+  auto &DTWP = getAnalysis<DominatorTreeWrapperPass>();
   auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
 
   Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), TLIWP.getTLI(),
-                                 ACT.getAssumptionCache(F),
-                                 DTWP ? &DTWP->getDomTree() : nullptr,
+                                 ACT.getAssumptionCache(F), &DTWP.getDomTree(),
                                  LIWP ? &LIWP->getLoopInfo() : nullptr));
 
   return false;
@@ -1620,6 +1712,7 @@ bool BasicAAWrapperPass::runOnFunction(Function &F) {
 void BasicAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequired<AssumptionCacheTracker>();
+  AU.addRequired<DominatorTreeWrapperPass>();
   AU.addRequired<TargetLibraryInfoWrapperPass>();
 }