10 files changed, 407 insertions, 112 deletions
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index 66e416cd..349c417 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -11,6 +11,8 @@
 #include "llvm-c/Initialization.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/IR/Module.h"
+#include "llvm/PassRegistry.h"
 #include <cstring>
 
 using namespace llvm;
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index ae6da1a..f8509dd 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -88,7 +88,7 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &TD,
                               const TargetLibraryInfo &TLI,
                               bool RoundToAlign = false) {
   uint64_t Size;
-  if (getUnderlyingObjectSize(V, Size, &TD, &TLI, RoundToAlign))
+  if (getObjectSize(V, Size, &TD, &TLI, RoundToAlign))
     return Size;
   return AliasAnalysis::UnknownSize;
 }
@@ -98,6 +98,35 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &TD,
 static bool isObjectSmallerThan(const Value *V, uint64_t Size,
                                 const DataLayout &TD,
                                 const TargetLibraryInfo &TLI) {
+  // Note that the meanings of the "object" are slightly different in the
+  // following contexts:
+  //    c1: llvm::getObjectSize()
+  //    c2: llvm.objectsize() intrinsic
+  //    c3: isObjectSmallerThan()
+  // c1 and c2 share the same meaning; however, the meaning of "object" in c3
+  // refers to the "entire object".
+  //
+  //  Consider this example:
+  //     char *p = (char*)malloc(100)
+  //     char *q = p+80;
+  //
+  //  In the context of c1 and c2, the "object" pointed by q refers to the
+  // stretch of memory of q[0:19]. So, getObjectSize(q) should return 20.
+  //
+  //  However, in the context of c3, the "object" refers to the chunk of memory
+  // being allocated. So, the "object" has 100 bytes, and q points to the middle
+  // the "object". In case q is passed to isObjectSmallerThan() as the 1st
+  // parameter, before the llvm::getObjectSize() is called to get the size of
+  // entire object, we should:
+  //    - either rewind the pointer q to the base-address of the object in
+  //      question (in this case rewind to p), or
+  //    - just give up. It is up to caller to make sure the pointer is pointing
+  //      to the base address the object.
+  // 
+  // We go for 2nd option for simplicity.
+  if (!isIdentifiedObject(V))
+    return false;
+
   // This function needs to use the aligned object size because we allow
   // reads a bit past the end given sufficient alignment.
   uint64_t ObjectSize = getObjectSize(V, TD, TLI, /*RoundToAlign*/true);
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 09d7608..bc0dffc 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -17,6 +17,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -550,7 +551,7 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
 
 
   if (Opc == Instruction::And && DL) {
-    unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType());
+    unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType()->getScalarType());
     APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0);
     APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0);
     ComputeMaskedBits(Op0, KnownZero0, KnownOne0, DL);
@@ -880,19 +881,20 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I,
   return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD, TLI);
 }
 
-/// ConstantFoldConstantExpression - Attempt to fold the constant expression
-/// using the specified DataLayout.  If successful, the constant result is
-/// result is returned, if not, null is returned.
-Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
-                                               const DataLayout *TD,
-                                               const TargetLibraryInfo *TLI) {
-  SmallVector<Constant*, 8> Ops;
-  for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end();
-       i != e; ++i) {
+static Constant *
+ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout *TD,
+                                   const TargetLibraryInfo *TLI,
+                                   SmallPtrSet<ConstantExpr *, 4> &FoldedOps) {
+  SmallVector<Constant *, 8> Ops;
+  for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end(); i != e;
+       ++i) {
     Constant *NewC = cast<Constant>(*i);
-    // Recursively fold the ConstantExpr's operands.
-    if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC))
-      NewC = ConstantFoldConstantExpression(NewCE, TD, TLI);
+    // Recursively fold the ConstantExpr's operands. If we have already folded
+    // a ConstantExpr, we don't have to process it again.
+    if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC)) {
+      if (FoldedOps.insert(NewCE))
+        NewC = ConstantFoldConstantExpressionImpl(NewCE, TD, TLI, FoldedOps);
+    }
     Ops.push_back(NewC);
   }
 
@@ -902,6 +904,16 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
   return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD, TLI);
 }
 
+/// ConstantFoldConstantExpression - Attempt to fold the constant expression
+/// using the specified DataLayout.  If successful, the constant result is
+/// result is returned, if not, null is returned.
+Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
+                                               const DataLayout *TD,
+                                               const TargetLibraryInfo *TLI) {
+  SmallPtrSet<ConstantExpr *, 4> FoldedOps;
+  return ConstantFoldConstantExpressionImpl(CE, TD, TLI, FoldedOps);
+}
+
 /// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
 /// specified opcode and operands.  If successful, the constant result is
 /// returned, if not, null is returned.  Note that this function can fail when
diff --git a/lib/Analysis/IPA/IPA.cpp b/lib/Analysis/IPA/IPA.cpp
index aa5164e..1c1816d 100644
--- a/lib/Analysis/IPA/IPA.cpp
+++ b/lib/Analysis/IPA/IPA.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
 #include "llvm-c/Initialization.h"
 
 using namespace llvm;
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 4a3c74e..bf77451 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -1711,7 +1711,7 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
 //    subobject at its beginning) or function, both are pointers to one past the
 //    last element of the same array object, or one is a pointer to one past the
 //    end of one array object and the other is a pointer to the start of a
-//    different array object that happens to immediately follow the ﬁrst array
+//    different array object that happens to immediately follow the first array
 //    object in the address space.)
 //
 // C11's version is more restrictive, however there's no reason why an argument
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index d490d54..9c0d8ac 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -364,26 +364,6 @@ bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *TD,
   return true;
 }
 
-/// \brief Compute the size of the underlying object pointed by Ptr. Returns
-/// true and the object size in Size if successful, and false otherwise.
-/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas,
-/// byval arguments, and global variables.
-bool llvm::getUnderlyingObjectSize(const Value *Ptr, uint64_t &Size,
-                                   const DataLayout *TD,
-                                   const TargetLibraryInfo *TLI,
-                                   bool RoundToAlign) {
-  if (!TD)
-    return false;
-
-  ObjectSizeOffsetVisitor Visitor(TD, TLI, Ptr->getContext(), RoundToAlign);
-  SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr));
-  if (!Visitor.knownSize(Data))
-    return false;
-
-  Size = Data.first.getZExtValue();
-  return true;
-}
-
 
 STATISTIC(ObjectVisitorArgument,
           "Number of arguments with unsolved size and offset");
@@ -409,23 +389,16 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *TD,
 
 SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
   V = V->stripPointerCasts();
+  if (Instruction *I = dyn_cast<Instruction>(V)) {
+    // If we have already seen this instruction, bail out. Cycles can happen in
+    // unreachable code after constant propagation.
+    if (!SeenInsts.insert(I))
+      return unknown();
 
-  if (isa<Instruction>(V) || isa<GEPOperator>(V)) {
-    // Return cached value or insert unknown in cache if size of V was not
-    // computed yet in order to avoid recursions in PHis.
-    std::pair<CacheMapTy::iterator, bool> CacheVal =
-      CacheMap.insert(std::make_pair(V, unknown()));
-    if (!CacheVal.second)
-      return CacheVal.first->second;
-
-    SizeOffsetType Result;
     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
-      Result = visitGEPOperator(*GEP);
-    else
-      Result = visit(cast<Instruction>(*V));
-    return CacheMap[V] = Result;
+      return visitGEPOperator(*GEP);
+    return visit(*I);
   }
-
   if (Argument *A = dyn_cast<Argument>(V))
     return visitArgument(*A);
   if (ConstantPointerNull *P = dyn_cast<ConstantPointerNull>(V))
@@ -439,6 +412,8 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
     if (CE->getOpcode() == Instruction::IntToPtr)
       return unknown(); // clueless
+    if (CE->getOpcode() == Instruction::GetElementPtr)
+      return visitGEPOperator(cast<GEPOperator>(*CE));
   }
 
   DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: " << *V
@@ -572,21 +547,9 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitLoadInst(LoadInst&) {
   return unknown();
 }
 
-SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode &PHI) {
-  if (PHI.getNumIncomingValues() == 0)
-    return unknown();
-
-  SizeOffsetType Ret = compute(PHI.getIncomingValue(0));
-  if (!bothKnown(Ret))
-    return unknown();
-
-  // Verify that all PHI incoming pointers have the same size and offset.
-  for (unsigned i = 1, e = PHI.getNumIncomingValues(); i != e; ++i) {
-    SizeOffsetType EdgeData = compute(PHI.getIncomingValue(i));
-    if (!bothKnown(EdgeData) || EdgeData != Ret)
-      return unknown();
-  }
-  return Ret;
+SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode&) {
+  // too complex to analyze statically.
+  return unknown();
 }
 
 SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) {
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 2240e9d..c0009cb 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -47,9 +47,7 @@ STATISTIC(NumCacheCompleteNonLocalPtr,
           "Number of block queries that were completely cached");
 
 // Limit for the number of instructions to scan in a block.
-// FIXME: Figure out what a sane value is for this.
-//        (500 is relatively insane.)
-static const int BlockScanLimit = 500;
+static const int BlockScanLimit = 100;
 
 char MemoryDependenceAnalysis::ID = 0;
 
@@ -913,7 +911,6 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
                             SmallVectorImpl<NonLocalDepResult> &Result,
                             DenseMap<BasicBlock*, Value*> &Visited,
                             bool SkipFirstBlock) {
-
   // Look up the cached info for Pointer.
   ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
 
@@ -1001,8 +998,17 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
     for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
          I != E; ++I) {
       Visited.insert(std::make_pair(I->getBB(), Addr));
-      if (!I->getResult().isNonLocal() && DT->isReachableFromEntry(I->getBB()))
+      if (I->getResult().isNonLocal()) {
+        continue;
+      }
+
+      if (!DT) {
+        Result.push_back(NonLocalDepResult(I->getBB(),
+                                           MemDepResult::getUnknown(),
+                                           Addr));
+      } else if (DT->isReachableFromEntry(I->getBB())) {
         Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), Addr));
+      }
     }
     ++NumCacheCompleteNonLocalPtr;
     return false;
@@ -1047,9 +1053,16 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
                                                  NumSortedEntries);
 
       // If we got a Def or Clobber, add this to the list of results.
-      if (!Dep.isNonLocal() && DT->isReachableFromEntry(BB)) {
-        Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
-        continue;
+      if (!Dep.isNonLocal()) {
+        if (!DT) {
+          Result.push_back(NonLocalDepResult(BB,
+                                             MemDepResult::getUnknown(),
+                                             Pointer.getAddr()));
+          continue;
+        } else if (DT->isReachableFromEntry(BB)) {
+          Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
+          continue;
+        }
       }
     }
 
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
index fad5074..8577025 100644
--- a/lib/Analysis/RegionInfo.cpp
+++ b/lib/Analysis/RegionInfo.cpp
@@ -79,10 +79,43 @@ void Region::replaceExit(BasicBlock *BB) {
   exit = BB;
 }
 
+void Region::replaceEntryRecursive(BasicBlock *NewEntry) {
+  std::vector<Region *> RegionQueue;
+  BasicBlock *OldEntry = getEntry();
+
+  RegionQueue.push_back(this);
+  while (!RegionQueue.empty()) {
+    Region *R = RegionQueue.back();
+    RegionQueue.pop_back();
+
+    R->replaceEntry(NewEntry);
+    for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI)
+      if ((*RI)->getEntry() == OldEntry)
+        RegionQueue.push_back(*RI);
+  }
+}
+
+void Region::replaceExitRecursive(BasicBlock *NewExit) {
+  std::vector<Region *> RegionQueue;
+  BasicBlock *OldExit = getExit();
+
+  RegionQueue.push_back(this);
+  while (!RegionQueue.empty()) {
+    Region *R = RegionQueue.back();
+    RegionQueue.pop_back();
+
+    R->replaceExit(NewExit);
+    for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI)
+      if ((*RI)->getExit() == OldExit)
+        RegionQueue.push_back(*RI);
+  }
+}
+
 bool Region::contains(const BasicBlock *B) const {
   BasicBlock *BB = const_cast<BasicBlock*>(B);
 
-  assert(DT->getNode(BB) && "BB not part of the dominance tree");
+  if (!DT->getNode(BB))
+    return false;
 
   BasicBlock *entry = getEntry(), *exit = getExit();
 
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 6ea915f..f876748 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -3937,10 +3937,19 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
 /// before taking the branch. For loops with multiple exits, it may not be the
 /// number times that the loop header executes because the loop may exit
 /// prematurely via another branch.
+///
+/// FIXME: We conservatively call getBackedgeTakenCount(L) instead of
+/// getExitCount(L, ExitingBlock) to compute a safe trip count considering all
+/// loop exits. getExitCount() may return an exact count for this branch
+/// assuming no-signed-wrap. The number of well-defined iterations may actually
+/// be higher than this trip count if this exit test is skipped and the loop
+/// exits via a different branch. Ideally, getExitCount() would know whether it
+/// depends on a NSW assumption, and we would only fall back to a conservative
+/// trip count in that case.
 unsigned ScalarEvolution::
-getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock) {
+getSmallConstantTripCount(Loop *L, BasicBlock */*ExitingBlock*/) {
   const SCEVConstant *ExitCount =
-    dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock));
+    dyn_cast<SCEVConstant>(getBackedgeTakenCount(L));
   if (!ExitCount)
     return 0;
 
@@ -3967,8 +3976,8 @@ getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock) {
 /// As explained in the comments for getSmallConstantTripCount, this assumes
 /// that control exits the loop via ExitingBlock.
 unsigned ScalarEvolution::
-getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock) {
-  const SCEV *ExitCount = getExitCount(L, ExitingBlock);
+getSmallConstantTripMultiple(Loop *L, BasicBlock */*ExitingBlock*/) {
+  const SCEV *ExitCount = getBackedgeTakenCount(L);
   if (ExitCount == getCouldNotCompute())
     return 1;
 
@@ -3997,7 +4006,7 @@ getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock) {
 }
 
 // getExitCount - Get the expression for the number of loop iterations for which
-// this loop is guaranteed not to exit via ExitintBlock. Otherwise return
+// this loop is guaranteed not to exit via ExitingBlock. Otherwise return
 // SCEVCouldNotCompute.
 const SCEV *ScalarEvolution::getExitCount(Loop *L, BasicBlock *ExitingBlock) {
   return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
@@ -4382,26 +4391,36 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
   // Proceed to the next level to examine the exit condition expression.
   return ComputeExitLimitFromCond(L, ExitBr->getCondition(),
                                   ExitBr->getSuccessor(0),
-                                  ExitBr->getSuccessor(1));
+                                  ExitBr->getSuccessor(1),
+                                  /*IsSubExpr=*/false);
 }
 
 /// ComputeExitLimitFromCond - Compute the number of times the
 /// backedge of the specified loop will execute if its exit condition
 /// were a conditional branch of ExitCond, TBB, and FBB.
+///
+/// @param IsSubExpr is true if ExitCond does not directly control the exit
+/// branch. In this case, we cannot assume that the loop only exits when the
+/// condition is true and cannot infer that failing to meet the condition prior
+/// to integer wraparound results in undefined behavior.
 ScalarEvolution::ExitLimit
 ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
                                           Value *ExitCond,
                                           BasicBlock *TBB,
-                                          BasicBlock *FBB) {
+                                          BasicBlock *FBB,
+                                          bool IsSubExpr) {
   // Check if the controlling expression for this loop is an And or Or.
   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
     if (BO->getOpcode() == Instruction::And) {
       // Recurse on the operands of the and.
-      ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB);
-      ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB);
+      bool EitherMayExit = L->contains(TBB);
+      ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
+                                               IsSubExpr || EitherMayExit);
+      ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
+                                               IsSubExpr || EitherMayExit);
       const SCEV *BECount = getCouldNotCompute();
       const SCEV *MaxBECount = getCouldNotCompute();
-      if (L->contains(TBB)) {
+      if (EitherMayExit) {
         // Both conditions must be true for the loop to continue executing.
         // Choose the less conservative count.
         if (EL0.Exact == getCouldNotCompute() ||
@@ -4429,11 +4448,14 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
     }
     if (BO->getOpcode() == Instruction::Or) {
       // Recurse on the operands of the or.
-      ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB);
-      ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB);
+      bool EitherMayExit = L->contains(FBB);
+      ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
+                                               IsSubExpr || EitherMayExit);
+      ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
+                                               IsSubExpr || EitherMayExit);
       const SCEV *BECount = getCouldNotCompute();
       const SCEV *MaxBECount = getCouldNotCompute();
-      if (L->contains(FBB)) {
+      if (EitherMayExit) {
         // Both conditions must be false for the loop to continue executing.
         // Choose the less conservative count.
         if (EL0.Exact == getCouldNotCompute() ||
@@ -4464,7 +4486,7 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
   // With an icmp, it may be feasible to compute an exact backedge-taken count.
   // Proceed to the next level to examine the icmp.
   if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
-    return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB);
+    return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, IsSubExpr);
 
   // Check for a constant condition. These are normally stripped out by
   // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
@@ -4490,7 +4512,8 @@ ScalarEvolution::ExitLimit
 ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
                                           ICmpInst *ExitCond,
                                           BasicBlock *TBB,
-                                          BasicBlock *FBB) {
+                                          BasicBlock *FBB,
+                                          bool IsSubExpr) {
 
   // If the condition was exit on true, convert the condition to exit on false
   ICmpInst::Predicate Cond;
@@ -4542,7 +4565,7 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
   switch (Cond) {
   case ICmpInst::ICMP_NE: {                     // while (X != Y)
     // Convert to: while (X-Y != 0)
-    ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L);
+    ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, IsSubExpr);
     if (EL.hasAnyInfo()) return EL;
     break;
   }
@@ -4553,24 +4576,24 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
     break;
   }
   case ICmpInst::ICMP_SLT: {
-    ExitLimit EL = HowManyLessThans(LHS, RHS, L, true);
+    ExitLimit EL = HowManyLessThans(LHS, RHS, L, true, IsSubExpr);
     if (EL.hasAnyInfo()) return EL;
     break;
   }
   case ICmpInst::ICMP_SGT: {
     ExitLimit EL = HowManyLessThans(getNotSCEV(LHS),
-                                             getNotSCEV(RHS), L, true);
+                                    getNotSCEV(RHS), L, true, IsSubExpr);
     if (EL.hasAnyInfo()) return EL;
     break;
   }
   case ICmpInst::ICMP_ULT: {
-    ExitLimit EL = HowManyLessThans(LHS, RHS, L, false);
+    ExitLimit EL = HowManyLessThans(LHS, RHS, L, false, IsSubExpr);
     if (EL.hasAnyInfo()) return EL;
     break;
   }
   case ICmpInst::ICMP_UGT: {
     ExitLimit EL = HowManyLessThans(getNotSCEV(LHS),
-                                             getNotSCEV(RHS), L, false);
+                                    getNotSCEV(RHS), L, false, IsSubExpr);
     if (EL.hasAnyInfo()) return EL;
     break;
   }
@@ -5439,7 +5462,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
 /// effectively V != 0.  We know and take advantage of the fact that this
 /// expression only being used in a comparison by zero context.
 ScalarEvolution::ExitLimit
-ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
+ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr) {
   // If the value is a constant
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
     // If the value is already zero, the branch will execute zero times.
@@ -5537,19 +5560,20 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
   }
 
   // If the recurrence is known not to wraparound, unsigned divide computes the
-  // back edge count. We know that the value will either become zero (and thus
-  // the loop terminates), that the loop will terminate through some other exit
-  // condition first, or that the loop has undefined behavior.  This means
-  // we can't "miss" the exit value, even with nonunit stride.
+  // back edge count. (Ideally we would have an "isexact" bit for udiv). We know
+  // that the value will either become zero (and thus the loop terminates), that
+  // the loop will terminate through some other exit condition first, or that
+  // the loop has undefined behavior.  This means we can't "miss" the exit
+  // value, even with nonunit stride.
   //
-  // FIXME: Prove that loops always exhibits *acceptable* undefined
-  // behavior. Loops must exhibit defined behavior until a wrapped value is
-  // actually used. So the trip count computed by udiv could be smaller than the
-  // number of well-defined iterations.
-  if (AddRec->getNoWrapFlags(SCEV::FlagNW)) {
-    // FIXME: We really want an "isexact" bit for udiv.
+  // This is only valid for expressions that directly compute the loop exit. It
+  // is invalid for subexpressions in which the loop may exit through this
+  // branch even if this subexpression is false. In that case, the trip count
+  // computed by this udiv could be smaller than the number of well-defined
+  // iterations.
+  if (!IsSubExpr && AddRec->getNoWrapFlags(SCEV::FlagNW))
     return getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
-  }
+
   // Then, try to solve the above equation provided that Start is constant.
   if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
     return SolveLinEquationWithOverflow(StepC->getValue()->getValue(),
@@ -6315,9 +6339,14 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
 /// HowManyLessThans - Return the number of times a backedge containing the
 /// specified less-than comparison will execute.  If not computable, return
 /// CouldNotCompute.
+///
+/// @param IsSubExpr is true when the LHS < RHS condition does not directly
+/// control the branch. In this case, we can only compute an iteration count for
+/// a subexpression that cannot overflow before evaluating true.
 ScalarEvolution::ExitLimit
 ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
-                                  const Loop *L, bool isSigned) {
+                                  const Loop *L, bool isSigned,
+                                  bool IsSubExpr) {
   // Only handle:  "ADDREC < LoopInvariant".
   if (!isLoopInvariant(RHS, L)) return getCouldNotCompute();
 
@@ -6326,10 +6355,12 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
     return getCouldNotCompute();
 
   // Check to see if we have a flag which makes analysis easy.
-  bool NoWrap = isSigned ?
-    AddRec->getNoWrapFlags((SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNW)) :
-    AddRec->getNoWrapFlags((SCEV::NoWrapFlags)(SCEV::FlagNUW | SCEV::FlagNW));
-
+  bool NoWrap = false;
+  if (!IsSubExpr) {
+    NoWrap = AddRec->getNoWrapFlags(
+      (SCEV::NoWrapFlags)(((isSigned ? SCEV::FlagNSW : SCEV::FlagNUW))
+                          | SCEV::FlagNW));
+  }
   if (AddRec->isAffine()) {
     unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
     const SCEV *Step = AddRec->getStepRecurrence(*this);
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 68e43b2..bbf3c3a 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -71,6 +71,7 @@ using namespace llvm;
 // achieved by stripping the !tbaa tags from IR, but this option is sometimes
 // more convenient.
 static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));
+static cl::opt<bool> EnableStructPathTBAA("struct-path-tbaa", cl::init(false));
 
 namespace {
   /// TBAANode - This is a simple wrapper around an MDNode which provides a
@@ -109,6 +110,97 @@ namespace {
       return CI->getValue()[0];
     }
   };
+
+  /// This is a simple wrapper around an MDNode which provides a
+  /// higher-level interface by hiding the details of how alias analysis
+  /// information is encoded in its operands.
+  class TBAAStructTagNode {
+    /// This node should be created with createTBAAStructTagNode.
+    const MDNode *Node;
+
+  public:
+    TBAAStructTagNode() : Node(0) {}
+    explicit TBAAStructTagNode(const MDNode *N) : Node(N) {}
+
+    /// Get the MDNode for this TBAAStructTagNode.
+    const MDNode *getNode() const { return Node; }
+
+    const MDNode *getBaseType() const {
+      return dyn_cast_or_null<MDNode>(Node->getOperand(0));
+    }
+    const MDNode *getAccessType() const {
+      return dyn_cast_or_null<MDNode>(Node->getOperand(1));
+    }
+    uint64_t getOffset() const {
+      return cast<ConstantInt>(Node->getOperand(2))->getZExtValue();
+    }
+    /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for
+    /// objects which are not modified (by any means) in the context where this
+    /// AliasAnalysis is relevant.
+    bool TypeIsImmutable() const {
+      if (Node->getNumOperands() < 4)
+        return false;
+      ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(3));
+      if (!CI)
+        return false;
+      return CI->getValue()[0];
+    }
+  };
+
+  /// This is a simple wrapper around an MDNode which provides a
+  /// higher-level interface by hiding the details of how alias analysis
+  /// information is encoded in its operands.
+  class TBAAStructTypeNode {
+    /// This node should be created with createTBAAStructTypeNode.
+    const MDNode *Node;
+
+  public:
+    TBAAStructTypeNode() : Node(0) {}
+    explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {}
+
+    /// Get the MDNode for this TBAAStructTypeNode.
+    const MDNode *getNode() const { return Node; }
+
+    /// Get this TBAAStructTypeNode's field in the type DAG with
+    /// given offset. Update the offset to be relative to the field type.
+    TBAAStructTypeNode getParent(uint64_t &Offset) const {
+      // Parent can be omitted for the root node.
+      if (Node->getNumOperands() < 2)
+        return TBAAStructTypeNode();
+
+      // Special handling for a scalar type node. 
+      if (Node->getNumOperands() <= 3) {
+        MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
+        if (!P)
+          return TBAAStructTypeNode();
+        return TBAAStructTypeNode(P);
+      }
+
+      // Assume the offsets are in order. We return the previous field if
+      // the current offset is bigger than the given offset.
+      unsigned TheIdx = 0;
+      for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) {
+        uint64_t Cur = cast<ConstantInt>(Node->getOperand(Idx + 1))->
+                         getZExtValue();
+        if (Cur > Offset) {
+          assert(Idx >= 3 &&
+                 "TBAAStructTypeNode::getParent should have an offset match!");
+          TheIdx = Idx - 2;
+          break;
+        }
+      }
+      // Move along the last field.
+      if (TheIdx == 0)
+        TheIdx = Node->getNumOperands() - 2;
+      uint64_t Cur = cast<ConstantInt>(Node->getOperand(TheIdx + 1))->
+                       getZExtValue();
+      Offset -= Cur;
+      MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx));
+      if (!P)
+        return TBAAStructTypeNode();
+      return TBAAStructTypeNode(P);
+    }
+  };
 }
 
 namespace {
@@ -137,6 +229,7 @@ namespace {
     }
 
     bool Aliases(const MDNode *A, const MDNode *B) const;
+    bool PathAliases(const MDNode *A, const MDNode *B) const;
 
   private:
     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
@@ -171,6 +264,9 @@ TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
 bool
 TypeBasedAliasAnalysis::Aliases(const MDNode *A,
                                 const MDNode *B) const {
+  if (EnableStructPathTBAA)
+    return PathAliases(A, B);
+
   // Keep track of the root node for A and B.
   TBAANode RootA, RootB;
 
@@ -209,6 +305,67 @@ TypeBasedAliasAnalysis::Aliases(const MDNode *A,
   return false;
 }
 
+/// Test whether the struct-path tag represented by A may alias the
+/// struct-path tag represented by B.
+bool
+TypeBasedAliasAnalysis::PathAliases(const MDNode *A,
+                                    const MDNode *B) const {
+  // Keep track of the root node for A and B.
+  TBAAStructTypeNode RootA, RootB;
+  TBAAStructTagNode TagA(A), TagB(B);
+
+  // TODO: We need to check if AccessType of TagA encloses AccessType of
+  // TagB to support aggregate AccessType. If yes, return true.
+
+  // Start from the base type of A, follow the edge with the correct offset in
+  // the type DAG and adjust the offset until we reach the base type of B or
+  // until we reach the Root node.
+  // Compare the adjusted offset once we have the same base.
+
+  // Climb the type DAG from base type of A to see if we reach base type of B.
+  const MDNode *BaseA = TagA.getBaseType();
+  const MDNode *BaseB = TagB.getBaseType();
+  uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset();
+  for (TBAAStructTypeNode T(BaseA); ; ) {
+    if (T.getNode() == BaseB)
+      // Base type of A encloses base type of B, check if the offsets match.
+      return OffsetA == OffsetB;
+
+    RootA = T;
+    // Follow the edge with the correct offset, OffsetA will be adjusted to
+    // be relative to the field type.
+    T = T.getParent(OffsetA);
+    if (!T.getNode())
+      break;
+  }
+
+  // Reset OffsetA and climb the type DAG from base type of B to see if we reach
+  // base type of A.
+  OffsetA = TagA.getOffset();
+  for (TBAAStructTypeNode T(BaseB); ; ) {
+    if (T.getNode() == BaseA)
+      // Base type of B encloses base type of A, check if the offsets match.
+      return OffsetA == OffsetB;
+
+    RootB = T;
+    // Follow the edge with the correct offset, OffsetB will be adjusted to
+    // be relative to the field type.
+    T = T.getParent(OffsetB);
+    if (!T.getNode())
+      break;
+  }
+
+  // Neither node is an ancestor of the other.
+
+  // If they have different roots, they're part of different potentially
+  // unrelated type systems, so we must be conservative.
+  if (RootA.getNode() != RootB.getNode())
+    return true;
+
+  // If they have the same root, then we've proved there's no alias.
+  return false;
+}
+
 AliasAnalysis::AliasResult
 TypeBasedAliasAnalysis::alias(const Location &LocA,
                               const Location &LocB) {
@@ -240,7 +397,8 @@ bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc,
 
   // If this is an "immutable" type, we can assume the pointer is pointing
   // to constant memory.
-  if (TBAANode(M).TypeIsImmutable())
+  if ((!EnableStructPathTBAA && TBAANode(M).TypeIsImmutable()) ||
+      (EnableStructPathTBAA && TBAAStructTagNode(M).TypeIsImmutable()))
     return true;
 
   return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
@@ -256,7 +414,8 @@ TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
   // If this is an "immutable" type, we can assume the call doesn't write
   // to memory.
   if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
-    if (TBAANode(M).TypeIsImmutable())
+    if ((!EnableStructPathTBAA && TBAANode(M).TypeIsImmutable()) ||
+        (EnableStructPathTBAA && TBAAStructTagNode(M).TypeIsImmutable()))
       Min = OnlyReadsMemory;
 
   return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
@@ -298,3 +457,55 @@ TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
 
   return AliasAnalysis::getModRefInfo(CS1, CS2);
 }
+
+MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
+  if (!A || !B)
+    return NULL;
+
+  if (A == B)
+    return A;
+
+  // For struct-path aware TBAA, we use the access type of the tag.
+  if (EnableStructPathTBAA) {
+    A = cast_or_null<MDNode>(A->getOperand(1));
+    if (!A) return 0;
+    B = cast_or_null<MDNode>(B->getOperand(1));
+    if (!B) return 0;
+  }
+
+  SmallVector<MDNode *, 4> PathA;
+  MDNode *T = A;
+  while (T) {
+    PathA.push_back(T);
+    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
+  }
+
+  SmallVector<MDNode *, 4> PathB;
+  T = B;
+  while (T) {
+    PathB.push_back(T);
+    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
+  }
+
+  int IA = PathA.size() - 1;
+  int IB = PathB.size() - 1;
+
+  MDNode *Ret = 0;
+  while (IA >= 0 && IB >=0) {
+    if (PathA[IA] == PathB[IB])
+      Ret = PathA[IA];
+    else
+      break;
+    --IA;
+    --IB;
+  }
+  if (!EnableStructPathTBAA)
+    return Ret;
+
+  if (!Ret)
+    return 0;
+  // We need to convert from a type node to a tag node.
+  Type *Int64 = IntegerType::get(A->getContext(), 64);
+  Value *Ops[3] = { Ret, Ret, ConstantInt::get(Int64, 0) };
+  return MDNode::get(A->getContext(), Ops);
+}