43 files changed, 6507 insertions, 2459 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index c5523ec..c456990 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -88,7 +88,7 @@ AliasAnalysis::getModRefInfo(CallSite CS1, CallSite CS2) {
 
 AliasAnalysis::ModRefResult
 AliasAnalysis::getModRefInfo(LoadInst *L, Value *P, unsigned Size) {
-  return alias(L->getOperand(0), TD->getTypeStoreSize(L->getType()),
+  return alias(L->getOperand(0), getTypeStoreSize(L->getType()),
                P, Size) ? Ref : NoModRef;
 }
 
@@ -97,7 +97,7 @@ AliasAnalysis::getModRefInfo(StoreInst *S, Value *P, unsigned Size) {
   // If the stored address cannot alias the pointer in question, then the
   // pointer cannot be modified by the store.
   if (!alias(S->getOperand(1),
-             TD->getTypeStoreSize(S->getOperand(0)->getType()), P, Size))
+             getTypeStoreSize(S->getOperand(0)->getType()), P, Size))
     return NoModRef;
 
   // If the pointer is a pointer to constant memory, then it could not have been
@@ -177,18 +177,23 @@ AliasAnalysis::~AliasAnalysis() {}
 /// AliasAnalysis interface before any other methods are called.
 ///
 void AliasAnalysis::InitializeAliasAnalysis(Pass *P) {
-  TD = &P->getAnalysis<TargetData>();
+  TD = P->getAnalysisIfAvailable<TargetData>();
   AA = &P->getAnalysis<AliasAnalysis>();
 }
 
 // getAnalysisUsage - All alias analysis implementations should invoke this
-// directly (using AliasAnalysis::getAnalysisUsage(AU)) to make sure that
-// TargetData is required by the pass.
+// directly (using AliasAnalysis::getAnalysisUsage(AU)).
 void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<TargetData>();            // All AA's need TargetData.
   AU.addRequired<AliasAnalysis>();         // All AA's chain
 }
 
+/// getTypeStoreSize - Return the TargetData store size for the given type,
+/// if known, or a conservative value otherwise.
+///
+unsigned AliasAnalysis::getTypeStoreSize(const Type *Ty) {
+  return TD ? TD->getTypeStoreSize(Ty) : ~0u;
+}
+
 /// canBasicBlockModify - Return true if it is possible for execution of the
 /// specified basic block to modify the value pointed to by Ptr.
 ///
@@ -228,13 +233,15 @@ bool llvm::isNoAliasCall(const Value *V) {
 
 /// isIdentifiedObject - Return true if this pointer refers to a distinct and
 /// identifiable object.  This returns true for:
-///    Global Variables and Functions
+///    Global Variables and Functions (but not Global Aliases)
 ///    Allocas and Mallocs
 ///    ByVal and NoAlias Arguments
 ///    NoAlias returns
 ///
 bool llvm::isIdentifiedObject(const Value *V) {
-  if (isa<GlobalValue>(V) || isa<AllocationInst>(V) || isNoAliasCall(V))
+  if (isa<AllocationInst>(V) || isNoAliasCall(V))
+    return true;
+  if (isa<GlobalValue>(V) && !isa<GlobalAlias>(V))
     return true;
   if (const Argument *A = dyn_cast<Argument>(V))
     return A->hasNoAliasAttr() || A->hasByValAttr();
diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp
index 4362d7d..272c871 100644
--- a/lib/Analysis/AliasAnalysisCounter.cpp
+++ b/lib/Analysis/AliasAnalysisCounter.cpp
@@ -18,11 +18,12 @@
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 static cl::opt<bool>
-PrintAll("count-aa-print-all-queries", cl::ReallyHidden);
+PrintAll("count-aa-print-all-queries", cl::ReallyHidden, cl::init(true));
 static cl::opt<bool>
 PrintAllFailures("count-aa-print-all-failed-queries", cl::ReallyHidden);
 
@@ -41,33 +42,33 @@ namespace {
     }
 
     void printLine(const char *Desc, unsigned Val, unsigned Sum) {
-      cerr <<  "  " << Val << " " << Desc << " responses ("
-           << Val*100/Sum << "%)\n";
+      errs() <<  "  " << Val << " " << Desc << " responses ("
+             << Val*100/Sum << "%)\n";
     }
     ~AliasAnalysisCounter() {
       unsigned AASum = No+May+Must;
       unsigned MRSum = NoMR+JustRef+JustMod+MR;
       if (AASum + MRSum) { // Print a report if any counted queries occurred...
-        cerr << "\n===== Alias Analysis Counter Report =====\n"
-             << "  Analysis counted: " << Name << "\n"
-             << "  " << AASum << " Total Alias Queries Performed\n";
+        errs() << "\n===== Alias Analysis Counter Report =====\n"
+               << "  Analysis counted: " << Name << "\n"
+               << "  " << AASum << " Total Alias Queries Performed\n";
         if (AASum) {
           printLine("no alias",     No, AASum);
           printLine("may alias",   May, AASum);
           printLine("must alias", Must, AASum);
-          cerr << "  Alias Analysis Counter Summary: " << No*100/AASum << "%/"
-               << May*100/AASum << "%/" << Must*100/AASum<<"%\n\n";
+          errs() << "  Alias Analysis Counter Summary: " << No*100/AASum << "%/"
+                 << May*100/AASum << "%/" << Must*100/AASum<<"%\n\n";
         }
 
-        cerr << "  " << MRSum    << " Total Mod/Ref Queries Performed\n";
+        errs() << "  " << MRSum    << " Total Mod/Ref Queries Performed\n";
         if (MRSum) {
           printLine("no mod/ref",    NoMR, MRSum);
           printLine("ref",        JustRef, MRSum);
           printLine("mod",        JustMod, MRSum);
           printLine("mod/ref",         MR, MRSum);
-          cerr << "  Mod/Ref Analysis Counter Summary: " <<NoMR*100/MRSum<< "%/"
-               << JustRef*100/MRSum << "%/" << JustMod*100/MRSum << "%/"
-               << MR*100/MRSum <<"%\n\n";
+          errs() << "  Mod/Ref Analysis Counter Summary: " <<NoMR*100/MRSum
+                 << "%/" << JustRef*100/MRSum << "%/" << JustMod*100/MRSum
+                 << "%/" << MR*100/MRSum <<"%\n\n";
         }
       }
     }
@@ -89,19 +90,6 @@ namespace {
     bool pointsToConstantMemory(const Value *P) {
       return getAnalysis<AliasAnalysis>().pointsToConstantMemory(P);
     }
-    bool doesNotAccessMemory(CallSite CS) {
-      return getAnalysis<AliasAnalysis>().doesNotAccessMemory(CS);
-    }
-    bool doesNotAccessMemory(Function *F) {
-      return getAnalysis<AliasAnalysis>().doesNotAccessMemory(F);
-    }
-    bool onlyReadsMemory(CallSite CS) {
-      return getAnalysis<AliasAnalysis>().onlyReadsMemory(CS);
-    }
-    bool onlyReadsMemory(Function *F) {
-      return getAnalysis<AliasAnalysis>().onlyReadsMemory(F);
-    }
-
 
     // Forwarding functions: just delegate to a real AA implementation, counting
     // the number of responses...
@@ -131,20 +119,20 @@ AliasAnalysisCounter::alias(const Value *V1, unsigned V1Size,
 
   const char *AliasString;
   switch (R) {
-  default: assert(0 && "Unknown alias type!");
+  default: llvm_unreachable("Unknown alias type!");
   case NoAlias:   No++;   AliasString = "No alias"; break;
   case MayAlias:  May++;  AliasString = "May alias"; break;
   case MustAlias: Must++; AliasString = "Must alias"; break;
   }
 
   if (PrintAll || (PrintAllFailures && R == MayAlias)) {
-    cerr << AliasString << ":\t";
-    cerr << "[" << V1Size << "B] ";
-    WriteAsOperand(*cerr.stream(), V1, true, M);
-    cerr << ", ";
-    cerr << "[" << V2Size << "B] ";
-    WriteAsOperand(*cerr.stream(), V2, true, M);
-    cerr << "\n";
+    errs() << AliasString << ":\t";
+    errs() << "[" << V1Size << "B] ";
+    WriteAsOperand(errs(), V1, true, M);
+    errs() << ", ";
+    errs() << "[" << V2Size << "B] ";
+    WriteAsOperand(errs(), V2, true, M);
+    errs() << "\n";
   }
 
   return R;
@@ -156,7 +144,7 @@ AliasAnalysisCounter::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
 
   const char *MRString;
   switch (R) {
-  default:       assert(0 && "Unknown mod/ref type!");
+  default:       llvm_unreachable("Unknown mod/ref type!");
   case NoModRef: NoMR++;     MRString = "NoModRef"; break;
   case Ref:      JustRef++;  MRString = "JustRef"; break;
   case Mod:      JustMod++;  MRString = "JustMod"; break;
@@ -164,10 +152,10 @@ AliasAnalysisCounter::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
   }
 
   if (PrintAll || (PrintAllFailures && R == ModRef)) {
-    cerr << MRString << ":  Ptr: ";
-    cerr << "[" << Size << "B] ";
-    WriteAsOperand(*cerr.stream(), P, true, M);
-    cerr << "\t<->" << *CS.getInstruction();
+    errs() << MRString << ":  Ptr: ";
+    errs() << "[" << Size << "B] ";
+    WriteAsOperand(errs(), P, true, M);
+    errs() << "\t<->" << *CS.getInstruction();
   }
   return R;
 }
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index 07820e3..bb95c01 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -29,9 +29,8 @@
 #include "llvm/Support/InstIterator.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/Support/Streams.h"
-#include <set>
-#include <sstream>
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SetVector.h"
 using namespace llvm;
 
 static cl::opt<bool> PrintAll("print-all-alias-modref-info", cl::ReallyHidden);
@@ -81,18 +80,21 @@ X("aa-eval", "Exhaustive Alias Analysis Precision Evaluator", false, true);
 
 FunctionPass *llvm::createAAEvalPass() { return new AAEval(); }
 
-static void PrintResults(const char *Msg, bool P, const Value *V1, const Value *V2,
-                         const Module *M) {
+static void PrintResults(const char *Msg, bool P, const Value *V1,
+                         const Value *V2, const Module *M) {
   if (P) {
-    std::stringstream s1, s2;
-    WriteAsOperand(s1, V1, true, M);
-    WriteAsOperand(s2, V2, true, M);
-    std::string o1(s1.str()), o2(s2.str());
+    std::string o1, o2;
+    {
+      raw_string_ostream os1(o1), os2(o2);
+      WriteAsOperand(os1, V1, true, M);
+      WriteAsOperand(os2, V2, true, M);
+    }
+    
     if (o2 < o1)
-        std::swap(o1, o2);
-    cerr << "  " << Msg << ":\t"
-         << o1 << ", "
-         << o2 << "\n";
+      std::swap(o1, o2);
+    errs() << "  " << Msg << ":\t"
+           << o1 << ", "
+           << o2 << "\n";
   }
 }
 
@@ -100,19 +102,17 @@ static inline void
 PrintModRefResults(const char *Msg, bool P, Instruction *I, Value *Ptr,
                    Module *M) {
   if (P) {
-    cerr << "  " << Msg << ":  Ptr: ";
-    WriteAsOperand(*cerr.stream(), Ptr, true, M);
-    cerr << "\t<->" << *I;
+    errs() << "  " << Msg << ":  Ptr: ";
+    WriteAsOperand(errs(), Ptr, true, M);
+    errs() << "\t<->" << *I << '\n';
   }
 }
 
 bool AAEval::runOnFunction(Function &F) {
   AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
 
-  const TargetData &TD = AA.getTargetData();
-
-  std::set<Value *> Pointers;
-  std::set<CallSite> CallSites;
+  SetVector<Value *> Pointers;
+  SetVector<CallSite> CallSites;
 
   for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
     if (isa<PointerType>(I->getType()))    // Add all pointer arguments
@@ -136,20 +136,20 @@ bool AAEval::runOnFunction(Function &F) {
 
   if (PrintNoAlias || PrintMayAlias || PrintMustAlias ||
       PrintNoModRef || PrintMod || PrintRef || PrintModRef)
-    cerr << "Function: " << F.getName() << ": " << Pointers.size()
-         << " pointers, " << CallSites.size() << " call sites\n";
+    errs() << "Function: " << F.getName() << ": " << Pointers.size()
+           << " pointers, " << CallSites.size() << " call sites\n";
 
   // iterate over the worklist, and run the full (n^2)/2 disambiguations
-  for (std::set<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end();
+  for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end();
        I1 != E; ++I1) {
-    unsigned I1Size = 0;
+    unsigned I1Size = ~0u;
     const Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType();
-    if (I1ElTy->isSized()) I1Size = TD.getTypeStoreSize(I1ElTy);
+    if (I1ElTy->isSized()) I1Size = AA.getTypeStoreSize(I1ElTy);
 
-    for (std::set<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) {
-      unsigned I2Size = 0;
+    for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) {
+      unsigned I2Size = ~0u;
       const Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType();
-      if (I2ElTy->isSized()) I2Size = TD.getTypeStoreSize(I2ElTy);
+      if (I2ElTy->isSized()) I2Size = AA.getTypeStoreSize(I2ElTy);
 
       switch (AA.alias(*I1, I1Size, *I2, I2Size)) {
       case AliasAnalysis::NoAlias:
@@ -162,21 +162,21 @@ bool AAEval::runOnFunction(Function &F) {
         PrintResults("MustAlias", PrintMustAlias, *I1, *I2, F.getParent());
         ++MustAlias; break;
       default:
-        cerr << "Unknown alias query result!\n";
+        errs() << "Unknown alias query result!\n";
       }
     }
   }
 
   // Mod/ref alias analysis: compare all pairs of calls and values
-  for (std::set<CallSite>::iterator C = CallSites.begin(),
+  for (SetVector<CallSite>::iterator C = CallSites.begin(),
          Ce = CallSites.end(); C != Ce; ++C) {
     Instruction *I = C->getInstruction();
 
-    for (std::set<Value *>::iterator V = Pointers.begin(), Ve = Pointers.end();
+    for (SetVector<Value *>::iterator V = Pointers.begin(), Ve = Pointers.end();
          V != Ve; ++V) {
-      unsigned Size = 0;
+      unsigned Size = ~0u;
       const Type *ElTy = cast<PointerType>((*V)->getType())->getElementType();
-      if (ElTy->isSized()) Size = TD.getTypeStoreSize(ElTy);
+      if (ElTy->isSized()) Size = AA.getTypeStoreSize(ElTy);
 
       switch (AA.getModRefInfo(*C, *V, Size)) {
       case AliasAnalysis::NoModRef:
@@ -192,7 +192,7 @@ bool AAEval::runOnFunction(Function &F) {
         PrintModRefResults("  ModRef", PrintModRef, I, *V, F.getParent());
         ++ModRef; break;
       default:
-        cerr << "Unknown alias query result!\n";
+        errs() << "Unknown alias query result!\n";
       }
     }
   }
@@ -201,45 +201,45 @@ bool AAEval::runOnFunction(Function &F) {
 }
 
 static void PrintPercent(unsigned Num, unsigned Sum) {
-  cerr << "(" << Num*100ULL/Sum << "."
-            << ((Num*1000ULL/Sum) % 10) << "%)\n";
+  errs() << "(" << Num*100ULL/Sum << "."
+         << ((Num*1000ULL/Sum) % 10) << "%)\n";
 }
 
 bool AAEval::doFinalization(Module &M) {
   unsigned AliasSum = NoAlias + MayAlias + MustAlias;
-  cerr << "===== Alias Analysis Evaluator Report =====\n";
+  errs() << "===== Alias Analysis Evaluator Report =====\n";
   if (AliasSum == 0) {
-    cerr << "  Alias Analysis Evaluator Summary: No pointers!\n";
+    errs() << "  Alias Analysis Evaluator Summary: No pointers!\n";
   } else {
-    cerr << "  " << AliasSum << " Total Alias Queries Performed\n";
-    cerr << "  " << NoAlias << " no alias responses ";
+    errs() << "  " << AliasSum << " Total Alias Queries Performed\n";
+    errs() << "  " << NoAlias << " no alias responses ";
     PrintPercent(NoAlias, AliasSum);
-    cerr << "  " << MayAlias << " may alias responses ";
+    errs() << "  " << MayAlias << " may alias responses ";
     PrintPercent(MayAlias, AliasSum);
-    cerr << "  " << MustAlias << " must alias responses ";
+    errs() << "  " << MustAlias << " must alias responses ";
     PrintPercent(MustAlias, AliasSum);
-    cerr << "  Alias Analysis Evaluator Pointer Alias Summary: "
-         << NoAlias*100/AliasSum  << "%/" << MayAlias*100/AliasSum << "%/"
-         << MustAlias*100/AliasSum << "%\n";
+    errs() << "  Alias Analysis Evaluator Pointer Alias Summary: "
+           << NoAlias*100/AliasSum  << "%/" << MayAlias*100/AliasSum << "%/"
+           << MustAlias*100/AliasSum << "%\n";
   }
 
   // Display the summary for mod/ref analysis
   unsigned ModRefSum = NoModRef + Mod + Ref + ModRef;
   if (ModRefSum == 0) {
-    cerr << "  Alias Analysis Mod/Ref Evaluator Summary: no mod/ref!\n";
+    errs() << "  Alias Analysis Mod/Ref Evaluator Summary: no mod/ref!\n";
   } else {
-    cerr << "  " << ModRefSum << " Total ModRef Queries Performed\n";
-    cerr << "  " << NoModRef << " no mod/ref responses ";
+    errs() << "  " << ModRefSum << " Total ModRef Queries Performed\n";
+    errs() << "  " << NoModRef << " no mod/ref responses ";
     PrintPercent(NoModRef, ModRefSum);
-    cerr << "  " << Mod << " mod responses ";
+    errs() << "  " << Mod << " mod responses ";
     PrintPercent(Mod, ModRefSum);
-    cerr << "  " << Ref << " ref responses ";
+    errs() << "  " << Ref << " ref responses ";
     PrintPercent(Ref, ModRefSum);
-    cerr << "  " << ModRef << " mod & ref responses ";
+    errs() << "  " << ModRef << " mod & ref responses ";
     PrintPercent(ModRef, ModRefSum);
-    cerr << "  Alias Analysis Evaluator Mod/Ref Summary: "
-         << NoModRef*100/ModRefSum  << "%/" << Mod*100/ModRefSum << "%/"
-         << Ref*100/ModRefSum << "%/" << ModRef*100/ModRefSum << "%\n";
+    errs() << "  Alias Analysis Evaluator Mod/Ref Summary: "
+           << NoModRef*100/ModRefSum  << "%/" << Mod*100/ModRefSum << "%/"
+           << Ref*100/ModRefSum << "%/" << ModRef*100/ModRefSum << "%\n";
   }
 
   return false;
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index 18c2b665..b056d00 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -20,8 +20,10 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/InstIterator.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 /// mergeSetIn - Merge the specified alias set into this alias set.
@@ -186,8 +188,8 @@ bool AliasSet::aliasesCallSite(CallSite CS, AliasAnalysis &AA) const {
 
 void AliasSetTracker::clear() {
   // Delete all the PointerRec entries.
-  for (DenseMap<Value*, AliasSet::PointerRec*>::iterator I = PointerMap.begin(),
-       E = PointerMap.end(); I != E; ++I)
+  for (PointerMapType::iterator I = PointerMap.begin(), E = PointerMap.end();
+       I != E; ++I)
     I->second->eraseFromList();
   
   PointerMap.clear();
@@ -279,7 +281,7 @@ bool AliasSetTracker::add(Value *Ptr, unsigned Size) {
 bool AliasSetTracker::add(LoadInst *LI) {
   bool NewPtr;
   AliasSet &AS = addPointer(LI->getOperand(0),
-                            AA.getTargetData().getTypeStoreSize(LI->getType()),
+                            AA.getTypeStoreSize(LI->getType()),
                             AliasSet::Refs, NewPtr);
   if (LI->isVolatile()) AS.setVolatile();
   return NewPtr;
@@ -289,7 +291,7 @@ bool AliasSetTracker::add(StoreInst *SI) {
   bool NewPtr;
   Value *Val = SI->getOperand(0);
   AliasSet &AS = addPointer(SI->getOperand(1),
-                            AA.getTargetData().getTypeStoreSize(Val->getType()),
+                            AA.getTypeStoreSize(Val->getType()),
                             AliasSet::Mods, NewPtr);
   if (SI->isVolatile()) AS.setVolatile();
   return NewPtr;
@@ -411,7 +413,7 @@ bool AliasSetTracker::remove(Value *Ptr, unsigned Size) {
 }
 
 bool AliasSetTracker::remove(LoadInst *LI) {
-  unsigned Size = AA.getTargetData().getTypeStoreSize(LI->getType());
+  unsigned Size = AA.getTypeStoreSize(LI->getType());
   AliasSet *AS = findAliasSetForPointer(LI->getOperand(0), Size);
   if (!AS) return false;
   remove(*AS);
@@ -419,8 +421,7 @@ bool AliasSetTracker::remove(LoadInst *LI) {
 }
 
 bool AliasSetTracker::remove(StoreInst *SI) {
-  unsigned Size =
-    AA.getTargetData().getTypeStoreSize(SI->getOperand(0)->getType());
+  unsigned Size = AA.getTypeStoreSize(SI->getOperand(0)->getType());
   AliasSet *AS = findAliasSetForPointer(SI->getOperand(1), Size);
   if (!AS) return false;
   remove(*AS);
@@ -485,7 +486,7 @@ void AliasSetTracker::deleteValue(Value *PtrVal) {
         AS->removeCallSite(CS);
 
   // First, look up the PointerRec for this pointer.
-  DenseMap<Value*, AliasSet::PointerRec*>::iterator I = PointerMap.find(PtrVal);
+  PointerMapType::iterator I = PointerMap.find(PtrVal);
   if (I == PointerMap.end()) return;  // Noop
 
   // If we found one, remove the pointer from the alias set it is in.
@@ -511,7 +512,7 @@ void AliasSetTracker::copyValue(Value *From, Value *To) {
   AA.copyValue(From, To);
 
   // First, look up the PointerRec for this pointer.
-  DenseMap<Value*, AliasSet::PointerRec*>::iterator I = PointerMap.find(From);
+  PointerMapType::iterator I = PointerMap.find(From);
   if (I == PointerMap.end())
     return;  // Noop
   assert(I->second->hasAliasSet() && "Dead entry?");
@@ -531,15 +532,15 @@ void AliasSetTracker::copyValue(Value *From, Value *To) {
 //               AliasSet/AliasSetTracker Printing Support
 //===----------------------------------------------------------------------===//
 
-void AliasSet::print(std::ostream &OS) const {
-  OS << "  AliasSet[" << (void*)this << "," << RefCount << "] ";
+void AliasSet::print(raw_ostream &OS) const {
+  OS << "  AliasSet[" << format("0x%p", (void*)this) << "," << RefCount << "] ";
   OS << (AliasTy == MustAlias ? "must" : "may") << " alias, ";
   switch (AccessTy) {
   case NoModRef: OS << "No access "; break;
   case Refs    : OS << "Ref       "; break;
   case Mods    : OS << "Mod       "; break;
   case ModRef  : OS << "Mod/Ref   "; break;
-  default: assert(0 && "Bad value for AccessTy!");
+  default: llvm_unreachable("Bad value for AccessTy!");
   }
   if (isVolatile()) OS << "[volatile] ";
   if (Forward)
@@ -564,7 +565,7 @@ void AliasSet::print(std::ostream &OS) const {
   OS << "\n";
 }
 
-void AliasSetTracker::print(std::ostream &OS) const {
+void AliasSetTracker::print(raw_ostream &OS) const {
   OS << "Alias Set Tracker: " << AliasSets.size() << " alias sets for "
      << PointerMap.size() << " pointer values.\n";
   for (const_iterator I = begin(), E = end(); I != E; ++I)
@@ -572,8 +573,26 @@ void AliasSetTracker::print(std::ostream &OS) const {
   OS << "\n";
 }
 
-void AliasSet::dump() const { print (cerr); }
-void AliasSetTracker::dump() const { print(cerr); }
+void AliasSet::dump() const { print(errs()); }
+void AliasSetTracker::dump() const { print(errs()); }
+
+//===----------------------------------------------------------------------===//
+//                     ASTCallbackVH Class Implementation
+//===----------------------------------------------------------------------===//
+
+void AliasSetTracker::ASTCallbackVH::deleted() {
+  assert(AST && "ASTCallbackVH called with a null AliasSetTracker!");
+  AST->deleteValue(getValPtr());
+  // this now dangles!
+}
+
+AliasSetTracker::ASTCallbackVH::ASTCallbackVH(Value *V, AliasSetTracker *ast)
+  : CallbackVH(V), AST(ast) {}
+
+AliasSetTracker::ASTCallbackVH &
+AliasSetTracker::ASTCallbackVH::operator=(Value *V) {
+  return *this = ASTCallbackVH(V, AST);
+}
 
 //===----------------------------------------------------------------------===//
 //                            AliasSetPrinter Pass
@@ -596,7 +615,7 @@ namespace {
 
       for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
         Tracker->add(&*I);
-      Tracker->print(cerr);
+      Tracker->print(errs());
       delete Tracker;
       return false;
     }
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index 493c6e8..f8cb323 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -9,7 +9,6 @@
 
 #include "llvm-c/Analysis.h"
 #include "llvm/Analysis/Verifier.h"
-#include <fstream>
 #include <cstring>
 
 using namespace llvm;
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index f689dca..2c4efc4 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -15,6 +15,7 @@
 
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/MallocHelper.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
@@ -22,11 +23,15 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Operator.h"
 #include "llvm/Pass.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include <algorithm>
 using namespace llvm;
@@ -35,12 +40,8 @@ using namespace llvm;
 // Useful predicates
 //===----------------------------------------------------------------------===//
 
-static const User *isGEP(const Value *V) {
-  if (isa<GetElementPtrInst>(V) ||
-      (isa<ConstantExpr>(V) &&
-       cast<ConstantExpr>(V)->getOpcode() == Instruction::GetElementPtr))
-    return cast<User>(V);
-  return 0;
+static const GEPOperator *isGEP(const Value *V) {
+  return dyn_cast<GEPOperator>(V);
 }
 
 static const Value *GetGEPOperands(const Value *V, 
@@ -103,7 +104,7 @@ static bool isNonEscapingLocalObject(const Value *V) {
 /// isObjectSmallerThan - Return true if we can prove that the object specified
 /// by V is smaller than Size.
 static bool isObjectSmallerThan(const Value *V, unsigned Size,
-                                const TargetData &TD) {
+                                LLVMContext &Context, const TargetData &TD) {
   const Type *AccessTy;
   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
     AccessTy = GV->getType()->getElementType();
@@ -112,6 +113,12 @@ static bool isObjectSmallerThan(const Value *V, unsigned Size,
       AccessTy = AI->getType()->getElementType();
     else
       return false;
+  } else if (const CallInst* CI = extractMallocCall(V)) {
+    if (!isArrayMalloc(V, Context, &TD))
+      // The size is the argument to the malloc call.
+      if (const ConstantInt* C = dyn_cast<ConstantInt>(CI->getOperand(1)))
+        return (C->getZExtValue() < Size);
+    return false;
   } else if (const Argument *A = dyn_cast<Argument>(V)) {
     if (A->hasByValAttr())
       AccessTy = cast<PointerType>(A->getType())->getElementType();
@@ -142,11 +149,10 @@ namespace {
     explicit NoAA(void *PID) : ImmutablePass(PID) { }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<TargetData>();
     }
 
     virtual void initializePass() {
-      TD = &getAnalysis<TargetData>();
+      TD = getAnalysisIfAvailable<TargetData>();
     }
 
     virtual AliasResult alias(const Value *V1, unsigned V1Size,
@@ -156,7 +162,7 @@ namespace {
 
     virtual void getArgumentAccesses(Function *F, CallSite CS,
                                      std::vector<PointerAccessInfo> &Info) {
-      assert(0 && "This method may not be called on this function!");
+      llvm_unreachable("This method may not be called on this function!");
     }
 
     virtual void getMustAliases(Value *P, std::vector<Value*> &RetVals) { }
@@ -196,7 +202,12 @@ namespace {
     static char ID; // Class identification, replacement for typeinfo
     BasicAliasAnalysis() : NoAA(&ID) {}
     AliasResult alias(const Value *V1, unsigned V1Size,
-                      const Value *V2, unsigned V2Size);
+                      const Value *V2, unsigned V2Size) {
+      assert(VisitedPHIs.empty() && "VisitedPHIs must be cleared after use!");
+      AliasResult Alias = aliasCheck(V1, V1Size, V2, V2Size);
+      VisitedPHIs.clear();
+      return Alias;
+    }
 
     ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size);
     ModRefResult getModRefInfo(CallSite CS1, CallSite CS2);
@@ -210,6 +221,22 @@ namespace {
     bool pointsToConstantMemory(const Value *P);
 
   private:
+    // VisitedPHIs - Track PHI nodes visited by a aliasCheck() call.
+    SmallSet<const PHINode*, 16> VisitedPHIs;
+
+    // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
+    // against another.
+    AliasResult aliasGEP(const Value *V1, unsigned V1Size,
+                         const Value *V2, unsigned V2Size);
+
+    // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction
+    // against another.
+    AliasResult aliasPHI(const PHINode *PN, unsigned PNSize,
+                         const Value *V2, unsigned V2Size);
+
+    AliasResult aliasCheck(const Value *V1, unsigned V1Size,
+                           const Value *V2, unsigned V2Size);
+
     // CheckGEPInstructions - Check two GEP instructions with known
     // must-aliasing base pointers.  This checks to see if the index expressions
     // preclude the pointers from aliasing...
@@ -279,6 +306,27 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
       if (!passedAsArg)
         return NoModRef;
     }
+
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
+      switch (II->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::atomic_cmp_swap:
+      case Intrinsic::atomic_swap:
+      case Intrinsic::atomic_load_add:
+      case Intrinsic::atomic_load_sub:
+      case Intrinsic::atomic_load_and:
+      case Intrinsic::atomic_load_nand:
+      case Intrinsic::atomic_load_or:
+      case Intrinsic::atomic_load_xor:
+      case Intrinsic::atomic_load_max:
+      case Intrinsic::atomic_load_min:
+      case Intrinsic::atomic_load_umax:
+      case Intrinsic::atomic_load_umin:
+        if (alias(II->getOperand(1), Size, P, Size) == NoAlias)
+          return NoModRef;
+        break;
+      }
+    }
   }
 
   // The AliasAnalysis base class has some smarts, lets use them.
@@ -303,71 +351,12 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS1, CallSite CS2) {
   return NoAA::getModRefInfo(CS1, CS2);
 }
 
-
-// alias - Provide a bunch of ad-hoc rules to disambiguate in common cases, such
-// as array references.
+// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
+// against another.
 //
 AliasAnalysis::AliasResult
-BasicAliasAnalysis::alias(const Value *V1, unsigned V1Size,
-                          const Value *V2, unsigned V2Size) {
-  // Strip off any constant expression casts if they exist
-  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V1))
-    if (CE->isCast() && isa<PointerType>(CE->getOperand(0)->getType()))
-      V1 = CE->getOperand(0);
-  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V2))
-    if (CE->isCast() && isa<PointerType>(CE->getOperand(0)->getType()))
-      V2 = CE->getOperand(0);
-
-  // Are we checking for alias of the same value?
-  if (V1 == V2) return MustAlias;
-
-  if (!isa<PointerType>(V1->getType()) || !isa<PointerType>(V2->getType()))
-    return NoAlias;  // Scalars cannot alias each other
-
-  // Strip off cast instructions.   Since V1 and V2 are pointers, they must be
-  // pointer<->pointer bitcasts.
-  if (const BitCastInst *I = dyn_cast<BitCastInst>(V1))
-    return alias(I->getOperand(0), V1Size, V2, V2Size);
-  if (const BitCastInst *I = dyn_cast<BitCastInst>(V2))
-    return alias(V1, V1Size, I->getOperand(0), V2Size);
-
-  // Figure out what objects these things are pointing to if we can.
-  const Value *O1 = V1->getUnderlyingObject();
-  const Value *O2 = V2->getUnderlyingObject();
-
-  if (O1 != O2) {
-    // If V1/V2 point to two different objects we know that we have no alias.
-    if (isIdentifiedObject(O1) && isIdentifiedObject(O2))
-      return NoAlias;
-  
-    // Arguments can't alias with local allocations or noalias calls.
-    if ((isa<Argument>(O1) && (isa<AllocationInst>(O2) || isNoAliasCall(O2))) ||
-        (isa<Argument>(O2) && (isa<AllocationInst>(O1) || isNoAliasCall(O1))))
-      return NoAlias;
-
-    // Most objects can't alias null.
-    if ((isa<ConstantPointerNull>(V2) && isKnownNonNull(O1)) ||
-        (isa<ConstantPointerNull>(V1) && isKnownNonNull(O2)))
-      return NoAlias;
-  }
-  
-  // If the size of one access is larger than the entire object on the other
-  // side, then we know such behavior is undefined and can assume no alias.
-  const TargetData &TD = getTargetData();
-  if ((V1Size != ~0U && isObjectSmallerThan(O2, V1Size, TD)) ||
-      (V2Size != ~0U && isObjectSmallerThan(O1, V2Size, TD)))
-    return NoAlias;
-  
-  // If one pointer is the result of a call/invoke and the other is a
-  // non-escaping local object, then we know the object couldn't escape to a
-  // point where the call could return it.
-  if ((isa<CallInst>(O1) || isa<InvokeInst>(O1)) &&
-      isNonEscapingLocalObject(O2) && O1 != O2)
-    return NoAlias;
-  if ((isa<CallInst>(O2) || isa<InvokeInst>(O2)) &&
-      isNonEscapingLocalObject(O1) && O1 != O2)
-    return NoAlias;
-  
+BasicAliasAnalysis::aliasGEP(const Value *V1, unsigned V1Size,
+                             const Value *V2, unsigned V2Size) {
   // If we have two gep instructions with must-alias'ing base pointers, figure
   // out if the indexes to the GEP tell us anything about the derived pointer.
   // Note that we also handle chains of getelementptr instructions as well as
@@ -387,8 +376,8 @@ BasicAliasAnalysis::alias(const Value *V1, unsigned V1Size,
         GEP1->getOperand(0)->getType() == GEP2->getOperand(0)->getType() &&
         // All operands are the same, ignoring the base.
         std::equal(GEP1->op_begin()+1, GEP1->op_end(), GEP2->op_begin()+1))
-      return alias(GEP1->getOperand(0), V1Size, GEP2->getOperand(0), V2Size);
-    
+      return aliasCheck(GEP1->getOperand(0), V1Size,
+                        GEP2->getOperand(0), V2Size);
     
     // Drill down into the first non-gep value, to test for must-aliasing of
     // the base pointers.
@@ -405,7 +394,7 @@ BasicAliasAnalysis::alias(const Value *V1, unsigned V1Size,
     const Value *BasePtr2 = GEP2->getOperand(0);
 
     // Do the base pointers alias?
-    AliasResult BaseAlias = alias(BasePtr1, ~0U, BasePtr2, ~0U);
+    AliasResult BaseAlias = aliasCheck(BasePtr1, ~0U, BasePtr2, ~0U);
     if (BaseAlias == NoAlias) return NoAlias;
     if (BaseAlias == MustAlias) {
       // If the base pointers alias each other exactly, check to see if we can
@@ -435,79 +424,190 @@ BasicAliasAnalysis::alias(const Value *V1, unsigned V1Size,
   // instruction.  If one pointer is a GEP with a non-zero index of the other
   // pointer, we know they cannot alias.
   //
-  if (isGEP(V2)) {
-    std::swap(V1, V2);
-    std::swap(V1Size, V2Size);
-  }
+  if (V1Size == ~0U || V2Size == ~0U)
+    return MayAlias;
 
-  if (V1Size != ~0U && V2Size != ~0U)
-    if (isGEP(V1)) {
-      SmallVector<Value*, 16> GEPOperands;
-      const Value *BasePtr = GetGEPOperands(V1, GEPOperands);
-
-      AliasResult R = alias(BasePtr, V1Size, V2, V2Size);
-      if (R == MustAlias) {
-        // If there is at least one non-zero constant index, we know they cannot
-        // alias.
-        bool ConstantFound = false;
-        bool AllZerosFound = true;
-        for (unsigned i = 0, e = GEPOperands.size(); i != e; ++i)
-          if (const Constant *C = dyn_cast<Constant>(GEPOperands[i])) {
-            if (!C->isNullValue()) {
-              ConstantFound = true;
-              AllZerosFound = false;
-              break;
-            }
-          } else {
-            AllZerosFound = false;
-          }
+  SmallVector<Value*, 16> GEPOperands;
+  const Value *BasePtr = GetGEPOperands(V1, GEPOperands);
+
+  AliasResult R = aliasCheck(BasePtr, ~0U, V2, V2Size);
+  if (R != MustAlias)
+    // If V2 may alias GEP base pointer, conservatively returns MayAlias.
+    // If V2 is known not to alias GEP base pointer, then the two values
+    // cannot alias per GEP semantics: "A pointer value formed from a
+    // getelementptr instruction is associated with the addresses associated
+    // with the first operand of the getelementptr".
+    return R;
+
+  // If there is at least one non-zero constant index, we know they cannot
+  // alias.
+  bool ConstantFound = false;
+  bool AllZerosFound = true;
+  for (unsigned i = 0, e = GEPOperands.size(); i != e; ++i)
+    if (const Constant *C = dyn_cast<Constant>(GEPOperands[i])) {
+      if (!C->isNullValue()) {
+        ConstantFound = true;
+        AllZerosFound = false;
+        break;
+      }
+    } else {
+      AllZerosFound = false;
+    }
 
-        // If we have getelementptr <ptr>, 0, 0, 0, 0, ... and V2 must aliases
-        // the ptr, the end result is a must alias also.
-        if (AllZerosFound)
-          return MustAlias;
+  // If we have getelementptr <ptr>, 0, 0, 0, 0, ... and V2 must aliases
+  // the ptr, the end result is a must alias also.
+  if (AllZerosFound)
+    return MustAlias;
 
-        if (ConstantFound) {
-          if (V2Size <= 1 && V1Size <= 1)  // Just pointer check?
-            return NoAlias;
+  if (ConstantFound) {
+    if (V2Size <= 1 && V1Size <= 1)  // Just pointer check?
+      return NoAlias;
 
-          // Otherwise we have to check to see that the distance is more than
-          // the size of the argument... build an index vector that is equal to
-          // the arguments provided, except substitute 0's for any variable
-          // indexes we find...
-          if (cast<PointerType>(
-                BasePtr->getType())->getElementType()->isSized()) {
-            for (unsigned i = 0; i != GEPOperands.size(); ++i)
-              if (!isa<ConstantInt>(GEPOperands[i]))
-                GEPOperands[i] =
-                  Constant::getNullValue(GEPOperands[i]->getType());
-            int64_t Offset =
-              getTargetData().getIndexedOffset(BasePtr->getType(),
-                                               &GEPOperands[0],
-                                               GEPOperands.size());
-
-            if (Offset >= (int64_t)V2Size || Offset <= -(int64_t)V1Size)
-              return NoAlias;
-          }
-        }
-      }
+    // Otherwise we have to check to see that the distance is more than
+    // the size of the argument... build an index vector that is equal to
+    // the arguments provided, except substitute 0's for any variable
+    // indexes we find...
+    if (TD &&
+        cast<PointerType>(BasePtr->getType())->getElementType()->isSized()) {
+      for (unsigned i = 0; i != GEPOperands.size(); ++i)
+        if (!isa<ConstantInt>(GEPOperands[i]))
+          GEPOperands[i] = Constant::getNullValue(GEPOperands[i]->getType());
+      int64_t Offset = TD->getIndexedOffset(BasePtr->getType(),
+                                            &GEPOperands[0],
+                                            GEPOperands.size());
+
+      if (Offset >= (int64_t)V2Size || Offset <= -(int64_t)V1Size)
+        return NoAlias;
     }
+  }
+
+  return MayAlias;
+}
+
+// aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction
+// against another.
+AliasAnalysis::AliasResult
+BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize,
+                             const Value *V2, unsigned V2Size) {
+  // The PHI node has already been visited, avoid recursion any further.
+  if (!VisitedPHIs.insert(PN))
+    return MayAlias;
+
+  SmallSet<Value*, 4> UniqueSrc;
+  SmallVector<Value*, 4> V1Srcs;
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    Value *PV1 = PN->getIncomingValue(i);
+    if (isa<PHINode>(PV1))
+      // If any of the source itself is a PHI, return MayAlias conservatively
+      // to avoid compile time explosion. The worst possible case is if both
+      // sides are PHI nodes. In which case, this is O(m x n) time where 'm'
+      // and 'n' are the number of PHI sources.
+      return MayAlias;
+    if (UniqueSrc.insert(PV1))
+      V1Srcs.push_back(PV1);
+  }
+
+  AliasResult Alias = aliasCheck(V1Srcs[0], PNSize, V2, V2Size);
+  // Early exit if the check of the first PHI source against V2 is MayAlias.
+  // Other results are not possible.
+  if (Alias == MayAlias)
+    return MayAlias;
+
+  // If all sources of the PHI node NoAlias or MustAlias V2, then returns
+  // NoAlias / MustAlias. Otherwise, returns MayAlias.
+  for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) {
+    Value *V = V1Srcs[i];
+    AliasResult ThisAlias = aliasCheck(V, PNSize, V2, V2Size);
+    if (ThisAlias != Alias || ThisAlias == MayAlias)
+      return MayAlias;
+  }
+
+  return Alias;
+}
+
+// aliasCheck - Provide a bunch of ad-hoc rules to disambiguate in common cases,
+// such as array references.
+//
+AliasAnalysis::AliasResult
+BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size,
+                               const Value *V2, unsigned V2Size) {
+  // Strip off any casts if they exist.
+  V1 = V1->stripPointerCasts();
+  V2 = V2->stripPointerCasts();
+
+  // Are we checking for alias of the same value?
+  if (V1 == V2) return MustAlias;
+
+  if (!isa<PointerType>(V1->getType()) || !isa<PointerType>(V2->getType()))
+    return NoAlias;  // Scalars cannot alias each other
+
+  // Figure out what objects these things are pointing to if we can.
+  const Value *O1 = V1->getUnderlyingObject();
+  const Value *O2 = V2->getUnderlyingObject();
+
+  if (O1 != O2) {
+    // If V1/V2 point to two different objects we know that we have no alias.
+    if (isIdentifiedObject(O1) && isIdentifiedObject(O2))
+      return NoAlias;
+  
+    // Arguments can't alias with local allocations or noalias calls.
+    if ((isa<Argument>(O1) && (isa<AllocationInst>(O2) || isNoAliasCall(O2))) ||
+        (isa<Argument>(O2) && (isa<AllocationInst>(O1) || isNoAliasCall(O1))))
+      return NoAlias;
+
+    // Most objects can't alias null.
+    if ((isa<ConstantPointerNull>(V2) && isKnownNonNull(O1)) ||
+        (isa<ConstantPointerNull>(V1) && isKnownNonNull(O2)))
+      return NoAlias;
+  }
+  
+  // If the size of one access is larger than the entire object on the other
+  // side, then we know such behavior is undefined and can assume no alias.
+  LLVMContext &Context = V1->getContext();
+  if (TD)
+    if ((V1Size != ~0U && isObjectSmallerThan(O2, V1Size, Context, *TD)) ||
+        (V2Size != ~0U && isObjectSmallerThan(O1, V2Size, Context, *TD)))
+      return NoAlias;
+  
+  // If one pointer is the result of a call/invoke and the other is a
+  // non-escaping local object, then we know the object couldn't escape to a
+  // point where the call could return it.
+  if ((isa<CallInst>(O1) || isa<InvokeInst>(O1)) &&
+      isNonEscapingLocalObject(O2) && O1 != O2)
+    return NoAlias;
+  if ((isa<CallInst>(O2) || isa<InvokeInst>(O2)) &&
+      isNonEscapingLocalObject(O1) && O1 != O2)
+    return NoAlias;
+
+  if (!isGEP(V1) && isGEP(V2)) {
+    std::swap(V1, V2);
+    std::swap(V1Size, V2Size);
+  }
+  if (isGEP(V1))
+    return aliasGEP(V1, V1Size, V2, V2Size);
+
+  if (isa<PHINode>(V2) && !isa<PHINode>(V1)) {
+    std::swap(V1, V2);
+    std::swap(V1Size, V2Size);
+  }
+  if (const PHINode *PN = dyn_cast<PHINode>(V1))
+    return aliasPHI(PN, V1Size, V2, V2Size);
 
   return MayAlias;
 }
 
 // This function is used to determine if the indices of two GEP instructions are
 // equal. V1 and V2 are the indices.
-static bool IndexOperandsEqual(Value *V1, Value *V2) {
+static bool IndexOperandsEqual(Value *V1, Value *V2, LLVMContext &Context) {
   if (V1->getType() == V2->getType())
     return V1 == V2;
   if (Constant *C1 = dyn_cast<Constant>(V1))
     if (Constant *C2 = dyn_cast<Constant>(V2)) {
       // Sign extend the constants to long types, if necessary
-      if (C1->getType() != Type::Int64Ty)
-        C1 = ConstantExpr::getSExt(C1, Type::Int64Ty);
-      if (C2->getType() != Type::Int64Ty) 
-        C2 = ConstantExpr::getSExt(C2, Type::Int64Ty);
+      if (C1->getType() != Type::getInt64Ty(Context))
+        C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(Context));
+      if (C2->getType() != Type::getInt64Ty(Context)) 
+        C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(Context));
       return C1 == C2;
     }
   return false;
@@ -528,6 +628,8 @@ BasicAliasAnalysis::CheckGEPInstructions(
 
   const PointerType *GEPPointerTy = cast<PointerType>(BasePtr1Ty);
 
+  LLVMContext &Context = GEPPointerTy->getContext();
+
   // Find the (possibly empty) initial sequence of equal values... which are not
   // necessarily constants.
   unsigned NumGEP1Operands = NumGEP1Ops, NumGEP2Operands = NumGEP2Ops;
@@ -535,7 +637,8 @@ BasicAliasAnalysis::CheckGEPInstructions(
   unsigned MaxOperands = std::max(NumGEP1Operands, NumGEP2Operands);
   unsigned UnequalOper = 0;
   while (UnequalOper != MinOperands &&
-         IndexOperandsEqual(GEP1Ops[UnequalOper], GEP2Ops[UnequalOper])) {
+         IndexOperandsEqual(GEP1Ops[UnequalOper], GEP2Ops[UnequalOper],
+         Context)) {
     // Advance through the type as we go...
     ++UnequalOper;
     if (const CompositeType *CT = dyn_cast<CompositeType>(BasePtr1Ty))
@@ -599,10 +702,10 @@ BasicAliasAnalysis::CheckGEPInstructions(
         if (Constant *G2OC = dyn_cast<ConstantInt>(const_cast<Value*>(G2Oper))){
           if (G1OC->getType() != G2OC->getType()) {
             // Sign extend both operands to long.
-            if (G1OC->getType() != Type::Int64Ty)
-              G1OC = ConstantExpr::getSExt(G1OC, Type::Int64Ty);
-            if (G2OC->getType() != Type::Int64Ty) 
-              G2OC = ConstantExpr::getSExt(G2OC, Type::Int64Ty);
+            if (G1OC->getType() != Type::getInt64Ty(Context))
+              G1OC = ConstantExpr::getSExt(G1OC, Type::getInt64Ty(Context));
+            if (G2OC->getType() != Type::getInt64Ty(Context)) 
+              G2OC = ConstantExpr::getSExt(G2OC, Type::getInt64Ty(Context));
             GEP1Ops[FirstConstantOper] = G1OC;
             GEP2Ops[FirstConstantOper] = G2OC;
           }
@@ -673,6 +776,10 @@ BasicAliasAnalysis::CheckGEPInstructions(
   // However, one GEP may have more operands than the other.  If this is the
   // case, there may still be hope.  Check this now.
   if (FirstConstantOper == MinOperands) {
+    // Without TargetData, we won't know what the offsets are.
+    if (!TD)
+      return MayAlias;
+
     // Make GEP1Ops be the longer one if there is a longer one.
     if (NumGEP1Ops < NumGEP2Ops) {
       std::swap(GEP1Ops, GEP2Ops);
@@ -692,13 +799,12 @@ BasicAliasAnalysis::CheckGEPInstructions(
               GEP1Ops[i] = Constant::getNullValue(GEP1Ops[i]->getType());
           // Okay, now get the offset.  This is the relative offset for the full
           // instruction.
-          const TargetData &TD = getTargetData();
-          int64_t Offset1 = TD.getIndexedOffset(GEPPointerTy, GEP1Ops,
-                                                NumGEP1Ops);
+          int64_t Offset1 = TD->getIndexedOffset(GEPPointerTy, GEP1Ops,
+                                                 NumGEP1Ops);
 
           // Now check without any constants at the end.
-          int64_t Offset2 = TD.getIndexedOffset(GEPPointerTy, GEP1Ops,
-                                                MinOperands);
+          int64_t Offset2 = TD->getIndexedOffset(GEPPointerTy, GEP1Ops,
+                                                 MinOperands);
 
           // Make sure we compare the absolute difference.
           if (Offset1 > Offset2)
@@ -734,7 +840,8 @@ BasicAliasAnalysis::CheckGEPInstructions(
   const Type *ZeroIdxTy = GEPPointerTy;
   for (unsigned i = 0; i != FirstConstantOper; ++i) {
     if (!isa<StructType>(ZeroIdxTy))
-      GEP1Ops[i] = GEP2Ops[i] = Constant::getNullValue(Type::Int32Ty);
+      GEP1Ops[i] = GEP2Ops[i] = 
+                              Constant::getNullValue(Type::getInt32Ty(Context));
 
     if (const CompositeType *CT = dyn_cast<CompositeType>(ZeroIdxTy))
       ZeroIdxTy = CT->getTypeAtIndex(GEP1Ops[i]);
@@ -775,9 +882,13 @@ BasicAliasAnalysis::CheckGEPInstructions(
           // value possible.
           //
           if (const ArrayType *AT = dyn_cast<ArrayType>(BasePtr1Ty))
-            GEP1Ops[i] = ConstantInt::get(Type::Int64Ty,AT->getNumElements()-1);
+            GEP1Ops[i] =
+                  ConstantInt::get(Type::getInt64Ty(Context), 
+                                   AT->getNumElements()-1);
           else if (const VectorType *VT = dyn_cast<VectorType>(BasePtr1Ty))
-            GEP1Ops[i] = ConstantInt::get(Type::Int64Ty,VT->getNumElements()-1);
+            GEP1Ops[i] = 
+                  ConstantInt::get(Type::getInt64Ty(Context),
+                                   VT->getNumElements()-1);
         }
       }
 
@@ -812,11 +923,11 @@ BasicAliasAnalysis::CheckGEPInstructions(
     }
   }
 
-  if (GEPPointerTy->getElementType()->isSized()) {
+  if (TD && GEPPointerTy->getElementType()->isSized()) {
     int64_t Offset1 =
-      getTargetData().getIndexedOffset(GEPPointerTy, GEP1Ops, NumGEP1Ops);
+      TD->getIndexedOffset(GEPPointerTy, GEP1Ops, NumGEP1Ops);
     int64_t Offset2 = 
-      getTargetData().getIndexedOffset(GEPPointerTy, GEP2Ops, NumGEP2Ops);
+      TD->getIndexedOffset(GEPPointerTy, GEP2Ops, NumGEP2Ops);
     assert(Offset1 != Offset2 &&
            "There is at least one different constant here!");
     
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index 8ada5a3..6fed400 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -25,38 +25,36 @@
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/GraphWriter.h"
-#include "llvm/Config/config.h"
-#include <iosfwd>
-#include <sstream>
-#include <fstream>
 using namespace llvm;
 
 namespace llvm {
 template<>
 struct DOTGraphTraits<const Function*> : public DefaultDOTGraphTraits {
   static std::string getGraphName(const Function *F) {
-    return "CFG for '" + F->getName() + "' function";
+    return "CFG for '" + F->getNameStr() + "' function";
   }
 
   static std::string getNodeLabel(const BasicBlock *Node,
                                   const Function *Graph,
                                   bool ShortNames) {
     if (ShortNames && !Node->getName().empty())
-      return Node->getName() + ":";
+      return Node->getNameStr() + ":";
+
+    std::string Str;
+    raw_string_ostream OS(Str);
 
-    std::ostringstream Out;
     if (ShortNames) {
-      WriteAsOperand(Out, Node, false);
-      return Out.str();
+      WriteAsOperand(OS, Node, false);
+      return OS.str();
     }
 
     if (Node->getName().empty()) {
-      WriteAsOperand(Out, Node, false);
-      Out << ":";
+      WriteAsOperand(OS, Node, false);
+      OS << ":";
     }
-
-    Out << *Node;
-    std::string OutStr = Out.str();
+    
+    OS << *Node;
+    std::string OutStr = OS.str();
     if (OutStr[0] == '\n') OutStr.erase(OutStr.begin());
 
     // Process string output to make it nicer...
@@ -94,7 +92,7 @@ namespace {
       return false;
     }
 
-    void print(std::ostream &OS, const Module* = 0) const {}
+    void print(raw_ostream &OS, const Module* = 0) const {}
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
@@ -112,11 +110,11 @@ namespace {
     CFGOnlyViewer() : FunctionPass(&ID) {}
 
     virtual bool runOnFunction(Function &F) {
-      F.viewCFG();
+      F.viewCFGOnly();
       return false;
     }
 
-    void print(std::ostream &OS, const Module* = 0) const {}
+    void print(raw_ostream &OS, const Module* = 0) const {}
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
@@ -136,19 +134,21 @@ namespace {
     explicit CFGPrinter(void *pid) : FunctionPass(pid) {}
 
     virtual bool runOnFunction(Function &F) {
-      std::string Filename = "cfg." + F.getName() + ".dot";
-      cerr << "Writing '" << Filename << "'...";
-      std::ofstream File(Filename.c_str());
+      std::string Filename = "cfg." + F.getNameStr() + ".dot";
+      errs() << "Writing '" << Filename << "'...";
+      
+      std::string ErrorInfo;
+      raw_fd_ostream File(Filename.c_str(), ErrorInfo);
 
-      if (File.good())
+      if (ErrorInfo.empty())
         WriteGraph(File, (const Function*)&F);
       else
-        cerr << "  error opening file for writing!";
-      cerr << "\n";
+        errs() << "  error opening file for writing!";
+      errs() << "\n";
       return false;
     }
 
-    void print(std::ostream &OS, const Module* = 0) const {}
+    void print(raw_ostream &OS, const Module* = 0) const {}
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
@@ -166,18 +166,20 @@ namespace {
     CFGOnlyPrinter() : FunctionPass(&ID) {}
     explicit CFGOnlyPrinter(void *pid) : FunctionPass(pid) {}
     virtual bool runOnFunction(Function &F) {
-      std::string Filename = "cfg." + F.getName() + ".dot";
-      cerr << "Writing '" << Filename << "'...";
-      std::ofstream File(Filename.c_str());
+      std::string Filename = "cfg." + F.getNameStr() + ".dot";
+      errs() << "Writing '" << Filename << "'...";
 
-      if (File.good())
+      std::string ErrorInfo;
+      raw_fd_ostream File(Filename.c_str(), ErrorInfo);
+      
+      if (ErrorInfo.empty())
         WriteGraph(File, (const Function*)&F, true);
       else
-        cerr << "  error opening file for writing!";
-      cerr << "\n";
+        errs() << "  error opening file for writing!";
+      errs() << "\n";
       return false;
     }
-    void print(std::ostream &OS, const Module* = 0) const {}
+    void print(raw_ostream &OS, const Module* = 0) const {}
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
@@ -196,7 +198,7 @@ P2("dot-cfg-only",
 /// being a 'dot' and 'gv' program in your path.
 ///
 void Function::viewCFG() const {
-  ViewGraph(this, "cfg" + getName());
+  ViewGraph(this, "cfg" + getNameStr());
 }
 
 /// viewCFGOnly - This function is meant for use from the debugger.  It works
@@ -205,7 +207,7 @@ void Function::viewCFG() const {
 /// his can make the graph smaller.
 ///
 void Function::viewCFGOnly() const {
-  ViewGraph(this, "cfg" + getName(), true);
+  ViewGraph(this, "cfg" + getNameStr(), true);
 }
 
 FunctionPass *llvm::createCFGPrinterPass () {
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index 6f2a06c..1d2f118 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -6,28 +6,33 @@ add_llvm_library(LLVMAnalysis
   AliasSetTracker.cpp
   Analysis.cpp
   BasicAliasAnalysis.cpp
-  CaptureTracking.cpp
   CFGPrinter.cpp
+  CaptureTracking.cpp
   ConstantFolding.cpp
   DbgInfoPrinter.cpp
   DebugInfo.cpp
+  IVUsers.cpp
+  InlineCost.cpp
   InstCount.cpp
   Interval.cpp
   IntervalPartition.cpp
-  IVUsers.cpp
   LibCallAliasAnalysis.cpp
   LibCallSemantics.cpp
   LiveValues.cpp
   LoopDependenceAnalysis.cpp
   LoopInfo.cpp
   LoopPass.cpp
-  LoopVR.cpp
+  MallocHelper.cpp
   MemoryDependenceAnalysis.cpp
+  PointerTracking.cpp
   PostDominators.cpp
+  ProfileEstimatorPass.cpp
   ProfileInfo.cpp
   ProfileInfoLoader.cpp
   ProfileInfoLoaderPass.cpp
+  ProfileVerifierPass.cpp
   ScalarEvolution.cpp
+  ScalarEvolutionAliasAnalysis.cpp
   ScalarEvolutionExpander.cpp
   SparsePropagation.cpp
   Trace.cpp
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index a19b8e4..b30ac71 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -54,7 +54,7 @@ bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures) {
       // its return value and doesn't unwind (a readonly function can leak bits
       // by throwing an exception or not depending on the input value).
       if (CS.onlyReadsMemory() && CS.doesNotThrow() &&
-          I->getType() == Type::VoidTy)
+          I->getType() == Type::getVoidTy(V->getContext()))
         break;
 
       // Not captured if only passed via 'nocapture' arguments.  Note that
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 5aa4d56..0ce1c24 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -1,4 +1,4 @@
-//===-- ConstantFolding.cpp - Analyze constant folding possibilities ------===//
+//===-- ConstantFolding.cpp - Fold instructions into constants ------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,8 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This family of functions determines the possibility of performing constant
-// folding.
+// This file defines routines for folding instructions into constants.
+//
+// Also, to supplement the basic VMCore ConstantExpr simplifications,
+// this file defines some additional folding routines that can make use of
+// TargetData information. These functions cannot go in VMCore due to library
+// dependency issues.
 //
 //===----------------------------------------------------------------------===//
 
@@ -19,9 +23,11 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
 #include <cerrno>
@@ -92,7 +98,8 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
 /// these together.  If target data info is available, it is provided as TD, 
 /// otherwise TD is null.
 static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
-                                           Constant *Op1, const TargetData *TD){
+                                           Constant *Op1, const TargetData *TD,
+                                           LLVMContext &Context){
   // SROA
   
   // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl.
@@ -121,40 +128,103 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
 /// constant expression, do so.
 static Constant *SymbolicallyEvaluateGEP(Constant* const* Ops, unsigned NumOps,
                                          const Type *ResultTy,
+                                         LLVMContext &Context,
                                          const TargetData *TD) {
   Constant *Ptr = Ops[0];
   if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized())
     return 0;
-  
-  uint64_t BasePtr = 0;
+
+  unsigned BitWidth = TD->getTypeSizeInBits(TD->getIntPtrType(Context));
+  APInt BasePtr(BitWidth, 0);
+  bool BaseIsInt = true;
   if (!Ptr->isNullValue()) {
     // If this is a inttoptr from a constant int, we can fold this as the base,
     // otherwise we can't.
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
       if (CE->getOpcode() == Instruction::IntToPtr)
-        if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0)))
-          BasePtr = Base->getZExtValue();
+        if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0))) {
+          BasePtr = Base->getValue();
+          BasePtr.zextOrTrunc(BitWidth);
+        }
     
     if (BasePtr == 0)
-      return 0;
+      BaseIsInt = false;
   }
 
   // If this is a constant expr gep that is effectively computing an
   // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12'
   for (unsigned i = 1; i != NumOps; ++i)
     if (!isa<ConstantInt>(Ops[i]))
-      return false;
+      return 0;
   
-  uint64_t Offset = TD->getIndexedOffset(Ptr->getType(),
-                                         (Value**)Ops+1, NumOps-1);
-  Constant *C = ConstantInt::get(TD->getIntPtrType(), Offset+BasePtr);
-  return ConstantExpr::getIntToPtr(C, ResultTy);
+  APInt Offset = APInt(BitWidth,
+                       TD->getIndexedOffset(Ptr->getType(),
+                                            (Value**)Ops+1, NumOps-1));
+  // If the base value for this address is a literal integer value, fold the
+  // getelementptr to the resulting integer value casted to the pointer type.
+  if (BaseIsInt) {
+    Constant *C = ConstantInt::get(Context, Offset+BasePtr);
+    return ConstantExpr::getIntToPtr(C, ResultTy);
+  }
+
+  // Otherwise form a regular getelementptr. Recompute the indices so that
+  // we eliminate over-indexing of the notional static type array bounds.
+  // This makes it easy to determine if the getelementptr is "inbounds".
+  // Also, this helps GlobalOpt do SROA on GlobalVariables.
+  const Type *Ty = Ptr->getType();
+  SmallVector<Constant*, 32> NewIdxs;
+  do {
+    if (const SequentialType *ATy = dyn_cast<SequentialType>(Ty)) {
+      // The only pointer indexing we'll do is on the first index of the GEP.
+      if (isa<PointerType>(ATy) && !NewIdxs.empty())
+        break;
+      // Determine which element of the array the offset points into.
+      APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType()));
+      if (ElemSize == 0)
+        return 0;
+      APInt NewIdx = Offset.udiv(ElemSize);
+      Offset -= NewIdx * ElemSize;
+      NewIdxs.push_back(ConstantInt::get(TD->getIntPtrType(Context), NewIdx));
+      Ty = ATy->getElementType();
+    } else if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+      // Determine which field of the struct the offset points into. The
+      // getZExtValue is at least as safe as the StructLayout API because we
+      // know the offset is within the struct at this point.
+      const StructLayout &SL = *TD->getStructLayout(STy);
+      unsigned ElIdx = SL.getElementContainingOffset(Offset.getZExtValue());
+      NewIdxs.push_back(ConstantInt::get(Type::getInt32Ty(Context), ElIdx));
+      Offset -= APInt(BitWidth, SL.getElementOffset(ElIdx));
+      Ty = STy->getTypeAtIndex(ElIdx);
+    } else {
+      // We've reached some non-indexable type.
+      break;
+    }
+  } while (Ty != cast<PointerType>(ResultTy)->getElementType());
+
+  // If we haven't used up the entire offset by descending the static
+  // type, then the offset is pointing into the middle of an indivisible
+  // member, so we can't simplify it.
+  if (Offset != 0)
+    return 0;
+
+  // Create a GEP.
+  Constant *C =
+    ConstantExpr::getGetElementPtr(Ptr, &NewIdxs[0], NewIdxs.size());
+  assert(cast<PointerType>(C->getType())->getElementType() == Ty &&
+         "Computed GetElementPtr has unexpected type!");
+
+  // If we ended up indexing a member with a type that doesn't match
+  // the type of what the original indices indexed, add a cast.
+  if (Ty != cast<PointerType>(ResultTy)->getElementType())
+    C = ConstantExpr::getBitCast(C, ResultTy);
+
+  return C;
 }
 
 /// FoldBitCast - Constant fold bitcast, symbolically evaluating it with 
 /// targetdata.  Return 0 if unfoldable.
 static Constant *FoldBitCast(Constant *C, const Type *DestTy,
-                             const TargetData &TD) {
+                             const TargetData &TD, LLVMContext &Context) {
   // If this is a bitcast from constant vector -> vector, fold it.
   if (ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
     if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
@@ -180,10 +250,10 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy,
       if (DstEltTy->isFloatingPoint()) {
         // Fold to an vector of integers with same size as our FP type.
         unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits();
-        const Type *DestIVTy = VectorType::get(IntegerType::get(FPWidth),
-                                               NumDstElt);
+        const Type *DestIVTy = VectorType::get(
+                                 IntegerType::get(Context, FPWidth), NumDstElt);
         // Recursively handle this integer conversion, if possible.
-        C = FoldBitCast(C, DestIVTy, TD);
+        C = FoldBitCast(C, DestIVTy, TD, Context);
         if (!C) return 0;
         
         // Finally, VMCore can handle this now that #elts line up.
@@ -194,8 +264,8 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy,
       // it to integer first.
       if (SrcEltTy->isFloatingPoint()) {
         unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
-        const Type *SrcIVTy = VectorType::get(IntegerType::get(FPWidth),
-                                              NumSrcElt);
+        const Type *SrcIVTy = VectorType::get(
+                                 IntegerType::get(Context, FPWidth), NumSrcElt);
         // Ask VMCore to do the conversion now that #elts line up.
         C = ConstantExpr::getBitCast(C, SrcIVTy);
         CV = dyn_cast<ConstantVector>(C);
@@ -228,7 +298,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy,
             
             // Shift it to the right place, depending on endianness.
             Src = ConstantExpr::getShl(Src, 
-                                    ConstantInt::get(Src->getType(), ShiftAmt));
+                             ConstantInt::get(Src->getType(), ShiftAmt));
             ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
             
             // Mix it in.
@@ -251,7 +321,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy,
             // Shift the piece of the value into the right place, depending on
             // endianness.
             Constant *Elt = ConstantExpr::getLShr(Src, 
-                                ConstantInt::get(Src->getType(), ShiftAmt));
+                            ConstantInt::get(Src->getType(), ShiftAmt));
             ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
 
             // Truncate and remember this piece.
@@ -278,7 +348,8 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy,
 /// is returned.  Note that this function can only fail when attempting to fold
 /// instructions like loads and stores, which have no constant expression form.
 ///
-Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
+Constant *llvm::ConstantFoldInstruction(Instruction *I, LLVMContext &Context,
+                                        const TargetData *TD) {
   if (PHINode *PN = dyn_cast<PHINode>(I)) {
     if (PN->getNumIncomingValues() == 0)
       return UndefValue::get(PN->getType());
@@ -306,16 +377,18 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
 
   if (const CmpInst *CI = dyn_cast<CmpInst>(I))
     return ConstantFoldCompareInstOperands(CI->getPredicate(),
-                                           Ops.data(), Ops.size(), TD);
-  else
-    return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
-                                    Ops.data(), Ops.size(), TD);
+                                           Ops.data(), Ops.size(), 
+                                           Context, TD);
+  
+  return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
+                                  Ops.data(), Ops.size(), Context, TD);
 }
 
 /// ConstantFoldConstantExpression - Attempt to fold the constant expression
 /// using the specified TargetData.  If successful, the constant result is
 /// result is returned, if not, null is returned.
 Constant *llvm::ConstantFoldConstantExpression(ConstantExpr *CE,
+                                               LLVMContext &Context,
                                                const TargetData *TD) {
   SmallVector<Constant*, 8> Ops;
   for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i)
@@ -323,10 +396,10 @@ Constant *llvm::ConstantFoldConstantExpression(ConstantExpr *CE,
 
   if (CE->isCompare())
     return ConstantFoldCompareInstOperands(CE->getPredicate(),
-                                           Ops.data(), Ops.size(), TD);
-  else 
-    return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(),
-                                    Ops.data(), Ops.size(), TD);
+                                           Ops.data(), Ops.size(), 
+                                           Context, TD);
+  return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(),
+                                  Ops.data(), Ops.size(), Context, TD);
 }
 
 /// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
@@ -337,11 +410,13 @@ Constant *llvm::ConstantFoldConstantExpression(ConstantExpr *CE,
 ///
 Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, 
                                          Constant* const* Ops, unsigned NumOps,
+                                         LLVMContext &Context,
                                          const TargetData *TD) {
   // Handle easy binops first.
   if (Instruction::isBinaryOp(Opcode)) {
     if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1]))
-      if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD))
+      if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD,
+                                                  Context))
         return C;
     
     return ConstantExpr::get(Opcode, Ops[0], Ops[1]);
@@ -356,9 +431,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
     return 0;
   case Instruction::ICmp:
   case Instruction::FCmp:
-  case Instruction::VICmp:
-  case Instruction::VFCmp:
-    assert(0 &&"This function is invalid for compares: no predicate specified");
+    llvm_unreachable("This function is invalid for compares: no predicate specified");
   case Instruction::PtrToInt:
     // If the input is a inttoptr, eliminate the pair.  This requires knowing
     // the width of a pointer, so it can't be done in ConstantExpr::getCast.
@@ -368,7 +441,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
         unsigned InWidth = Input->getType()->getScalarSizeInBits();
         if (TD->getPointerSizeInBits() < InWidth) {
           Constant *Mask = 
-            ConstantInt::get(APInt::getLowBitsSet(InWidth,
+            ConstantInt::get(Context, APInt::getLowBitsSet(InWidth,
                                                   TD->getPointerSizeInBits()));
           Input = ConstantExpr::getAnd(Input, Mask);
         }
@@ -387,7 +460,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
           CE->getType()->getScalarSizeInBits()) {
         if (CE->getOpcode() == Instruction::PtrToInt) {
           Constant *Input = CE->getOperand(0);
-          Constant *C = FoldBitCast(Input, DestTy, *TD);
+          Constant *C = FoldBitCast(Input, DestTy, *TD, Context);
           return C ? C : ConstantExpr::getBitCast(Input, DestTy);
         }
         // If there's a constant offset added to the integer value before
@@ -412,9 +485,10 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
                                             AT->getNumElements()))) {
                         Constant *Index[] = {
                           Constant::getNullValue(CE->getType()),
-                          ConstantInt::get(ElemIdx)
+                          ConstantInt::get(Context, ElemIdx)
                         };
-                        return ConstantExpr::getGetElementPtr(GV, &Index[0], 2);
+                        return
+                        ConstantExpr::getGetElementPtr(GV, &Index[0], 2);
                       }
                     }
                   }
@@ -434,7 +508,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
       return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
   case Instruction::BitCast:
     if (TD)
-      if (Constant *C = FoldBitCast(Ops[0], DestTy, *TD))
+      if (Constant *C = FoldBitCast(Ops[0], DestTy, *TD, Context))
         return C;
     return ConstantExpr::getBitCast(Ops[0], DestTy);
   case Instruction::Select:
@@ -446,7 +520,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
   case Instruction::ShuffleVector:
     return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
   case Instruction::GetElementPtr:
-    if (Constant *C = SymbolicallyEvaluateGEP(Ops, NumOps, DestTy, TD))
+    if (Constant *C = SymbolicallyEvaluateGEP(Ops, NumOps, DestTy, Context, TD))
       return C;
     
     return ConstantExpr::getGetElementPtr(Ops[0], Ops+1, NumOps-1);
@@ -460,6 +534,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
 Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
                                                 Constant*const * Ops, 
                                                 unsigned NumOps,
+                                                LLVMContext &Context,
                                                 const TargetData *TD) {
   // fold: icmp (inttoptr x), null         -> icmp x, 0
   // fold: icmp (ptrtoint x), 0            -> icmp x, null
@@ -470,14 +545,15 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
   // around to know if bit truncation is happening.
   if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops[0])) {
     if (TD && Ops[1]->isNullValue()) {
-      const Type *IntPtrTy = TD->getIntPtrType();
+      const Type *IntPtrTy = TD->getIntPtrType(Context);
       if (CE0->getOpcode() == Instruction::IntToPtr) {
         // Convert the integer value to the right size to ensure we get the
         // proper extension or truncation.
         Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0),
                                                    IntPtrTy, false);
         Constant *NewOps[] = { C, Constant::getNullValue(C->getType()) };
-        return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, TD);
+        return ConstantFoldCompareInstOperands(Predicate, NewOps, 2,
+                                               Context, TD);
       }
       
       // Only do this transformation if the int is intptrty in size, otherwise
@@ -487,13 +563,14 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
         Constant *C = CE0->getOperand(0);
         Constant *NewOps[] = { C, Constant::getNullValue(C->getType()) };
         // FIXME!
-        return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, TD);
+        return ConstantFoldCompareInstOperands(Predicate, NewOps, 2,
+                                               Context, TD);
       }
     }
     
     if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops[1])) {
       if (TD && CE0->getOpcode() == CE1->getOpcode()) {
-        const Type *IntPtrTy = TD->getIntPtrType();
+        const Type *IntPtrTy = TD->getIntPtrType(Context);
 
         if (CE0->getOpcode() == Instruction::IntToPtr) {
           // Convert the integer value to the right size to ensure we get the
@@ -503,7 +580,8 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
           Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0),
                                                       IntPtrTy, false);
           Constant *NewOps[] = { C0, C1 };
-          return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, TD);
+          return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, 
+                                                 Context, TD);
         }
 
         // Only do this transformation if the int is intptrty in size, otherwise
@@ -514,7 +592,8 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
           Constant *NewOps[] = { 
             CE0->getOperand(0), CE1->getOperand(0) 
           };
-          return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, TD);
+          return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, 
+                                                 Context, TD);
         }
       }
     }
@@ -597,74 +676,47 @@ llvm::canConstantFoldCallTo(const Function *F) {
   case Intrinsic::ctpop:
   case Intrinsic::ctlz:
   case Intrinsic::cttz:
+  case Intrinsic::uadd_with_overflow:
+  case Intrinsic::usub_with_overflow:
+  case Intrinsic::sadd_with_overflow:
+  case Intrinsic::ssub_with_overflow:
     return true;
-  default: break;
+  default:
+    return false;
+  case 0: break;
   }
 
   if (!F->hasName()) return false;
-  const char *Str = F->getNameStart();
-  unsigned Len = F->getNameLen();
+  StringRef Name = F->getName();
   
   // In these cases, the check of the length is required.  We don't want to
   // return true for a name like "cos\0blah" which strcmp would return equal to
   // "cos", but has length 8.
-  switch (Str[0]) {
+  switch (Name[0]) {
   default: return false;
   case 'a':
-    if (Len == 4)
-      return !strcmp(Str, "acos") || !strcmp(Str, "asin") ||
-             !strcmp(Str, "atan");
-    else if (Len == 5)
-      return !strcmp(Str, "atan2");
-    return false;
+    return Name == "acos" || Name == "asin" || 
+      Name == "atan" || Name == "atan2";
   case 'c':
-    if (Len == 3)
-      return !strcmp(Str, "cos");
-    else if (Len == 4)
-      return !strcmp(Str, "ceil") || !strcmp(Str, "cosf") ||
-             !strcmp(Str, "cosh");
-    return false;
+    return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh";
   case 'e':
-    if (Len == 3)
-      return !strcmp(Str, "exp");
-    return false;
+    return Name == "exp";
   case 'f':
-    if (Len == 4)
-      return !strcmp(Str, "fabs") || !strcmp(Str, "fmod");
-    else if (Len == 5)
-      return !strcmp(Str, "floor");
-    return false;
-    break;
+    return Name == "fabs" || Name == "fmod" || Name == "floor";
   case 'l':
-    if (Len == 3 && !strcmp(Str, "log"))
-      return true;
-    if (Len == 5 && !strcmp(Str, "log10"))
-      return true;
-    return false;
+    return Name == "log" || Name == "log10";
   case 'p':
-    if (Len == 3 && !strcmp(Str, "pow"))
-      return true;
-    return false;
+    return Name == "pow";
   case 's':
-    if (Len == 3)
-      return !strcmp(Str, "sin");
-    if (Len == 4)
-      return !strcmp(Str, "sinh") || !strcmp(Str, "sqrt") ||
-             !strcmp(Str, "sinf");
-    if (Len == 5)
-      return !strcmp(Str, "sqrtf");
-    return false;
+    return Name == "sin" || Name == "sinh" || Name == "sqrt" ||
+      Name == "sinf" || Name == "sqrtf";
   case 't':
-    if (Len == 3 && !strcmp(Str, "tan"))
-      return true;
-    else if (Len == 4 && !strcmp(Str, "tanh"))
-      return true;
-    return false;
+    return Name == "tan" || Name == "tanh";
   }
 }
 
 static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, 
-                                const Type *Ty) {
+                                const Type *Ty, LLVMContext &Context) {
   errno = 0;
   V = NativeFP(V);
   if (errno != 0) {
@@ -672,17 +724,18 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V,
     return 0;
   }
   
-  if (Ty == Type::FloatTy)
-    return ConstantFP::get(APFloat((float)V));
-  if (Ty == Type::DoubleTy)
-    return ConstantFP::get(APFloat(V));
-  assert(0 && "Can only constant fold float/double");
+  if (Ty->isFloatTy())
+    return ConstantFP::get(Context, APFloat((float)V));
+  if (Ty->isDoubleTy())
+    return ConstantFP::get(Context, APFloat(V));
+  llvm_unreachable("Can only constant fold float/double");
   return 0; // dummy return to suppress warning
 }
 
 static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
                                       double V, double W,
-                                      const Type *Ty) {
+                                      const Type *Ty,
+                                      LLVMContext &Context) {
   errno = 0;
   V = NativeFP(V, W);
   if (errno != 0) {
@@ -690,137 +743,195 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
     return 0;
   }
   
-  if (Ty == Type::FloatTy)
-    return ConstantFP::get(APFloat((float)V));
-  if (Ty == Type::DoubleTy)
-    return ConstantFP::get(APFloat(V));
-  assert(0 && "Can only constant fold float/double");
+  if (Ty->isFloatTy())
+    return ConstantFP::get(Context, APFloat((float)V));
+  if (Ty->isDoubleTy())
+    return ConstantFP::get(Context, APFloat(V));
+  llvm_unreachable("Can only constant fold float/double");
   return 0; // dummy return to suppress warning
 }
 
 /// ConstantFoldCall - Attempt to constant fold a call to the specified function
 /// with the specified arguments, returning null if unsuccessful.
-
 Constant *
 llvm::ConstantFoldCall(Function *F, 
-                       Constant* const* Operands, unsigned NumOperands) {
+                       Constant *const *Operands, unsigned NumOperands) {
   if (!F->hasName()) return 0;
-  const char *Str = F->getNameStart();
-  unsigned Len = F->getNameLen();
-  
+  LLVMContext &Context = F->getContext();
+  StringRef Name = F->getName();
+
   const Type *Ty = F->getReturnType();
   if (NumOperands == 1) {
     if (ConstantFP *Op = dyn_cast<ConstantFP>(Operands[0])) {
-      if (Ty!=Type::FloatTy && Ty!=Type::DoubleTy)
+      if (!Ty->isFloatTy() && !Ty->isDoubleTy())
         return 0;
       /// Currently APFloat versions of these functions do not exist, so we use
       /// the host native double versions.  Float versions are not called
       /// directly but for all these it is true (float)(f((double)arg)) ==
       /// f(arg).  Long double not supported yet.
-      double V = Ty==Type::FloatTy ? (double)Op->getValueAPF().convertToFloat():
+      double V = Ty->isFloatTy() ? (double)Op->getValueAPF().convertToFloat() :
                                      Op->getValueAPF().convertToDouble();
-      switch (Str[0]) {
+      switch (Name[0]) {
       case 'a':
-        if (Len == 4 && !strcmp(Str, "acos"))
-          return ConstantFoldFP(acos, V, Ty);
-        else if (Len == 4 && !strcmp(Str, "asin"))
-          return ConstantFoldFP(asin, V, Ty);
-        else if (Len == 4 && !strcmp(Str, "atan"))
-          return ConstantFoldFP(atan, V, Ty);
+        if (Name == "acos")
+          return ConstantFoldFP(acos, V, Ty, Context);
+        else if (Name == "asin")
+          return ConstantFoldFP(asin, V, Ty, Context);
+        else if (Name == "atan")
+          return ConstantFoldFP(atan, V, Ty, Context);
         break;
       case 'c':
-        if (Len == 4 && !strcmp(Str, "ceil"))
-          return ConstantFoldFP(ceil, V, Ty);
-        else if (Len == 3 && !strcmp(Str, "cos"))
-          return ConstantFoldFP(cos, V, Ty);
-        else if (Len == 4 && !strcmp(Str, "cosh"))
-          return ConstantFoldFP(cosh, V, Ty);
-        else if (Len == 4 && !strcmp(Str, "cosf"))
-          return ConstantFoldFP(cos, V, Ty);
+        if (Name == "ceil")
+          return ConstantFoldFP(ceil, V, Ty, Context);
+        else if (Name == "cos")
+          return ConstantFoldFP(cos, V, Ty, Context);
+        else if (Name == "cosh")
+          return ConstantFoldFP(cosh, V, Ty, Context);
+        else if (Name == "cosf")
+          return ConstantFoldFP(cos, V, Ty, Context);
         break;
       case 'e':
-        if (Len == 3 && !strcmp(Str, "exp"))
-          return ConstantFoldFP(exp, V, Ty);
+        if (Name == "exp")
+          return ConstantFoldFP(exp, V, Ty, Context);
         break;
       case 'f':
-        if (Len == 4 && !strcmp(Str, "fabs"))
-          return ConstantFoldFP(fabs, V, Ty);
-        else if (Len == 5 && !strcmp(Str, "floor"))
-          return ConstantFoldFP(floor, V, Ty);
+        if (Name == "fabs")
+          return ConstantFoldFP(fabs, V, Ty, Context);
+        else if (Name == "floor")
+          return ConstantFoldFP(floor, V, Ty, Context);
         break;
       case 'l':
-        if (Len == 3 && !strcmp(Str, "log") && V > 0)
-          return ConstantFoldFP(log, V, Ty);
-        else if (Len == 5 && !strcmp(Str, "log10") && V > 0)
-          return ConstantFoldFP(log10, V, Ty);
-        else if (!strcmp(Str, "llvm.sqrt.f32") ||
-                 !strcmp(Str, "llvm.sqrt.f64")) {
+        if (Name == "log" && V > 0)
+          return ConstantFoldFP(log, V, Ty, Context);
+        else if (Name == "log10" && V > 0)
+          return ConstantFoldFP(log10, V, Ty, Context);
+        else if (Name == "llvm.sqrt.f32" ||
+                 Name == "llvm.sqrt.f64") {
           if (V >= -0.0)
-            return ConstantFoldFP(sqrt, V, Ty);
+            return ConstantFoldFP(sqrt, V, Ty, Context);
           else // Undefined
             return Constant::getNullValue(Ty);
         }
         break;
       case 's':
-        if (Len == 3 && !strcmp(Str, "sin"))
-          return ConstantFoldFP(sin, V, Ty);
-        else if (Len == 4 && !strcmp(Str, "sinh"))
-          return ConstantFoldFP(sinh, V, Ty);
-        else if (Len == 4 && !strcmp(Str, "sqrt") && V >= 0)
-          return ConstantFoldFP(sqrt, V, Ty);
-        else if (Len == 5 && !strcmp(Str, "sqrtf") && V >= 0)
-          return ConstantFoldFP(sqrt, V, Ty);
-        else if (Len == 4 && !strcmp(Str, "sinf"))
-          return ConstantFoldFP(sin, V, Ty);
+        if (Name == "sin")
+          return ConstantFoldFP(sin, V, Ty, Context);
+        else if (Name == "sinh")
+          return ConstantFoldFP(sinh, V, Ty, Context);
+        else if (Name == "sqrt" && V >= 0)
+          return ConstantFoldFP(sqrt, V, Ty, Context);
+        else if (Name == "sqrtf" && V >= 0)
+          return ConstantFoldFP(sqrt, V, Ty, Context);
+        else if (Name == "sinf")
+          return ConstantFoldFP(sin, V, Ty, Context);
         break;
       case 't':
-        if (Len == 3 && !strcmp(Str, "tan"))
-          return ConstantFoldFP(tan, V, Ty);
-        else if (Len == 4 && !strcmp(Str, "tanh"))
-          return ConstantFoldFP(tanh, V, Ty);
+        if (Name == "tan")
+          return ConstantFoldFP(tan, V, Ty, Context);
+        else if (Name == "tanh")
+          return ConstantFoldFP(tanh, V, Ty, Context);
         break;
       default:
         break;
       }
-    } else if (ConstantInt *Op = dyn_cast<ConstantInt>(Operands[0])) {
-      if (Len > 11 && !memcmp(Str, "llvm.bswap", 10))
-        return ConstantInt::get(Op->getValue().byteSwap());
-      else if (Len > 11 && !memcmp(Str, "llvm.ctpop", 10))
+      return 0;
+    }
+    
+    
+    if (ConstantInt *Op = dyn_cast<ConstantInt>(Operands[0])) {
+      if (Name.startswith("llvm.bswap"))
+        return ConstantInt::get(Context, Op->getValue().byteSwap());
+      else if (Name.startswith("llvm.ctpop"))
         return ConstantInt::get(Ty, Op->getValue().countPopulation());
-      else if (Len > 10 && !memcmp(Str, "llvm.cttz", 9))
+      else if (Name.startswith("llvm.cttz"))
         return ConstantInt::get(Ty, Op->getValue().countTrailingZeros());
-      else if (Len > 10 && !memcmp(Str, "llvm.ctlz", 9))
+      else if (Name.startswith("llvm.ctlz"))
         return ConstantInt::get(Ty, Op->getValue().countLeadingZeros());
+      return 0;
     }
-  } else if (NumOperands == 2) {
+    
+    return 0;
+  }
+  
+  if (NumOperands == 2) {
     if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
-      if (Ty!=Type::FloatTy && Ty!=Type::DoubleTy)
+      if (!Ty->isFloatTy() && !Ty->isDoubleTy())
         return 0;
-      double Op1V = Ty==Type::FloatTy ? 
-                      (double)Op1->getValueAPF().convertToFloat():
+      double Op1V = Ty->isFloatTy() ? 
+                      (double)Op1->getValueAPF().convertToFloat() :
                       Op1->getValueAPF().convertToDouble();
       if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
-        double Op2V = Ty==Type::FloatTy ? 
+        if (Op2->getType() != Op1->getType())
+          return 0;
+        
+        double Op2V = Ty->isFloatTy() ? 
                       (double)Op2->getValueAPF().convertToFloat():
                       Op2->getValueAPF().convertToDouble();
 
-        if (Len == 3 && !strcmp(Str, "pow")) {
-          return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
-        } else if (Len == 4 && !strcmp(Str, "fmod")) {
-          return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
-        } else if (Len == 5 && !strcmp(Str, "atan2")) {
-          return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
-        }
+        if (Name == "pow")
+          return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty, Context);
+        if (Name == "fmod")
+          return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty, Context);
+        if (Name == "atan2")
+          return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty, Context);
       } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
-        if (!strcmp(Str, "llvm.powi.f32")) {
-          return ConstantFP::get(APFloat((float)std::pow((float)Op1V,
+        if (Name == "llvm.powi.f32")
+          return ConstantFP::get(Context, APFloat((float)std::pow((float)Op1V,
                                                  (int)Op2C->getZExtValue())));
-        } else if (!strcmp(Str, "llvm.powi.f64")) {
-          return ConstantFP::get(APFloat((double)std::pow((double)Op1V,
+        if (Name == "llvm.powi.f64")
+          return ConstantFP::get(Context, APFloat((double)std::pow((double)Op1V,
                                                  (int)Op2C->getZExtValue())));
+      }
+      return 0;
+    }
+    
+    
+    if (ConstantInt *Op1 = dyn_cast<ConstantInt>(Operands[0])) {
+      if (ConstantInt *Op2 = dyn_cast<ConstantInt>(Operands[1])) {
+        switch (F->getIntrinsicID()) {
+        default: break;
+        case Intrinsic::uadd_with_overflow: {
+          Constant *Res = ConstantExpr::getAdd(Op1, Op2);           // result.
+          Constant *Ops[] = {
+            Res, ConstantExpr::getICmp(CmpInst::ICMP_ULT, Res, Op1) // overflow.
+          };
+          return ConstantStruct::get(F->getContext(), Ops, 2, false);
+        }
+        case Intrinsic::usub_with_overflow: {
+          Constant *Res = ConstantExpr::getSub(Op1, Op2);           // result.
+          Constant *Ops[] = {
+            Res, ConstantExpr::getICmp(CmpInst::ICMP_UGT, Res, Op1) // overflow.
+          };
+          return ConstantStruct::get(F->getContext(), Ops, 2, false);
+        }
+        case Intrinsic::sadd_with_overflow: {
+          Constant *Res = ConstantExpr::getAdd(Op1, Op2);           // result.
+          Constant *Overflow = ConstantExpr::getSelect(
+              ConstantExpr::getICmp(CmpInst::ICMP_SGT,
+                ConstantInt::get(Op1->getType(), 0), Op1),
+              ConstantExpr::getICmp(CmpInst::ICMP_SGT, Res, Op2), 
+              ConstantExpr::getICmp(CmpInst::ICMP_SLT, Res, Op2)); // overflow.
+
+          Constant *Ops[] = { Res, Overflow };
+          return ConstantStruct::get(F->getContext(), Ops, 2, false);
+        }
+        case Intrinsic::ssub_with_overflow: {
+          Constant *Res = ConstantExpr::getSub(Op1, Op2);           // result.
+          Constant *Overflow = ConstantExpr::getSelect(
+              ConstantExpr::getICmp(CmpInst::ICMP_SGT,
+                ConstantInt::get(Op2->getType(), 0), Op2),
+              ConstantExpr::getICmp(CmpInst::ICMP_SLT, Res, Op1), 
+              ConstantExpr::getICmp(CmpInst::ICMP_SGT, Res, Op1)); // overflow.
+
+          Constant *Ops[] = { Res, Overflow };
+          return ConstantStruct::get(F->getContext(), Ops, 2, false);
+        }
         }
       }
+      
+      return 0;
     }
+    return 0;
   }
   return 0;
 }
diff --git a/lib/Analysis/DbgInfoPrinter.cpp b/lib/Analysis/DbgInfoPrinter.cpp
index 6c549e63..2bbe2e0 100644
--- a/lib/Analysis/DbgInfoPrinter.cpp
+++ b/lib/Analysis/DbgInfoPrinter.cpp
@@ -90,10 +90,9 @@ void PrintDbgInfo::printStopPoint(const DbgStopPointInst *DSI) {
 }
 
 void PrintDbgInfo::printFuncStart(const DbgFuncStartInst *FS) {
-  DISubprogram Subprogram(cast<GlobalVariable>(FS->getSubprogram()));
-  std::string Res1, Res2;
-  Out << "; fully qualified function name: " << Subprogram.getDisplayName(Res1)
-      << " return type: " << Subprogram.getReturnTypeName(Res2)
+  DISubprogram Subprogram(FS->getSubprogram());
+  Out << "; fully qualified function name: " << Subprogram.getDisplayName()
+      << " return type: " << Subprogram.getReturnTypeName()
       << " at line " << Subprogram.getLineNumber()
       << "\n\n";
 }
@@ -152,7 +151,7 @@ bool PrintDbgInfo::runOnFunction(Function &F) {
           Printed = true;
         }
 
-        Out << *i;
+        Out << *i << '\n';
         printVariableDeclaration(i);
 
         if (const User *U = dyn_cast<User>(i)) {
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index 9eecc33..7bb7e9b 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -18,12 +18,13 @@
 #include "llvm/Intrinsics.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/DebugLoc.h"
-#include "llvm/Support/Streams.h"
-
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 using namespace llvm::dwarf;
 
@@ -32,18 +33,12 @@ using namespace llvm::dwarf;
 //===----------------------------------------------------------------------===//
 
 /// ValidDebugInfo - Return true if V represents valid debug info value.
-bool DIDescriptor::ValidDebugInfo(Value *V, CodeGenOpt::Level OptLevel) {
-  if (!V)
-    return false;
-
-  GlobalVariable *GV = dyn_cast<GlobalVariable>(V->stripPointerCasts());
-  if (!GV)
-    return false;
-
-  if (!GV->hasInternalLinkage () && !GV->hasLinkOnceLinkage())
+/// FIXME : Add DIDescriptor.isValid()
+bool DIDescriptor::ValidDebugInfo(MDNode *N, CodeGenOpt::Level OptLevel) {
+  if (!N)
     return false;
 
-  DIDescriptor DI(GV);
+  DIDescriptor DI(N);
 
   // Check current version. Allow Version6 for now.
   unsigned Version = DI.getVersion();
@@ -53,13 +48,13 @@ bool DIDescriptor::ValidDebugInfo(Value *V, CodeGenOpt::Level OptLevel) {
   unsigned Tag = DI.getTag();
   switch (Tag) {
   case DW_TAG_variable:
-    assert(DIVariable(GV).Verify() && "Invalid DebugInfo value");
+    assert(DIVariable(N).Verify() && "Invalid DebugInfo value");
     break;
   case DW_TAG_compile_unit:
-    assert(DICompileUnit(GV).Verify() && "Invalid DebugInfo value");
+    assert(DICompileUnit(N).Verify() && "Invalid DebugInfo value");
     break;
   case DW_TAG_subprogram:
-    assert(DISubprogram(GV).Verify() && "Invalid DebugInfo value");
+    assert(DISubprogram(N).Verify() && "Invalid DebugInfo value");
     break;
   case DW_TAG_lexical_block:
     // FIXME: This interfers with the quality of generated code during
@@ -74,84 +69,75 @@ bool DIDescriptor::ValidDebugInfo(Value *V, CodeGenOpt::Level OptLevel) {
   return true;
 }
 
-DIDescriptor::DIDescriptor(GlobalVariable *GV, unsigned RequiredTag) {
-  DbgGV = GV;
-  
-  // If this is non-null, check to see if the Tag matches. If not, set to null.
-  if (GV && getTag() != RequiredTag)
-    DbgGV = 0;
-}
+DIDescriptor::DIDescriptor(MDNode *N, unsigned RequiredTag) {
+  DbgNode = N;
 
-const std::string &
-DIDescriptor::getStringField(unsigned Elt, std::string &Result) const {
-  if (DbgGV == 0) {
-    Result.clear();
-    return Result;
+  // If this is non-null, check to see if the Tag matches. If not, set to null.
+  if (N && getTag() != RequiredTag) {
+    DbgNode = 0;
   }
+}
 
-  Constant *C = DbgGV->getInitializer();
-  if (C == 0 || Elt >= C->getNumOperands()) {
-    Result.clear();
-    return Result;
-  }
+const char *
+DIDescriptor::getStringField(unsigned Elt) const {
+  if (DbgNode == 0)
+    return NULL;
 
-  // Fills in the string if it succeeds
-  if (!GetConstantStringInfo(C->getOperand(Elt), Result))
-    Result.clear();
+  if (Elt < DbgNode->getNumElements())
+    if (MDString *MDS = dyn_cast_or_null<MDString>(DbgNode->getElement(Elt)))
+      return MDS->getString().data();
 
-  return Result;
+  return NULL;
 }
 
 uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
-  if (DbgGV == 0) return 0;
-
-  Constant *C = DbgGV->getInitializer();
-  if (C == 0 || Elt >= C->getNumOperands())
+  if (DbgNode == 0)
     return 0;
 
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(C->getOperand(Elt)))
-    return CI->getZExtValue();
+  if (Elt < DbgNode->getNumElements())
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(DbgNode->getElement(Elt)))
+      return CI->getZExtValue();
+
   return 0;
 }
 
 DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const {
-  if (DbgGV == 0) return DIDescriptor();
-
-  Constant *C = DbgGV->getInitializer();
-  if (C == 0 || Elt >= C->getNumOperands())
+  if (DbgNode == 0)
     return DIDescriptor();
 
-  C = C->getOperand(Elt);
-  return DIDescriptor(dyn_cast<GlobalVariable>(C->stripPointerCasts()));
+  if (Elt < DbgNode->getNumElements() && DbgNode->getElement(Elt))
+    return DIDescriptor(dyn_cast<MDNode>(DbgNode->getElement(Elt)));
+
+  return DIDescriptor();
 }
 
 GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const {
-  if (DbgGV == 0) return 0;
-
-  Constant *C = DbgGV->getInitializer();
-  if (C == 0 || Elt >= C->getNumOperands())
+  if (DbgNode == 0)
     return 0;
 
-  C = C->getOperand(Elt);
-  return dyn_cast<GlobalVariable>(C->stripPointerCasts());
+  if (Elt < DbgNode->getNumElements())
+      return dyn_cast_or_null<GlobalVariable>(DbgNode->getElement(Elt));
+  return 0;
 }
 
 //===----------------------------------------------------------------------===//
-// Simple Descriptor Constructors and other Methods
+// Predicates
 //===----------------------------------------------------------------------===//
 
-// Needed by DIVariable::getType().
-DIType::DIType(GlobalVariable *GV) : DIDescriptor(GV) {
-  if (!GV) return;
-  unsigned tag = getTag();
-  if (tag != dwarf::DW_TAG_base_type && !DIDerivedType::isDerivedType(tag) &&
-      !DICompositeType::isCompositeType(tag))
-    DbgGV = 0;
+/// isBasicType - Return true if the specified tag is legal for
+/// DIBasicType.
+bool DIDescriptor::isBasicType() const {
+  assert (!isNull() && "Invalid descriptor!");
+  unsigned Tag = getTag();
+
+  return Tag == dwarf::DW_TAG_base_type;
 }
 
-/// isDerivedType - Return true if the specified tag is legal for
-/// DIDerivedType.
-bool DIType::isDerivedType(unsigned Tag) {
+/// isDerivedType - Return true if the specified tag is legal for DIDerivedType.
+bool DIDescriptor::isDerivedType() const {
+  assert (!isNull() && "Invalid descriptor!");
+  unsigned Tag = getTag();
+
   switch (Tag) {
   case dwarf::DW_TAG_typedef:
   case dwarf::DW_TAG_pointer_type:
@@ -163,16 +149,18 @@ bool DIType::isDerivedType(unsigned Tag) {
   case dwarf::DW_TAG_inheritance:
     return true;
   default:
-    // FIXME: Even though it doesn't make sense, CompositeTypes are current
-    // modelled as DerivedTypes, this should return true for them as well.
-    return false;
+    // CompositeTypes are currently modelled as DerivedTypes.
+    return isCompositeType();
   }
 }
 
 /// isCompositeType - Return true if the specified tag is legal for
 /// DICompositeType.
-bool DIType::isCompositeType(unsigned TAG) {
-  switch (TAG) {
+bool DIDescriptor::isCompositeType() const {
+  assert (!isNull() && "Invalid descriptor!");
+  unsigned Tag = getTag();
+
+  switch (Tag) {
   case dwarf::DW_TAG_array_type:
   case dwarf::DW_TAG_structure_type:
   case dwarf::DW_TAG_union_type:
@@ -187,7 +175,10 @@ bool DIType::isCompositeType(unsigned TAG) {
 }
 
 /// isVariable - Return true if the specified tag is legal for DIVariable.
-bool DIVariable::isVariable(unsigned Tag) {
+bool DIDescriptor::isVariable() const {
+  assert (!isNull() && "Invalid descriptor!");
+  unsigned Tag = getTag();
+
   switch (Tag) {
   case dwarf::DW_TAG_auto_variable:
   case dwarf::DW_TAG_arg_variable:
@@ -198,19 +189,126 @@ bool DIVariable::isVariable(unsigned Tag) {
   }
 }
 
+/// isType - Return true if the specified tag is legal for DIType.
+bool DIDescriptor::isType() const {
+  return isBasicType() || isCompositeType() || isDerivedType();
+}
+
+/// isSubprogram - Return true if the specified tag is legal for
+/// DISubprogram.
+bool DIDescriptor::isSubprogram() const {
+  assert (!isNull() && "Invalid descriptor!");
+  unsigned Tag = getTag();
+
+  return Tag == dwarf::DW_TAG_subprogram;
+}
+
+/// isGlobalVariable - Return true if the specified tag is legal for
+/// DIGlobalVariable.
+bool DIDescriptor::isGlobalVariable() const {
+  assert (!isNull() && "Invalid descriptor!");
+  unsigned Tag = getTag();
+
+  return Tag == dwarf::DW_TAG_variable;
+}
+
+/// isGlobal - Return true if the specified tag is legal for DIGlobal.
+bool DIDescriptor::isGlobal() const {
+  return isGlobalVariable();
+}
+
+/// isScope - Return true if the specified tag is one of the scope
+/// related tag.
+bool DIDescriptor::isScope() const {
+  assert (!isNull() && "Invalid descriptor!");
+  unsigned Tag = getTag();
+
+  switch (Tag) {
+    case dwarf::DW_TAG_compile_unit:
+    case dwarf::DW_TAG_lexical_block:
+    case dwarf::DW_TAG_subprogram:
+      return true;
+    default:
+      break;
+  }
+  return false;
+}
+
+/// isCompileUnit - Return true if the specified tag is DW_TAG_compile_unit.
+bool DIDescriptor::isCompileUnit() const {
+  assert (!isNull() && "Invalid descriptor!");
+  unsigned Tag = getTag();
+
+  return Tag == dwarf::DW_TAG_compile_unit;
+}
+
+/// isLexicalBlock - Return true if the specified tag is DW_TAG_lexical_block.
+bool DIDescriptor::isLexicalBlock() const {
+  assert (!isNull() && "Invalid descriptor!");
+  unsigned Tag = getTag();
+
+  return Tag == dwarf::DW_TAG_lexical_block;
+}
+
+/// isSubrange - Return true if the specified tag is DW_TAG_subrange_type.
+bool DIDescriptor::isSubrange() const {
+  assert (!isNull() && "Invalid descriptor!");
+  unsigned Tag = getTag();
+
+  return Tag == dwarf::DW_TAG_subrange_type;
+}
+
+/// isEnumerator - Return true if the specified tag is DW_TAG_enumerator.
+bool DIDescriptor::isEnumerator() const {
+  assert (!isNull() && "Invalid descriptor!");
+  unsigned Tag = getTag();
+
+  return Tag == dwarf::DW_TAG_enumerator;
+}
+
+//===----------------------------------------------------------------------===//
+// Simple Descriptor Constructors and other Methods
+//===----------------------------------------------------------------------===//
+
+DIType::DIType(MDNode *N) : DIDescriptor(N) {
+  if (!N) return;
+  if (!isBasicType() && !isDerivedType() && !isCompositeType()) {
+    DbgNode = 0;
+  }
+}
+
 unsigned DIArray::getNumElements() const {
-  assert (DbgGV && "Invalid DIArray");
-  Constant *C = DbgGV->getInitializer();
-  assert (C && "Invalid DIArray initializer");
-  return C->getNumOperands();
+  assert (DbgNode && "Invalid DIArray");
+  return DbgNode->getNumElements();
+}
+
+/// replaceAllUsesWith - Replace all uses of debug info referenced by
+/// this descriptor. After this completes, the current debug info value
+/// is erased.
+void DIDerivedType::replaceAllUsesWith(DIDescriptor &D) {
+  if (isNull())
+    return;
+
+  assert (!D.isNull() && "Can not replace with null");
+
+  // Since we use a TrackingVH for the node, its easy for clients to manufacture
+  // legitimate situations where they want to replaceAllUsesWith() on something
+  // which, due to uniquing, has merged with the source. We shield clients from
+  // this detail by allowing a value to be replaced with replaceAllUsesWith()
+  // itself.
+  if (getNode() != D.getNode()) {
+    MDNode *Node = DbgNode;
+    Node->replaceAllUsesWith(D.getNode());
+    delete Node;
+  }
 }
 
 /// Verify - Verify that a compile unit is well formed.
 bool DICompileUnit::Verify() const {
-  if (isNull()) 
+  if (isNull())
     return false;
-  std::string Res;
-  if (getFilename(Res).empty()) 
+  const char *N = getFilename();
+  if (!N)
     return false;
   // It is possible that directory and produce string is empty.
   return true;
@@ -218,26 +316,26 @@ bool DICompileUnit::Verify() const {
 
 /// Verify - Verify that a type descriptor is well formed.
 bool DIType::Verify() const {
-  if (isNull()) 
+  if (isNull())
     return false;
-  if (getContext().isNull()) 
+  if (getContext().isNull())
     return false;
 
   DICompileUnit CU = getCompileUnit();
-  if (!CU.isNull() && !CU.Verify()) 
+  if (!CU.isNull() && !CU.Verify())
     return false;
   return true;
 }
 
 /// Verify - Verify that a composite type descriptor is well formed.
 bool DICompositeType::Verify() const {
-  if (isNull()) 
+  if (isNull())
     return false;
-  if (getContext().isNull()) 
+  if (getContext().isNull())
     return false;
 
   DICompileUnit CU = getCompileUnit();
-  if (!CU.isNull() && !CU.Verify()) 
+  if (!CU.isNull() && !CU.Verify())
     return false;
   return true;
 }
@@ -246,12 +344,12 @@ bool DICompositeType::Verify() const {
 bool DISubprogram::Verify() const {
   if (isNull())
     return false;
-  
+
   if (getContext().isNull())
     return false;
 
   DICompileUnit CU = getCompileUnit();
-  if (!CU.Verify()) 
+  if (!CU.Verify())
     return false;
 
   DICompositeType Ty = getType();
@@ -264,12 +362,12 @@ bool DISubprogram::Verify() const {
 bool DIGlobalVariable::Verify() const {
   if (isNull())
     return false;
-  
+
   if (getContext().isNull())
     return false;
 
   DICompileUnit CU = getCompileUnit();
-  if (!CU.isNull() && !CU.Verify()) 
+  if (!CU.isNull() && !CU.Verify())
     return false;
 
   DIType Ty = getType();
@@ -286,7 +384,7 @@ bool DIGlobalVariable::Verify() const {
 bool DIVariable::Verify() const {
   if (isNull())
     return false;
-  
+
   if (getContext().isNull())
     return false;
 
@@ -312,15 +410,38 @@ uint64_t DIDerivedType::getOriginalTypeSize() const {
 /// information for the function F.
 bool DISubprogram::describes(const Function *F) {
   assert (F && "Invalid function");
-  std::string Name;
-  getLinkageName(Name);
-  if (Name.empty())
-    getName(Name);
-  if (!Name.empty() && (strcmp(Name.c_str(), F->getNameStart()) == false))
+  const char *Name = getLinkageName();
+  if (!Name)
+    Name = getName();
+  if (strcmp(F->getName().data(), Name) == 0)
     return true;
   return false;
 }
 
+const char *DIScope::getFilename() const {
+  if (isLexicalBlock()) 
+    return DILexicalBlock(DbgNode).getFilename();
+  else if (isSubprogram())
+    return DISubprogram(DbgNode).getFilename();
+  else if (isCompileUnit())
+    return DICompileUnit(DbgNode).getFilename();
+  else 
+    assert (0 && "Invalid DIScope!");
+  return NULL;
+}
+
+const char *DIScope::getDirectory() const {
+  if (isLexicalBlock()) 
+    return DILexicalBlock(DbgNode).getDirectory();
+  else if (isSubprogram())
+    return DISubprogram(DbgNode).getDirectory();
+  else if (isCompileUnit())
+    return DICompileUnit(DbgNode).getDirectory();
+  else 
+    assert (0 && "Invalid DIScope!");
+  return NULL;
+}
+
 //===----------------------------------------------------------------------===//
 // DIDescriptor: dump routines for all descriptors.
 //===----------------------------------------------------------------------===//
@@ -328,69 +449,67 @@ bool DISubprogram::describes(const Function *F) {
 
 /// dump - Print descriptor.
 void DIDescriptor::dump() const {
-  cerr << "[" << dwarf::TagString(getTag()) << "] ";
-  cerr << std::hex << "[GV:" << DbgGV << "]" << std::dec;
+  errs() << "[" << dwarf::TagString(getTag()) << "] ";
+  errs().write_hex((intptr_t) &*DbgNode) << ']';
 }
 
 /// dump - Print compile unit.
 void DICompileUnit::dump() const {
   if (getLanguage())
-    cerr << " [" << dwarf::LanguageString(getLanguage()) << "] ";
+    errs() << " [" << dwarf::LanguageString(getLanguage()) << "] ";
 
-  std::string Res1, Res2;
-  cerr << " [" << getDirectory(Res1) << "/" << getFilename(Res2) << " ]";
+  errs() << " [" << getDirectory() << "/" << getFilename() << " ]";
 }
 
 /// dump - Print type.
 void DIType::dump() const {
   if (isNull()) return;
 
-  std::string Res;
-  if (!getName(Res).empty())
-    cerr << " [" << Res << "] ";
+  if (const char *Res = getName())
+    errs() << " [" << Res << "] ";
 
   unsigned Tag = getTag();
-  cerr << " [" << dwarf::TagString(Tag) << "] ";
+  errs() << " [" << dwarf::TagString(Tag) << "] ";
 
   // TODO : Print context
   getCompileUnit().dump();
-  cerr << " [" 
-       << getLineNumber() << ", " 
-       << getSizeInBits() << ", "
-       << getAlignInBits() << ", "
-       << getOffsetInBits() 
-       << "] ";
-
-  if (isPrivate()) 
-    cerr << " [private] ";
+  errs() << " ["
+         << getLineNumber() << ", "
+         << getSizeInBits() << ", "
+         << getAlignInBits() << ", "
+         << getOffsetInBits()
+         << "] ";
+
+  if (isPrivate())
+    errs() << " [private] ";
   else if (isProtected())
-    cerr << " [protected] ";
+    errs() << " [protected] ";
 
   if (isForwardDecl())
-    cerr << " [fwd] ";
-
-  if (isBasicType(Tag))
-    DIBasicType(DbgGV).dump();
-  else if (isDerivedType(Tag))
-    DIDerivedType(DbgGV).dump();
-  else if (isCompositeType(Tag))
-    DICompositeType(DbgGV).dump();
+    errs() << " [fwd] ";
+
+  if (isBasicType())
+    DIBasicType(DbgNode).dump();
+  else if (isDerivedType())
+    DIDerivedType(DbgNode).dump();
+  else if (isCompositeType())
+    DICompositeType(DbgNode).dump();
   else {
-    cerr << "Invalid DIType\n";
+    errs() << "Invalid DIType\n";
     return;
   }
 
-  cerr << "\n";
+  errs() << "\n";
 }
 
 /// dump - Print basic type.
 void DIBasicType::dump() const {
-  cerr << " [" << dwarf::AttributeEncodingString(getEncoding()) << "] ";
+  errs() << " [" << dwarf::AttributeEncodingString(getEncoding()) << "] ";
 }
 
 /// dump - Print derived type.
 void DIDerivedType::dump() const {
-  cerr << "\n\t Derived From: "; getTypeDerivedFrom().dump();
+  errs() << "\n\t Derived From: "; getTypeDerivedFrom().dump();
 }
 
 /// dump - Print composite type.
@@ -398,54 +517,72 @@ void DICompositeType::dump() const {
   DIArray A = getTypeArray();
   if (A.isNull())
     return;
-  cerr << " [" << A.getNumElements() << " elements]";
+  errs() << " [" << A.getNumElements() << " elements]";
 }
 
 /// dump - Print global.
 void DIGlobal::dump() const {
-  std::string Res;
-  if (!getName(Res).empty())
-    cerr << " [" << Res << "] ";
+  if (const char *Res = getName())
+    errs() << " [" << Res << "] ";
 
   unsigned Tag = getTag();
-  cerr << " [" << dwarf::TagString(Tag) << "] ";
+  errs() << " [" << dwarf::TagString(Tag) << "] ";
 
   // TODO : Print context
   getCompileUnit().dump();
-  cerr << " [" << getLineNumber() << "] ";
+  errs() << " [" << getLineNumber() << "] ";
 
   if (isLocalToUnit())
-    cerr << " [local] ";
+    errs() << " [local] ";
 
   if (isDefinition())
-    cerr << " [def] ";
+    errs() << " [def] ";
 
-  if (isGlobalVariable(Tag))
-    DIGlobalVariable(DbgGV).dump();
+  if (isGlobalVariable())
+    DIGlobalVariable(DbgNode).dump();
 
-  cerr << "\n";
+  errs() << "\n";
 }
 
 /// dump - Print subprogram.
 void DISubprogram::dump() const {
-  DIGlobal::dump();
+  if (const char *Res = getName())
+    errs() << " [" << Res << "] ";
+
+  unsigned Tag = getTag();
+  errs() << " [" << dwarf::TagString(Tag) << "] ";
+
+  // TODO : Print context
+  getCompileUnit().dump();
+  errs() << " [" << getLineNumber() << "] ";
+
+  if (isLocalToUnit())
+    errs() << " [local] ";
+
+  if (isDefinition())
+    errs() << " [def] ";
+
+  errs() << "\n";
 }
 
 /// dump - Print global variable.
 void DIGlobalVariable::dump() const {
-  cerr << " ["; getGlobal()->dump(); cerr << "] ";
+  errs() << " [";
+  getGlobal()->dump();
+  errs() << "] ";
 }
 
 /// dump - Print variable.
 void DIVariable::dump() const {
-  std::string Res;
-  if (!getName(Res).empty())
-    cerr << " [" << Res << "] ";
+  if (const char *Res = getName())
+    errs() << " [" << Res << "] ";
 
   getCompileUnit().dump();
-  cerr << " [" << getLineNumber() << "] ";
+  errs() << " [" << getLineNumber() << "] ";
   getType().dump();
-  cerr << "\n";
+  errs() << "\n";
+
+  // FIXME: Dump complex addresses
 }
 
 //===----------------------------------------------------------------------===//
@@ -453,98 +590,46 @@ void DIVariable::dump() const {
 //===----------------------------------------------------------------------===//
 
 DIFactory::DIFactory(Module &m)
-  : M(m), StopPointFn(0), FuncStartFn(0), RegionStartFn(0), RegionEndFn(0),
+  : M(m), VMContext(M.getContext()), StopPointFn(0), FuncStartFn(0),
+    RegionStartFn(0), RegionEndFn(0),
     DeclareFn(0) {
-  EmptyStructPtr = PointerType::getUnqual(StructType::get());
-}
-
-/// getCastToEmpty - Return this descriptor as a Constant* with type '{}*'.
-/// This is only valid when the descriptor is non-null.
-Constant *DIFactory::getCastToEmpty(DIDescriptor D) {
-  if (D.isNull()) return Constant::getNullValue(EmptyStructPtr);
-  return ConstantExpr::getBitCast(D.getGV(), EmptyStructPtr);
+  EmptyStructPtr = PointerType::getUnqual(StructType::get(VMContext));
 }
 
 Constant *DIFactory::GetTagConstant(unsigned TAG) {
   assert((TAG & LLVMDebugVersionMask) == 0 &&
          "Tag too large for debug encoding!");
-  return ConstantInt::get(Type::Int32Ty, TAG | LLVMDebugVersion);
-}
-
-Constant *DIFactory::GetStringConstant(const std::string &String) {
-  // Check string cache for previous edition.
-  Constant *&Slot = StringCache[String];
-  
-  // Return Constant if previously defined.
-  if (Slot) return Slot;
-  
-  const PointerType *DestTy = PointerType::getUnqual(Type::Int8Ty);
-  
-  // If empty string then use a i8* null instead.
-  if (String.empty())
-    return Slot = ConstantPointerNull::get(DestTy);
-
-  // Construct string as an llvm constant.
-  Constant *ConstStr = ConstantArray::get(String);
-    
-  // Otherwise create and return a new string global.
-  GlobalVariable *StrGV = new GlobalVariable(ConstStr->getType(), true,
-                                             GlobalVariable::InternalLinkage,
-                                             ConstStr, ".str", &M);
-  StrGV->setSection("llvm.metadata");
-  return Slot = ConstantExpr::getBitCast(StrGV, DestTy);
+  return ConstantInt::get(Type::getInt32Ty(VMContext), TAG | LLVMDebugVersion);
 }
 
 //===----------------------------------------------------------------------===//
 // DIFactory: Primary Constructors
 //===----------------------------------------------------------------------===//
 
-/// GetOrCreateArray - Create an descriptor for an array of descriptors. 
+/// GetOrCreateArray - Create an descriptor for an array of descriptors.
 /// This implicitly uniques the arrays created.
 DIArray DIFactory::GetOrCreateArray(DIDescriptor *Tys, unsigned NumTys) {
-  SmallVector<Constant*, 16> Elts;
-  
-  for (unsigned i = 0; i != NumTys; ++i)
-    Elts.push_back(getCastToEmpty(Tys[i]));
-  
-  Constant *Init = ConstantArray::get(ArrayType::get(EmptyStructPtr,
-                                                     Elts.size()),
-                                      Elts.data(), Elts.size());
-  // If we already have this array, just return the uniqued version.
-  DIDescriptor &Entry = SimpleConstantCache[Init];
-  if (!Entry.isNull()) return DIArray(Entry.getGV());
-  
-  GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
-                                          GlobalValue::InternalLinkage,
-                                          Init, "llvm.dbg.array", &M);
-  GV->setSection("llvm.metadata");
-  Entry = DIDescriptor(GV);
-  return DIArray(GV);
+  SmallVector<Value*, 16> Elts;
+
+  if (NumTys == 0)
+    Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)));
+  else
+    for (unsigned i = 0; i != NumTys; ++i)
+      Elts.push_back(Tys[i].getNode());
+
+  return DIArray(MDNode::get(VMContext,Elts.data(), Elts.size()));
 }
 
 /// GetOrCreateSubrange - Create a descriptor for a value range.  This
 /// implicitly uniques the values returned.
 DISubrange DIFactory::GetOrCreateSubrange(int64_t Lo, int64_t Hi) {
-  Constant *Elts[] = {
+  Value *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_subrange_type),
-    ConstantInt::get(Type::Int64Ty, Lo),
-    ConstantInt::get(Type::Int64Ty, Hi)
+    ConstantInt::get(Type::getInt64Ty(VMContext), Lo),
+    ConstantInt::get(Type::getInt64Ty(VMContext), Hi)
   };
-  
-  Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
-
-  // If we already have this range, just return the uniqued version.
-  DIDescriptor &Entry = SimpleConstantCache[Init];
-  if (!Entry.isNull()) return DISubrange(Entry.getGV());
-  
-  M.addTypeName("llvm.dbg.subrange.type", Init->getType());
 
-  GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
-                                          GlobalValue::InternalLinkage,
-                                          Init, "llvm.dbg.subrange", &M);
-  GV->setSection("llvm.metadata");
-  Entry = DIDescriptor(GV);
-  return DISubrange(GV);
+  return DISubrange(MDNode::get(VMContext, &Elts[0], 3));
 }
 
 
@@ -552,92 +637,69 @@ DISubrange DIFactory::GetOrCreateSubrange(int64_t Lo, int64_t Hi) {
 /// CreateCompileUnit - Create a new descriptor for the specified compile
 /// unit.  Note that this does not unique compile units within the module.
 DICompileUnit DIFactory::CreateCompileUnit(unsigned LangID,
-                                           const std::string &Filename,
-                                           const std::string &Directory,
-                                           const std::string &Producer,
+                                           StringRef Filename,
+                                           StringRef Directory,
+                                           StringRef Producer,
                                            bool isMain,
                                            bool isOptimized,
                                            const char *Flags,
                                            unsigned RunTimeVer) {
-  Constant *Elts[] = {
+  Value *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_compile_unit),
-    Constant::getNullValue(EmptyStructPtr),
-    ConstantInt::get(Type::Int32Ty, LangID),
-    GetStringConstant(Filename),
-    GetStringConstant(Directory),
-    GetStringConstant(Producer),
-    ConstantInt::get(Type::Int1Ty, isMain),
-    ConstantInt::get(Type::Int1Ty, isOptimized),
-    GetStringConstant(Flags),
-    ConstantInt::get(Type::Int32Ty, RunTimeVer)
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LangID),
+    MDString::get(VMContext, Filename),
+    MDString::get(VMContext, Directory),
+    MDString::get(VMContext, Producer),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isMain),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
+    MDString::get(VMContext, Flags),
+    ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer)
   };
-  
-  Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
-  
-  M.addTypeName("llvm.dbg.compile_unit.type", Init->getType());
-  GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
-                                          GlobalValue::LinkOnceAnyLinkage,
-                                          Init, "llvm.dbg.compile_unit", &M);
-  GV->setSection("llvm.metadata");
-  return DICompileUnit(GV);
+
+  return DICompileUnit(MDNode::get(VMContext, &Elts[0], 10));
 }
 
 /// CreateEnumerator - Create a single enumerator value.
-DIEnumerator DIFactory::CreateEnumerator(const std::string &Name, uint64_t Val){
-  Constant *Elts[] = {
+DIEnumerator DIFactory::CreateEnumerator(StringRef Name, uint64_t Val){
+  Value *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_enumerator),
-    GetStringConstant(Name),
-    ConstantInt::get(Type::Int64Ty, Val)
+    MDString::get(VMContext, Name),
+    ConstantInt::get(Type::getInt64Ty(VMContext), Val)
   };
-  
-  Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
-  
-  M.addTypeName("llvm.dbg.enumerator.type", Init->getType());
-  GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
-                                          GlobalValue::InternalLinkage,
-                                          Init, "llvm.dbg.enumerator", &M);
-  GV->setSection("llvm.metadata");
-  return DIEnumerator(GV);
+  return DIEnumerator(MDNode::get(VMContext, &Elts[0], 3));
 }
 
 
 /// CreateBasicType - Create a basic type like int, float, etc.
 DIBasicType DIFactory::CreateBasicType(DIDescriptor Context,
-                                      const std::string &Name,
+                                       StringRef Name,
                                        DICompileUnit CompileUnit,
                                        unsigned LineNumber,
                                        uint64_t SizeInBits,
                                        uint64_t AlignInBits,
                                        uint64_t OffsetInBits, unsigned Flags,
                                        unsigned Encoding) {
-  Constant *Elts[] = {
+  Value *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_base_type),
-    getCastToEmpty(Context),
-    GetStringConstant(Name),
-    getCastToEmpty(CompileUnit),
-    ConstantInt::get(Type::Int32Ty, LineNumber),
-    ConstantInt::get(Type::Int64Ty, SizeInBits),
-    ConstantInt::get(Type::Int64Ty, AlignInBits),
-    ConstantInt::get(Type::Int64Ty, OffsetInBits),
-    ConstantInt::get(Type::Int32Ty, Flags),
-    ConstantInt::get(Type::Int32Ty, Encoding)
+    Context.getNode(),
+    MDString::get(VMContext, Name),
+    CompileUnit.getNode(),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Encoding)
   };
-  
-  Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
-  
-  M.addTypeName("llvm.dbg.basictype.type", Init->getType());
-  GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
-                                          GlobalValue::InternalLinkage,
-                                          Init, "llvm.dbg.basictype", &M);
-  GV->setSection("llvm.metadata");
-  return DIBasicType(GV);
+  return DIBasicType(MDNode::get(VMContext, &Elts[0], 10));
 }
 
 /// CreateDerivedType - Create a derived type like const qualified type,
 /// pointer, typedef, etc.
 DIDerivedType DIFactory::CreateDerivedType(unsigned Tag,
                                            DIDescriptor Context,
-                                           const std::string &Name,
+                                           StringRef Name,
                                            DICompileUnit CompileUnit,
                                            unsigned LineNumber,
                                            uint64_t SizeInBits,
@@ -645,33 +707,25 @@ DIDerivedType DIFactory::CreateDerivedType(unsigned Tag,
                                            uint64_t OffsetInBits,
                                            unsigned Flags,
                                            DIType DerivedFrom) {
-  Constant *Elts[] = {
+  Value *Elts[] = {
     GetTagConstant(Tag),
-    getCastToEmpty(Context),
-    GetStringConstant(Name),
-    getCastToEmpty(CompileUnit),
-    ConstantInt::get(Type::Int32Ty, LineNumber),
-    ConstantInt::get(Type::Int64Ty, SizeInBits),
-    ConstantInt::get(Type::Int64Ty, AlignInBits),
-    ConstantInt::get(Type::Int64Ty, OffsetInBits),
-    ConstantInt::get(Type::Int32Ty, Flags),
-    getCastToEmpty(DerivedFrom)
+    Context.getNode(),
+    MDString::get(VMContext, Name),
+    CompileUnit.getNode(),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    DerivedFrom.getNode(),
   };
-  
-  Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
-  
-  M.addTypeName("llvm.dbg.derivedtype.type", Init->getType());
-  GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
-                                          GlobalValue::InternalLinkage,
-                                          Init, "llvm.dbg.derivedtype", &M);
-  GV->setSection("llvm.metadata");
-  return DIDerivedType(GV);
+  return DIDerivedType(MDNode::get(VMContext, &Elts[0], 10));
 }
 
 /// CreateCompositeType - Create a composite type like array, struct, etc.
 DICompositeType DIFactory::CreateCompositeType(unsigned Tag,
                                                DIDescriptor Context,
-                                               const std::string &Name,
+                                               StringRef Name,
                                                DICompileUnit CompileUnit,
                                                unsigned LineNumber,
                                                uint64_t SizeInBits,
@@ -682,143 +736,143 @@ DICompositeType DIFactory::CreateCompositeType(unsigned Tag,
                                                DIArray Elements,
                                                unsigned RuntimeLang) {
 
-  Constant *Elts[] = {
+  Value *Elts[] = {
     GetTagConstant(Tag),
-    getCastToEmpty(Context),
-    GetStringConstant(Name),
-    getCastToEmpty(CompileUnit),
-    ConstantInt::get(Type::Int32Ty, LineNumber),
-    ConstantInt::get(Type::Int64Ty, SizeInBits),
-    ConstantInt::get(Type::Int64Ty, AlignInBits),
-    ConstantInt::get(Type::Int64Ty, OffsetInBits),
-    ConstantInt::get(Type::Int32Ty, Flags),
-    getCastToEmpty(DerivedFrom),
-    getCastToEmpty(Elements),
-    ConstantInt::get(Type::Int32Ty, RuntimeLang)
+    Context.getNode(),
+    MDString::get(VMContext, Name),
+    CompileUnit.getNode(),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    DerivedFrom.getNode(),
+    Elements.getNode(),
+    ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang)
   };
-  
-  Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
-  
-  M.addTypeName("llvm.dbg.composite.type", Init->getType());
-  GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
-                                          GlobalValue::InternalLinkage,
-                                          Init, "llvm.dbg.composite", &M);
-  GV->setSection("llvm.metadata");
-  return DICompositeType(GV);
+  return DICompositeType(MDNode::get(VMContext, &Elts[0], 12));
 }
 
 
 /// CreateSubprogram - Create a new descriptor for the specified subprogram.
 /// See comments in DISubprogram for descriptions of these fields.  This
 /// method does not unique the generated descriptors.
-DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, 
-                                         const std::string &Name,
-                                         const std::string &DisplayName,
-                                         const std::string &LinkageName,
+DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
+                                         StringRef Name,
+                                         StringRef DisplayName,
+                                         StringRef LinkageName,
                                          DICompileUnit CompileUnit,
                                          unsigned LineNo, DIType Type,
                                          bool isLocalToUnit,
                                          bool isDefinition) {
 
-  Constant *Elts[] = {
+  Value *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_subprogram),
-    Constant::getNullValue(EmptyStructPtr),
-    getCastToEmpty(Context),
-    GetStringConstant(Name),
-    GetStringConstant(DisplayName),
-    GetStringConstant(LinkageName),
-    getCastToEmpty(CompileUnit),
-    ConstantInt::get(Type::Int32Ty, LineNo),
-    getCastToEmpty(Type),
-    ConstantInt::get(Type::Int1Ty, isLocalToUnit),
-    ConstantInt::get(Type::Int1Ty, isDefinition)
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    Context.getNode(),
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, DisplayName),
+    MDString::get(VMContext, LinkageName),
+    CompileUnit.getNode(),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    Type.getNode(),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition)
   };
-  
-  Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
-  
-  M.addTypeName("llvm.dbg.subprogram.type", Init->getType());
-  GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
-                                          GlobalValue::LinkOnceAnyLinkage,
-                                          Init, "llvm.dbg.subprogram", &M);
-  GV->setSection("llvm.metadata");
-  return DISubprogram(GV);
+  return DISubprogram(MDNode::get(VMContext, &Elts[0], 11));
 }
 
 /// CreateGlobalVariable - Create a new descriptor for the specified global.
 DIGlobalVariable
-DIFactory::CreateGlobalVariable(DIDescriptor Context, const std::string &Name,
-                                const std::string &DisplayName,
-                                const std::string &LinkageName,
+DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name,
+                                StringRef DisplayName,
+                                StringRef LinkageName,
                                 DICompileUnit CompileUnit,
                                 unsigned LineNo, DIType Type,bool isLocalToUnit,
                                 bool isDefinition, llvm::GlobalVariable *Val) {
-  Constant *Elts[] = {
+  Value *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_variable),
-    Constant::getNullValue(EmptyStructPtr),
-    getCastToEmpty(Context),
-    GetStringConstant(Name),
-    GetStringConstant(DisplayName),
-    GetStringConstant(LinkageName),
-    getCastToEmpty(CompileUnit),
-    ConstantInt::get(Type::Int32Ty, LineNo),
-    getCastToEmpty(Type),
-    ConstantInt::get(Type::Int1Ty, isLocalToUnit),
-    ConstantInt::get(Type::Int1Ty, isDefinition),
-    ConstantExpr::getBitCast(Val, EmptyStructPtr)
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    Context.getNode(),
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, DisplayName),
+    MDString::get(VMContext, LinkageName),
+    CompileUnit.getNode(),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    Type.getNode(),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
+    Val
   };
-  
-  Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
-  
-  M.addTypeName("llvm.dbg.global_variable.type", Init->getType());
-  GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
-                                          GlobalValue::LinkOnceAnyLinkage,
-                                          Init, "llvm.dbg.global_variable", &M);
-  GV->setSection("llvm.metadata");
-  return DIGlobalVariable(GV);
+
+  Value *const *Vs = &Elts[0];
+  MDNode *Node = MDNode::get(VMContext,Vs, 12);
+
+  // Create a named metadata so that we do not lose this mdnode.
+  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
+  NMD->addElement(Node);
+
+  return DIGlobalVariable(Node);
 }
 
 
 /// CreateVariable - Create a new descriptor for the specified variable.
 DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context,
-                                     const std::string &Name,
+                                     StringRef Name,
                                      DICompileUnit CompileUnit, unsigned LineNo,
                                      DIType Type) {
-  Constant *Elts[] = {
+  Value *Elts[] = {
     GetTagConstant(Tag),
-    getCastToEmpty(Context),
-    GetStringConstant(Name),
-    getCastToEmpty(CompileUnit),
-    ConstantInt::get(Type::Int32Ty, LineNo),
-    getCastToEmpty(Type)
+    Context.getNode(),
+    MDString::get(VMContext, Name),
+    CompileUnit.getNode(),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    Type.getNode(),
   };
-  
-  Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
-  
-  M.addTypeName("llvm.dbg.variable.type", Init->getType());
-  GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
-                                          GlobalValue::InternalLinkage,
-                                          Init, "llvm.dbg.variable", &M);
-  GV->setSection("llvm.metadata");
-  return DIVariable(GV);
+  return DIVariable(MDNode::get(VMContext, &Elts[0], 6));
+}
+
+
+/// CreateComplexVariable - Create a new descriptor for the specified variable
+/// which has a complex address expression for its address.
+DIVariable DIFactory::CreateComplexVariable(unsigned Tag, DIDescriptor Context,
+                                            const std::string &Name,
+                                            DICompileUnit CompileUnit,
+                                            unsigned LineNo,
+                                   DIType Type, SmallVector<Value *, 9> &addr) {
+  SmallVector<Value *, 9> Elts;
+  Elts.push_back(GetTagConstant(Tag));
+  Elts.push_back(Context.getNode());
+  Elts.push_back(MDString::get(VMContext, Name));
+  Elts.push_back(CompileUnit.getNode());
+  Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), LineNo));
+  Elts.push_back(Type.getNode());
+  Elts.insert(Elts.end(), addr.begin(), addr.end());
+
+  return DIVariable(MDNode::get(VMContext, &Elts[0], 6+addr.size()));
 }
 
 
 /// CreateBlock - This creates a descriptor for a lexical block with the
-/// specified parent context.
-DIBlock DIFactory::CreateBlock(DIDescriptor Context) {
-  Constant *Elts[] = {
+/// specified parent VMContext.
+DILexicalBlock DIFactory::CreateLexicalBlock(DIDescriptor Context) {
+  Value *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_lexical_block),
-    getCastToEmpty(Context)
+    Context.getNode()
+  };
+  return DILexicalBlock(MDNode::get(VMContext, &Elts[0], 2));
+}
+
+/// CreateLocation - Creates a debug info location.
+DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo,
+                                     DIScope S, DILocation OrigLoc) {
+  Value *Elts[] = {
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo),
+    S.getNode(),
+    OrigLoc.getNode(),
   };
-  
-  Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
-  
-  M.addTypeName("llvm.dbg.block.type", Init->getType());
-  GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
-                                          GlobalValue::InternalLinkage,
-                                          Init, "llvm.dbg.block", &M);
-  GV->setSection("llvm.metadata");
-  return DIBlock(GV);
+  return DILocation(MDNode::get(VMContext, &Elts[0], 4));
 }
 
 
@@ -830,17 +884,17 @@ DIBlock DIFactory::CreateBlock(DIDescriptor Context) {
 /// inserting it at the end of the specified basic block.
 void DIFactory::InsertStopPoint(DICompileUnit CU, unsigned LineNo,
                                 unsigned ColNo, BasicBlock *BB) {
-  
+
   // Lazily construct llvm.dbg.stoppoint function.
   if (!StopPointFn)
-    StopPointFn = llvm::Intrinsic::getDeclaration(&M, 
+    StopPointFn = llvm::Intrinsic::getDeclaration(&M,
                                               llvm::Intrinsic::dbg_stoppoint);
-  
+
   // Invoke llvm.dbg.stoppoint
   Value *Args[] = {
-    llvm::ConstantInt::get(llvm::Type::Int32Ty, LineNo),
-    llvm::ConstantInt::get(llvm::Type::Int32Ty, ColNo),
-    getCastToEmpty(CU)
+    ConstantInt::get(llvm::Type::getInt32Ty(VMContext), LineNo),
+    ConstantInt::get(llvm::Type::getInt32Ty(VMContext), ColNo),
+    CU.getNode()
   };
   CallInst::Create(StopPointFn, Args, Args+3, "", BB);
 }
@@ -851,9 +905,9 @@ void DIFactory::InsertSubprogramStart(DISubprogram SP, BasicBlock *BB) {
   // Lazily construct llvm.dbg.func.start.
   if (!FuncStartFn)
     FuncStartFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_func_start);
-  
+
   // Call llvm.dbg.func.start which also implicitly sets a stoppoint.
-  CallInst::Create(FuncStartFn, getCastToEmpty(SP), "", BB);
+  CallInst::Create(FuncStartFn, SP.getNode(), "", BB);
 }
 
 /// InsertRegionStart - Insert a new llvm.dbg.region.start intrinsic call to
@@ -864,7 +918,7 @@ void DIFactory::InsertRegionStart(DIDescriptor D, BasicBlock *BB) {
     RegionStartFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_region_start);
 
   // Call llvm.dbg.func.start.
-  CallInst::Create(RegionStartFn, getCastToEmpty(D), "", BB);
+  CallInst::Create(RegionStartFn, D.getNode(), "", BB);
 }
 
 /// InsertRegionEnd - Insert a new llvm.dbg.region.end intrinsic call to
@@ -875,19 +929,220 @@ void DIFactory::InsertRegionEnd(DIDescriptor D, BasicBlock *BB) {
     RegionEndFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_region_end);
 
   // Call llvm.dbg.region.end.
-  CallInst::Create(RegionEndFn, getCastToEmpty(D), "", BB);
+  CallInst::Create(RegionEndFn, D.getNode(), "", BB);
 }
 
 /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call.
-void DIFactory::InsertDeclare(Value *Storage, DIVariable D, BasicBlock *BB) {
+void DIFactory::InsertDeclare(Value *Storage, DIVariable D,
+                              Instruction *InsertBefore) {
   // Cast the storage to a {}* for the call to llvm.dbg.declare.
-  Storage = new BitCastInst(Storage, EmptyStructPtr, "", BB);
-  
+  Storage = new BitCastInst(Storage, EmptyStructPtr, "", InsertBefore);
+
   if (!DeclareFn)
     DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
 
-  Value *Args[] = { Storage, getCastToEmpty(D) };
-  CallInst::Create(DeclareFn, Args, Args+2, "", BB);
+  Value *Args[] = { Storage, D.getNode() };
+  CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore);
+}
+
+/// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call.
+void DIFactory::InsertDeclare(Value *Storage, DIVariable D,
+                              BasicBlock *InsertAtEnd) {
+  // Cast the storage to a {}* for the call to llvm.dbg.declare.
+  Storage = new BitCastInst(Storage, EmptyStructPtr, "", InsertAtEnd);
+
+  if (!DeclareFn)
+    DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
+
+  Value *Args[] = { Storage, D.getNode() };
+  CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd);
+}
+
+
+//===----------------------------------------------------------------------===//
+// DebugInfoFinder implementations.
+//===----------------------------------------------------------------------===//
+
+/// processModule - Process entire module and collect debug info.
+void DebugInfoFinder::processModule(Module &M) {
+
+#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
+  MetadataContext &TheMetadata = M.getContext().getMetadata();
+  unsigned MDDbgKind = TheMetadata.getMDKind("dbg");
+#endif
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+    for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI)
+      for (BasicBlock::iterator BI = (*FI).begin(), BE = (*FI).end(); BI != BE;
+           ++BI) {
+        if (DbgStopPointInst *SPI = dyn_cast<DbgStopPointInst>(BI))
+          processStopPoint(SPI);
+        else if (DbgFuncStartInst *FSI = dyn_cast<DbgFuncStartInst>(BI))
+          processFuncStart(FSI);
+        else if (DbgRegionStartInst *DRS = dyn_cast<DbgRegionStartInst>(BI))
+          processRegionStart(DRS);
+        else if (DbgRegionEndInst *DRE = dyn_cast<DbgRegionEndInst>(BI))
+          processRegionEnd(DRE);
+        else if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
+          processDeclare(DDI);
+#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
+        else if (MDDbgKind) {
+          if (MDNode *L = TheMetadata.getMD(MDDbgKind, BI)) {
+            DILocation Loc(L);
+            DIScope S(Loc.getScope().getNode());
+            if (S.isCompileUnit())
+              addCompileUnit(DICompileUnit(S.getNode()));
+            else if (S.isSubprogram())
+              processSubprogram(DISubprogram(S.getNode()));
+            else if (S.isLexicalBlock())
+              processLexicalBlock(DILexicalBlock(S.getNode()));
+          }
+        }
+#endif
+      }
+
+  NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv");
+  if (!NMD)
+    return;
+
+  for (unsigned i = 0, e = NMD->getNumElements(); i != e; ++i) {
+    DIGlobalVariable DIG(cast<MDNode>(NMD->getElement(i)));
+    if (addGlobalVariable(DIG)) {
+      addCompileUnit(DIG.getCompileUnit());
+      processType(DIG.getType());
+    }
+  }
+}
+
+/// processType - Process DIType.
+void DebugInfoFinder::processType(DIType DT) {
+  if (!addType(DT))
+    return;
+
+  addCompileUnit(DT.getCompileUnit());
+  if (DT.isCompositeType()) {
+    DICompositeType DCT(DT.getNode());
+    processType(DCT.getTypeDerivedFrom());
+    DIArray DA = DCT.getTypeArray();
+    if (!DA.isNull())
+      for (unsigned i = 0, e = DA.getNumElements(); i != e; ++i) {
+        DIDescriptor D = DA.getElement(i);
+        DIType TypeE = DIType(D.getNode());
+        if (!TypeE.isNull())
+          processType(TypeE);
+        else
+          processSubprogram(DISubprogram(D.getNode()));
+      }
+  } else if (DT.isDerivedType()) {
+    DIDerivedType DDT(DT.getNode());
+    if (!DDT.isNull())
+      processType(DDT.getTypeDerivedFrom());
+  }
+}
+
+/// processLexicalBlock
+void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) {
+  if (LB.isNull())
+    return;
+  DIScope Context = LB.getContext();
+  if (Context.isLexicalBlock())
+    return processLexicalBlock(DILexicalBlock(Context.getNode()));
+  else
+    return processSubprogram(DISubprogram(Context.getNode()));
+}
+
+/// processSubprogram - Process DISubprogram.
+void DebugInfoFinder::processSubprogram(DISubprogram SP) {
+  if (SP.isNull())
+    return;
+  if (!addSubprogram(SP))
+    return;
+  addCompileUnit(SP.getCompileUnit());
+  processType(SP.getType());
+}
+
+/// processStopPoint - Process DbgStopPointInst.
+void DebugInfoFinder::processStopPoint(DbgStopPointInst *SPI) {
+  MDNode *Context = dyn_cast<MDNode>(SPI->getContext());
+  addCompileUnit(DICompileUnit(Context));
+}
+
+/// processFuncStart - Process DbgFuncStartInst.
+void DebugInfoFinder::processFuncStart(DbgFuncStartInst *FSI) {
+  MDNode *SP = dyn_cast<MDNode>(FSI->getSubprogram());
+  processSubprogram(DISubprogram(SP));
+}
+
+/// processRegionStart - Process DbgRegionStart.
+void DebugInfoFinder::processRegionStart(DbgRegionStartInst *DRS) {
+  MDNode *SP = dyn_cast<MDNode>(DRS->getContext());
+  processSubprogram(DISubprogram(SP));
+}
+
+/// processRegionEnd - Process DbgRegionEnd.
+void DebugInfoFinder::processRegionEnd(DbgRegionEndInst *DRE) {
+  MDNode *SP = dyn_cast<MDNode>(DRE->getContext());
+  processSubprogram(DISubprogram(SP));
+}
+
+/// processDeclare - Process DbgDeclareInst.
+void DebugInfoFinder::processDeclare(DbgDeclareInst *DDI) {
+  DIVariable DV(cast<MDNode>(DDI->getVariable()));
+  if (DV.isNull())
+    return;
+
+  if (!NodesSeen.insert(DV.getNode()))
+    return;
+
+  addCompileUnit(DV.getCompileUnit());
+  processType(DV.getType());
+}
+
+/// addType - Add type into Tys.
+bool DebugInfoFinder::addType(DIType DT) {
+  if (DT.isNull())
+    return false;
+
+  if (!NodesSeen.insert(DT.getNode()))
+    return false;
+
+  TYs.push_back(DT.getNode());
+  return true;
+}
+
+/// addCompileUnit - Add compile unit into CUs.
+bool DebugInfoFinder::addCompileUnit(DICompileUnit CU) {
+  if (CU.isNull())
+    return false;
+
+  if (!NodesSeen.insert(CU.getNode()))
+    return false;
+
+  CUs.push_back(CU.getNode());
+  return true;
+}
+
+/// addGlobalVariable - Add global variable into GVs.
+bool DebugInfoFinder::addGlobalVariable(DIGlobalVariable DIG) {
+  if (DIG.isNull())
+    return false;
+
+  if (!NodesSeen.insert(DIG.getNode()))
+    return false;
+
+  GVs.push_back(DIG.getNode());
+  return true;
+}
+
+// addSubprogram - Add subprgoram into SPs.
+bool DebugInfoFinder::addSubprogram(DISubprogram SP) {
+  if (SP.isNull())
+    return false;
+
+  if (!NodesSeen.insert(SP.getNode()))
+    return false;
+
+  SPs.push_back(SP.getNode());
+  return true;
 }
 
 namespace llvm {
@@ -939,30 +1194,17 @@ namespace llvm {
 
   Value *findDbgGlobalDeclare(GlobalVariable *V) {
     const Module *M = V->getParent();
-    const Type *Ty = M->getTypeByName("llvm.dbg.global_variable.type");
-    if (!Ty) return 0;
-
-    Ty = PointerType::get(Ty, 0);
-
-    Value *Val = V->stripPointerCasts();
-    for (Value::use_iterator I = Val->use_begin(), E = Val->use_end();
-         I != E; ++I) {
-      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(I)) {
-        if (CE->getOpcode() == Instruction::BitCast) {
-          Value *VV = CE;
-
-          while (VV->hasOneUse())
-            VV = *VV->use_begin();
+    NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv");
+    if (!NMD)
+      return 0;
 
-          if (VV->getType() == Ty)
-            return VV;
-        }
-      }
+    for (unsigned i = 0, e = NMD->getNumElements(); i != e; ++i) {
+      DIGlobalVariable DIG(cast_or_null<MDNode>(NMD->getElement(i)));
+      if (DIG.isNull())
+        continue;
+      if (DIG.getGlobal() == V)
+        return DIG.getNode();
     }
-    
-    if (Val->getType() == Ty)
-      return Val;
-
     return 0;
   }
 
@@ -990,8 +1232,8 @@ namespace llvm {
     return 0;
   }
 
-  bool getLocationInfo(const Value *V, std::string &DisplayName,
-                       std::string &Type, unsigned &LineNo, std::string &File,
+bool getLocationInfo(const Value *V, std::string &DisplayName,
+                     std::string &Type, unsigned &LineNo, std::string &File,
                        std::string &Dir) {
     DICompileUnit Unit;
     DIType TypeD;
@@ -999,81 +1241,56 @@ namespace llvm {
     if (GlobalVariable *GV = dyn_cast<GlobalVariable>(const_cast<Value*>(V))) {
       Value *DIGV = findDbgGlobalDeclare(GV);
       if (!DIGV) return false;
-      DIGlobalVariable Var(cast<GlobalVariable>(DIGV));
+      DIGlobalVariable Var(cast<MDNode>(DIGV));
 
-      Var.getDisplayName(DisplayName);
+      if (const char *D = Var.getDisplayName())
+        DisplayName = D;
       LineNo = Var.getLineNumber();
       Unit = Var.getCompileUnit();
       TypeD = Var.getType();
     } else {
       const DbgDeclareInst *DDI = findDbgDeclare(V);
       if (!DDI) return false;
-      DIVariable Var(cast<GlobalVariable>(DDI->getVariable()));
+      DIVariable Var(cast<MDNode>(DDI->getVariable()));
 
-      Var.getName(DisplayName);
+      if (const char *D = Var.getName())
+        DisplayName = D;
       LineNo = Var.getLineNumber();
       Unit = Var.getCompileUnit();
       TypeD = Var.getType();
     }
 
-    TypeD.getName(Type);
-    Unit.getFilename(File);
-    Unit.getDirectory(Dir);
+    if (const char *T = TypeD.getName())
+      Type = T;
+    if (const char *F = Unit.getFilename())
+      File = F;
+    if (const char *D = Unit.getDirectory())
+      Dir = D;
     return true;
   }
 
-  /// CollectDebugInfoAnchors - Collect debugging information anchors.
-  void CollectDebugInfoAnchors(Module &M,
-                               SmallVector<GlobalVariable *, 2> &CUs,
-                               SmallVector<GlobalVariable *, 4> &GVs,
-                               SmallVector<GlobalVariable *, 4> &SPs) {
-
-    for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
-       GVI != E; GVI++) {
-      GlobalVariable *GV = GVI;
-      if (GV->hasName() && strncmp(GV->getNameStart(), "llvm.dbg", 8) == 0
-          && GV->isConstant() && GV->hasInitializer()) {
-        DICompileUnit C(GV);
-        if (C.isNull() == false) {
-          CUs.push_back(GV);
-          continue;
-        }
-        DIGlobalVariable G(GV);
-        if (G.isNull() == false) {
-          GVs.push_back(GV);
-          continue;
-        }
-        DISubprogram S(GV);
-        if (S.isNull() == false) {
-          SPs.push_back(GV);
-          continue;
-        }
-      }
-    }
-  }
-
-  /// isValidDebugInfoIntrinsic - Return true if SPI is a valid debug 
+  /// isValidDebugInfoIntrinsic - Return true if SPI is a valid debug
   /// info intrinsic.
-  bool isValidDebugInfoIntrinsic(DbgStopPointInst &SPI, 
+  bool isValidDebugInfoIntrinsic(DbgStopPointInst &SPI,
                                  CodeGenOpt::Level OptLev) {
     return DIDescriptor::ValidDebugInfo(SPI.getContext(), OptLev);
   }
 
-  /// isValidDebugInfoIntrinsic - Return true if FSI is a valid debug 
+  /// isValidDebugInfoIntrinsic - Return true if FSI is a valid debug
   /// info intrinsic.
   bool isValidDebugInfoIntrinsic(DbgFuncStartInst &FSI,
                                  CodeGenOpt::Level OptLev) {
     return DIDescriptor::ValidDebugInfo(FSI.getSubprogram(), OptLev);
   }
 
-  /// isValidDebugInfoIntrinsic - Return true if RSI is a valid debug 
+  /// isValidDebugInfoIntrinsic - Return true if RSI is a valid debug
   /// info intrinsic.
   bool isValidDebugInfoIntrinsic(DbgRegionStartInst &RSI,
                                  CodeGenOpt::Level OptLev) {
     return DIDescriptor::ValidDebugInfo(RSI.getContext(), OptLev);
   }
 
-  /// isValidDebugInfoIntrinsic - Return true if REI is a valid debug 
+  /// isValidDebugInfoIntrinsic - Return true if REI is a valid debug
   /// info intrinsic.
   bool isValidDebugInfoIntrinsic(DbgRegionEndInst &REI,
                                  CodeGenOpt::Level OptLev) {
@@ -1081,14 +1298,14 @@ namespace llvm {
   }
 
 
-  /// isValidDebugInfoIntrinsic - Return true if DI is a valid debug 
+  /// isValidDebugInfoIntrinsic - Return true if DI is a valid debug
   /// info intrinsic.
   bool isValidDebugInfoIntrinsic(DbgDeclareInst &DI,
                                  CodeGenOpt::Level OptLev) {
     return DIDescriptor::ValidDebugInfo(DI.getVariable(), OptLev);
   }
 
-  /// ExtractDebugLocation - Extract debug location information 
+  /// ExtractDebugLocation - Extract debug location information
   /// from llvm.dbg.stoppoint intrinsic.
   DebugLoc ExtractDebugLocation(DbgStopPointInst &SPI,
                                 DebugLocTracker &DebugLocInfo) {
@@ -1096,7 +1313,7 @@ namespace llvm {
     Value *Context = SPI.getContext();
 
     // If this location is already tracked then use it.
-    DebugLocTuple Tuple(cast<GlobalVariable>(Context), SPI.getLine(), 
+    DebugLocTuple Tuple(cast<MDNode>(Context), NULL, SPI.getLine(),
                         SPI.getColumn());
     DenseMap<DebugLocTuple, unsigned>::iterator II
       = DebugLocInfo.DebugIdMap.find(Tuple);
@@ -1107,23 +1324,48 @@ namespace llvm {
     unsigned Id = DebugLocInfo.DebugLocations.size();
     DebugLocInfo.DebugLocations.push_back(Tuple);
     DebugLocInfo.DebugIdMap[Tuple] = Id;
-    
+
+    return DebugLoc::get(Id);
+  }
+
+  /// ExtractDebugLocation - Extract debug location information
+  /// from DILocation.
+  DebugLoc ExtractDebugLocation(DILocation &Loc,
+                                DebugLocTracker &DebugLocInfo) {
+    DebugLoc DL;
+    MDNode *Context = Loc.getScope().getNode();
+    MDNode *InlinedLoc = NULL;
+    if (!Loc.getOrigLocation().isNull())
+      InlinedLoc = Loc.getOrigLocation().getNode();
+    // If this location is already tracked then use it.
+    DebugLocTuple Tuple(Context, InlinedLoc, Loc.getLineNumber(),
+                        Loc.getColumnNumber());
+    DenseMap<DebugLocTuple, unsigned>::iterator II
+      = DebugLocInfo.DebugIdMap.find(Tuple);
+    if (II != DebugLocInfo.DebugIdMap.end())
+      return DebugLoc::get(II->second);
+
+    // Add a new location entry.
+    unsigned Id = DebugLocInfo.DebugLocations.size();
+    DebugLocInfo.DebugLocations.push_back(Tuple);
+    DebugLocInfo.DebugIdMap[Tuple] = Id;
+
     return DebugLoc::get(Id);
   }
 
-  /// ExtractDebugLocation - Extract debug location information 
+  /// ExtractDebugLocation - Extract debug location information
   /// from llvm.dbg.func_start intrinsic.
   DebugLoc ExtractDebugLocation(DbgFuncStartInst &FSI,
                                 DebugLocTracker &DebugLocInfo) {
     DebugLoc DL;
     Value *SP = FSI.getSubprogram();
 
-    DISubprogram Subprogram(cast<GlobalVariable>(SP));
+    DISubprogram Subprogram(cast<MDNode>(SP));
     unsigned Line = Subprogram.getLineNumber();
     DICompileUnit CU(Subprogram.getCompileUnit());
 
     // If this location is already tracked then use it.
-    DebugLocTuple Tuple(CU.getGV(), Line, /* Column */ 0);
+    DebugLocTuple Tuple(CU.getNode(), NULL, Line, /* Column */ 0);
     DenseMap<DebugLocTuple, unsigned>::iterator II
       = DebugLocInfo.DebugIdMap.find(Tuple);
     if (II != DebugLocInfo.DebugIdMap.end())
@@ -1133,13 +1375,13 @@ namespace llvm {
     unsigned Id = DebugLocInfo.DebugLocations.size();
     DebugLocInfo.DebugLocations.push_back(Tuple);
     DebugLocInfo.DebugIdMap[Tuple] = Id;
-    
+
     return DebugLoc::get(Id);
   }
 
   /// isInlinedFnStart - Return true if FSI is starting an inlined function.
   bool isInlinedFnStart(DbgFuncStartInst &FSI, const Function *CurrentFn) {
-    DISubprogram Subprogram(cast<GlobalVariable>(FSI.getSubprogram()));
+    DISubprogram Subprogram(cast<MDNode>(FSI.getSubprogram()));
     if (Subprogram.describes(CurrentFn))
       return false;
 
@@ -1148,11 +1390,10 @@ namespace llvm {
 
   /// isInlinedFnEnd - Return true if REI is ending an inlined function.
   bool isInlinedFnEnd(DbgRegionEndInst &REI, const Function *CurrentFn) {
-    DISubprogram Subprogram(cast<GlobalVariable>(REI.getContext()));
+    DISubprogram Subprogram(cast<MDNode>(REI.getContext()));
     if (Subprogram.isNull() || Subprogram.describes(CurrentFn))
       return false;
 
     return true;
   }
-
 }
diff --git a/lib/Analysis/IPA/Andersens.cpp b/lib/Analysis/IPA/Andersens.cpp
index 3fb6526..1c9159d 100644
--- a/lib/Analysis/IPA/Andersens.cpp
+++ b/lib/Analysis/IPA/Andersens.cpp
@@ -60,9 +60,11 @@
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/Support/InstVisitor.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MallocHelper.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/System/Atomic.h"
@@ -84,7 +86,9 @@
 #define FULL_UNIVERSAL 0
 
 using namespace llvm;
+#ifndef NDEBUG
 STATISTIC(NumIters      , "Number of iterations to reach convergence");
+#endif
 STATISTIC(NumConstraints, "Number of constraints");
 STATISTIC(NumNodes      , "Number of nodes");
 STATISTIC(NumUnified    , "Number of variables unified");
@@ -507,7 +511,7 @@ namespace {
 #ifndef NDEBUG
         V->dump();
 #endif
-        assert(0 && "Value does not have a node in the points-to graph!");
+        llvm_unreachable("Value does not have a node in the points-to graph!");
       }
       return I->second;
     }
@@ -589,9 +593,12 @@ namespace {
     friend class InstVisitor<Andersens>;
     void visitReturnInst(ReturnInst &RI);
     void visitInvokeInst(InvokeInst &II) { visitCallSite(CallSite(&II)); }
-    void visitCallInst(CallInst &CI) { visitCallSite(CallSite(&CI)); }
+    void visitCallInst(CallInst &CI) { 
+      if (isMalloc(&CI)) visitAllocationInst(CI);
+      else visitCallSite(CallSite(&CI)); 
+    }
     void visitCallSite(CallSite CS);
-    void visitAllocationInst(AllocationInst &AI);
+    void visitAllocationInst(Instruction &I);
     void visitLoadInst(LoadInst &LI);
     void visitStoreInst(StoreInst &SI);
     void visitGetElementPtrInst(GetElementPtrInst &GEP);
@@ -606,7 +613,7 @@ namespace {
     //===------------------------------------------------------------------===//
     // Implement Analyize interface
     //
-    void print(std::ostream &O, const Module* M) const {
+    void print(raw_ostream &O, const Module*) const {
       PrintPointsToGraph();
     }
   };
@@ -614,7 +621,8 @@ namespace {
 
 char Andersens::ID = 0;
 static RegisterPass<Andersens>
-X("anders-aa", "Andersen's Interprocedural Alias Analysis", false, true);
+X("anders-aa", "Andersen's Interprocedural Alias Analysis (experimental)",
+  false, true);
 static RegisterAnalysisGroup<AliasAnalysis> Y(X);
 
 // Initialize Timestamp Counter (static).
@@ -786,6 +794,8 @@ void Andersens::IdentifyObjects(Module &M) {
         ValueNodes[&*II] = NumObjects++;
         if (AllocationInst *AI = dyn_cast<AllocationInst>(&*II))
           ObjectNodes[AI] = NumObjects++;
+        else if (isMalloc(&*II))
+          ObjectNodes[&*II] = NumObjects++;
       }
 
       // Calls to inline asm need to be added as well because the callee isn't
@@ -825,11 +835,11 @@ unsigned Andersens::getNodeForConstantPointer(Constant *C) {
     case Instruction::BitCast:
       return getNodeForConstantPointer(CE->getOperand(0));
     default:
-      cerr << "Constant Expr not yet handled: " << *CE << "\n";
-      assert(0);
+      errs() << "Constant Expr not yet handled: " << *CE << "\n";
+      llvm_unreachable(0);
     }
   } else {
-    assert(0 && "Unknown constant pointer!");
+    llvm_unreachable("Unknown constant pointer!");
   }
   return 0;
 }
@@ -852,11 +862,11 @@ unsigned Andersens::getNodeForConstantPointerTarget(Constant *C) {
     case Instruction::BitCast:
       return getNodeForConstantPointerTarget(CE->getOperand(0));
     default:
-      cerr << "Constant Expr not yet handled: " << *CE << "\n";
-      assert(0);
+      errs() << "Constant Expr not yet handled: " << *CE << "\n";
+      llvm_unreachable(0);
     }
   } else {
-    assert(0 && "Unknown constant pointer!");
+    llvm_unreachable("Unknown constant pointer!");
   }
   return 0;
 }
@@ -996,7 +1006,7 @@ bool Andersens::AnalyzeUsesOfFunction(Value *V) {
   if (!isa<PointerType>(V->getType())) return true;
 
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
-    if (dyn_cast<LoadInst>(*UI)) {
+    if (isa<LoadInst>(*UI)) {
       return false;
     } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
       if (V == SI->getOperand(1)) {
@@ -1027,7 +1037,7 @@ bool Andersens::AnalyzeUsesOfFunction(Value *V) {
     } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(*UI)) {
       if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
         return true;  // Allow comparison against null.
-    } else if (dyn_cast<FreeInst>(*UI)) {
+    } else if (isa<FreeInst>(*UI)) {
       return false;
     } else {
       return true;
@@ -1060,7 +1070,7 @@ void Andersens::CollectConstraints(Module &M) {
     Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(*I),
                                      ObjectIndex));
 
-    if (I->hasInitializer()) {
+    if (I->hasDefinitiveInitializer()) {
       AddGlobalInitializerConstraints(ObjectIndex, I->getInitializer());
     } else {
       // If it doesn't have an initializer (i.e. it's defined in another
@@ -1152,15 +1162,15 @@ void Andersens::visitInstruction(Instruction &I) {
     return;
   default:
     // Is this something we aren't handling yet?
-    cerr << "Unknown instruction: " << I;
-    abort();
+    errs() << "Unknown instruction: " << I;
+    llvm_unreachable(0);
   }
 }
 
-void Andersens::visitAllocationInst(AllocationInst &AI) {
-  unsigned ObjectIndex = getObject(&AI);
-  GraphNodes[ObjectIndex].setValue(&AI);
-  Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(AI),
+void Andersens::visitAllocationInst(Instruction &I) {
+  unsigned ObjectIndex = getObject(&I);
+  GraphNodes[ObjectIndex].setValue(&I);
+  Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(I),
                                    ObjectIndex));
 }
 
@@ -1243,7 +1253,7 @@ void Andersens::visitSelectInst(SelectInst &SI) {
 }
 
 void Andersens::visitVAArg(VAArgInst &I) {
-  assert(0 && "vaarg not handled yet!");
+  llvm_unreachable("vaarg not handled yet!");
 }
 
 /// AddConstraintsForCall - Add constraints for a call with actual arguments
@@ -1395,12 +1405,6 @@ bool Andersens::Node::intersectsIgnoring(Node *N, unsigned Ignoring) const {
   return Result;
 }
 
-void dumpToDOUT(SparseBitVector<> *bitmap) {
-#ifndef NDEBUG
-  dump(*bitmap, DOUT);
-#endif
-}
-
 
 /// Clump together address taken variables so that the points-to sets use up
 /// less space and can be operated on faster.
@@ -1424,7 +1428,7 @@ void Andersens::ClumpAddressTaken() {
     unsigned Pos = NewPos++;
     Translate[i] = Pos;
     NewGraphNodes.push_back(GraphNodes[i]);
-    DOUT << "Renumbering node " << i << " to node " << Pos << "\n";
+    DEBUG(errs() << "Renumbering node " << i << " to node " << Pos << "\n");
   }
 
   // I believe this ends up being faster than making two vectors and splicing
@@ -1434,7 +1438,7 @@ void Andersens::ClumpAddressTaken() {
       unsigned Pos = NewPos++;
       Translate[i] = Pos;
       NewGraphNodes.push_back(GraphNodes[i]);
-      DOUT << "Renumbering node " << i << " to node " << Pos << "\n";
+      DEBUG(errs() << "Renumbering node " << i << " to node " << Pos << "\n");
     }
   }
 
@@ -1443,7 +1447,7 @@ void Andersens::ClumpAddressTaken() {
       unsigned Pos = NewPos++;
       Translate[i] = Pos;
       NewGraphNodes.push_back(GraphNodes[i]);
-      DOUT << "Renumbering node " << i << " to node " << Pos << "\n";
+      DEBUG(errs() << "Renumbering node " << i << " to node " << Pos << "\n");
     }
   }
 
@@ -1515,7 +1519,7 @@ void Andersens::ClumpAddressTaken() {
 /// receive &D from E anyway.
 
 void Andersens::HVN() {
-  DOUT << "Beginning HVN\n";
+  DEBUG(errs() << "Beginning HVN\n");
   // Build a predecessor graph.  This is like our constraint graph with the
   // edges going in the opposite direction, and there are edges for all the
   // constraints, instead of just copy constraints.  We also build implicit
@@ -1586,7 +1590,7 @@ void Andersens::HVN() {
   Node2DFS.clear();
   Node2Deleted.clear();
   Node2Visited.clear();
-  DOUT << "Finished HVN\n";
+  DEBUG(errs() << "Finished HVN\n");
 
 }
 
@@ -1710,7 +1714,7 @@ void Andersens::HVNValNum(unsigned NodeIndex) {
 /// and is equivalent to value numbering the collapsed constraint graph
 /// including evaluating unions.
 void Andersens::HU() {
-  DOUT << "Beginning HU\n";
+  DEBUG(errs() << "Beginning HU\n");
   // Build a predecessor graph.  This is like our constraint graph with the
   // edges going in the opposite direction, and there are edges for all the
   // constraints, instead of just copy constraints.  We also build implicit
@@ -1790,7 +1794,7 @@ void Andersens::HU() {
   }
   // PEClass nodes will be deleted by the deleting of N->PointsTo in our caller.
   Set2PEClass.clear();
-  DOUT << "Finished HU\n";
+  DEBUG(errs() << "Finished HU\n");
 }
 
 
@@ -1968,12 +1972,12 @@ void Andersens::RewriteConstraints() {
     // to anything.
     if (LHSLabel == 0) {
       DEBUG(PrintNode(&GraphNodes[LHSNode]));
-      DOUT << " is a non-pointer, ignoring constraint.\n";
+      DEBUG(errs() << " is a non-pointer, ignoring constraint.\n");
       continue;
     }
     if (RHSLabel == 0) {
       DEBUG(PrintNode(&GraphNodes[RHSNode]));
-      DOUT << " is a non-pointer, ignoring constraint.\n";
+      DEBUG(errs() << " is a non-pointer, ignoring constraint.\n");
       continue;
     }
     // This constraint may be useless, and it may become useless as we translate
@@ -2021,19 +2025,19 @@ void Andersens::PrintLabels() const {
     if (i < FirstRefNode) {
       PrintNode(&GraphNodes[i]);
     } else if (i < FirstAdrNode) {
-      DOUT << "REF(";
+      DEBUG(errs() << "REF(");
       PrintNode(&GraphNodes[i-FirstRefNode]);
-      DOUT <<")";
+      DEBUG(errs() <<")");
     } else {
-      DOUT << "ADR(";
+      DEBUG(errs() << "ADR(");
       PrintNode(&GraphNodes[i-FirstAdrNode]);
-      DOUT <<")";
+      DEBUG(errs() <<")");
     }
 
-    DOUT << " has pointer label " << GraphNodes[i].PointerEquivLabel
+    DEBUG(errs() << " has pointer label " << GraphNodes[i].PointerEquivLabel
          << " and SCC rep " << VSSCCRep[i]
          << " and is " << (GraphNodes[i].Direct ? "Direct" : "Not direct")
-         << "\n";
+         << "\n");
   }
 }
 
@@ -2047,7 +2051,7 @@ void Andersens::PrintLabels() const {
 /// operation are stored in SDT and are later used in SolveContraints()
 /// and UniteNodes().
 void Andersens::HCD() {
-  DOUT << "Starting HCD.\n";
+  DEBUG(errs() << "Starting HCD.\n");
   HCDSCCRep.resize(GraphNodes.size());
 
   for (unsigned i = 0; i < GraphNodes.size(); ++i) {
@@ -2096,7 +2100,7 @@ void Andersens::HCD() {
   Node2Visited.clear();
   Node2Deleted.clear();
   HCDSCCRep.clear();
-  DOUT << "HCD complete.\n";
+  DEBUG(errs() << "HCD complete.\n");
 }
 
 // Component of HCD: 
@@ -2168,7 +2172,7 @@ void Andersens::Search(unsigned Node) {
 /// Optimize the constraints by performing offline variable substitution and
 /// other optimizations.
 void Andersens::OptimizeConstraints() {
-  DOUT << "Beginning constraint optimization\n";
+  DEBUG(errs() << "Beginning constraint optimization\n");
 
   SDTActive = false;
 
@@ -2252,7 +2256,7 @@ void Andersens::OptimizeConstraints() {
 
   // HCD complete.
 
-  DOUT << "Finished constraint optimization\n";
+  DEBUG(errs() << "Finished constraint optimization\n");
   FirstRefNode = 0;
   FirstAdrNode = 0;
 }
@@ -2260,7 +2264,7 @@ void Andersens::OptimizeConstraints() {
 /// Unite pointer but not location equivalent variables, now that the constraint
 /// graph is built.
 void Andersens::UnitePointerEquivalences() {
-  DOUT << "Uniting remaining pointer equivalences\n";
+  DEBUG(errs() << "Uniting remaining pointer equivalences\n");
   for (unsigned i = 0; i < GraphNodes.size(); ++i) {
     if (GraphNodes[i].AddressTaken && GraphNodes[i].isRep()) {
       unsigned Label = GraphNodes[i].PointerEquivLabel;
@@ -2269,7 +2273,7 @@ void Andersens::UnitePointerEquivalences() {
         UniteNodes(i, PENLEClass2Node[Label]);
     }
   }
-  DOUT << "Finished remaining pointer equivalences\n";
+  DEBUG(errs() << "Finished remaining pointer equivalences\n");
   PENLEClass2Node.clear();
 }
 
@@ -2425,7 +2429,7 @@ void Andersens::SolveConstraints() {
   std::vector<unsigned int> RSV;
 #endif
   while( !CurrWL->empty() ) {
-    DOUT << "Starting iteration #" << ++NumIters << "\n";
+    DEBUG(errs() << "Starting iteration #" << ++NumIters << "\n");
 
     Node* CurrNode;
     unsigned CurrNodeIndex;
@@ -2728,11 +2732,11 @@ unsigned Andersens::UniteNodes(unsigned First, unsigned Second,
   SecondNode->OldPointsTo = NULL;
 
   NumUnified++;
-  DOUT << "Unified Node ";
+  DEBUG(errs() << "Unified Node ");
   DEBUG(PrintNode(FirstNode));
-  DOUT << " and Node ";
+  DEBUG(errs() << " and Node ");
   DEBUG(PrintNode(SecondNode));
-  DOUT << "\n";
+  DEBUG(errs() << "\n");
 
   if (SDTActive)
     if (SDT[Second] >= 0) {
@@ -2777,17 +2781,17 @@ unsigned Andersens::FindNode(unsigned NodeIndex) const {
 
 void Andersens::PrintNode(const Node *N) const {
   if (N == &GraphNodes[UniversalSet]) {
-    cerr << "<universal>";
+    errs() << "<universal>";
     return;
   } else if (N == &GraphNodes[NullPtr]) {
-    cerr << "<nullptr>";
+    errs() << "<nullptr>";
     return;
   } else if (N == &GraphNodes[NullObject]) {
-    cerr << "<null>";
+    errs() << "<null>";
     return;
   }
   if (!N->getValue()) {
-    cerr << "artificial" << (intptr_t) N;
+    errs() << "artificial" << (intptr_t) N;
     return;
   }
 
@@ -2796,85 +2800,85 @@ void Andersens::PrintNode(const Node *N) const {
   if (Function *F = dyn_cast<Function>(V)) {
     if (isa<PointerType>(F->getFunctionType()->getReturnType()) &&
         N == &GraphNodes[getReturnNode(F)]) {
-      cerr << F->getName() << ":retval";
+      errs() << F->getName() << ":retval";
       return;
     } else if (F->getFunctionType()->isVarArg() &&
                N == &GraphNodes[getVarargNode(F)]) {
-      cerr << F->getName() << ":vararg";
+      errs() << F->getName() << ":vararg";
       return;
     }
   }
 
   if (Instruction *I = dyn_cast<Instruction>(V))
-    cerr << I->getParent()->getParent()->getName() << ":";
+    errs() << I->getParent()->getParent()->getName() << ":";
   else if (Argument *Arg = dyn_cast<Argument>(V))
-    cerr << Arg->getParent()->getName() << ":";
+    errs() << Arg->getParent()->getName() << ":";
 
   if (V->hasName())
-    cerr << V->getName();
+    errs() << V->getName();
   else
-    cerr << "(unnamed)";
+    errs() << "(unnamed)";
 
-  if (isa<GlobalValue>(V) || isa<AllocationInst>(V))
+  if (isa<GlobalValue>(V) || isa<AllocationInst>(V) || isMalloc(V))
     if (N == &GraphNodes[getObject(V)])
-      cerr << "<mem>";
+      errs() << "<mem>";
 }
 void Andersens::PrintConstraint(const Constraint &C) const {
   if (C.Type == Constraint::Store) {
-    cerr << "*";
+    errs() << "*";
     if (C.Offset != 0)
-      cerr << "(";
+      errs() << "(";
   }
   PrintNode(&GraphNodes[C.Dest]);
   if (C.Type == Constraint::Store && C.Offset != 0)
-    cerr << " + " << C.Offset << ")";
-  cerr << " = ";
+    errs() << " + " << C.Offset << ")";
+  errs() << " = ";
   if (C.Type == Constraint::Load) {
-    cerr << "*";
+    errs() << "*";
     if (C.Offset != 0)
-      cerr << "(";
+      errs() << "(";
   }
   else if (C.Type == Constraint::AddressOf)
-    cerr << "&";
+    errs() << "&";
   PrintNode(&GraphNodes[C.Src]);
   if (C.Offset != 0 && C.Type != Constraint::Store)
-    cerr << " + " << C.Offset;
+    errs() << " + " << C.Offset;
   if (C.Type == Constraint::Load && C.Offset != 0)
-    cerr << ")";
-  cerr << "\n";
+    errs() << ")";
+  errs() << "\n";
 }
 
 void Andersens::PrintConstraints() const {
-  cerr << "Constraints:\n";
+  errs() << "Constraints:\n";
 
   for (unsigned i = 0, e = Constraints.size(); i != e; ++i)
     PrintConstraint(Constraints[i]);
 }
 
 void Andersens::PrintPointsToGraph() const {
-  cerr << "Points-to graph:\n";
+  errs() << "Points-to graph:\n";
   for (unsigned i = 0, e = GraphNodes.size(); i != e; ++i) {
     const Node *N = &GraphNodes[i];
     if (FindNode(i) != i) {
       PrintNode(N);
-      cerr << "\t--> same as ";
+      errs() << "\t--> same as ";
       PrintNode(&GraphNodes[FindNode(i)]);
-      cerr << "\n";
+      errs() << "\n";
     } else {
-      cerr << "[" << (N->PointsTo->count()) << "] ";
+      errs() << "[" << (N->PointsTo->count()) << "] ";
       PrintNode(N);
-      cerr << "\t--> ";
+      errs() << "\t--> ";
 
       bool first = true;
       for (SparseBitVector<>::iterator bi = N->PointsTo->begin();
            bi != N->PointsTo->end();
            ++bi) {
         if (!first)
-          cerr << ", ";
+          errs() << ", ";
         PrintNode(&GraphNodes[*bi]);
         first = false;
       }
-      cerr << "\n";
+      errs() << "\n";
     }
   }
 }
diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp
index 6dabcdb..e2b288d 100644
--- a/lib/Analysis/IPA/CallGraph.cpp
+++ b/lib/Analysis/IPA/CallGraph.cpp
@@ -18,8 +18,7 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/Support/Streams.h"
-#include <ostream>
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 namespace {
@@ -54,7 +53,7 @@ public:
     CallsExternalNode = new CallGraphNode(0);
     Root = 0;
   
-    // Add every function to the call graph...
+    // Add every function to the call graph.
     for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
       addToCallGraph(I);
   
@@ -68,30 +67,21 @@ public:
     AU.setPreservesAll();
   }
 
-  void print(std::ostream *o, const Module *M) const {
-    if (o) print(*o, M);
-  }
-
-  virtual void print(std::ostream &o, const Module *M) const {
-    o << "CallGraph Root is: ";
+  virtual void print(raw_ostream &OS, const Module *) const {
+    OS << "CallGraph Root is: ";
     if (Function *F = getRoot()->getFunction())
-      o << F->getName() << "\n";
-    else
-      o << "<<null function: 0x" << getRoot() << ">>\n";
+      OS << F->getName() << "\n";
+    else {
+      OS << "<<null function: 0x" << getRoot() << ">>\n";
+    }
     
-    CallGraph::print(o, M);
+    CallGraph::print(OS, 0);
   }
 
   virtual void releaseMemory() {
     destroy();
   }
   
-  /// dump - Print out this call graph.
-  ///
-  inline void dump() const {
-    print(cerr, Mod);
-  }
-
   CallGraphNode* getExternalCallingNode() const { return ExternalCallingNode; }
   CallGraphNode* getCallsExternalNode()   const { return CallsExternalNode; }
 
@@ -179,21 +169,20 @@ void CallGraph::initialize(Module &M) {
 }
 
 void CallGraph::destroy() {
-  if (!FunctionMap.empty()) {
-    for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
-        I != E; ++I)
-      delete I->second;
-    FunctionMap.clear();
-  }
+  if (FunctionMap.empty()) return;
+  
+  for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
+      I != E; ++I)
+    delete I->second;
+  FunctionMap.clear();
 }
 
-void CallGraph::print(std::ostream &OS, const Module *M) const {
+void CallGraph::print(raw_ostream &OS, Module*) const {
   for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I)
     I->second->print(OS);
 }
-
 void CallGraph::dump() const {
-  print(cerr, 0);
+  print(errs(), 0);
 }
 
 //===----------------------------------------------------------------------===//
@@ -207,7 +196,7 @@ void CallGraph::dump() const {
 // is to dropAllReferences before calling this.
 //
 Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) {
-  assert(CGN->CalledFunctions.empty() && "Cannot remove function from call "
+  assert(CGN->empty() && "Cannot remove function from call "
          "graph if it references other functions!");
   Function *F = CGN->getFunction(); // Get the function for the call graph node
   delete CGN;                       // Delete the call graph node for this func
@@ -217,20 +206,6 @@ Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) {
   return F;
 }
 
-// changeFunction - This method changes the function associated with this
-// CallGraphNode, for use by transformations that need to change the prototype
-// of a Function (thus they must create a new Function and move the old code
-// over).
-void CallGraph::changeFunction(Function *OldF, Function *NewF) {
-  iterator I = FunctionMap.find(OldF);
-  CallGraphNode *&New = FunctionMap[NewF];
-  assert(I != FunctionMap.end() && I->second && !New &&
-         "OldF didn't exist in CG or NewF already does!");
-  New = I->second;
-  New->F = NewF;
-  FunctionMap.erase(I);
-}
-
 // getOrInsertFunction - This method is identical to calling operator[], but
 // it will insert a new CallGraphNode for the specified function if one does
 // not already exist.
@@ -242,11 +217,13 @@ CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) {
   return CGN = new CallGraphNode(const_cast<Function*>(F));
 }
 
-void CallGraphNode::print(std::ostream &OS) const {
+void CallGraphNode::print(raw_ostream &OS) const {
   if (Function *F = getFunction())
-    OS << "Call graph node for function: '" << F->getName() <<"'\n";
+    OS << "Call graph node for function: '" << F->getName() << "'";
   else
-    OS << "Call graph node <<null function: 0x" << this << ">>:\n";
+    OS << "Call graph node <<null function>>";
+  
+  OS << "<<0x" << this << ">>  #uses=" << getNumReferences() << '\n';
 
   for (const_iterator I = begin(), E = end(); I != E; ++I)
     if (Function *FI = I->second->getFunction())
@@ -256,7 +233,7 @@ void CallGraphNode::print(std::ostream &OS) const {
   OS << "\n";
 }
 
-void CallGraphNode::dump() const { print(cerr); }
+void CallGraphNode::dump() const { print(errs()); }
 
 /// removeCallEdgeFor - This method removes the edge in the node for the
 /// specified call site.  Note that this method takes linear time, so it
@@ -264,8 +241,10 @@ void CallGraphNode::dump() const { print(cerr); }
 void CallGraphNode::removeCallEdgeFor(CallSite CS) {
   for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
     assert(I != CalledFunctions.end() && "Cannot find callsite to remove!");
-    if (I->first == CS) {
-      CalledFunctions.erase(I);
+    if (I->first == CS.getInstruction()) {
+      I->second->DropRef();
+      *I = CalledFunctions.back();
+      CalledFunctions.pop_back();
       return;
     }
   }
@@ -278,6 +257,7 @@ void CallGraphNode::removeCallEdgeFor(CallSite CS) {
 void CallGraphNode::removeAnyCallEdgeTo(CallGraphNode *Callee) {
   for (unsigned i = 0, e = CalledFunctions.size(); i != e; ++i)
     if (CalledFunctions[i].second == Callee) {
+      Callee->DropRef();
       CalledFunctions[i] = CalledFunctions.back();
       CalledFunctions.pop_back();
       --i; --e;
@@ -290,21 +270,27 @@ void CallGraphNode::removeOneAbstractEdgeTo(CallGraphNode *Callee) {
   for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
     assert(I != CalledFunctions.end() && "Cannot find callee to remove!");
     CallRecord &CR = *I;
-    if (CR.second == Callee && !CR.first.getInstruction()) {
-      CalledFunctions.erase(I);
+    if (CR.second == Callee && CR.first == 0) {
+      Callee->DropRef();
+      *I = CalledFunctions.back();
+      CalledFunctions.pop_back();
       return;
     }
   }
 }
 
-/// replaceCallSite - Make the edge in the node for Old CallSite be for
-/// New CallSite instead.  Note that this method takes linear time, so it
-/// should be used sparingly.
-void CallGraphNode::replaceCallSite(CallSite Old, CallSite New) {
+/// replaceCallEdge - This method replaces the edge in the node for the
+/// specified call site with a new one.  Note that this method takes linear
+/// time, so it should be used sparingly.
+void CallGraphNode::replaceCallEdge(CallSite CS,
+                                    CallSite NewCS, CallGraphNode *NewNode){
   for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
-    assert(I != CalledFunctions.end() && "Cannot find callsite to replace!");
-    if (I->first == Old) {
-      I->first = New;
+    assert(I != CalledFunctions.end() && "Cannot find callsite to remove!");
+    if (I->first == CS.getInstruction()) {
+      I->second->DropRef();
+      I->first = NewCS.getInstruction();
+      I->second = NewNode;
+      NewNode->AddRef();
       return;
     }
   }
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index 3880d0a..a96a5c5 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -15,22 +15,25 @@
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "cgscc-passmgr"
 #include "llvm/CallGraphSCCPass.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/PassManagers.h"
 #include "llvm/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
 // CGPassManager
 //
-/// CGPassManager manages FPPassManagers and CalLGraphSCCPasses.
+/// CGPassManager manages FPPassManagers and CallGraphSCCPasses.
 
 namespace {
 
 class CGPassManager : public ModulePass, public PMDataManager {
-
 public:
   static char ID;
   explicit CGPassManager(int Depth) 
@@ -56,7 +59,7 @@ public:
 
   // Print passes managed by this manager
   void dumpPassStructure(unsigned Offset) {
-    llvm::cerr << std::string(Offset*2, ' ') << "Call Graph SCC Pass Manager\n";
+    errs().indent(Offset*2) << "Call Graph SCC Pass Manager\n";
     for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
       Pass *P = getContainedPass(Index);
       P->dumpPassStructure(Offset + 1);
@@ -65,56 +68,275 @@ public:
   }
 
   Pass *getContainedPass(unsigned N) {
-    assert ( N < PassVector.size() && "Pass number out of range!");
-    Pass *FP = static_cast<Pass *>(PassVector[N]);
-    return FP;
+    assert(N < PassVector.size() && "Pass number out of range!");
+    return static_cast<Pass *>(PassVector[N]);
   }
 
   virtual PassManagerType getPassManagerType() const { 
     return PMT_CallGraphPassManager; 
   }
+  
+private:
+  bool RunPassOnSCC(Pass *P, std::vector<CallGraphNode*> &CurSCC,
+                    CallGraph &CG, bool &CallGraphUpToDate);
+  void RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC, CallGraph &CG,
+                        bool IsCheckingMode);
 };
 
-}
+} // end anonymous namespace.
 
 char CGPassManager::ID = 0;
+
+bool CGPassManager::RunPassOnSCC(Pass *P, std::vector<CallGraphNode*> &CurSCC,
+                                 CallGraph &CG, bool &CallGraphUpToDate) {
+  bool Changed = false;
+  if (CallGraphSCCPass *CGSP = dynamic_cast<CallGraphSCCPass*>(P)) {
+    if (!CallGraphUpToDate) {
+      RefreshCallGraph(CurSCC, CG, false);
+      CallGraphUpToDate = true;
+    }
+
+    Timer *T = StartPassTimer(CGSP);
+    Changed = CGSP->runOnSCC(CurSCC);
+    StopPassTimer(CGSP, T);
+    
+    // After the CGSCCPass is done, when assertions are enabled, use
+    // RefreshCallGraph to verify that the callgraph was correctly updated.
+#ifndef NDEBUG
+    if (Changed)
+      RefreshCallGraph(CurSCC, CG, true);
+#endif
+    
+    return Changed;
+  }
+  
+  FPPassManager *FPP = dynamic_cast<FPPassManager *>(P);
+  assert(FPP && "Invalid CGPassManager member");
+  
+  // Run pass P on all functions in the current SCC.
+  for (unsigned i = 0, e = CurSCC.size(); i != e; ++i) {
+    if (Function *F = CurSCC[i]->getFunction()) {
+      dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName());
+      Timer *T = StartPassTimer(FPP);
+      Changed |= FPP->runOnFunction(*F);
+      StopPassTimer(FPP, T);
+    }
+  }
+  
+  // The function pass(es) modified the IR, they may have clobbered the
+  // callgraph.
+  if (Changed && CallGraphUpToDate) {
+    DEBUG(errs() << "CGSCCPASSMGR: Pass Dirtied SCC: "
+                 << P->getPassName() << '\n');
+    CallGraphUpToDate = false;
+  }
+  return Changed;
+}
+
+
+/// RefreshCallGraph - Scan the functions in the specified CFG and resync the
+/// callgraph with the call sites found in it.  This is used after
+/// FunctionPasses have potentially munged the callgraph, and can be used after
+/// CallGraphSCC passes to verify that they correctly updated the callgraph.
+///
+void CGPassManager::RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC,
+                                     CallGraph &CG, bool CheckingMode) {
+  DenseMap<Value*, CallGraphNode*> CallSites;
+  
+  DEBUG(errs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size()
+               << " nodes:\n";
+        for (unsigned i = 0, e = CurSCC.size(); i != e; ++i)
+          CurSCC[i]->dump();
+        );
+
+  bool MadeChange = false;
+  
+  // Scan all functions in the SCC.
+  for (unsigned sccidx = 0, e = CurSCC.size(); sccidx != e; ++sccidx) {
+    CallGraphNode *CGN = CurSCC[sccidx];
+    Function *F = CGN->getFunction();
+    if (F == 0 || F->isDeclaration()) continue;
+    
+    // Walk the function body looking for call sites.  Sync up the call sites in
+    // CGN with those actually in the function.
+    
+    // Get the set of call sites currently in the function.
+    for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) {
+      // If this call site is null, then the function pass deleted the call
+      // entirely and the WeakVH nulled it out.  
+      if (I->first == 0 ||
+          // If we've already seen this call site, then the FunctionPass RAUW'd
+          // one call with another, which resulted in two "uses" in the edge
+          // list of the same call.
+          CallSites.count(I->first) ||
+
+          // If the call edge is not from a call or invoke, then the function
+          // pass RAUW'd a call with another value.  This can happen when
+          // constant folding happens of well known functions etc.
+          CallSite::get(I->first).getInstruction() == 0) {
+        assert(!CheckingMode &&
+               "CallGraphSCCPass did not update the CallGraph correctly!");
+        
+        // Just remove the edge from the set of callees, keep track of whether
+        // I points to the last element of the vector.
+        bool WasLast = I + 1 == E;
+        CGN->removeCallEdge(I);
+        
+        // If I pointed to the last element of the vector, we have to bail out:
+        // iterator checking rejects comparisons of the resultant pointer with
+        // end.
+        if (WasLast)
+          break;
+        E = CGN->end();
+        continue;
+      }
+      
+      assert(!CallSites.count(I->first) &&
+             "Call site occurs in node multiple times");
+      CallSites.insert(std::make_pair(I->first, I->second));
+      ++I;
+    }
+    
+    // Loop over all of the instructions in the function, getting the callsites.
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+        CallSite CS = CallSite::get(I);
+        if (!CS.getInstruction() || isa<DbgInfoIntrinsic>(I)) continue;
+        
+        // If this call site already existed in the callgraph, just verify it
+        // matches up to expectations and remove it from CallSites.
+        DenseMap<Value*, CallGraphNode*>::iterator ExistingIt =
+          CallSites.find(CS.getInstruction());
+        if (ExistingIt != CallSites.end()) {
+          CallGraphNode *ExistingNode = ExistingIt->second;
+
+          // Remove from CallSites since we have now seen it.
+          CallSites.erase(ExistingIt);
+          
+          // Verify that the callee is right.
+          if (ExistingNode->getFunction() == CS.getCalledFunction())
+            continue;
+          
+          // If we are in checking mode, we are not allowed to actually mutate
+          // the callgraph.  If this is a case where we can infer that the
+          // callgraph is less precise than it could be (e.g. an indirect call
+          // site could be turned direct), don't reject it in checking mode, and
+          // don't tweak it to be more precise.
+          if (CheckingMode && CS.getCalledFunction() &&
+              ExistingNode->getFunction() == 0)
+            continue;
+          
+          assert(!CheckingMode &&
+                 "CallGraphSCCPass did not update the CallGraph correctly!");
+          
+          // If not, we either went from a direct call to indirect, indirect to
+          // direct, or direct to different direct.
+          CallGraphNode *CalleeNode;
+          if (Function *Callee = CS.getCalledFunction())
+            CalleeNode = CG.getOrInsertFunction(Callee);
+          else
+            CalleeNode = CG.getCallsExternalNode();
+
+          // Update the edge target in CGN.
+          for (CallGraphNode::iterator I = CGN->begin(); ; ++I) {
+            assert(I != CGN->end() && "Didn't find call entry");
+            if (I->first == CS.getInstruction()) {
+              I->second = CalleeNode;
+              break;
+            }
+          }
+          MadeChange = true;
+          continue;
+        }
+        
+        assert(!CheckingMode &&
+               "CallGraphSCCPass did not update the CallGraph correctly!");
+
+        // If the call site didn't exist in the CGN yet, add it.  We assume that
+        // newly introduced call sites won't be indirect.  This could be fixed
+        // in the future.
+        CallGraphNode *CalleeNode;
+        if (Function *Callee = CS.getCalledFunction())
+          CalleeNode = CG.getOrInsertFunction(Callee);
+        else
+          CalleeNode = CG.getCallsExternalNode();
+        
+        CGN->addCalledFunction(CS, CalleeNode);
+        MadeChange = true;
+      }
+    
+    // After scanning this function, if we still have entries in callsites, then
+    // they are dangling pointers.  WeakVH should save us for this, so abort if
+    // this happens.
+    assert(CallSites.empty() && "Dangling pointers found in call sites map");
+    
+    // Periodically do an explicit clear to remove tombstones when processing
+    // large scc's.
+    if ((sccidx & 15) == 0)
+      CallSites.clear();
+  }
+
+  DEBUG(if (MadeChange) {
+          errs() << "CGSCCPASSMGR: Refreshed SCC is now:\n";
+          for (unsigned i = 0, e = CurSCC.size(); i != e; ++i)
+            CurSCC[i]->dump();
+         } else {
+           errs() << "CGSCCPASSMGR: SCC Refresh didn't change call graph.\n";
+         }
+        );
+}
+
 /// run - Execute all of the passes scheduled for execution.  Keep track of
 /// whether any of the passes modifies the module, and if so, return true.
 bool CGPassManager::runOnModule(Module &M) {
   CallGraph &CG = getAnalysis<CallGraph>();
   bool Changed = doInitialization(CG);
 
-  // Walk SCC
-  for (scc_iterator<CallGraph*> I = scc_begin(&CG), E = scc_end(&CG);
-       I != E; ++I) {
-
-    // Run all passes on current SCC
-    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
-      Pass *P = getContainedPass(Index);
-
-      dumpPassInfo(P, EXECUTION_MSG, ON_CG_MSG, "");
+  std::vector<CallGraphNode*> CurSCC;
+  
+  // Walk the callgraph in bottom-up SCC order.
+  for (scc_iterator<CallGraph*> CGI = scc_begin(&CG), E = scc_end(&CG);
+       CGI != E;) {
+    // Copy the current SCC and increment past it so that the pass can hack
+    // on the SCC if it wants to without invalidating our iterator.
+    CurSCC = *CGI;
+    ++CGI;
+    
+    
+    // CallGraphUpToDate - Keep track of whether the callgraph is known to be
+    // up-to-date or not.  The CGSSC pass manager runs two types of passes:
+    // CallGraphSCC Passes and other random function passes.  Because other
+    // random function passes are not CallGraph aware, they may clobber the
+    // call graph by introducing new calls or deleting other ones.  This flag
+    // is set to false when we run a function pass so that we know to clean up
+    // the callgraph when we need to run a CGSCCPass again.
+    bool CallGraphUpToDate = true;
+    
+    // Run all passes on current SCC.
+    for (unsigned PassNo = 0, e = getNumContainedPasses();
+         PassNo != e; ++PassNo) {
+      Pass *P = getContainedPass(PassNo);
+
+      // If we're in -debug-pass=Executions mode, construct the SCC node list,
+      // otherwise avoid constructing this string as it is expensive.
+      if (isPassDebuggingExecutionsOrMore()) {
+        std::string Functions;
+#ifndef NDEBUG
+        raw_string_ostream OS(Functions);
+        for (unsigned i = 0, e = CurSCC.size(); i != e; ++i) {
+          if (i) OS << ", ";
+          CurSCC[i]->print(OS);
+        }
+        OS.flush();
+#endif
+        dumpPassInfo(P, EXECUTION_MSG, ON_CG_MSG, Functions);
+      }
       dumpRequiredSet(P);
 
       initializeAnalysisImpl(P);
 
-      StartPassTimer(P);
-      if (CallGraphSCCPass *CGSP = dynamic_cast<CallGraphSCCPass *>(P))
-        Changed |= CGSP->runOnSCC(*I);   // TODO : What if CG is changed ?
-      else {
-        FPPassManager *FPP = dynamic_cast<FPPassManager *>(P);
-        assert (FPP && "Invalid CGPassManager member");
-
-        // Run pass P on all functions current SCC
-        std::vector<CallGraphNode*> &SCC = *I;
-        for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
-          Function *F = SCC[i]->getFunction();
-          if (F) {
-            dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getNameStart());
-            Changed |= FPP->runOnFunction(*F);
-          }
-        }
-      }
-      StopPassTimer(P);
+      // Actually run this pass on the current SCC.
+      Changed |= RunPassOnSCC(P, CurSCC, CG, CallGraphUpToDate);
 
       if (Changed)
         dumpPassInfo(P, MODIFICATION_MSG, ON_CG_MSG, "");
@@ -125,6 +347,11 @@ bool CGPassManager::runOnModule(Module &M) {
       recordAvailableAnalysis(P);
       removeDeadPasses(P, "", ON_CG_MSG);
     }
+    
+    // If the callgraph was left out of date (because the last pass run was a
+    // functionpass), refresh it before we move on to the next SCC.
+    if (!CallGraphUpToDate)
+      RefreshCallGraph(CurSCC, CG, false);
   }
   Changed |= doFinalization(CG);
   return Changed;
diff --git a/lib/Analysis/IPA/FindUsedTypes.cpp b/lib/Analysis/IPA/FindUsedTypes.cpp
index 920ee37..c4fb0b9 100644
--- a/lib/Analysis/IPA/FindUsedTypes.cpp
+++ b/lib/Analysis/IPA/FindUsedTypes.cpp
@@ -92,13 +92,12 @@ bool FindUsedTypes::runOnModule(Module &m) {
 // passed in, then the types are printed symbolically if possible, using the
 // symbol table from the module.
 //
-void FindUsedTypes::print(std::ostream &OS, const Module *M) const {
-  raw_os_ostream RO(OS);
-  RO << "Types in use by this module:\n";
+void FindUsedTypes::print(raw_ostream &OS, const Module *M) const {
+  OS << "Types in use by this module:\n";
   for (std::set<const Type *>::const_iterator I = UsedTypes.begin(),
        E = UsedTypes.end(); I != E; ++I) {
-    RO << "   ";
-    WriteTypeSymbolic(RO, *I, M);
-    RO << '\n';
+    OS << "   ";
+    WriteTypeSymbolic(OS, *I, M);
+    OS << '\n';
   }
 }
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index 2e9884a..f5c1108 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -23,6 +23,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/MallocHelper.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/InstIterator.h"
@@ -236,6 +237,9 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
       }
     } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) {
       if (AnalyzeUsesOfPointer(GEP, Readers, Writers)) return true;
+    } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI)) {
+      if (AnalyzeUsesOfPointer(BCI, Readers, Writers, OkayStoreDest))
+        return true;
     } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
       // Make sure that this is just the function being called, not that it is
       // passing into the function.
@@ -299,7 +303,7 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
       // Check the value being stored.
       Value *Ptr = SI->getOperand(0)->getUnderlyingObject();
 
-      if (isa<MallocInst>(Ptr)) {
+      if (isa<MallocInst>(Ptr) || isMalloc(Ptr)) {
         // Okay, easy case.
       } else if (CallInst *CI = dyn_cast<CallInst>(Ptr)) {
         Function *F = CI->getCalledFunction();
@@ -435,7 +439,8 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
           if (cast<StoreInst>(*II).isVolatile())
             // Treat volatile stores as reading memory somewhere.
             FunctionEffect |= Ref;
-        } else if (isa<MallocInst>(*II) || isa<FreeInst>(*II)) {
+        } else if (isa<MallocInst>(*II) || isa<FreeInst>(*II) ||
+                   isMalloc(&cast<Instruction>(*II))) {
           FunctionEffect |= ModRef;
         }
 
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index caeb14b..543e017 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -19,7 +19,6 @@
 #include "llvm/Type.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/ADT/STLExtras.h"
@@ -39,7 +38,7 @@ Pass *llvm::createIVUsersPass() {
 /// containsAddRecFromDifferentLoop - Determine whether expression S involves a
 /// subexpression that is an AddRec from a loop other than L.  An outer loop
 /// of L is OK, but not an inner loop nor a disjoint loop.
-static bool containsAddRecFromDifferentLoop(const SCEV* S, Loop *L) {
+static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) {
   // This is very common, put it first.
   if (isa<SCEVConstant>(S))
     return false;
@@ -54,7 +53,7 @@ static bool containsAddRecFromDifferentLoop(const SCEV* S, Loop *L) {
       if (newLoop == L)
         return false;
       // if newLoop is an outer loop of L, this is OK.
-      if (!LoopInfoBase<BasicBlock>::isNotAlreadyContainedIn(L, newLoop))
+      if (!LoopInfo::isNotAlreadyContainedIn(L, newLoop))
         return false;
     }
     return true;
@@ -80,10 +79,10 @@ static bool containsAddRecFromDifferentLoop(const SCEV* S, Loop *L) {
 /// a mix of loop invariant and loop variant expressions.  The start cannot,
 /// however, contain an AddRec from a different loop, unless that loop is an
 /// outer loop of the current loop.
-static bool getSCEVStartAndStride(const SCEV* &SH, Loop *L, Loop *UseLoop,
-                                  const SCEV* &Start, const SCEV* &Stride,
+static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop,
+                                  const SCEV *&Start, const SCEV *&Stride,
                                   ScalarEvolution *SE, DominatorTree *DT) {
-  const SCEV* TheAddRec = Start;   // Initialize to zero.
+  const SCEV *TheAddRec = Start;   // Initialize to zero.
 
   // If the outer level is an AddExpr, the operands are all start values except
   // for a nested AddRecExpr.
@@ -109,9 +108,9 @@ static bool getSCEVStartAndStride(const SCEV* &SH, Loop *L, Loop *UseLoop,
 
   // Use getSCEVAtScope to attempt to simplify other loops out of
   // the picture.
-  const SCEV* AddRecStart = AddRec->getStart();
+  const SCEV *AddRecStart = AddRec->getStart();
   AddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop);
-  const SCEV* AddRecStride = AddRec->getStepRecurrence(*SE);
+  const SCEV *AddRecStride = AddRec->getStepRecurrence(*SE);
 
   // FIXME: If Start contains an SCEVAddRecExpr from a different loop, other
   // than an outer loop of the current loop, reject it.  LSR has no concept of
@@ -122,15 +121,15 @@ static bool getSCEVStartAndStride(const SCEV* &SH, Loop *L, Loop *UseLoop,
 
   Start = SE->getAddExpr(Start, AddRecStart);
 
-  // If stride is an instruction, make sure it dominates the loop preheader.
+  // If stride is an instruction, make sure it properly dominates the header.
   // Otherwise we could end up with a use before def situation.
   if (!isa<SCEVConstant>(AddRecStride)) {
-    BasicBlock *Preheader = L->getLoopPreheader();
-    if (!AddRecStride->dominates(Preheader, DT))
+    BasicBlock *Header = L->getHeader();
+    if (!AddRecStride->properlyDominates(Header, DT))
       return false;
 
-    DOUT << "[" << L->getHeader()->getName()
-         << "] Variable stride: " << *AddRec << "\n";
+    DEBUG(errs() << "[" << L->getHeader()->getName()
+                 << "] Variable stride: " << *AddRec << "\n");
   }
 
   Stride = AddRecStride;
@@ -196,13 +195,13 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
     return true;    // Instruction already handled.
 
   // Get the symbolic expression for this instruction.
-  const SCEV* ISE = SE->getSCEV(I);
+  const SCEV *ISE = SE->getSCEV(I);
   if (isa<SCEVCouldNotCompute>(ISE)) return false;
 
   // Get the start and stride for this expression.
   Loop *UseLoop = LI->getLoopFor(I->getParent());
-  const SCEV* Start = SE->getIntegerSCEV(0, ISE->getType());
-  const SCEV* Stride = Start;
+  const SCEV *Start = SE->getIntegerSCEV(0, ISE->getType());
+  const SCEV *Stride = Start;
 
   if (!getSCEVStartAndStride(ISE, L, UseLoop, Start, Stride, SE, DT))
     return false;  // Non-reducible symbolic expression, bail out.
@@ -228,14 +227,14 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
     if (LI->getLoopFor(User->getParent()) != L) {
       if (isa<PHINode>(User) || Processed.count(User) ||
           !AddUsersIfInteresting(User)) {
-        DOUT << "FOUND USER in other loop: " << *User
-             << "   OF SCEV: " << *ISE << "\n";
+        DEBUG(errs() << "FOUND USER in other loop: " << *User << '\n'
+                     << "   OF SCEV: " << *ISE << '\n');
         AddUserToIVUsers = true;
       }
     } else if (Processed.count(User) ||
                !AddUsersIfInteresting(User)) {
-      DOUT << "FOUND USER: " << *User
-           << "   OF SCEV: " << *ISE << "\n";
+      DEBUG(errs() << "FOUND USER: " << *User << '\n'
+                   << "   OF SCEV: " << *ISE << '\n');
       AddUserToIVUsers = true;
     }
 
@@ -254,10 +253,10 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
       if (IVUseShouldUsePostIncValue(User, I, L, LI, DT, this)) {
         // The value used will be incremented by the stride more than we are
         // expecting, so subtract this off.
-        const SCEV* NewStart = SE->getMinusSCEV(Start, Stride);
+        const SCEV *NewStart = SE->getMinusSCEV(Start, Stride);
         StrideUses->addUser(NewStart, User, I);
         StrideUses->Users.back().setIsUseOfPostIncrementedValue(true);
-        DOUT << "   USING POSTINC SCEV, START=" << *NewStart<< "\n";
+        DEBUG(errs() << "   USING POSTINC SCEV, START=" << *NewStart<< "\n");
       } else {
         StrideUses->addUser(Start, User, I);
       }
@@ -295,9 +294,9 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
 
 /// getReplacementExpr - Return a SCEV expression which computes the
 /// value of the OperandValToReplace of the given IVStrideUse.
-const SCEV* IVUsers::getReplacementExpr(const IVStrideUse &U) const {
+const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const {
   // Start with zero.
-  const SCEV* RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType());
+  const SCEV *RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType());
   // Create the basic add recurrence.
   RetVal = SE->getAddRecExpr(RetVal, U.getParent()->Stride, L);
   // Add the offset in a separate step, because it may be loop-variant.
@@ -308,7 +307,7 @@ const SCEV* IVUsers::getReplacementExpr(const IVStrideUse &U) const {
     RetVal = SE->getAddExpr(RetVal, U.getParent()->Stride);
   // Evaluate the expression out of the loop, if possible.
   if (!L->contains(U.getUser()->getParent())) {
-    const SCEV* ExitVal = SE->getSCEVAtScope(RetVal, L->getParentLoop());
+    const SCEV *ExitVal = SE->getSCEVAtScope(RetVal, L->getParentLoop());
     if (ExitVal->isLoopInvariant(L))
       RetVal = ExitVal;
   }
@@ -325,7 +324,7 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const {
   OS << ":\n";
 
   for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) {
-    std::map<const SCEV*, IVUsersOfOneStride*>::const_iterator SI =
+    std::map<const SCEV *, IVUsersOfOneStride*>::const_iterator SI =
       IVUsesByStride.find(StrideOrder[Stride]);
     assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
     OS << "  Stride " << *SI->first->getType() << " " << *SI->first << ":\n";
@@ -340,15 +339,11 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const {
         OS << " (post-inc)";
       OS << " in ";
       UI->getUser()->print(OS);
+      OS << '\n';
     }
   }
 }
 
-void IVUsers::print(std::ostream &o, const Module *M) const {
-  raw_os_ostream OS(o);
-  print(OS, M);
-}
-
 void IVUsers::dump() const {
   print(errs());
 }
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
new file mode 100644
index 0000000..3b0d2c9
--- /dev/null
+++ b/lib/Analysis/InlineCost.cpp
@@ -0,0 +1,338 @@
+//===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements inline cost analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/CallingConv.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/ADT/SmallPtrSet.h"
+using namespace llvm;
+
+// CountCodeReductionForConstant - Figure out an approximation for how many
+// instructions will be constant folded if the specified value is constant.
+//
+unsigned InlineCostAnalyzer::FunctionInfo::
+         CountCodeReductionForConstant(Value *V) {
+  unsigned Reduction = 0;
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
+    if (isa<BranchInst>(*UI))
+      Reduction += 40;          // Eliminating a conditional branch is a big win
+    else if (SwitchInst *SI = dyn_cast<SwitchInst>(*UI))
+      // Eliminating a switch is a big win, proportional to the number of edges
+      // deleted.
+      Reduction += (SI->getNumSuccessors()-1) * 40;
+    else if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
+      // Turning an indirect call into a direct call is a BIG win
+      Reduction += CI->getCalledValue() == V ? 500 : 0;
+    } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) {
+      // Turning an indirect call into a direct call is a BIG win
+      Reduction += II->getCalledValue() == V ? 500 : 0;
+    } else {
+      // Figure out if this instruction will be removed due to simple constant
+      // propagation.
+      Instruction &Inst = cast<Instruction>(**UI);
+      
+      // We can't constant propagate instructions which have effects or
+      // read memory.
+      //
+      // FIXME: It would be nice to capture the fact that a load from a
+      // pointer-to-constant-global is actually a *really* good thing to zap.
+      // Unfortunately, we don't know the pointer that may get propagated here,
+      // so we can't make this decision.
+      if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() ||
+          isa<AllocationInst>(Inst)) 
+        continue;
+
+      bool AllOperandsConstant = true;
+      for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i)
+        if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) {
+          AllOperandsConstant = false;
+          break;
+        }
+
+      if (AllOperandsConstant) {
+        // We will get to remove this instruction...
+        Reduction += 7;
+
+        // And any other instructions that use it which become constants
+        // themselves.
+        Reduction += CountCodeReductionForConstant(&Inst);
+      }
+    }
+
+  return Reduction;
+}
+
+// CountCodeReductionForAlloca - Figure out an approximation of how much smaller
+// the function will be if it is inlined into a context where an argument
+// becomes an alloca.
+//
+unsigned InlineCostAnalyzer::FunctionInfo::
+         CountCodeReductionForAlloca(Value *V) {
+  if (!isa<PointerType>(V->getType())) return 0;  // Not a pointer
+  unsigned Reduction = 0;
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+    Instruction *I = cast<Instruction>(*UI);
+    if (isa<LoadInst>(I) || isa<StoreInst>(I))
+      Reduction += 10;
+    else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
+      // If the GEP has variable indices, we won't be able to do much with it.
+      if (!GEP->hasAllConstantIndices())
+        Reduction += CountCodeReductionForAlloca(GEP)+15;
+    } else {
+      // If there is some other strange instruction, we're not going to be able
+      // to do much if we inline this.
+      return 0;
+    }
+  }
+
+  return Reduction;
+}
+
+/// analyzeBasicBlock - Fill in the current structure with information gleaned
+/// from the specified block.
+void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
+  ++NumBlocks;
+
+  for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
+       II != E; ++II) {
+    if (isa<PHINode>(II)) continue;           // PHI nodes don't count.
+
+    // Special handling for calls.
+    if (isa<CallInst>(II) || isa<InvokeInst>(II)) {
+      if (isa<DbgInfoIntrinsic>(II))
+        continue;  // Debug intrinsics don't count as size.
+      
+      CallSite CS = CallSite::get(const_cast<Instruction*>(&*II));
+      
+      // If this function contains a call to setjmp or _setjmp, never inline
+      // it.  This is a hack because we depend on the user marking their local
+      // variables as volatile if they are live across a setjmp call, and they
+      // probably won't do this in callers.
+      if (Function *F = CS.getCalledFunction())
+        if (F->isDeclaration() && 
+            (F->getName() == "setjmp" || F->getName() == "_setjmp"))
+          NeverInline = true;
+
+      // Calls often compile into many machine instructions.  Bump up their
+      // cost to reflect this.
+      if (!isa<IntrinsicInst>(II))
+        NumInsts += InlineConstants::CallPenalty;
+    }
+    
+    // These, too, are calls.
+    if (isa<MallocInst>(II) || isa<FreeInst>(II))
+      NumInsts += InlineConstants::CallPenalty;
+
+    if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+      if (!AI->isStaticAlloca())
+        this->usesDynamicAlloca = true;
+    }
+
+    if (isa<ExtractElementInst>(II) || isa<VectorType>(II->getType()))
+      ++NumVectorInsts; 
+    
+    // Noop casts, including ptr <-> int,  don't count.
+    if (const CastInst *CI = dyn_cast<CastInst>(II)) {
+      if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || 
+          isa<PtrToIntInst>(CI))
+        continue;
+    } else if (const GetElementPtrInst *GEPI =
+               dyn_cast<GetElementPtrInst>(II)) {
+      // If a GEP has all constant indices, it will probably be folded with
+      // a load/store.
+      if (GEPI->hasAllConstantIndices())
+        continue;
+    }
+
+    if (isa<ReturnInst>(II))
+      ++NumRets;
+    
+    ++NumInsts;
+  }
+}
+
+/// analyzeFunction - Fill in the current structure with information gleaned
+/// from the specified function.
+void CodeMetrics::analyzeFunction(Function *F) {
+  // Look at the size of the callee.
+  for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+    analyzeBasicBlock(&*BB);
+}
+
+/// analyzeFunction - Fill in the current structure with information gleaned
+/// from the specified function.
+void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
+  Metrics.analyzeFunction(F);
+
+  // A function with exactly one return has it removed during the inlining
+  // process (see InlineFunction), so don't count it.
+  // FIXME: This knowledge should really be encoded outside of FunctionInfo.
+  if (Metrics.NumRets==1)
+    --Metrics.NumInsts;
+
+  // Check out all of the arguments to the function, figuring out how much
+  // code can be eliminated if one of the arguments is a constant.
+  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
+    ArgumentWeights.push_back(ArgInfo(CountCodeReductionForConstant(I),
+                                      CountCodeReductionForAlloca(I)));
+}
+
+// getInlineCost - The heuristic used to determine if we should inline the
+// function call or not.
+//
+InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
+                               SmallPtrSet<const Function *, 16> &NeverInline) {
+  Instruction *TheCall = CS.getInstruction();
+  Function *Callee = CS.getCalledFunction();
+  Function *Caller = TheCall->getParent()->getParent();
+
+  // Don't inline functions which can be redefined at link-time to mean
+  // something else.  Don't inline functions marked noinline.
+  if (Callee->mayBeOverridden() ||
+      Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee))
+    return llvm::InlineCost::getNever();
+
+  // InlineCost - This value measures how good of an inline candidate this call
+  // site is to inline.  A lower inline cost make is more likely for the call to
+  // be inlined.  This value may go negative.
+  //
+  int InlineCost = 0;
+  
+  // If there is only one call of the function, and it has internal linkage,
+  // make it almost guaranteed to be inlined.
+  //
+  if (Callee->hasLocalLinkage() && Callee->hasOneUse())
+    InlineCost += InlineConstants::LastCallToStaticBonus;
+  
+  // If this function uses the coldcc calling convention, prefer not to inline
+  // it.
+  if (Callee->getCallingConv() == CallingConv::Cold)
+    InlineCost += InlineConstants::ColdccPenalty;
+  
+  // If the instruction after the call, or if the normal destination of the
+  // invoke is an unreachable instruction, the function is noreturn.  As such,
+  // there is little point in inlining this.
+  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+    if (isa<UnreachableInst>(II->getNormalDest()->begin()))
+      InlineCost += InlineConstants::NoreturnPenalty;
+  } else if (isa<UnreachableInst>(++BasicBlock::iterator(TheCall)))
+    InlineCost += InlineConstants::NoreturnPenalty;
+  
+  // Get information about the callee...
+  FunctionInfo &CalleeFI = CachedFunctionInfo[Callee];
+  
+  // If we haven't calculated this information yet, do so now.
+  if (CalleeFI.Metrics.NumBlocks == 0)
+    CalleeFI.analyzeFunction(Callee);
+
+  // If we should never inline this, return a huge cost.
+  if (CalleeFI.Metrics.NeverInline)
+    return InlineCost::getNever();
+
+  // FIXME: It would be nice to kill off CalleeFI.NeverInline. Then we
+  // could move this up and avoid computing the FunctionInfo for
+  // things we are going to just return always inline for. This
+  // requires handling setjmp somewhere else, however.
+  if (!Callee->isDeclaration() && Callee->hasFnAttr(Attribute::AlwaysInline))
+    return InlineCost::getAlways();
+    
+  if (CalleeFI.Metrics.usesDynamicAlloca) {
+    // Get infomation about the caller...
+    FunctionInfo &CallerFI = CachedFunctionInfo[Caller];
+
+    // If we haven't calculated this information yet, do so now.
+    if (CallerFI.Metrics.NumBlocks == 0)
+      CallerFI.analyzeFunction(Caller);
+
+    // Don't inline a callee with dynamic alloca into a caller without them.
+    // Functions containing dynamic alloca's are inefficient in various ways;
+    // don't create more inefficiency.
+    if (!CallerFI.Metrics.usesDynamicAlloca)
+      return InlineCost::getNever();
+  }
+
+  // Add to the inline quality for properties that make the call valuable to
+  // inline.  This includes factors that indicate that the result of inlining
+  // the function will be optimizable.  Currently this just looks at arguments
+  // passed into the function.
+  //
+  unsigned ArgNo = 0;
+  for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+       I != E; ++I, ++ArgNo) {
+    // Each argument passed in has a cost at both the caller and the callee
+    // sides.  This favors functions that take many arguments over functions
+    // that take few arguments.
+    InlineCost -= 20;
+    
+    // If this is a function being passed in, it is very likely that we will be
+    // able to turn an indirect function call into a direct function call.
+    if (isa<Function>(I))
+      InlineCost -= 100;
+    
+    // If an alloca is passed in, inlining this function is likely to allow
+    // significant future optimization possibilities (like scalar promotion, and
+    // scalarization), so encourage the inlining of the function.
+    //
+    else if (isa<AllocaInst>(I)) {
+      if (ArgNo < CalleeFI.ArgumentWeights.size())
+        InlineCost -= CalleeFI.ArgumentWeights[ArgNo].AllocaWeight;
+      
+      // If this is a constant being passed into the function, use the argument
+      // weights calculated for the callee to determine how much will be folded
+      // away with this information.
+    } else if (isa<Constant>(I)) {
+      if (ArgNo < CalleeFI.ArgumentWeights.size())
+        InlineCost -= CalleeFI.ArgumentWeights[ArgNo].ConstantWeight;
+    }
+  }
+  
+  // Now that we have considered all of the factors that make the call site more
+  // likely to be inlined, look at factors that make us not want to inline it.
+  
+  // Don't inline into something too big, which would make it bigger.
+  // "size" here is the number of basic blocks, not instructions.
+  //
+  InlineCost += Caller->size()/15;
+  
+  // Look at the size of the callee. Each instruction counts as 5.
+  InlineCost += CalleeFI.Metrics.NumInsts*5;
+
+  return llvm::InlineCost::get(InlineCost);
+}
+
+// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
+// higher threshold to determine if the function call should be inlined.
+float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) {
+  Function *Callee = CS.getCalledFunction();
+  
+  // Get information about the callee...
+  FunctionInfo &CalleeFI = CachedFunctionInfo[Callee];
+  
+  // If we haven't calculated this information yet, do so now.
+  if (CalleeFI.Metrics.NumBlocks == 0)
+    CalleeFI.analyzeFunction(Callee);
+
+  float Factor = 1.0f;
+  // Single BB functions are often written to be inlined.
+  if (CalleeFI.Metrics.NumBlocks == 1)
+    Factor += 0.5f;
+
+  // Be more aggressive if the function contains a good chunk (if it mades up
+  // at least 10% of the instructions) of vector instructions.
+  if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/2)
+    Factor += 2.0f;
+  else if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/10)
+    Factor += 1.5f;
+  return Factor;
+}
diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp
index 2b34ad3..83724ca 100644
--- a/lib/Analysis/InstCount.cpp
+++ b/lib/Analysis/InstCount.cpp
@@ -16,8 +16,9 @@
 #include "llvm/Pass.h"
 #include "llvm/Function.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/InstVisitor.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
@@ -46,8 +47,8 @@ namespace {
 #include "llvm/Instruction.def"
 
     void visitInstruction(Instruction &I) {
-      cerr << "Instruction Count does not know about " << I;
-      abort();
+      errs() << "Instruction Count does not know about " << I;
+      llvm_unreachable(0);
     }
   public:
     static char ID; // Pass identification, replacement for typeid
@@ -58,7 +59,7 @@ namespace {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
     }
-    virtual void print(std::ostream &O, const Module *M) const {}
+    virtual void print(raw_ostream &O, const Module *M) const {}
 
   };
 }
diff --git a/lib/Analysis/Interval.cpp b/lib/Analysis/Interval.cpp
index 16b1947..ca9cdca 100644
--- a/lib/Analysis/Interval.cpp
+++ b/lib/Analysis/Interval.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Analysis/Interval.h"
 #include "llvm/BasicBlock.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 
 using namespace llvm;
@@ -29,29 +30,29 @@ bool Interval::isLoop() const {
   // There is a loop in this interval iff one of the predecessors of the header
   // node lives in the interval.
   for (::pred_iterator I = ::pred_begin(HeaderNode), E = ::pred_end(HeaderNode);
-       I != E; ++I) {
-    if (contains(*I)) return true;
-  }
+       I != E; ++I)
+    if (contains(*I))
+      return true;
   return false;
 }
 
 
-void Interval::print(std::ostream &o) const {
-  o << "-------------------------------------------------------------\n"
+void Interval::print(raw_ostream &OS) const {
+  OS << "-------------------------------------------------------------\n"
        << "Interval Contents:\n";
 
   // Print out all of the basic blocks in the interval...
   for (std::vector<BasicBlock*>::const_iterator I = Nodes.begin(),
          E = Nodes.end(); I != E; ++I)
-    o << **I << "\n";
+    OS << **I << "\n";
 
-  o << "Interval Predecessors:\n";
+  OS << "Interval Predecessors:\n";
   for (std::vector<BasicBlock*>::const_iterator I = Predecessors.begin(),
          E = Predecessors.end(); I != E; ++I)
-    o << **I << "\n";
+    OS << **I << "\n";
 
-  o << "Interval Successors:\n";
+  OS << "Interval Successors:\n";
   for (std::vector<BasicBlock*>::const_iterator I = Successors.begin(),
          E = Successors.end(); I != E; ++I)
-    o << **I << "\n";
+    OS << **I << "\n";
 }
diff --git a/lib/Analysis/IntervalPartition.cpp b/lib/Analysis/IntervalPartition.cpp
index cb8a85d..1f17b77 100644
--- a/lib/Analysis/IntervalPartition.cpp
+++ b/lib/Analysis/IntervalPartition.cpp
@@ -32,7 +32,7 @@ void IntervalPartition::releaseMemory() {
   RootInterval = 0;
 }
 
-void IntervalPartition::print(std::ostream &O, const Module*) const {
+void IntervalPartition::print(raw_ostream &O, const Module*) const {
   for(unsigned i = 0, e = Intervals.size(); i != e; ++i)
     Intervals[i]->print(O);
 }
diff --git a/lib/Analysis/LibCallAliasAnalysis.cpp b/lib/Analysis/LibCallAliasAnalysis.cpp
index 971e6e7..7419659 100644
--- a/lib/Analysis/LibCallAliasAnalysis.cpp
+++ b/lib/Analysis/LibCallAliasAnalysis.cpp
@@ -16,7 +16,6 @@
 #include "llvm/Analysis/LibCallSemantics.h"
 #include "llvm/Function.h"
 #include "llvm/Pass.h"
-#include "llvm/Target/TargetData.h"
 using namespace llvm;
   
 // Register this pass...
@@ -37,7 +36,6 @@ LibCallAliasAnalysis::~LibCallAliasAnalysis() {
 
 void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
   AliasAnalysis::getAnalysisUsage(AU);
-  AU.addRequired<TargetData>();
   AU.setPreservesAll();                         // Does not transform code
 }
 
diff --git a/lib/Analysis/LibCallSemantics.cpp b/lib/Analysis/LibCallSemantics.cpp
index 2985047..e0060c3 100644
--- a/lib/Analysis/LibCallSemantics.cpp
+++ b/lib/Analysis/LibCallSemantics.cpp
@@ -57,9 +57,6 @@ const LibCallFunctionInfo *LibCallInfo::getFunctionInfo(Function *F) const {
   }
   
   // Look up this function in the string map.
-  const char *ValueName = F->getNameStart();
-  StringMap<const LibCallFunctionInfo*>::iterator I =
-  Map->find(ValueName, ValueName+F->getNameLen());
-  return I != Map->end() ? I->second : 0;
+  return Map->lookup(F->getName());
 }
 
diff --git a/lib/Analysis/LoopDependenceAnalysis.cpp b/lib/Analysis/LoopDependenceAnalysis.cpp
index f605783..32d2266 100644
--- a/lib/Analysis/LoopDependenceAnalysis.cpp
+++ b/lib/Analysis/LoopDependenceAnalysis.cpp
@@ -15,18 +15,33 @@
 //
 // TODO: adapt as implementation progresses.
 //
+// TODO: document lingo (pair, subscript, index)
+//
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "lda"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/LoopDependenceAnalysis.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Instructions.h"
+#include "llvm/Operator.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetData.h"
 using namespace llvm;
 
+STATISTIC(NumAnswered,    "Number of dependence queries answered");
+STATISTIC(NumAnalysed,    "Number of distinct dependence pairs analysed");
+STATISTIC(NumDependent,   "Number of pairs with dependent accesses");
+STATISTIC(NumIndependent, "Number of pairs with independent accesses");
+STATISTIC(NumUnknown,     "Number of pairs with unknown accesses");
+
 LoopPass *llvm::createLoopDependenceAnalysisPass() {
   return new LoopDependenceAnalysis();
 }
@@ -44,14 +59,14 @@ static inline bool IsMemRefInstr(const Value *V) {
   return I && (I->mayReadFromMemory() || I->mayWriteToMemory());
 }
 
-static void GetMemRefInstrs(
-    const Loop *L, SmallVectorImpl<Instruction*> &memrefs) {
+static void GetMemRefInstrs(const Loop *L,
+                            SmallVectorImpl<Instruction*> &Memrefs) {
   for (Loop::block_iterator b = L->block_begin(), be = L->block_end();
-      b != be; ++b)
+       b != be; ++b)
     for (BasicBlock::iterator i = (*b)->begin(), ie = (*b)->end();
-        i != ie; ++i)
+         i != ie; ++i)
       if (IsMemRefInstr(i))
-        memrefs.push_back(i);
+        Memrefs.push_back(i);
 }
 
 static bool IsLoadOrStoreInst(Value *I) {
@@ -63,53 +78,223 @@ static Value *GetPointerOperand(Value *I) {
     return i->getPointerOperand();
   if (StoreInst *i = dyn_cast<StoreInst>(I))
     return i->getPointerOperand();
-  assert(0 && "Value is no load or store instruction!");
+  llvm_unreachable("Value is no load or store instruction!");
   // Never reached.
   return 0;
 }
 
+static AliasAnalysis::AliasResult UnderlyingObjectsAlias(AliasAnalysis *AA,
+                                                         const Value *A,
+                                                         const Value *B) {
+  const Value *aObj = A->getUnderlyingObject();
+  const Value *bObj = B->getUnderlyingObject();
+  return AA->alias(aObj, AA->getTypeStoreSize(aObj->getType()),
+                   bObj, AA->getTypeStoreSize(bObj->getType()));
+}
+
+static inline const SCEV *GetZeroSCEV(ScalarEvolution *SE) {
+  return SE->getConstant(Type::getInt32Ty(SE->getContext()), 0L);
+}
+
 //===----------------------------------------------------------------------===//
 //                             Dependence Testing
 //===----------------------------------------------------------------------===//
 
-bool LoopDependenceAnalysis::isDependencePair(const Value *x,
-                                              const Value *y) const {
-  return IsMemRefInstr(x) &&
-         IsMemRefInstr(y) &&
-         (cast<const Instruction>(x)->mayWriteToMemory() ||
-          cast<const Instruction>(y)->mayWriteToMemory());
+bool LoopDependenceAnalysis::isDependencePair(const Value *A,
+                                              const Value *B) const {
+  return IsMemRefInstr(A) &&
+         IsMemRefInstr(B) &&
+         (cast<const Instruction>(A)->mayWriteToMemory() ||
+          cast<const Instruction>(B)->mayWriteToMemory());
+}
+
+bool LoopDependenceAnalysis::findOrInsertDependencePair(Value *A,
+                                                        Value *B,
+                                                        DependencePair *&P) {
+  void *insertPos = 0;
+  FoldingSetNodeID id;
+  id.AddPointer(A);
+  id.AddPointer(B);
+
+  P = Pairs.FindNodeOrInsertPos(id, insertPos);
+  if (P) return true;
+
+  P = PairAllocator.Allocate<DependencePair>();
+  new (P) DependencePair(id, A, B);
+  Pairs.InsertNode(P, insertPos);
+  return false;
+}
+
+void LoopDependenceAnalysis::getLoops(const SCEV *S,
+                                      DenseSet<const Loop*>* Loops) const {
+  // Refactor this into an SCEVVisitor, if efficiency becomes a concern.
+  for (const Loop *L = this->L; L != 0; L = L->getParentLoop())
+    if (!S->isLoopInvariant(L))
+      Loops->insert(L);
+}
+
+bool LoopDependenceAnalysis::isLoopInvariant(const SCEV *S) const {
+  DenseSet<const Loop*> loops;
+  getLoops(S, &loops);
+  return loops.empty();
+}
+
+bool LoopDependenceAnalysis::isAffine(const SCEV *S) const {
+  const SCEVAddRecExpr *rec = dyn_cast<SCEVAddRecExpr>(S);
+  return isLoopInvariant(S) || (rec && rec->isAffine());
+}
+
+bool LoopDependenceAnalysis::isZIVPair(const SCEV *A, const SCEV *B) const {
+  return isLoopInvariant(A) && isLoopInvariant(B);
+}
+
+bool LoopDependenceAnalysis::isSIVPair(const SCEV *A, const SCEV *B) const {
+  DenseSet<const Loop*> loops;
+  getLoops(A, &loops);
+  getLoops(B, &loops);
+  return loops.size() == 1;
+}
+
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analyseZIV(const SCEV *A,
+                                   const SCEV *B,
+                                   Subscript *S) const {
+  assert(isZIVPair(A, B) && "Attempted to ZIV-test non-ZIV SCEVs!");
+  return A == B ? Dependent : Independent;
 }
 
-bool LoopDependenceAnalysis::depends(Value *src, Value *dst) {
-  assert(isDependencePair(src, dst) && "Values form no dependence pair!");
-  DOUT << "== LDA test ==\n" << *src << *dst;
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analyseSIV(const SCEV *A,
+                                   const SCEV *B,
+                                   Subscript *S) const {
+  return Unknown; // TODO: Implement.
+}
+
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analyseMIV(const SCEV *A,
+                                   const SCEV *B,
+                                   Subscript *S) const {
+  return Unknown; // TODO: Implement.
+}
 
-  // We only analyse loads and stores; for possible memory accesses by e.g.
-  // free, call, or invoke instructions we conservatively assume dependence.
-  if (!IsLoadOrStoreInst(src) || !IsLoadOrStoreInst(dst))
-    return true;
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analyseSubscript(const SCEV *A,
+                                         const SCEV *B,
+                                         Subscript *S) const {
+  DEBUG(errs() << "  Testing subscript: " << *A << ", " << *B << "\n");
 
-  Value *srcPtr = GetPointerOperand(src);
-  Value *dstPtr = GetPointerOperand(dst);
-  const Value *srcObj = srcPtr->getUnderlyingObject();
-  const Value *dstObj = dstPtr->getUnderlyingObject();
-  AliasAnalysis::AliasResult alias = AA->alias(
-      srcObj, AA->getTargetData().getTypeStoreSize(srcObj->getType()),
-      dstObj, AA->getTargetData().getTypeStoreSize(dstObj->getType()));
+  if (A == B) {
+    DEBUG(errs() << "  -> [D] same SCEV\n");
+    return Dependent;
+  }
 
-  // If we don't know whether or not the two objects alias, assume dependence.
-  if (alias == AliasAnalysis::MayAlias)
-    return true;
+  if (!isAffine(A) || !isAffine(B)) {
+    DEBUG(errs() << "  -> [?] not affine\n");
+    return Unknown;
+  }
 
-  // If the objects noalias, they are distinct, accesses are independent.
-  if (alias == AliasAnalysis::NoAlias)
-    return false;
+  if (isZIVPair(A, B))
+    return analyseZIV(A, B, S);
 
-  // TODO: the underlying objects MustAlias, test for dependence
+  if (isSIVPair(A, B))
+    return analyseSIV(A, B, S);
 
-  // We couldn't establish a more precise result, so we have to conservatively
-  // assume full dependence.
-  return true;
+  return analyseMIV(A, B, S);
+}
+
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analysePair(DependencePair *P) const {
+  DEBUG(errs() << "Analysing:\n" << *P->A << "\n" << *P->B << "\n");
+
+  // We only analyse loads and stores but no possible memory accesses by e.g.
+  // free, call, or invoke instructions.
+  if (!IsLoadOrStoreInst(P->A) || !IsLoadOrStoreInst(P->B)) {
+    DEBUG(errs() << "--> [?] no load/store\n");
+    return Unknown;
+  }
+
+  Value *aPtr = GetPointerOperand(P->A);
+  Value *bPtr = GetPointerOperand(P->B);
+
+  switch (UnderlyingObjectsAlias(AA, aPtr, bPtr)) {
+  case AliasAnalysis::MayAlias:
+    // We can not analyse objects if we do not know about their aliasing.
+    DEBUG(errs() << "---> [?] may alias\n");
+    return Unknown;
+
+  case AliasAnalysis::NoAlias:
+    // If the objects noalias, they are distinct, accesses are independent.
+    DEBUG(errs() << "---> [I] no alias\n");
+    return Independent;
+
+  case AliasAnalysis::MustAlias:
+    break; // The underlying objects alias, test accesses for dependence.
+  }
+
+  const GEPOperator *aGEP = dyn_cast<GEPOperator>(aPtr);
+  const GEPOperator *bGEP = dyn_cast<GEPOperator>(bPtr);
+
+  if (!aGEP || !bGEP)
+    return Unknown;
+
+  // FIXME: Is filtering coupled subscripts necessary?
+
+  // Collect GEP operand pairs (FIXME: use GetGEPOperands from BasicAA), adding
+  // trailing zeroes to the smaller GEP, if needed.
+  typedef SmallVector<std::pair<const SCEV*, const SCEV*>, 4> GEPOpdPairsTy;
+  GEPOpdPairsTy opds;
+  for(GEPOperator::const_op_iterator aIdx = aGEP->idx_begin(),
+                                     aEnd = aGEP->idx_end(),
+                                     bIdx = bGEP->idx_begin(),
+                                     bEnd = bGEP->idx_end();
+      aIdx != aEnd && bIdx != bEnd;
+      aIdx += (aIdx != aEnd), bIdx += (bIdx != bEnd)) {
+    const SCEV* aSCEV = (aIdx != aEnd) ? SE->getSCEV(*aIdx) : GetZeroSCEV(SE);
+    const SCEV* bSCEV = (bIdx != bEnd) ? SE->getSCEV(*bIdx) : GetZeroSCEV(SE);
+    opds.push_back(std::make_pair(aSCEV, bSCEV));
+  }
+
+  if (!opds.empty() && opds[0].first != opds[0].second) {
+    // We cannot (yet) handle arbitrary GEP pointer offsets. By limiting
+    //
+    // TODO: this could be relaxed by adding the size of the underlying object
+    // to the first subscript. If we have e.g. (GEP x,0,i; GEP x,2,-i) and we
+    // know that x is a [100 x i8]*, we could modify the first subscript to be
+    // (i, 200-i) instead of (i, -i).
+    return Unknown;
+  }
+
+  // Now analyse the collected operand pairs (skipping the GEP ptr offsets).
+  for (GEPOpdPairsTy::const_iterator i = opds.begin() + 1, end = opds.end();
+       i != end; ++i) {
+    Subscript subscript;
+    DependenceResult result = analyseSubscript(i->first, i->second, &subscript);
+    if (result != Dependent) {
+      // We either proved independence or failed to analyse this subscript.
+      // Further subscripts will not improve the situation, so abort early.
+      return result;
+    }
+    P->Subscripts.push_back(subscript);
+  }
+  // We successfully analysed all subscripts but failed to prove independence.
+  return Dependent;
+}
+
+bool LoopDependenceAnalysis::depends(Value *A, Value *B) {
+  assert(isDependencePair(A, B) && "Values form no dependence pair!");
+  ++NumAnswered;
+
+  DependencePair *p;
+  if (!findOrInsertDependencePair(A, B, p)) {
+    // The pair is not cached, so analyse it.
+    ++NumAnalysed;
+    switch (p->Result = analysePair(p)) {
+    case Dependent:   ++NumDependent;   break;
+    case Independent: ++NumIndependent; break;
+    case Unknown:     ++NumUnknown;     break;
+    }
+  }
+  return p->Result != Independent;
 }
 
 //===----------------------------------------------------------------------===//
@@ -123,14 +308,19 @@ bool LoopDependenceAnalysis::runOnLoop(Loop *L, LPPassManager &) {
   return false;
 }
 
+void LoopDependenceAnalysis::releaseMemory() {
+  Pairs.clear();
+  PairAllocator.Reset();
+}
+
 void LoopDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequiredTransitive<AliasAnalysis>();
   AU.addRequiredTransitive<ScalarEvolution>();
 }
 
-static void PrintLoopInfo(
-    raw_ostream &OS, LoopDependenceAnalysis *LDA, const Loop *L) {
+static void PrintLoopInfo(raw_ostream &OS,
+                          LoopDependenceAnalysis *LDA, const Loop *L) {
   if (!L->empty()) return; // ignore non-innermost loops
 
   SmallVector<Instruction*, 8> memrefs;
@@ -142,14 +332,14 @@ static void PrintLoopInfo(
 
   OS << "  Load/store instructions: " << memrefs.size() << "\n";
   for (SmallVector<Instruction*, 8>::const_iterator x = memrefs.begin(),
-      end = memrefs.end(); x != end; ++x)
-    OS << "\t" << (x - memrefs.begin()) << ": " << **x;
+       end = memrefs.end(); x != end; ++x)
+    OS << "\t" << (x - memrefs.begin()) << ": " << **x << "\n";
 
   OS << "  Pairwise dependence results:\n";
   for (SmallVector<Instruction*, 8>::const_iterator x = memrefs.begin(),
-      end = memrefs.end(); x != end; ++x)
+       end = memrefs.end(); x != end; ++x)
     for (SmallVector<Instruction*, 8>::const_iterator y = x + 1;
-        y != end; ++y)
+         y != end; ++y)
       if (LDA->isDependencePair(*x, *y))
         OS << "\t" << (x - memrefs.begin()) << "," << (y - memrefs.begin())
            << ": " << (LDA->depends(*x, *y) ? "dependent" : "independent")
@@ -160,8 +350,3 @@ void LoopDependenceAnalysis::print(raw_ostream &OS, const Module*) const {
   // TODO: doc why const_cast is safe
   PrintLoopInfo(OS, const_cast<LoopDependenceAnalysis*>(this), this->L);
 }
-
-void LoopDependenceAnalysis::print(std::ostream &OS, const Module *M) const {
-  raw_os_ostream os(OS);
-  print(os, M);
-}
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index bb53589..ce2d29f 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -20,12 +20,22 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include <algorithm>
 using namespace llvm;
 
+// Always verify loopinfo if expensive checking is enabled.
+#ifdef XDEBUG
+bool VerifyLoopInfo = true;
+#else
+bool VerifyLoopInfo = false;
+#endif
+static cl::opt<bool,true>
+VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo),
+                cl::desc("Verify loop info (time consuming)"));
+
 char LoopInfo::ID = 0;
 static RegisterPass<LoopInfo>
 X("loops", "Natural Loop Information", true, true);
@@ -34,6 +44,338 @@ X("loops", "Natural Loop Information", true, true);
 // Loop implementation
 //
 
+/// isLoopInvariant - Return true if the specified value is loop invariant
+///
+bool Loop::isLoopInvariant(Value *V) const {
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    return isLoopInvariant(I);
+  return true;  // All non-instructions are loop invariant
+}
+
+/// isLoopInvariant - Return true if the specified instruction is
+/// loop-invariant.
+///
+bool Loop::isLoopInvariant(Instruction *I) const {
+  return !contains(I->getParent());
+}
+
+/// makeLoopInvariant - If the given value is an instruciton inside of the
+/// loop and it can be hoisted, do so to make it trivially loop-invariant.
+/// Return true if the value after any hoisting is loop invariant. This
+/// function can be used as a slightly more aggressive replacement for
+/// isLoopInvariant.
+///
+/// If InsertPt is specified, it is the point to hoist instructions to.
+/// If null, the terminator of the loop preheader is used.
+///
+bool Loop::makeLoopInvariant(Value *V, bool &Changed,
+                             Instruction *InsertPt) const {
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    return makeLoopInvariant(I, Changed, InsertPt);
+  return true;  // All non-instructions are loop-invariant.
+}
+
+/// makeLoopInvariant - If the given instruction is inside of the
+/// loop and it can be hoisted, do so to make it trivially loop-invariant.
+/// Return true if the instruction after any hoisting is loop invariant. This
+/// function can be used as a slightly more aggressive replacement for
+/// isLoopInvariant.
+///
+/// If InsertPt is specified, it is the point to hoist instructions to.
+/// If null, the terminator of the loop preheader is used.
+///
+bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
+                             Instruction *InsertPt) const {
+  // Test if the value is already loop-invariant.
+  if (isLoopInvariant(I))
+    return true;
+  if (!I->isSafeToSpeculativelyExecute())
+    return false;
+  if (I->mayReadFromMemory())
+    return false;
+  // Determine the insertion point, unless one was given.
+  if (!InsertPt) {
+    BasicBlock *Preheader = getLoopPreheader();
+    // Without a preheader, hoisting is not feasible.
+    if (!Preheader)
+      return false;
+    InsertPt = Preheader->getTerminator();
+  }
+  // Don't hoist instructions with loop-variant operands.
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+    if (!makeLoopInvariant(I->getOperand(i), Changed, InsertPt))
+      return false;
+  // Hoist.
+  I->moveBefore(InsertPt);
+  Changed = true;
+  return true;
+}
+
+/// getCanonicalInductionVariable - Check to see if the loop has a canonical
+/// induction variable: an integer recurrence that starts at 0 and increments
+/// by one each time through the loop.  If so, return the phi node that
+/// corresponds to it.
+///
+/// The IndVarSimplify pass transforms loops to have a canonical induction
+/// variable.
+///
+PHINode *Loop::getCanonicalInductionVariable() const {
+  BasicBlock *H = getHeader();
+
+  BasicBlock *Incoming = 0, *Backedge = 0;
+  typedef GraphTraits<Inverse<BasicBlock*> > InvBlockTraits;
+  InvBlockTraits::ChildIteratorType PI = InvBlockTraits::child_begin(H);
+  assert(PI != InvBlockTraits::child_end(H) &&
+         "Loop must have at least one backedge!");
+  Backedge = *PI++;
+  if (PI == InvBlockTraits::child_end(H)) return 0;  // dead loop
+  Incoming = *PI++;
+  if (PI != InvBlockTraits::child_end(H)) return 0;  // multiple backedges?
+
+  if (contains(Incoming)) {
+    if (contains(Backedge))
+      return 0;
+    std::swap(Incoming, Backedge);
+  } else if (!contains(Backedge))
+    return 0;
+
+  // Loop over all of the PHI nodes, looking for a canonical indvar.
+  for (BasicBlock::iterator I = H->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+    if (ConstantInt *CI =
+        dyn_cast<ConstantInt>(PN->getIncomingValueForBlock(Incoming)))
+      if (CI->isNullValue())
+        if (Instruction *Inc =
+            dyn_cast<Instruction>(PN->getIncomingValueForBlock(Backedge)))
+          if (Inc->getOpcode() == Instruction::Add &&
+                Inc->getOperand(0) == PN)
+            if (ConstantInt *CI = dyn_cast<ConstantInt>(Inc->getOperand(1)))
+              if (CI->equalsInt(1))
+                return PN;
+  }
+  return 0;
+}
+
+/// getCanonicalInductionVariableIncrement - Return the LLVM value that holds
+/// the canonical induction variable value for the "next" iteration of the
+/// loop.  This always succeeds if getCanonicalInductionVariable succeeds.
+///
+Instruction *Loop::getCanonicalInductionVariableIncrement() const {
+  if (PHINode *PN = getCanonicalInductionVariable()) {
+    bool P1InLoop = contains(PN->getIncomingBlock(1));
+    return cast<Instruction>(PN->getIncomingValue(P1InLoop));
+  }
+  return 0;
+}
+
+/// getTripCount - Return a loop-invariant LLVM value indicating the number of
+/// times the loop will be executed.  Note that this means that the backedge
+/// of the loop executes N-1 times.  If the trip-count cannot be determined,
+/// this returns null.
+///
+/// The IndVarSimplify pass transforms loops to have a form that this
+/// function easily understands.
+///
+Value *Loop::getTripCount() const {
+  // Canonical loops will end with a 'cmp ne I, V', where I is the incremented
+  // canonical induction variable and V is the trip count of the loop.
+  Instruction *Inc = getCanonicalInductionVariableIncrement();
+  if (Inc == 0) return 0;
+  PHINode *IV = cast<PHINode>(Inc->getOperand(0));
+
+  BasicBlock *BackedgeBlock =
+    IV->getIncomingBlock(contains(IV->getIncomingBlock(1)));
+
+  if (BranchInst *BI = dyn_cast<BranchInst>(BackedgeBlock->getTerminator()))
+    if (BI->isConditional()) {
+      if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
+        if (ICI->getOperand(0) == Inc) {
+          if (BI->getSuccessor(0) == getHeader()) {
+            if (ICI->getPredicate() == ICmpInst::ICMP_NE)
+              return ICI->getOperand(1);
+          } else if (ICI->getPredicate() == ICmpInst::ICMP_EQ) {
+            return ICI->getOperand(1);
+          }
+        }
+      }
+    }
+
+  return 0;
+}
+
+/// getSmallConstantTripCount - Returns the trip count of this loop as a
+/// normal unsigned value, if possible. Returns 0 if the trip count is unknown
+/// of not constant. Will also return 0 if the trip count is very large
+/// (>= 2^32)
+unsigned Loop::getSmallConstantTripCount() const {
+  Value* TripCount = this->getTripCount();
+  if (TripCount) {
+    if (ConstantInt *TripCountC = dyn_cast<ConstantInt>(TripCount)) {
+      // Guard against huge trip counts.
+      if (TripCountC->getValue().getActiveBits() <= 32) {
+        return (unsigned)TripCountC->getZExtValue();
+      }
+    }
+  }
+  return 0;
+}
+
+/// getSmallConstantTripMultiple - Returns the largest constant divisor of the
+/// trip count of this loop as a normal unsigned value, if possible. This
+/// means that the actual trip count is always a multiple of the returned
+/// value (don't forget the trip count could very well be zero as well!).
+///
+/// Returns 1 if the trip count is unknown or not guaranteed to be the
+/// multiple of a constant (which is also the case if the trip count is simply
+/// constant, use getSmallConstantTripCount for that case), Will also return 1
+/// if the trip count is very large (>= 2^32).
+unsigned Loop::getSmallConstantTripMultiple() const {
+  Value* TripCount = this->getTripCount();
+  // This will hold the ConstantInt result, if any
+  ConstantInt *Result = NULL;
+  if (TripCount) {
+    // See if the trip count is constant itself
+    Result = dyn_cast<ConstantInt>(TripCount);
+    // if not, see if it is a multiplication
+    if (!Result)
+      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TripCount)) {
+        switch (BO->getOpcode()) {
+        case BinaryOperator::Mul:
+          Result = dyn_cast<ConstantInt>(BO->getOperand(1));
+          break;
+        default:
+          break;
+        }
+      }
+  }
+  // Guard against huge trip counts.
+  if (Result && Result->getValue().getActiveBits() <= 32) {
+    return (unsigned)Result->getZExtValue();
+  } else {
+    return 1;
+  }
+}
+
+/// isLCSSAForm - Return true if the Loop is in LCSSA form
+bool Loop::isLCSSAForm() const {
+  // Sort the blocks vector so that we can use binary search to do quick
+  // lookups.
+  SmallPtrSet<BasicBlock *, 16> LoopBBs(block_begin(), block_end());
+
+  for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) {
+    BasicBlock  *BB = *BI;
+    for (BasicBlock ::iterator I = BB->begin(), E = BB->end(); I != E;++I)
+      for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+           ++UI) {
+        BasicBlock *UserBB = cast<Instruction>(*UI)->getParent();
+        if (PHINode *P = dyn_cast<PHINode>(*UI)) {
+          UserBB = P->getIncomingBlock(UI);
+        }
+
+        // Check the current block, as a fast-path.  Most values are used in
+        // the same block they are defined in.
+        if (UserBB != BB && !LoopBBs.count(UserBB))
+          return false;
+      }
+  }
+
+  return true;
+}
+
+/// isLoopSimplifyForm - Return true if the Loop is in the form that
+/// the LoopSimplify form transforms loops to, which is sometimes called
+/// normal form.
+bool Loop::isLoopSimplifyForm() const {
+  // Normal-form loops have a preheader.
+  if (!getLoopPreheader())
+    return false;
+  // Normal-form loops have a single backedge.
+  if (!getLoopLatch())
+    return false;
+  // Each predecessor of each exit block of a normal loop is contained
+  // within the loop.
+  SmallVector<BasicBlock *, 4> ExitBlocks;
+  getExitBlocks(ExitBlocks);
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+    for (pred_iterator PI = pred_begin(ExitBlocks[i]),
+         PE = pred_end(ExitBlocks[i]); PI != PE; ++PI)
+      if (!contains(*PI))
+        return false;
+  // All the requirements are met.
+  return true;
+}
+
+/// getUniqueExitBlocks - Return all unique successor blocks of this loop.
+/// These are the blocks _outside of the current loop_ which are branched to.
+/// This assumes that loop is in canonical form.
+///
+void
+Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const {
+  assert(isLoopSimplifyForm() &&
+         "getUniqueExitBlocks assumes the loop is in canonical form!");
+
+  // Sort the blocks vector so that we can use binary search to do quick
+  // lookups.
+  SmallVector<BasicBlock *, 128> LoopBBs(block_begin(), block_end());
+  std::sort(LoopBBs.begin(), LoopBBs.end());
+
+  SmallVector<BasicBlock *, 32> switchExitBlocks;
+
+  for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) {
+
+    BasicBlock *current = *BI;
+    switchExitBlocks.clear();
+
+    typedef GraphTraits<BasicBlock *> BlockTraits;
+    typedef GraphTraits<Inverse<BasicBlock *> > InvBlockTraits;
+    for (BlockTraits::ChildIteratorType I =
+         BlockTraits::child_begin(*BI), E = BlockTraits::child_end(*BI);
+         I != E; ++I) {
+      // If block is inside the loop then it is not a exit block.
+      if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I))
+        continue;
+
+      InvBlockTraits::ChildIteratorType PI = InvBlockTraits::child_begin(*I);
+      BasicBlock *firstPred = *PI;
+
+      // If current basic block is this exit block's first predecessor
+      // then only insert exit block in to the output ExitBlocks vector.
+      // This ensures that same exit block is not inserted twice into
+      // ExitBlocks vector.
+      if (current != firstPred)
+        continue;
+
+      // If a terminator has more then two successors, for example SwitchInst,
+      // then it is possible that there are multiple edges from current block
+      // to one exit block.
+      if (std::distance(BlockTraits::child_begin(current),
+                        BlockTraits::child_end(current)) <= 2) {
+        ExitBlocks.push_back(*I);
+        continue;
+      }
+
+      // In case of multiple edges from current block to exit block, collect
+      // only one edge in ExitBlocks. Use switchExitBlocks to keep track of
+      // duplicate edges.
+      if (std::find(switchExitBlocks.begin(), switchExitBlocks.end(), *I)
+          == switchExitBlocks.end()) {
+        switchExitBlocks.push_back(*I);
+        ExitBlocks.push_back(*I);
+      }
+    }
+  }
+}
+
+/// getUniqueExitBlock - If getUniqueExitBlocks would return exactly one
+/// block, return that block. Otherwise return null.
+BasicBlock *Loop::getUniqueExitBlock() const {
+  SmallVector<BasicBlock *, 8> UniqueExitBlocks;
+  getUniqueExitBlocks(UniqueExitBlocks);
+  if (UniqueExitBlocks.size() == 1)
+    return UniqueExitBlocks[0];
+  return 0;
+}
+
 //===----------------------------------------------------------------------===//
 // LoopInfo implementation
 //
@@ -43,7 +385,29 @@ bool LoopInfo::runOnFunction(Function &) {
   return false;
 }
 
+void LoopInfo::verifyAnalysis() const {
+  // LoopInfo is a FunctionPass, but verifying every loop in the function
+  // each time verifyAnalysis is called is very expensive. The
+  // -verify-loop-info option can enable this. In order to perform some
+  // checking by default, LoopPass has been taught to call verifyLoop
+  // manually during loop pass sequences.
+
+  if (!VerifyLoopInfo) return;
+
+  for (iterator I = begin(), E = end(); I != E; ++I) {
+    assert(!(*I)->getParentLoop() && "Top-level loop has a parent!");
+    (*I)->verifyLoopNest();
+  }
+
+  // TODO: check BBMap consistency.
+}
+
 void LoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequired<DominatorTree>();
 }
+
+void LoopInfo::print(raw_ostream &OS, const Module*) const {
+  LI.print(OS);
+}
+
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index ee03556..43463cd 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -21,7 +21,6 @@ using namespace llvm;
 //
 
 char LPPassManager::ID = 0;
-/// LPPassManager manages FPPassManagers and CalLGraphSCCPasses.
 
 LPPassManager::LPPassManager(int Depth) 
   : FunctionPass(&ID), PMDataManager(Depth) { 
@@ -111,17 +110,21 @@ void LPPassManager::insertLoop(Loop *L, Loop *ParentLoop) {
   else
     LI->addTopLevelLoop(L);
 
+  insertLoopIntoQueue(L);
+}
+
+void LPPassManager::insertLoopIntoQueue(Loop *L) {
   // Insert L into loop queue
   if (L == CurrentLoop) 
     redoLoop(L);
-  else if (!ParentLoop)
+  else if (!L->getParentLoop())
     // This is top level loop. 
     LQ.push_front(L);
   else {
-    // Insert L after ParentLoop
+    // Insert L after the parent loop.
     for (std::deque<Loop *>::iterator I = LQ.begin(),
            E = LQ.end(); I != E; ++I) {
-      if (*I == ParentLoop) {
+      if (*I == L->getParentLoop()) {
         // deque does not support insert after.
         ++I;
         LQ.insert(I, 1, L);
@@ -217,41 +220,66 @@ bool LPPassManager::runOnFunction(Function &F) {
     skipThisLoop = false;
     redoThisLoop = false;
 
-    // Run all passes on current SCC
+    // Run all passes on the current Loop.
     for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {  
       Pass *P = getContainedPass(Index);
 
-      dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG, "");
+      dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG,
+                   CurrentLoop->getHeader()->getNameStr());
       dumpRequiredSet(P);
 
       initializeAnalysisImpl(P);
 
       LoopPass *LP = dynamic_cast<LoopPass *>(P);
+      assert(LP && "Invalid LPPassManager member");
       {
         PassManagerPrettyStackEntry X(LP, *CurrentLoop->getHeader());
-        StartPassTimer(P);
-        assert(LP && "Invalid LPPassManager member");
+        Timer *T = StartPassTimer(P);
         Changed |= LP->runOnLoop(CurrentLoop, *this);
-        StopPassTimer(P);
+        StopPassTimer(P, T);
       }
 
       if (Changed)
-        dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG, "");
+        dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG,
+                     skipThisLoop ? "<deleted>" :
+                                    CurrentLoop->getHeader()->getNameStr());
       dumpPreservedSet(P);
 
-      verifyPreservedAnalysis(LP);
+      if (!skipThisLoop) {
+        // Manually check that this loop is still healthy. This is done
+        // instead of relying on LoopInfo::verifyLoop since LoopInfo
+        // is a function pass and it's really expensive to verify every
+        // loop in the function every time. That level of checking can be
+        // enabled with the -verify-loop-info option.
+        Timer *T = StartPassTimer(LI);
+        CurrentLoop->verifyLoop();
+        StopPassTimer(LI, T);
+
+        // Then call the regular verifyAnalysis functions.
+        verifyPreservedAnalysis(LP);
+      }
+
       removeNotPreservedAnalysis(P);
       recordAvailableAnalysis(P);
-      removeDeadPasses(P, "", ON_LOOP_MSG);
-
-      // If dominator information is available then verify the info if requested.
-      verifyDomInfo(*LP, F);
+      removeDeadPasses(P,
+                       skipThisLoop ? "<deleted>" :
+                                      CurrentLoop->getHeader()->getNameStr(),
+                       ON_LOOP_MSG);
 
       if (skipThisLoop)
         // Do not run other passes on this loop.
         break;
     }
     
+    // If the loop was deleted, release all the loop passes. This frees up
+    // some memory, and avoids trouble with the pass manager trying to call
+    // verifyAnalysis on them.
+    if (skipThisLoop)
+      for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {  
+        Pass *P = getContainedPass(Index);
+        freePass(P, "<deleted>", ON_LOOP_MSG);
+      }
+
     // Pop the loop from queue after running all passes.
     LQ.pop_back();
     
@@ -272,7 +300,7 @@ bool LPPassManager::runOnFunction(Function &F) {
 
 /// Print passes managed by this manager
 void LPPassManager::dumpPassStructure(unsigned Offset) {
-  llvm::cerr << std::string(Offset*2, ' ') << "Loop Pass Manager\n";
+  errs().indent(Offset*2) << "Loop Pass Manager\n";
   for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
     Pass *P = getContainedPass(Index);
     P->dumpPassStructure(Offset + 1);
diff --git a/lib/Analysis/MallocHelper.cpp b/lib/Analysis/MallocHelper.cpp
new file mode 100644
index 0000000..89051d1
--- /dev/null
+++ b/lib/Analysis/MallocHelper.cpp
@@ -0,0 +1,230 @@
+//===-- MallocHelper.cpp - Functions to identify malloc calls -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions identifies calls to malloc, bitcasts of malloc
+// calls, and the types and array sizes associated with them.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/MallocHelper.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/ConstantFolding.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  malloc Call Utility Functions.
+//
+
+/// isMalloc - Returns true if the the value is either a malloc call or a
+/// bitcast of the result of a malloc call.
+bool llvm::isMalloc(const Value* I) {
+  return extractMallocCall(I) || extractMallocCallFromBitCast(I);
+}
+
+static bool isMallocCall(const CallInst *CI) {
+  if (!CI)
+    return false;
+
+  const Module* M = CI->getParent()->getParent()->getParent();
+  Function *MallocFunc = M->getFunction("malloc");
+
+  if (CI->getOperand(0) != MallocFunc)
+    return false;
+
+  // Check malloc prototype.
+  // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin 
+  // attribute will exist.
+  const FunctionType *FTy = MallocFunc->getFunctionType();
+  if (FTy->getNumParams() != 1)
+    return false;
+  if (IntegerType *ITy = dyn_cast<IntegerType>(FTy->param_begin()->get())) {
+    if (ITy->getBitWidth() != 32 && ITy->getBitWidth() != 64)
+      return false;
+    return true;
+  }
+
+  return false;
+}
+
+/// extractMallocCall - Returns the corresponding CallInst if the instruction
+/// is a malloc call.  Since CallInst::CreateMalloc() only creates calls, we
+/// ignore InvokeInst here.
+const CallInst* llvm::extractMallocCall(const Value* I) {
+  const CallInst *CI = dyn_cast<CallInst>(I);
+  return (isMallocCall(CI)) ? CI : NULL;
+}
+
+CallInst* llvm::extractMallocCall(Value* I) {
+  CallInst *CI = dyn_cast<CallInst>(I);
+  return (isMallocCall(CI)) ? CI : NULL;
+}
+
+static bool isBitCastOfMallocCall(const BitCastInst* BCI) {
+  if (!BCI)
+    return false;
+    
+  return isMallocCall(dyn_cast<CallInst>(BCI->getOperand(0)));
+}
+
+/// extractMallocCallFromBitCast - Returns the corresponding CallInst if the
+/// instruction is a bitcast of the result of a malloc call.
+CallInst* llvm::extractMallocCallFromBitCast(Value* I) {
+  BitCastInst *BCI = dyn_cast<BitCastInst>(I);
+  return (isBitCastOfMallocCall(BCI)) ? cast<CallInst>(BCI->getOperand(0))
+                                      : NULL;
+}
+
+const CallInst* llvm::extractMallocCallFromBitCast(const Value* I) {
+  const BitCastInst *BCI = dyn_cast<BitCastInst>(I);
+  return (isBitCastOfMallocCall(BCI)) ? cast<CallInst>(BCI->getOperand(0))
+                                      : NULL;
+}
+
+static bool isArrayMallocHelper(const CallInst *CI, LLVMContext &Context,
+                                const TargetData* TD) {
+  if (!CI)
+    return false;
+
+  const Type* T = getMallocAllocatedType(CI);
+
+  // We can only indentify an array malloc if we know the type of the malloc 
+  // call.
+  if (!T) return false;
+
+  Value* MallocArg = CI->getOperand(1);
+  Constant *ElementSize = ConstantExpr::getSizeOf(T);
+  ElementSize = ConstantExpr::getTruncOrBitCast(ElementSize, 
+                                                MallocArg->getType());
+  Constant *FoldedElementSize = ConstantFoldConstantExpression(
+                                       cast<ConstantExpr>(ElementSize), 
+                                       Context, TD);
+
+
+  if (isa<ConstantExpr>(MallocArg))
+    return (MallocArg != ElementSize);
+
+  BinaryOperator *BI = dyn_cast<BinaryOperator>(MallocArg);
+  if (!BI)
+    return false;
+
+  if (BI->getOpcode() == Instruction::Mul)
+    // ArraySize * ElementSize
+    if (BI->getOperand(1) == ElementSize ||
+        (FoldedElementSize && BI->getOperand(1) == FoldedElementSize))
+      return true;
+
+  // TODO: Detect case where MallocArg mul has been transformed to shl.
+
+  return false;
+}
+
+/// isArrayMalloc - Returns the corresponding CallInst if the instruction 
+/// matches the malloc call IR generated by CallInst::CreateMalloc().  This 
+/// means that it is a malloc call with one bitcast use AND the malloc call's 
+/// size argument is:
+///  1. a constant not equal to the malloc's allocated type
+/// or
+///  2. the result of a multiplication by the malloc's allocated type
+/// Otherwise it returns NULL.
+/// The unique bitcast is needed to determine the type/size of the array
+/// allocation.
+CallInst* llvm::isArrayMalloc(Value* I, LLVMContext &Context,
+                              const TargetData* TD) {
+  CallInst *CI = extractMallocCall(I);
+  return (isArrayMallocHelper(CI, Context, TD)) ? CI : NULL;
+}
+
+const CallInst* llvm::isArrayMalloc(const Value* I, LLVMContext &Context,
+                                    const TargetData* TD) {
+  const CallInst *CI = extractMallocCall(I);
+  return (isArrayMallocHelper(CI, Context, TD)) ? CI : NULL;
+}
+
+/// getMallocType - Returns the PointerType resulting from the malloc call.
+/// This PointerType is the result type of the call's only bitcast use.
+/// If there is no unique bitcast use, then return NULL.
+const PointerType* llvm::getMallocType(const CallInst* CI) {
+  assert(isMalloc(CI) && "GetMallocType and not malloc call");
+  
+  const BitCastInst* BCI = NULL;
+  
+  // Determine if CallInst has a bitcast use.
+  for (Value::use_const_iterator UI = CI->use_begin(), E = CI->use_end();
+       UI != E; )
+    if ((BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++))))
+      break;
+
+  // Malloc call has 1 bitcast use and no other uses, so type is the bitcast's
+  // destination type.
+  if (BCI && CI->hasOneUse())
+    return cast<PointerType>(BCI->getDestTy());
+
+  // Malloc call was not bitcast, so type is the malloc function's return type.
+  if (!BCI)
+    return cast<PointerType>(CI->getType());
+
+  // Type could not be determined.
+  return NULL;
+}
+
+/// getMallocAllocatedType - Returns the Type allocated by malloc call. This
+/// Type is the result type of the call's only bitcast use. If there is no
+/// unique bitcast use, then return NULL.
+const Type* llvm::getMallocAllocatedType(const CallInst* CI) {
+  const PointerType* PT = getMallocType(CI);
+  return PT ? PT->getElementType() : NULL;
+}
+
+/// isConstantOne - Return true only if val is constant int 1.
+static bool isConstantOne(Value *val) {
+  return isa<ConstantInt>(val) && cast<ConstantInt>(val)->isOne();
+}
+
+/// getMallocArraySize - Returns the array size of a malloc call.  The array
+/// size is computated in 1 of 3 ways:
+///  1. If the element type if of size 1, then array size is the argument to 
+///     malloc.
+///  2. Else if the malloc's argument is a constant, the array size is that
+///     argument divided by the element type's size.
+///  3. Else the malloc argument must be a multiplication and the array size is
+///     the first operand of the multiplication.
+/// This function returns constant 1 if:
+///  1. The malloc call's allocated type cannot be determined.
+///  2. IR wasn't created by a call to CallInst::CreateMalloc() with a non-NULL
+///     ArraySize.
+Value* llvm::getMallocArraySize(CallInst* CI, LLVMContext &Context,
+                                const TargetData* TD) {
+  // Match CreateMalloc's use of constant 1 array-size for non-array mallocs.
+  if (!isArrayMalloc(CI, Context, TD))
+    return ConstantInt::get(CI->getOperand(1)->getType(), 1);
+
+  Value* MallocArg = CI->getOperand(1);
+  assert(getMallocAllocatedType(CI) && "getMallocArraySize and no type");
+  Constant *ElementSize = ConstantExpr::getSizeOf(getMallocAllocatedType(CI));
+  ElementSize = ConstantExpr::getTruncOrBitCast(ElementSize, 
+                                                MallocArg->getType());
+
+  Constant* CO = dyn_cast<Constant>(MallocArg);
+  BinaryOperator* BO = dyn_cast<BinaryOperator>(MallocArg);
+  assert((isConstantOne(ElementSize) || CO || BO) &&
+         "getMallocArraySize and malformed malloc IR");
+      
+  if (isConstantOne(ElementSize))
+    return MallocArg;
+    
+  if (CO)
+    return CO->getOperand(0);
+    
+  // TODO: Detect case where MallocArg mul has been transformed to shl.
+
+  assert(BO && "getMallocArraySize not constant but not multiplication either");
+  return BO->getOperand(0);
+}
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 3b21029..d640075 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -16,16 +16,15 @@
 
 #define DEBUG_TYPE "memdep"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Function.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MallocHelper.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/PredIteratorCache.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetData.h"
 using namespace llvm;
 
 STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
@@ -71,12 +70,10 @@ void MemoryDependenceAnalysis::releaseMemory() {
 void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequiredTransitive<AliasAnalysis>();
-  AU.addRequiredTransitive<TargetData>();
 }
 
 bool MemoryDependenceAnalysis::runOnFunction(Function &) {
   AA = &getAnalysis<AliasAnalysis>();
-  TD = &getAnalysis<TargetData>();
   if (PredCache == 0)
     PredCache.reset(new PredIteratorCache());
   return false;
@@ -112,10 +109,10 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
     uint64_t PointerSize = 0;
     if (StoreInst *S = dyn_cast<StoreInst>(Inst)) {
       Pointer = S->getPointerOperand();
-      PointerSize = TD->getTypeStoreSize(S->getOperand(0)->getType());
+      PointerSize = AA->getTypeStoreSize(S->getOperand(0)->getType());
     } else if (VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
       Pointer = V->getOperand(0);
-      PointerSize = TD->getTypeStoreSize(V->getType());
+      PointerSize = AA->getTypeStoreSize(V->getType());
     } else if (FreeInst *F = dyn_cast<FreeInst>(Inst)) {
       Pointer = F->getPointerOperand();
       
@@ -185,7 +182,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
     // a load depends on another must aliased load from the same value.
     if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
       Value *Pointer = LI->getPointerOperand();
-      uint64_t PointerSize = TD->getTypeStoreSize(LI->getType());
+      uint64_t PointerSize = AA->getTypeStoreSize(LI->getType());
       
       // If we found a pointer, check if it could be the same as our pointer.
       AliasAnalysis::AliasResult R =
@@ -211,7 +208,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
       // Ok, this store might clobber the query pointer.  Check to see if it is
       // a must alias: in this case, we want to return this as a def.
       Value *Pointer = SI->getPointerOperand();
-      uint64_t PointerSize = TD->getTypeStoreSize(SI->getOperand(0)->getType());
+      uint64_t PointerSize = AA->getTypeStoreSize(SI->getOperand(0)->getType());
       
       // If we found a pointer, check if it could be the same as our pointer.
       AliasAnalysis::AliasResult R =
@@ -228,15 +225,19 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
     // the allocation, return Def.  This means that there is no dependence and
     // the access can be optimized based on that.  For example, a load could
     // turn into undef.
-    if (AllocationInst *AI = dyn_cast<AllocationInst>(Inst)) {
+    // Note: Only determine this to be a malloc if Inst is the malloc call, not
+    // a subsequent bitcast of the malloc call result.  There can be stores to
+    // the malloced memory between the malloc call and its bitcast uses, and we
+    // need to continue scanning until the malloc call.
+    if (isa<AllocationInst>(Inst) || extractMallocCall(Inst)) {
       Value *AccessPtr = MemPtr->getUnderlyingObject();
       
-      if (AccessPtr == AI ||
-          AA->alias(AI, 1, AccessPtr, 1) == AliasAnalysis::MustAlias)
-        return MemDepResult::getDef(AI);
+      if (AccessPtr == Inst ||
+          AA->alias(Inst, 1, AccessPtr, 1) == AliasAnalysis::MustAlias)
+        return MemDepResult::getDef(Inst);
       continue;
     }
-    
+
     // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
     switch (AA->getModRefInfo(Inst, MemPtr, MemSize)) {
     case AliasAnalysis::NoModRef:
@@ -302,7 +303,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
       LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
     else {
       MemPtr = SI->getPointerOperand();
-      MemSize = TD->getTypeStoreSize(SI->getOperand(0)->getType());
+      MemSize = AA->getTypeStoreSize(SI->getOperand(0)->getType());
     }
   } else if (LoadInst *LI = dyn_cast<LoadInst>(QueryInst)) {
     // If this is a volatile load, don't mess around with it.  Just return the
@@ -311,7 +312,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
       LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
     else {
       MemPtr = LI->getPointerOperand();
-      MemSize = TD->getTypeStoreSize(LI->getType());
+      MemSize = AA->getTypeStoreSize(LI->getType());
     }
   } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
     CallSite QueryCS = CallSite::get(QueryInst);
@@ -513,7 +514,7 @@ getNonLocalPointerDependency(Value *Pointer, bool isLoad, BasicBlock *FromBB,
   // We know that the pointer value is live into FromBB find the def/clobbers
   // from presecessors.
   const Type *EltTy = cast<PointerType>(Pointer->getType())->getElementType();
-  uint64_t PointeeSize = TD->getTypeStoreSize(EltTy);
+  uint64_t PointeeSize = AA->getTypeStoreSize(EltTy);
   
   // This is the set of blocks we've inspected, and the pointer we consider in
   // each block.  Because of critical edges, we currently bail out if querying
@@ -599,6 +600,42 @@ GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize,
   return Dep;
 }
 
+/// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain
+/// number of elements in the array that are already properly ordered.  This is
+/// optimized for the case when only a few entries are added.
+static void 
+SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
+                         unsigned NumSortedEntries) {
+  switch (Cache.size() - NumSortedEntries) {
+  case 0:
+    // done, no new entries.
+    break;
+  case 2: {
+    // Two new entries, insert the last one into place.
+    MemoryDependenceAnalysis::NonLocalDepEntry Val = Cache.back();
+    Cache.pop_back();
+    MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
+      std::upper_bound(Cache.begin(), Cache.end()-1, Val);
+    Cache.insert(Entry, Val);
+    // FALL THROUGH.
+  }
+  case 1:
+    // One new entry, Just insert the new value at the appropriate position.
+    if (Cache.size() != 1) {
+      MemoryDependenceAnalysis::NonLocalDepEntry Val = Cache.back();
+      Cache.pop_back();
+      MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
+        std::upper_bound(Cache.begin(), Cache.end(), Val);
+      Cache.insert(Entry, Val);
+    }
+    break;
+  default:
+    // Added many values, do a full scale sort.
+    std::sort(Cache.begin(), Cache.end());
+    break;
+  }
+}
+
 
 /// getNonLocalPointerDepFromBB - Perform a dependency query based on
 /// pointer/pointeesize starting at the end of StartBB.  Add any clobber/def
@@ -731,10 +768,22 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
     // If we do need to do phi translation, then there are a bunch of different
     // cases, because we have to find a Value* live in the predecessor block. We
     // know that PtrInst is defined in this block at least.
+
+    // We may have added values to the cache list before this PHI translation.
+    // If so, we haven't done anything to ensure that the cache remains sorted.
+    // Sort it now (if needed) so that recursive invocations of
+    // getNonLocalPointerDepFromBB and other routines that could reuse the cache
+    // value will only see properly sorted cache arrays.
+    if (Cache && NumSortedEntries != Cache->size()) {
+      SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
+      NumSortedEntries = Cache->size();
+    }
     
     // If this is directly a PHI node, just use the incoming values for each
     // pred as the phi translated version.
     if (PHINode *PtrPHI = dyn_cast<PHINode>(PtrInst)) {
+      Cache = 0;
+      
       for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
         BasicBlock *Pred = *PI;
         Value *PredPtr = PtrPHI->getIncomingValueForBlock(Pred);
@@ -759,15 +808,6 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
           goto PredTranslationFailure;
         }
 
-        // We may have added values to the cache list before this PHI
-        // translation.  If so, we haven't done anything to ensure that the
-        // cache remains sorted.  Sort it now (if needed) so that recursive
-        // invocations of getNonLocalPointerDepFromBB that could reuse the cache
-        // value will only see properly sorted cache arrays.
-        if (Cache && NumSortedEntries != Cache->size())
-          std::sort(Cache->begin(), Cache->end());
-        Cache = 0;
-        
         // FIXME: it is entirely possible that PHI translating will end up with
         // the same value.  Consider PHI translating something like:
         // X = phi [x, bb1], [y, bb2].  PHI translating for bb1 doesn't *need*
@@ -779,7 +819,7 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
                                         Result, Visited))
           goto PredTranslationFailure;
       }
-
+      
       // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
       CacheInfo = &NonLocalPointerDeps[CacheKey];
       Cache = &CacheInfo->second;
@@ -806,11 +846,8 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
       CacheInfo = &NonLocalPointerDeps[CacheKey];
       Cache = &CacheInfo->second;
       NumSortedEntries = Cache->size();
-    } else if (NumSortedEntries != Cache->size()) {
-      std::sort(Cache->begin(), Cache->end());
-      NumSortedEntries = Cache->size();
     }
-
+    
     // Since we did phi translation, the "Cache" set won't contain all of the
     // results for the query.  This is ok (we can still use it to accelerate
     // specific block queries) but we can't do the fastpath "return all
@@ -841,33 +878,7 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
   }
 
   // Okay, we're done now.  If we added new values to the cache, re-sort it.
-  switch (Cache->size()-NumSortedEntries) {
-  case 0:
-    // done, no new entries.
-    break;
-  case 2: {
-    // Two new entries, insert the last one into place.
-    NonLocalDepEntry Val = Cache->back();
-    Cache->pop_back();
-    NonLocalDepInfo::iterator Entry =
-    std::upper_bound(Cache->begin(), Cache->end()-1, Val);
-    Cache->insert(Entry, Val);
-    // FALL THROUGH.
-  }
-  case 1:
-    // One new entry, Just insert the new value at the appropriate position.
-    if (Cache->size() != 1) {
-      NonLocalDepEntry Val = Cache->back();
-      Cache->pop_back();
-      NonLocalDepInfo::iterator Entry =
-        std::upper_bound(Cache->begin(), Cache->end(), Val);
-      Cache->insert(Entry, Val);
-    }
-    break;
-  default:
-    // Added many values, do a full scale sort.
-    std::sort(Cache->begin(), Cache->end());
-  }
+  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
   DEBUG(AssertSorted(*Cache));
   return false;
 }
diff --git a/lib/Analysis/PointerTracking.cpp b/lib/Analysis/PointerTracking.cpp
new file mode 100644
index 0000000..43f4af3
--- /dev/null
+++ b/lib/Analysis/PointerTracking.cpp
@@ -0,0 +1,265 @@
+//===- PointerTracking.cpp - Pointer Bounds Tracking ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements tracking of pointer bounds.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MallocHelper.h"
+#include "llvm/Analysis/PointerTracking.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/Value.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+char PointerTracking::ID = 0;
+PointerTracking::PointerTracking() : FunctionPass(&ID) {}
+
+bool PointerTracking::runOnFunction(Function &F) {
+  predCache.clear();
+  assert(analyzing.empty());
+  FF = &F;
+  TD = getAnalysisIfAvailable<TargetData>();
+  SE = &getAnalysis<ScalarEvolution>();
+  LI = &getAnalysis<LoopInfo>();
+  DT = &getAnalysis<DominatorTree>();
+  return false;
+}
+
+void PointerTracking::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequiredTransitive<DominatorTree>();
+  AU.addRequiredTransitive<LoopInfo>();
+  AU.addRequiredTransitive<ScalarEvolution>();
+  AU.setPreservesAll();
+}
+
+bool PointerTracking::doInitialization(Module &M) {
+  const Type *PTy = Type::getInt8PtrTy(M.getContext());
+
+  // Find calloc(i64, i64) or calloc(i32, i32).
+  callocFunc = M.getFunction("calloc");
+  if (callocFunc) {
+    const FunctionType *Ty = callocFunc->getFunctionType();
+
+    std::vector<const Type*> args, args2;
+    args.push_back(Type::getInt64Ty(M.getContext()));
+    args.push_back(Type::getInt64Ty(M.getContext()));
+    args2.push_back(Type::getInt32Ty(M.getContext()));
+    args2.push_back(Type::getInt32Ty(M.getContext()));
+    const FunctionType *Calloc1Type =
+      FunctionType::get(PTy, args, false);
+    const FunctionType *Calloc2Type =
+      FunctionType::get(PTy, args2, false);
+    if (Ty != Calloc1Type && Ty != Calloc2Type)
+      callocFunc = 0; // Give up
+  }
+
+  // Find realloc(i8*, i64) or realloc(i8*, i32).
+  reallocFunc = M.getFunction("realloc");
+  if (reallocFunc) {
+    const FunctionType *Ty = reallocFunc->getFunctionType();
+    std::vector<const Type*> args, args2;
+    args.push_back(PTy);
+    args.push_back(Type::getInt64Ty(M.getContext()));
+    args2.push_back(PTy);
+    args2.push_back(Type::getInt32Ty(M.getContext()));
+
+    const FunctionType *Realloc1Type =
+      FunctionType::get(PTy, args, false);
+    const FunctionType *Realloc2Type =
+      FunctionType::get(PTy, args2, false);
+    if (Ty != Realloc1Type && Ty != Realloc2Type)
+      reallocFunc = 0; // Give up
+  }
+  return false;
+}
+
+// Calculates the number of elements allocated for pointer P,
+// the type of the element is stored in Ty.
+const SCEV *PointerTracking::computeAllocationCount(Value *P,
+                                                    const Type *&Ty) const {
+  Value *V = P->stripPointerCasts();
+  if (AllocationInst *AI = dyn_cast<AllocationInst>(V)) {
+    Value *arraySize = AI->getArraySize();
+    Ty = AI->getAllocatedType();
+    // arraySize elements of type Ty.
+    return SE->getSCEV(arraySize);
+  }
+
+  if (CallInst *CI = extractMallocCall(V)) {
+    Value *arraySize = getMallocArraySize(CI, P->getContext(), TD);
+    Ty = getMallocAllocatedType(CI);
+    if (!Ty || !arraySize) return SE->getCouldNotCompute();
+    // arraySize elements of type Ty.
+    return SE->getSCEV(arraySize);
+  }
+
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
+    if (GV->hasDefinitiveInitializer()) {
+      Constant *C = GV->getInitializer();
+      if (const ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) {
+        Ty = ATy->getElementType();
+        return SE->getConstant(Type::getInt32Ty(P->getContext()),
+                               ATy->getNumElements());
+      }
+    }
+    Ty = GV->getType();
+    return SE->getConstant(Type::getInt32Ty(P->getContext()), 1);
+    //TODO: implement more tracking for globals
+  }
+
+  if (CallInst *CI = dyn_cast<CallInst>(V)) {
+    CallSite CS(CI);
+    Function *F = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+    const Loop *L = LI->getLoopFor(CI->getParent());
+    if (F == callocFunc) {
+      Ty = Type::getInt8Ty(P->getContext());
+      // calloc allocates arg0*arg1 bytes.
+      return SE->getSCEVAtScope(SE->getMulExpr(SE->getSCEV(CS.getArgument(0)),
+                                               SE->getSCEV(CS.getArgument(1))),
+                                L);
+    } else if (F == reallocFunc) {
+      Ty = Type::getInt8Ty(P->getContext());
+      // realloc allocates arg1 bytes.
+      return SE->getSCEVAtScope(CS.getArgument(1), L);
+    }
+  }
+
+  return SE->getCouldNotCompute();
+}
+
+// Calculates the number of elements of type Ty allocated for P.
+const SCEV *PointerTracking::computeAllocationCountForType(Value *P,
+                                                           const Type *Ty)
+  const {
+    const Type *elementTy;
+    const SCEV *Count = computeAllocationCount(P, elementTy);
+    if (isa<SCEVCouldNotCompute>(Count))
+      return Count;
+    if (elementTy == Ty)
+      return Count;
+
+    if (!TD) // need TargetData from this point forward
+      return SE->getCouldNotCompute();
+
+    uint64_t elementSize = TD->getTypeAllocSize(elementTy);
+    uint64_t wantSize = TD->getTypeAllocSize(Ty);
+    if (elementSize == wantSize)
+      return Count;
+    if (elementSize % wantSize) //fractional counts not possible
+      return SE->getCouldNotCompute();
+    return SE->getMulExpr(Count, SE->getConstant(Count->getType(),
+                                                 elementSize/wantSize));
+}
+
+const SCEV *PointerTracking::getAllocationElementCount(Value *V) const {
+  // We only deal with pointers.
+  const PointerType *PTy = cast<PointerType>(V->getType());
+  return computeAllocationCountForType(V, PTy->getElementType());
+}
+
+const SCEV *PointerTracking::getAllocationSizeInBytes(Value *V) const {
+  return computeAllocationCountForType(V, Type::getInt8Ty(V->getContext()));
+}
+
+// Helper for isLoopGuardedBy that checks the swapped and inverted predicate too
+enum SolverResult PointerTracking::isLoopGuardedBy(const Loop *L,
+                                                   Predicate Pred,
+                                                   const SCEV *A,
+                                                   const SCEV *B) const {
+  if (SE->isLoopGuardedByCond(L, Pred, A, B))
+    return AlwaysTrue;
+  Pred = ICmpInst::getSwappedPredicate(Pred);
+  if (SE->isLoopGuardedByCond(L, Pred, B, A))
+    return AlwaysTrue;
+
+  Pred = ICmpInst::getInversePredicate(Pred);
+  if (SE->isLoopGuardedByCond(L, Pred, B, A))
+    return AlwaysFalse;
+  Pred = ICmpInst::getSwappedPredicate(Pred);
+  if (SE->isLoopGuardedByCond(L, Pred, A, B))
+    return AlwaysTrue;
+  return Unknown;
+}
+
+enum SolverResult PointerTracking::checkLimits(const SCEV *Offset,
+                                               const SCEV *Limit,
+                                               BasicBlock *BB)
+{
+  //FIXME: merge implementation
+  return Unknown;
+}
+
+void PointerTracking::getPointerOffset(Value *Pointer, Value *&Base,
+                                       const SCEV *&Limit,
+                                       const SCEV *&Offset) const
+{
+    Pointer = Pointer->stripPointerCasts();
+    Base = Pointer->getUnderlyingObject();
+    Limit = getAllocationSizeInBytes(Base);
+    if (isa<SCEVCouldNotCompute>(Limit)) {
+      Base = 0;
+      Offset = Limit;
+      return;
+    }
+
+    Offset = SE->getMinusSCEV(SE->getSCEV(Pointer), SE->getSCEV(Base));
+    if (isa<SCEVCouldNotCompute>(Offset)) {
+      Base = 0;
+      Limit = Offset;
+    }
+}
+
+void PointerTracking::print(raw_ostream &OS, const Module* M) const {
+  // Calling some PT methods may cause caches to be updated, however
+  // this should be safe for the same reason its safe for SCEV.
+  PointerTracking &PT = *const_cast<PointerTracking*>(this);
+  for (inst_iterator I=inst_begin(*FF), E=inst_end(*FF); I != E; ++I) {
+    if (!isa<PointerType>(I->getType()))
+      continue;
+    Value *Base;
+    const SCEV *Limit, *Offset;
+    getPointerOffset(&*I, Base, Limit, Offset);
+    if (!Base)
+      continue;
+
+    if (Base == &*I) {
+      const SCEV *S = getAllocationElementCount(Base);
+      OS << *Base << " ==> " << *S << " elements, ";
+      OS << *Limit << " bytes allocated\n";
+      continue;
+    }
+    OS << &*I << " -- base: " << *Base;
+    OS << " offset: " << *Offset;
+
+    enum SolverResult res = PT.checkLimits(Offset, Limit, I->getParent());
+    switch (res) {
+    case AlwaysTrue:
+      OS << " always safe\n";
+      break;
+    case AlwaysFalse:
+      OS << " always unsafe\n";
+      break;
+    case Unknown:
+      OS << " <<unknown>>\n";
+      break;
+    }
+  }
+}
+
+static RegisterPass<PointerTracking> X("pointertracking",
+                                       "Track pointer bounds", false, true);
diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp
index 4853c2a..69d6b47 100644
--- a/lib/Analysis/PostDominators.cpp
+++ b/lib/Analysis/PostDominators.cpp
@@ -33,15 +33,19 @@ F("postdomtree", "Post-Dominator Tree Construction", true, true);
 
 bool PostDominatorTree::runOnFunction(Function &F) {
   DT->recalculate(F);
-  DEBUG(DT->dump());
+  DEBUG(DT->print(errs()));
   return false;
 }
 
-PostDominatorTree::~PostDominatorTree()
-{
+PostDominatorTree::~PostDominatorTree() {
   delete DT;
 }
 
+void PostDominatorTree::print(raw_ostream &OS, const Module *) const {
+  DT->print(OS);
+}
+
+
 FunctionPass* llvm::createPostDomTree() {
   return new PostDominatorTree();
 }
diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp
new file mode 100644
index 0000000..c585c1d
--- /dev/null
+++ b/lib/Analysis/ProfileEstimatorPass.cpp
@@ -0,0 +1,310 @@
+//===- ProfileEstimatorPass.cpp - LLVM Pass to estimate profile info ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a concrete implementation of profiling information that
+// estimates the profiling information in a very crude and unimaginative way.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "profile-estimator"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
+using namespace llvm;
+
+static cl::opt<double>
+LoopWeight(
+    "profile-estimator-loop-weight", cl::init(10),
+    cl::value_desc("loop-weight"),
+    cl::desc("Number of loop executions used for profile-estimator")
+);
+
+namespace {
+  class VISIBILITY_HIDDEN ProfileEstimatorPass :
+      public FunctionPass, public ProfileInfo {
+    double ExecCount;
+    LoopInfo *LI;
+    std::set<BasicBlock*>  BBToVisit;
+    std::map<Loop*,double> LoopExitWeights;
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+    explicit ProfileEstimatorPass(const double execcount = 0)
+      : FunctionPass(&ID), ExecCount(execcount) {
+      if (execcount == 0) ExecCount = LoopWeight;
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      AU.addRequired<LoopInfo>();
+    }
+
+    virtual const char *getPassName() const {
+      return "Profiling information estimator";
+    }
+
+    /// run - Estimate the profile information from the specified file.
+    virtual bool runOnFunction(Function &F);
+
+    virtual void recurseBasicBlock(BasicBlock *BB);
+
+    void inline printEdgeWeight(Edge);
+  };
+}  // End of anonymous namespace
+
+char ProfileEstimatorPass::ID = 0;
+static RegisterPass<ProfileEstimatorPass>
+X("profile-estimator", "Estimate profiling information", false, true);
+
+static RegisterAnalysisGroup<ProfileInfo> Y(X);
+
+namespace llvm {
+  const PassInfo *ProfileEstimatorPassID = &X;
+
+  FunctionPass *createProfileEstimatorPass() {
+    return new ProfileEstimatorPass();
+  }
+
+  /// createProfileEstimatorPass - This function returns a Pass that estimates
+  /// profiling information using the given loop execution count.
+  Pass *createProfileEstimatorPass(const unsigned execcount) {
+    return new ProfileEstimatorPass(execcount);
+  }
+}
+
+static double ignoreMissing(double w) {
+  if (w == ProfileInfo::MissingValue) return 0;
+  return w;
+}
+
+static void inline printEdgeError(ProfileInfo::Edge e, const char *M) {
+  DEBUG(errs() << "-- Edge " << e << " is not calculated, " << M << "\n");
+}
+
+void inline ProfileEstimatorPass::printEdgeWeight(Edge E) {
+  DEBUG(errs() << "-- Weight of Edge " << E << ":"
+               << format("%g", getEdgeWeight(E)) << "\n");
+}
+
+// recurseBasicBlock() - This calculates the ProfileInfo estimation for a
+// single block and then recurses into the successors.
+// The algorithm preserves the flow condition, meaning that the sum of the
+// weight of the incoming edges must be equal the block weight which must in
+// turn be equal to the sume of the weights of the outgoing edges.
+// Since the flow of an block is deterimined from the current state of the
+// flow, once an edge has a flow assigned this flow is never changed again,
+// otherwise it would be possible to violate the flow condition in another
+// block.
+void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) {
+
+  // Break the recursion if this BasicBlock was already visited.
+  if (BBToVisit.find(BB) == BBToVisit.end()) return;
+
+  // Read the LoopInfo for this block.
+  bool  BBisHeader = LI->isLoopHeader(BB);
+  Loop* BBLoop     = LI->getLoopFor(BB);
+
+  // To get the block weight, read all incoming edges.
+  double BBWeight = 0;
+  std::set<BasicBlock*> ProcessedPreds;
+  for ( pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+        bbi != bbe; ++bbi ) {
+    // If this block was not considered already, add weight.
+    Edge edge = getEdge(*bbi,BB);
+    double w = getEdgeWeight(edge);
+    if (ProcessedPreds.insert(*bbi).second) {
+      BBWeight += ignoreMissing(w);
+    }
+    // If this block is a loop header and the predecessor is contained in this
+    // loop, thus the edge is a backedge, continue and do not check if the
+    // value is valid.
+    if (BBisHeader && BBLoop->contains(*bbi)) {
+      printEdgeError(edge, "but is backedge, continueing");
+      continue;
+    }
+    // If the edges value is missing (and this is no loop header, and this is
+    // no backedge) return, this block is currently non estimatable.
+    if (w == MissingValue) {
+      printEdgeError(edge, "returning");
+      return;
+    }
+  }
+  if (getExecutionCount(BB) != MissingValue) {
+    BBWeight = getExecutionCount(BB);
+  }
+
+  // Fetch all necessary information for current block.
+  SmallVector<Edge, 8> ExitEdges;
+  SmallVector<Edge, 8> Edges;
+  if (BBLoop) {
+    BBLoop->getExitEdges(ExitEdges);
+  }
+
+  // If this is a loop header, consider the following:
+  // Exactly the flow that is entering this block, must exit this block too. So
+  // do the following: 
+  // *) get all the exit edges, read the flow that is already leaving this
+  // loop, remember the edges that do not have any flow on them right now.
+  // (The edges that have already flow on them are most likely exiting edges of
+  // other loops, do not touch those flows because the previously caclulated
+  // loopheaders would not be exact anymore.)
+  // *) In case there is not a single exiting edge left, create one at the loop
+  // latch to prevent the flow from building up in the loop.
+  // *) Take the flow that is not leaving the loop already and distribute it on
+  // the remaining exiting edges.
+  // (This ensures that all flow that enters the loop also leaves it.)
+  // *) Increase the flow into the loop by increasing the weight of this block.
+  // There is at least one incoming backedge that will bring us this flow later
+  // on. (So that the flow condition in this node is valid again.)
+  if (BBisHeader) {
+    double incoming = BBWeight;
+    // Subtract the flow leaving the loop.
+    std::set<Edge> ProcessedExits;
+    for (SmallVector<Edge, 8>::iterator ei = ExitEdges.begin(),
+         ee = ExitEdges.end(); ei != ee; ++ei) {
+      if (ProcessedExits.insert(*ei).second) {
+        double w = getEdgeWeight(*ei);
+        if (w == MissingValue) {
+          Edges.push_back(*ei);
+        } else {
+          incoming -= w;
+        }
+      }
+    }
+    // If no exit edges, create one:
+    if (Edges.size() == 0) {
+      BasicBlock *Latch = BBLoop->getLoopLatch();
+      if (Latch) {
+        Edge edge = getEdge(Latch,0);
+        EdgeInformation[BB->getParent()][edge] = BBWeight;
+        printEdgeWeight(edge);
+        edge = getEdge(Latch, BB);
+        EdgeInformation[BB->getParent()][edge] = BBWeight * ExecCount;
+        printEdgeWeight(edge);
+      }
+    }
+    // Distribute remaining weight onto the exit edges.
+    for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end();
+         ei != ee; ++ei) {
+      EdgeInformation[BB->getParent()][*ei] += incoming/Edges.size();
+      printEdgeWeight(*ei);
+    }
+    // Increase flow into the loop.
+    BBWeight *= (ExecCount+1);
+  }
+
+  BlockInformation[BB->getParent()][BB] = BBWeight;
+  // Up until now we considered only the loop exiting edges, now we have a
+  // definite block weight and must ditribute this onto the outgoing edges.
+  // Since there may be already flow attached to some of the edges, read this
+  // flow first and remember the edges that have still now flow attached.
+  Edges.clear();
+  std::set<BasicBlock*> ProcessedSuccs;
+
+  succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+  // Also check for (BB,0) edges that may already contain some flow. (But only
+  // in case there are no successors.)
+  if (bbi == bbe) {
+    Edge edge = getEdge(BB,0);
+    EdgeInformation[BB->getParent()][edge] = BBWeight;
+    printEdgeWeight(edge);
+  }
+  for ( ; bbi != bbe; ++bbi ) {
+    if (ProcessedSuccs.insert(*bbi).second) {
+      Edge edge = getEdge(BB,*bbi);
+      double w = getEdgeWeight(edge);
+      if (w != MissingValue) {
+        BBWeight -= getEdgeWeight(edge);
+      } else {
+        Edges.push_back(edge);
+      }
+    }
+  }
+
+  // Finally we know what flow is still not leaving the block, distribute this
+  // flow onto the empty edges.
+  for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end();
+       ei != ee; ++ei) {
+    EdgeInformation[BB->getParent()][*ei] += BBWeight/Edges.size();
+    printEdgeWeight(*ei);
+  }
+
+  // This block is visited, mark this before the recursion.
+  BBToVisit.erase(BB);
+
+  // Recurse into successors.
+  for (succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+       bbi != bbe; ++bbi) {
+    recurseBasicBlock(*bbi);
+  }
+}
+
+bool ProfileEstimatorPass::runOnFunction(Function &F) {
+  if (F.isDeclaration()) return false;
+
+  // Fetch LoopInfo and clear ProfileInfo for this function.
+  LI = &getAnalysis<LoopInfo>();
+  FunctionInformation.erase(&F);
+  BlockInformation[&F].clear();
+  EdgeInformation[&F].clear();
+
+  // Mark all blocks as to visit.
+  for (Function::iterator bi = F.begin(), be = F.end(); bi != be; ++bi)
+    BBToVisit.insert(bi);
+
+  DEBUG(errs() << "Working on function " << F.getNameStr() << "\n");
+
+  // Since the entry block is the first one and has no predecessors, the edge
+  // (0,entry) is inserted with the starting weight of 1.
+  BasicBlock *entry = &F.getEntryBlock();
+  BlockInformation[&F][entry] = 1;
+  Edge edge = getEdge(0,entry);
+  EdgeInformation[&F][edge] = 1;
+  printEdgeWeight(edge);
+
+  // Since recurseBasicBlock() maybe returns with a block which was not fully
+  // estimated, use recurseBasicBlock() until everything is calculated. 
+  recurseBasicBlock(entry);
+  while (BBToVisit.size() > 0) {
+    // Remember number of open blocks, this is later used to check if progress
+    // was made.
+    unsigned size = BBToVisit.size();
+
+    // Try to calculate all blocks in turn.
+    for (std::set<BasicBlock*>::iterator bi = BBToVisit.begin(),
+         be = BBToVisit.end(); bi != be; ++bi) {
+      recurseBasicBlock(*bi);
+      // If at least one block was finished, break because iterator may be
+      // invalid.
+      if (BBToVisit.size() < size) break;
+    }
+
+    // If there was not a single block resovled, make some assumptions.
+    if (BBToVisit.size() == size) {
+      BasicBlock *BB = *(BBToVisit.begin());
+      // Since this BB was not calculated because of missing incoming edges,
+      // set these edges to zero.
+      for (pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+           bbi != bbe; ++bbi) {
+        Edge e = getEdge(*bbi,BB);
+        double w = getEdgeWeight(e);
+        if (w == MissingValue) {
+          EdgeInformation[&F][e] = 0;
+          DEBUG(errs() << "Assuming edge weight: ");
+          printEdgeWeight(e);
+        }
+      }
+    }
+  }
+
+  return false;
+}
diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp
index a0965b6..9efdd23 100644
--- a/lib/Analysis/ProfileInfo.cpp
+++ b/lib/Analysis/ProfileInfo.cpp
@@ -17,6 +17,9 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
 #include <set>
 using namespace llvm;
 
@@ -26,56 +29,149 @@ char ProfileInfo::ID = 0;
 
 ProfileInfo::~ProfileInfo() {}
 
-unsigned ProfileInfo::getExecutionCount(BasicBlock *BB) const {
-  pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+const double ProfileInfo::MissingValue = -1;
+
+double ProfileInfo::getExecutionCount(const BasicBlock *BB) {
+  std::map<const Function*, BlockCounts>::iterator J =
+    BlockInformation.find(BB->getParent());
+  if (J != BlockInformation.end()) {
+    BlockCounts::iterator I = J->second.find(BB);
+    if (I != J->second.end())
+      return I->second;
+  }
+
+  pred_const_iterator PI = pred_begin(BB), PE = pred_end(BB);
 
   // Are there zero predecessors of this block?
   if (PI == PE) {
     // If this is the entry block, look for the Null -> Entry edge.
     if (BB == &BB->getParent()->getEntryBlock())
-      return getEdgeWeight(0, BB);
+      return getEdgeWeight(getEdge(0, BB));
     else
       return 0;   // Otherwise, this is a dead block.
   }
 
   // Otherwise, if there are predecessors, the execution count of this block is
-  // the sum of the edge frequencies from the incoming edges.  Note that if
-  // there are multiple edges from a predecessor to this block that we don't
-  // want to count its weight multiple times.  For this reason, we keep track of
-  // the predecessors we've seen and only count them if we haven't run into them
-  // yet.
-  //
-  // We don't want to create an std::set unless we are dealing with a block that
-  // has a LARGE number of in-edges.  Handle the common case of having only a
-  // few in-edges with special code.
-  //
-  BasicBlock *FirstPred = *PI;
-  unsigned Count = getEdgeWeight(FirstPred, BB);
-  ++PI;
-  if (PI == PE) return Count;   // Quick exit for single predecessor blocks
-
-  BasicBlock *SecondPred = *PI;
-  if (SecondPred != FirstPred) Count += getEdgeWeight(SecondPred, BB);
-  ++PI;
-  if (PI == PE) return Count;   // Quick exit for two predecessor blocks
-
-  BasicBlock *ThirdPred = *PI;
-  if (ThirdPred != FirstPred && ThirdPred != SecondPred)
-    Count += getEdgeWeight(ThirdPred, BB);
-  ++PI;
-  if (PI == PE) return Count;   // Quick exit for three predecessor blocks
-
-  std::set<BasicBlock*> ProcessedPreds;
-  ProcessedPreds.insert(FirstPred);
-  ProcessedPreds.insert(SecondPred);
-  ProcessedPreds.insert(ThirdPred);
+  // the sum of the edge frequencies from the incoming edges.
+  std::set<const BasicBlock*> ProcessedPreds;
+  double Count = 0;
   for (; PI != PE; ++PI)
-    if (ProcessedPreds.insert(*PI).second)
-      Count += getEdgeWeight(*PI, BB);
+    if (ProcessedPreds.insert(*PI).second) {
+      double w = getEdgeWeight(getEdge(*PI, BB));
+      if (w == MissingValue) {
+        Count = MissingValue;
+        break;
+      }
+      Count += w;
+    }
+
+  if (Count != MissingValue) BlockInformation[BB->getParent()][BB] = Count;
+  return Count;
+}
+
+double ProfileInfo::getExecutionCount(const Function *F) {
+  std::map<const Function*, double>::iterator J =
+    FunctionInformation.find(F);
+  if (J != FunctionInformation.end())
+    return J->second;
+
+  // isDeclaration() is checked here and not at start of function to allow
+  // functions without a body still to have a execution count.
+  if (F->isDeclaration()) return MissingValue;
+
+  double Count = getExecutionCount(&F->getEntryBlock());
+  if (Count != MissingValue) FunctionInformation[F] = Count;
   return Count;
 }
 
+/// Replaces all occurences of RmBB in the ProfilingInfo with DestBB.
+/// This checks all edges of the function the blocks reside in and replaces the
+/// occurences of RmBB with DestBB.
+void ProfileInfo::replaceAllUses(const BasicBlock *RmBB, 
+                                 const BasicBlock *DestBB) {
+  DEBUG(errs() << "Replacing " << RmBB->getNameStr()
+               << " with " << DestBB->getNameStr() << "\n");
+  const Function *F = DestBB->getParent();
+  std::map<const Function*, EdgeWeights>::iterator J =
+    EdgeInformation.find(F);
+  if (J == EdgeInformation.end()) return;
+
+  for (EdgeWeights::iterator I = J->second.begin(), E = J->second.end();
+       I != E; ++I) {
+    Edge e = I->first;
+    Edge newedge; bool foundedge = false;
+    if (e.first == RmBB) {
+      newedge = getEdge(DestBB, e.second);
+      foundedge = true;
+    }
+    if (e.second == RmBB) {
+      newedge = getEdge(e.first, DestBB);
+      foundedge = true;
+    }
+    if (foundedge) {
+      double w = getEdgeWeight(e);
+      EdgeInformation[F][newedge] = w;
+      DEBUG(errs() << "Replacing " << e << " with " << newedge  << "\n");
+      J->second.erase(e);
+    }
+  }
+}
+
+/// Splits an edge in the ProfileInfo and redirects flow over NewBB.
+/// Since its possible that there is more than one edge in the CFG from FristBB
+/// to SecondBB its necessary to redirect the flow proporionally.
+void ProfileInfo::splitEdge(const BasicBlock *FirstBB,
+                            const BasicBlock *SecondBB,
+                            const BasicBlock *NewBB,
+                            bool MergeIdenticalEdges) {
+  const Function *F = FirstBB->getParent();
+  std::map<const Function*, EdgeWeights>::iterator J =
+    EdgeInformation.find(F);
+  if (J == EdgeInformation.end()) return;
+
+  // Generate edges and read current weight.
+  Edge e  = getEdge(FirstBB, SecondBB);
+  Edge n1 = getEdge(FirstBB, NewBB);
+  Edge n2 = getEdge(NewBB, SecondBB);
+  EdgeWeights &ECs = J->second;
+  double w = ECs[e];
+
+  int succ_count = 0;
+  if (!MergeIdenticalEdges) {
+    // First count the edges from FristBB to SecondBB, if there is more than
+    // one, only slice out a proporional part for NewBB.
+    for(succ_const_iterator BBI = succ_begin(FirstBB), BBE = succ_end(FirstBB);
+        BBI != BBE; ++BBI) {
+      if (*BBI == SecondBB) succ_count++;  
+    }
+    // When the NewBB is completely new, increment the count by one so that
+    // the counts are properly distributed.
+    if (getExecutionCount(NewBB) == ProfileInfo::MissingValue) succ_count++;
+  } else {
+    // When the edges are merged anyway, then redirect all flow.
+    succ_count = 1;
+  }
 
+  // We know now how many edges there are from FirstBB to SecondBB, reroute a
+  // proportional part of the edge weight over NewBB.
+  double neww = w / succ_count;
+  ECs[n1] += neww;
+  ECs[n2] += neww;
+  BlockInformation[F][NewBB] += neww;
+  if (succ_count == 1) {
+    ECs.erase(e);
+  } else {
+    ECs[e] -= neww;
+  }
+}
+
+raw_ostream& llvm::operator<<(raw_ostream &O, ProfileInfo::Edge E) {
+  O << "(";
+  O << (E.first ? E.first->getNameStr() : "0");
+  O << ",";
+  O << (E.second ? E.second->getNameStr() : "0");
+  return O << ")";
+}
 
 //===----------------------------------------------------------------------===//
 //  NoProfile ProfileInfo implementation
diff --git a/lib/Analysis/ProfileInfoLoader.cpp b/lib/Analysis/ProfileInfoLoader.cpp
index adb2bdc..25481b2 100644
--- a/lib/Analysis/ProfileInfoLoader.cpp
+++ b/lib/Analysis/ProfileInfoLoader.cpp
@@ -16,7 +16,7 @@
 #include "llvm/Analysis/ProfileInfoTypes.h"
 #include "llvm/Module.h"
 #include "llvm/InstrTypes.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cstdio>
 #include <cstdlib>
 #include <map>
@@ -26,10 +26,17 @@ using namespace llvm;
 //
 static inline unsigned ByteSwap(unsigned Var, bool Really) {
   if (!Really) return Var;
-  return ((Var & (255<< 0)) << 24) |
-         ((Var & (255<< 8)) <<  8) |
-         ((Var & (255<<16)) >>  8) |
-         ((Var & (255<<24)) >> 24);
+  return ((Var & (255U<< 0U)) << 24U) |
+         ((Var & (255U<< 8U)) <<  8U) |
+         ((Var & (255U<<16U)) >>  8U) |
+         ((Var & (255U<<24U)) >> 24U);
+}
+
+static unsigned AddCounts(unsigned A, unsigned B) {
+  // If either value is undefined, use the other.
+  if (A == ProfileInfoLoader::Uncounted) return B;
+  if (B == ProfileInfoLoader::Uncounted) return A;
+  return A + B;
 }
 
 static void ReadProfilingBlock(const char *ToolName, FILE *F,
@@ -38,7 +45,7 @@ static void ReadProfilingBlock(const char *ToolName, FILE *F,
   // Read the number of entries...
   unsigned NumEntries;
   if (fread(&NumEntries, sizeof(unsigned), 1, F) != 1) {
-    cerr << ToolName << ": data packet truncated!\n";
+    errs() << ToolName << ": data packet truncated!\n";
     perror(0);
     exit(1);
   }
@@ -49,35 +56,41 @@ static void ReadProfilingBlock(const char *ToolName, FILE *F,
 
   // Read in the block of data...
   if (fread(&TempSpace[0], sizeof(unsigned)*NumEntries, 1, F) != 1) {
-    cerr << ToolName << ": data packet truncated!\n";
+    errs() << ToolName << ": data packet truncated!\n";
     perror(0);
     exit(1);
   }
 
-  // Make sure we have enough space...
+  // Make sure we have enough space... The space is initialised to -1 to
+  // facitiltate the loading of missing values for OptimalEdgeProfiling.
   if (Data.size() < NumEntries)
-    Data.resize(NumEntries);
+    Data.resize(NumEntries, ProfileInfoLoader::Uncounted);
 
   // Accumulate the data we just read into the data.
   if (!ShouldByteSwap) {
-    for (unsigned i = 0; i != NumEntries; ++i)
-      Data[i] += TempSpace[i];
+    for (unsigned i = 0; i != NumEntries; ++i) {
+      Data[i] = AddCounts(TempSpace[i], Data[i]);
+    }
   } else {
-    for (unsigned i = 0; i != NumEntries; ++i)
-      Data[i] += ByteSwap(TempSpace[i], true);
+    for (unsigned i = 0; i != NumEntries; ++i) {
+      Data[i] = AddCounts(ByteSwap(TempSpace[i], true), Data[i]);
+    }
   }
 }
 
+const unsigned ProfileInfoLoader::Uncounted = ~0U;
+
 // ProfileInfoLoader ctor - Read the specified profiling data file, exiting the
 // program if the file is invalid or broken.
 //
 ProfileInfoLoader::ProfileInfoLoader(const char *ToolName,
                                      const std::string &Filename,
-                                     Module &TheModule) : 
-                              M(TheModule), Warned(false) {
-  FILE *F = fopen(Filename.c_str(), "r");
+                                     Module &TheModule) :
+                                     Filename(Filename), 
+                                     M(TheModule), Warned(false) {
+  FILE *F = fopen(Filename.c_str(), "rb");
   if (F == 0) {
-    cerr << ToolName << ": Error opening '" << Filename << "': ";
+    errs() << ToolName << ": Error opening '" << Filename << "': ";
     perror(0);
     exit(1);
   }
@@ -95,7 +108,7 @@ ProfileInfoLoader::ProfileInfoLoader(const char *ToolName,
     case ArgumentInfo: {
       unsigned ArgLength;
       if (fread(&ArgLength, sizeof(unsigned), 1, F) != 1) {
-        cerr << ToolName << ": arguments packet truncated!\n";
+        errs() << ToolName << ": arguments packet truncated!\n";
         perror(0);
         exit(1);
       }
@@ -106,7 +119,7 @@ ProfileInfoLoader::ProfileInfoLoader(const char *ToolName,
 
       if (ArgLength)
         if (fread(&Chars[0], (ArgLength+3) & ~3, 1, F) != 1) {
-          cerr << ToolName << ": arguments packet truncated!\n";
+          errs() << ToolName << ": arguments packet truncated!\n";
           perror(0);
           exit(1);
         }
@@ -126,12 +139,16 @@ ProfileInfoLoader::ProfileInfoLoader(const char *ToolName,
       ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts);
       break;
 
+    case OptEdgeInfo:
+      ReadProfilingBlock(ToolName, F, ShouldByteSwap, OptimalEdgeCounts);
+      break;
+
     case BBTraceInfo:
       ReadProfilingBlock(ToolName, F, ShouldByteSwap, BBTrace);
       break;
 
     default:
-      cerr << ToolName << ": Unknown packet type #" << PacketType << "!\n";
+      errs() << ToolName << ": Unknown packet type #" << PacketType << "!\n";
       exit(1);
     }
   }
@@ -139,139 +156,3 @@ ProfileInfoLoader::ProfileInfoLoader(const char *ToolName,
   fclose(F);
 }
 
-
-// getFunctionCounts - This method is used by consumers of function counting
-// information.  If we do not directly have function count information, we
-// compute it from other, more refined, types of profile information.
-//
-void ProfileInfoLoader::getFunctionCounts(std::vector<std::pair<Function*,
-                                                      unsigned> > &Counts) {
-  if (FunctionCounts.empty()) {
-    if (hasAccurateBlockCounts()) {
-      // Synthesize function frequency information from the number of times
-      // their entry blocks were executed.
-      std::vector<std::pair<BasicBlock*, unsigned> > BlockCounts;
-      getBlockCounts(BlockCounts);
-
-      for (unsigned i = 0, e = BlockCounts.size(); i != e; ++i)
-        if (&BlockCounts[i].first->getParent()->getEntryBlock() ==
-            BlockCounts[i].first)
-          Counts.push_back(std::make_pair(BlockCounts[i].first->getParent(),
-                                          BlockCounts[i].second));
-    } else {
-      cerr << "Function counts are not available!\n";
-    }
-    return;
-  }
-
-  unsigned Counter = 0;
-  for (Module::iterator I = M.begin(), E = M.end();
-       I != E && Counter != FunctionCounts.size(); ++I)
-    if (!I->isDeclaration())
-      Counts.push_back(std::make_pair(I, FunctionCounts[Counter++]));
-}
-
-// getBlockCounts - This method is used by consumers of block counting
-// information.  If we do not directly have block count information, we
-// compute it from other, more refined, types of profile information.
-//
-void ProfileInfoLoader::getBlockCounts(std::vector<std::pair<BasicBlock*,
-                                                         unsigned> > &Counts) {
-  if (BlockCounts.empty()) {
-    if (hasAccurateEdgeCounts()) {
-      // Synthesize block count information from edge frequency information.
-      // The block execution frequency is equal to the sum of the execution
-      // frequency of all outgoing edges from a block.
-      //
-      // If a block has no successors, this will not be correct, so we have to
-      // special case it. :(
-      std::vector<std::pair<Edge, unsigned> > EdgeCounts;
-      getEdgeCounts(EdgeCounts);
-
-      std::map<BasicBlock*, unsigned> InEdgeFreqs;
-
-      BasicBlock *LastBlock = 0;
-      TerminatorInst *TI = 0;
-      for (unsigned i = 0, e = EdgeCounts.size(); i != e; ++i) {
-        if (EdgeCounts[i].first.first != LastBlock) {
-          LastBlock = EdgeCounts[i].first.first;
-          TI = LastBlock->getTerminator();
-          Counts.push_back(std::make_pair(LastBlock, 0));
-        }
-        Counts.back().second += EdgeCounts[i].second;
-        unsigned SuccNum = EdgeCounts[i].first.second;
-        if (SuccNum >= TI->getNumSuccessors()) {
-          if (!Warned) {
-            cerr << "WARNING: profile info doesn't seem to match"
-                 << " the program!\n";
-            Warned = true;
-          }
-        } else {
-          // If this successor has no successors of its own, we will never
-          // compute an execution count for that block.  Remember the incoming
-          // edge frequencies to add later.
-          BasicBlock *Succ = TI->getSuccessor(SuccNum);
-          if (Succ->getTerminator()->getNumSuccessors() == 0)
-            InEdgeFreqs[Succ] += EdgeCounts[i].second;
-        }
-      }
-
-      // Now we have to accumulate information for those blocks without
-      // successors into our table.
-      for (std::map<BasicBlock*, unsigned>::iterator I = InEdgeFreqs.begin(),
-             E = InEdgeFreqs.end(); I != E; ++I) {
-        unsigned i = 0;
-        for (; i != Counts.size() && Counts[i].first != I->first; ++i)
-          /*empty*/;
-        if (i == Counts.size()) Counts.push_back(std::make_pair(I->first, 0));
-        Counts[i].second += I->second;
-      }
-
-    } else {
-      cerr << "Block counts are not available!\n";
-    }
-    return;
-  }
-
-  unsigned Counter = 0;
-  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
-    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
-      Counts.push_back(std::make_pair(BB, BlockCounts[Counter++]));
-      if (Counter == BlockCounts.size())
-        return;
-    }
-}
-
-// getEdgeCounts - This method is used by consumers of edge counting
-// information.  If we do not directly have edge count information, we compute
-// it from other, more refined, types of profile information.
-//
-void ProfileInfoLoader::getEdgeCounts(std::vector<std::pair<Edge,
-                                                  unsigned> > &Counts) {
-  if (EdgeCounts.empty()) {
-    cerr << "Edge counts not available, and no synthesis "
-         << "is implemented yet!\n";
-    return;
-  }
-
-  unsigned Counter = 0;
-  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
-    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
-      for (unsigned i = 0, e = BB->getTerminator()->getNumSuccessors();
-           i != e; ++i) {
-        Counts.push_back(std::make_pair(Edge(BB, i), EdgeCounts[Counter++]));
-        if (Counter == EdgeCounts.size())
-          return;
-      }
-}
-
-// getBBTrace - This method is used by consumers of basic-block trace
-// information.
-//
-void ProfileInfoLoader::getBBTrace(std::vector<BasicBlock *> &Trace) {
-  if (BBTrace.empty ()) {
-    cerr << "Basic block trace is not available!\n";
-    return;
-  }
-  cerr << "Basic block trace loading is not implemented yet!\n";
-}
diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp
index 0a8a87b..89d90bc 100644
--- a/lib/Analysis/ProfileInfoLoaderPass.cpp
+++ b/lib/Analysis/ProfileInfoLoaderPass.cpp
@@ -11,18 +11,27 @@
 // loads the information from a profile dump file.
 //
 //===----------------------------------------------------------------------===//
-
+#define DEBUG_TYPE "profile-loader"
 #include "llvm/BasicBlock.h"
 #include "llvm/InstrTypes.h"
+#include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Analysis/ProfileInfoLoader.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallSet.h"
+#include <set>
 using namespace llvm;
 
+STATISTIC(NumEdgesRead, "The # of edges read.");
+
 static cl::opt<std::string>
 ProfileInfoFilename("profile-info-file", cl::init("llvmprof.out"),
                     cl::value_desc("filename"),
@@ -31,6 +40,9 @@ ProfileInfoFilename("profile-info-file", cl::init("llvmprof.out"),
 namespace {
   class VISIBILITY_HIDDEN LoaderPass : public ModulePass, public ProfileInfo {
     std::string Filename;
+    std::set<Edge> SpanningTree;
+    std::set<const BasicBlock*> BBisUnvisited;
+    unsigned ReadCount;
   public:
     static char ID; // Class identification, replacement for typeinfo
     explicit LoaderPass(const std::string &filename = "")
@@ -46,6 +58,12 @@ namespace {
       return "Profiling information loader";
     }
 
+    // recurseBasicBlock() - Calculates the edge weights for as much basic
+    // blocks as possbile.
+    virtual void recurseBasicBlock(const BasicBlock *BB);
+    virtual void readEdgeOrRemember(Edge, Edge&, unsigned &, unsigned &);
+    virtual void readEdge(ProfileInfo::Edge, std::vector<unsigned>&);
+
     /// run - Load the profile information from the specified file.
     virtual bool runOnModule(Module &M);
   };
@@ -66,25 +84,210 @@ Pass *llvm::createProfileLoaderPass(const std::string &Filename) {
   return new LoaderPass(Filename);
 }
 
+void LoaderPass::readEdgeOrRemember(Edge edge, Edge &tocalc, 
+                                    unsigned &uncalc, unsigned &count) {
+  double w;
+  if ((w = getEdgeWeight(edge)) == MissingValue) {
+    tocalc = edge;
+    uncalc++;
+  } else {
+    count+=w;
+  }
+}
+
+// recurseBasicBlock - Visits all neighbours of a block and then tries to
+// calculate the missing edge values.
+void LoaderPass::recurseBasicBlock(const BasicBlock *BB) {
+
+  // break recursion if already visited
+  if (BBisUnvisited.find(BB) == BBisUnvisited.end()) return;
+  BBisUnvisited.erase(BB);
+  if (!BB) return;
+
+  for (succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+       bbi != bbe; ++bbi) {
+    recurseBasicBlock(*bbi);
+  }
+  for (pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+       bbi != bbe; ++bbi) {
+    recurseBasicBlock(*bbi);
+  }
+
+  Edge edgetocalc;
+  unsigned uncalculated = 0;
+
+  // collect weights of all incoming and outgoing edges, rememer edges that
+  // have no value
+  unsigned incount = 0;
+  SmallSet<const BasicBlock*,8> pred_visited;
+  pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+  if (bbi==bbe) {
+    readEdgeOrRemember(getEdge(0, BB),edgetocalc,uncalculated,incount);
+  }
+  for (;bbi != bbe; ++bbi) {
+    if (pred_visited.insert(*bbi)) {
+      readEdgeOrRemember(getEdge(*bbi, BB),edgetocalc,uncalculated,incount);
+    }
+  }
+
+  unsigned outcount = 0;
+  SmallSet<const BasicBlock*,8> succ_visited;
+  succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB);
+  if (sbbi==sbbe) {
+    readEdgeOrRemember(getEdge(BB, 0),edgetocalc,uncalculated,outcount);
+  }
+  for (;sbbi != sbbe; ++sbbi) {
+    if (succ_visited.insert(*sbbi)) {
+      readEdgeOrRemember(getEdge(BB, *sbbi),edgetocalc,uncalculated,outcount);
+    }
+  }
+
+  // if exactly one edge weight was missing, calculate it and remove it from
+  // spanning tree
+  if (uncalculated == 1) {
+    if (incount < outcount) {
+      EdgeInformation[BB->getParent()][edgetocalc] = outcount-incount;
+    } else {
+      EdgeInformation[BB->getParent()][edgetocalc] = incount-outcount;
+    }
+    DEBUG(errs() << "--Calc Edge Counter for " << edgetocalc << ": "
+                 << format("%g", getEdgeWeight(edgetocalc)) << "\n");
+    SpanningTree.erase(edgetocalc);
+  }
+}
+
+void LoaderPass::readEdge(ProfileInfo::Edge e,
+                          std::vector<unsigned> &ECs) {
+  if (ReadCount < ECs.size()) {
+    double weight = ECs[ReadCount++];
+    if (weight != ProfileInfoLoader::Uncounted) {
+      // Here the data realm changes from the unsigned of the file to the
+      // double of the ProfileInfo. This conversion is save because we know
+      // that everything thats representable in unsinged is also representable
+      // in double.
+      EdgeInformation[getFunction(e)][e] += (double)weight;
+
+      DEBUG(errs() << "--Read Edge Counter for " << e
+                   << " (# "<< (ReadCount-1) << "): "
+                   << (unsigned)getEdgeWeight(e) << "\n");
+    } else {
+      // This happens only if reading optimal profiling information, not when
+      // reading regular profiling information.
+      SpanningTree.insert(e);
+    }
+  }
+}
+
 bool LoaderPass::runOnModule(Module &M) {
   ProfileInfoLoader PIL("profile-loader", Filename, M);
-  EdgeCounts.clear();
-  bool PrintedWarning = false;
-
-  std::vector<std::pair<ProfileInfoLoader::Edge, unsigned> > ECs;
-  PIL.getEdgeCounts(ECs);
-  for (unsigned i = 0, e = ECs.size(); i != e; ++i) {
-    BasicBlock *BB = ECs[i].first.first;
-    unsigned SuccNum = ECs[i].first.second;
-    TerminatorInst *TI = BB->getTerminator();
-    if (SuccNum >= TI->getNumSuccessors()) {
-      if (!PrintedWarning) {
-        cerr << "WARNING: profile information is inconsistent with "
+
+  EdgeInformation.clear();
+  std::vector<unsigned> Counters = PIL.getRawEdgeCounts();
+  if (Counters.size() > 0) {
+    ReadCount = 0;
+    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+      if (F->isDeclaration()) continue;
+      DEBUG(errs()<<"Working on "<<F->getNameStr()<<"\n");
+      readEdge(getEdge(0,&F->getEntryBlock()), Counters);
+      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+        TerminatorInst *TI = BB->getTerminator();
+        for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+          readEdge(getEdge(BB,TI->getSuccessor(s)), Counters);
+        }
+      }
+    }
+    if (ReadCount != Counters.size()) {
+      errs() << "WARNING: profile information is inconsistent with "
              << "the current program!\n";
-        PrintedWarning = true;
+    }
+    NumEdgesRead = ReadCount;
+  }
+
+  Counters = PIL.getRawOptimalEdgeCounts();
+  if (Counters.size() > 0) {
+    ReadCount = 0;
+    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+      if (F->isDeclaration()) continue;
+      DEBUG(errs()<<"Working on "<<F->getNameStr()<<"\n");
+      readEdge(getEdge(0,&F->getEntryBlock()), Counters);
+      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+        TerminatorInst *TI = BB->getTerminator();
+        if (TI->getNumSuccessors() == 0) {
+          readEdge(getEdge(BB,0), Counters);
+        }
+        for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+          readEdge(getEdge(BB,TI->getSuccessor(s)), Counters);
+        }
       }
-    } else {
-      EdgeCounts[std::make_pair(BB, TI->getSuccessor(SuccNum))]+= ECs[i].second;
+      while (SpanningTree.size() > 0) {
+#if 0
+        unsigned size = SpanningTree.size();
+#endif
+        BBisUnvisited.clear();
+        for (std::set<Edge>::iterator ei = SpanningTree.begin(),
+             ee = SpanningTree.end(); ei != ee; ++ei) {
+          BBisUnvisited.insert(ei->first);
+          BBisUnvisited.insert(ei->second);
+        }
+        while (BBisUnvisited.size() > 0) {
+          recurseBasicBlock(*BBisUnvisited.begin());
+        }
+#if 0
+        if (SpanningTree.size() == size) {
+          DEBUG(errs()<<"{");
+          for (std::set<Edge>::iterator ei = SpanningTree.begin(),
+               ee = SpanningTree.end(); ei != ee; ++ei) {
+            DEBUG(errs()<<"("<<(ei->first?ei->first->getName():"0")<<","
+                        <<(ei->second?ei->second->getName():"0")<<"),");
+          }
+          assert(0 && "No edge calculated!");
+        }
+#endif
+      }
+    }
+    if (ReadCount != Counters.size()) {
+      errs() << "WARNING: profile information is inconsistent with "
+             << "the current program!\n";
+    }
+    NumEdgesRead = ReadCount;
+  }
+
+  BlockInformation.clear();
+  Counters = PIL.getRawBlockCounts();
+  if (Counters.size() > 0) {
+    ReadCount = 0;
+    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+      if (F->isDeclaration()) continue;
+      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+        if (ReadCount < Counters.size())
+          // Here the data realm changes from the unsigned of the file to the
+          // double of the ProfileInfo. This conversion is save because we know
+          // that everything thats representable in unsinged is also
+          // representable in double.
+          BlockInformation[F][BB] = (double)Counters[ReadCount++];
+    }
+    if (ReadCount != Counters.size()) {
+      errs() << "WARNING: profile information is inconsistent with "
+             << "the current program!\n";
+    }
+  }
+
+  FunctionInformation.clear();
+  Counters = PIL.getRawFunctionCounts();
+  if (Counters.size() > 0) {
+    ReadCount = 0;
+    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+      if (F->isDeclaration()) continue;
+      if (ReadCount < Counters.size())
+        // Here the data realm changes from the unsigned of the file to the
+        // double of the ProfileInfo. This conversion is save because we know
+        // that everything thats representable in unsinged is also
+        // representable in double.
+        FunctionInformation[F] = (double)Counters[ReadCount++];
+    }
+    if (ReadCount != Counters.size()) {
+      errs() << "WARNING: profile information is inconsistent with "
+             << "the current program!\n";
     }
   }
 
diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp
new file mode 100644
index 0000000..9766da5
--- /dev/null
+++ b/lib/Analysis/ProfileVerifierPass.cpp
@@ -0,0 +1,343 @@
+//===- ProfileVerifierPass.cpp - LLVM Pass to estimate profile info -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass that checks profiling information for 
+// plausibility.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "profile-verifier"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+#include <set>
+using namespace llvm;
+
+static cl::opt<bool,false>
+ProfileVerifierDisableAssertions("profile-verifier-noassert",
+     cl::desc("Disable assertions"));
+
+namespace {
+  class VISIBILITY_HIDDEN ProfileVerifierPass : public FunctionPass {
+
+    struct DetailedBlockInfo {
+      const BasicBlock *BB;
+      double            BBWeight;
+      double            inWeight;
+      int               inCount;
+      double            outWeight;
+      int               outCount;
+    };
+
+    ProfileInfo *PI;
+    std::set<const BasicBlock*> BBisVisited;
+    std::set<const Function*>   FisVisited;
+    bool DisableAssertions;
+
+    // When debugging is enabled, the verifier prints a whole slew of debug
+    // information, otherwise its just the assert. These are all the helper
+    // functions.
+    bool PrintedDebugTree;
+    std::set<const BasicBlock*> BBisPrinted;
+    void debugEntry(DetailedBlockInfo*);
+    void printDebugInfo(const BasicBlock *BB);
+
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+
+    explicit ProfileVerifierPass () : FunctionPass(&ID) {
+      DisableAssertions = ProfileVerifierDisableAssertions;
+    }
+    explicit ProfileVerifierPass (bool da) : FunctionPass(&ID), 
+                                             DisableAssertions(da) {
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      AU.addRequired<ProfileInfo>();
+    }
+
+    const char *getPassName() const {
+      return "Profiling information verifier";
+    }
+
+    /// run - Verify the profile information.
+    bool runOnFunction(Function &F);
+    void recurseBasicBlock(const BasicBlock*);
+
+    bool   exitReachable(const Function*);
+    double ReadOrAssert(ProfileInfo::Edge);
+    void   CheckValue(bool, const char*, DetailedBlockInfo*);
+  };
+}  // End of anonymous namespace
+
+char ProfileVerifierPass::ID = 0;
+static RegisterPass<ProfileVerifierPass>
+X("profile-verifier", "Verify profiling information", false, true);
+
+namespace llvm {
+  FunctionPass *createProfileVerifierPass() {
+    return new ProfileVerifierPass(ProfileVerifierDisableAssertions); 
+  }
+}
+
+void ProfileVerifierPass::printDebugInfo(const BasicBlock *BB) {
+
+  if (BBisPrinted.find(BB) != BBisPrinted.end()) return;
+
+  double BBWeight = PI->getExecutionCount(BB);
+  if (BBWeight == ProfileInfo::MissingValue) { BBWeight = 0; }
+  double inWeight = 0;
+  int inCount = 0;
+  std::set<const BasicBlock*> ProcessedPreds;
+  for ( pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+        bbi != bbe; ++bbi ) {
+    if (ProcessedPreds.insert(*bbi).second) {
+      ProfileInfo::Edge E = PI->getEdge(*bbi,BB);
+      double EdgeWeight = PI->getEdgeWeight(E);
+      if (EdgeWeight == ProfileInfo::MissingValue) { EdgeWeight = 0; }
+      errs() << "calculated in-edge " << E << ": " << EdgeWeight << "\n";
+      inWeight += EdgeWeight;
+      inCount++;
+    }
+  }
+  double outWeight = 0;
+  int outCount = 0;
+  std::set<const BasicBlock*> ProcessedSuccs;
+  for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+        bbi != bbe; ++bbi ) {
+    if (ProcessedSuccs.insert(*bbi).second) {
+      ProfileInfo::Edge E = PI->getEdge(BB,*bbi);
+      double EdgeWeight = PI->getEdgeWeight(E);
+      if (EdgeWeight == ProfileInfo::MissingValue) { EdgeWeight = 0; }
+      errs() << "calculated out-edge " << E << ": " << EdgeWeight << "\n";
+      outWeight += EdgeWeight;
+      outCount++;
+    }
+  }
+  errs()<<"Block "<<BB->getNameStr()<<" in "<<BB->getParent()->getNameStr()
+        <<",BBWeight="<<BBWeight<<",inWeight="<<inWeight<<",inCount="<<inCount
+        <<",outWeight="<<outWeight<<",outCount"<<outCount<<"\n";
+
+  // mark as visited and recurse into subnodes
+  BBisPrinted.insert(BB);
+  for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); 
+        bbi != bbe; ++bbi ) {
+    printDebugInfo(*bbi);
+  }
+}
+
+void ProfileVerifierPass::debugEntry (DetailedBlockInfo *DI) {
+  errs() << "TROUBLE: Block " << DI->BB->getNameStr() << " in "
+         << DI->BB->getParent()->getNameStr()  << ":";
+  errs() << "BBWeight="  << DI->BBWeight   << ",";
+  errs() << "inWeight="  << DI->inWeight   << ",";
+  errs() << "inCount="   << DI->inCount    << ",";
+  errs() << "outWeight=" << DI->outWeight  << ",";
+  errs() << "outCount="  << DI->outCount   << "\n";
+  if (!PrintedDebugTree) {
+    PrintedDebugTree = true;
+    printDebugInfo(&(DI->BB->getParent()->getEntryBlock()));
+  }
+}
+
+// This compares A and B but considering maybe small differences.
+static bool Equals(double A, double B) { 
+  double maxRelativeError = 0.0000001;
+  if (A == B)
+    return true;
+  double relativeError;
+  if (fabs(B) > fabs(A)) 
+    relativeError = fabs((A - B) / B);
+  else 
+    relativeError = fabs((A - B) / A);
+  if (relativeError <= maxRelativeError) return true; 
+  return false; 
+}
+
+// This checks if the function "exit" is reachable from an given function
+// via calls, this is necessary to check if a profile is valid despite the
+// counts not fitting exactly.
+bool ProfileVerifierPass::exitReachable(const Function *F) {
+  if (!F) return false;
+
+  if (FisVisited.count(F)) return false;
+
+  Function *Exit = F->getParent()->getFunction("exit");
+  if (Exit == F) {
+    return true;
+  }
+
+  FisVisited.insert(F);
+  bool exits = false;
+  for (const_inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+    if (const CallInst *CI = dyn_cast<CallInst>(&*I)) {
+      exits |= exitReachable(CI->getCalledFunction());
+      if (exits) break;
+    }
+  }
+  return exits;
+}
+
+#define ASSERTMESSAGE(M) \
+    errs() << (M) << "\n"; \
+    if (!DisableAssertions) assert(0 && (M));
+
+double ProfileVerifierPass::ReadOrAssert(ProfileInfo::Edge E) {
+  double EdgeWeight = PI->getEdgeWeight(E);
+  if (EdgeWeight == ProfileInfo::MissingValue) {
+    errs() << "Edge " << E << " in Function " 
+           << ProfileInfo::getFunction(E)->getNameStr() << ": ";
+    ASSERTMESSAGE("ASSERT:Edge has missing value");
+    return 0;
+  } else {
+    return EdgeWeight;
+  }
+}
+
+void ProfileVerifierPass::CheckValue(bool Error, const char *Message,
+                                     DetailedBlockInfo *DI) {
+  if (Error) {
+    DEBUG(debugEntry(DI));
+    errs() << "Block " << DI->BB->getNameStr() << " in Function " 
+           << DI->BB->getParent()->getNameStr() << ": ";
+    ASSERTMESSAGE(Message);
+  }
+  return;
+}
+
+// This calculates the Information for a block and then recurses into the
+// successors.
+void ProfileVerifierPass::recurseBasicBlock(const BasicBlock *BB) {
+
+  // Break the recursion by remembering all visited blocks.
+  if (BBisVisited.find(BB) != BBisVisited.end()) return;
+
+  // Use a data structure to store all the information, this can then be handed
+  // to debug printers.
+  DetailedBlockInfo DI;
+  DI.BB = BB;
+  DI.outCount = DI.inCount = DI.inWeight = DI.outWeight = 0;
+
+  // Read predecessors.
+  std::set<const BasicBlock*> ProcessedPreds;
+  pred_const_iterator bpi = pred_begin(BB), bpe = pred_end(BB);
+  // If there are none, check for (0,BB) edge.
+  if (bpi == bpe) {
+    DI.inWeight += ReadOrAssert(PI->getEdge(0,BB));
+    DI.inCount++;
+  }
+  for (;bpi != bpe; ++bpi) {
+    if (ProcessedPreds.insert(*bpi).second) {
+      DI.inWeight += ReadOrAssert(PI->getEdge(*bpi,BB));
+      DI.inCount++;
+    }
+  }
+
+  // Read successors.
+  std::set<const BasicBlock*> ProcessedSuccs;
+  succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+  // If there is an (0,BB) edge, consider it too. (This is done not only when
+  // there are no successors, but every time; not every function contains
+  // return blocks with no successors (think loop latch as return block)).
+  double w = PI->getEdgeWeight(PI->getEdge(BB,0));
+  if (w != ProfileInfo::MissingValue) {
+    DI.outWeight += w;
+    DI.outCount++;
+  }
+  for (;bbi != bbe; ++bbi) {
+    if (ProcessedSuccs.insert(*bbi).second) {
+      DI.outWeight += ReadOrAssert(PI->getEdge(BB,*bbi));
+      DI.outCount++;
+    }
+  }
+
+  // Read block weight.
+  DI.BBWeight = PI->getExecutionCount(BB);
+  CheckValue(DI.BBWeight == ProfileInfo::MissingValue,
+             "ASSERT:BasicBlock has missing value", &DI);
+
+  // Check if this block is a setjmp target.
+  bool isSetJmpTarget = false;
+  if (DI.outWeight > DI.inWeight) {
+    for (BasicBlock::const_iterator i = BB->begin(), ie = BB->end();
+         i != ie; ++i) {
+      if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
+        Function *F = CI->getCalledFunction();
+        if (F && (F->getNameStr() == "_setjmp")) {
+          isSetJmpTarget = true; break;
+        }
+      }
+    }
+  }
+  // Check if this block is eventually reaching exit.
+  bool isExitReachable = false;
+  if (DI.inWeight > DI.outWeight) {
+    for (BasicBlock::const_iterator i = BB->begin(), ie = BB->end();
+         i != ie; ++i) {
+      if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
+        FisVisited.clear();
+        isExitReachable |= exitReachable(CI->getCalledFunction());
+        if (isExitReachable) break;
+      }
+    }
+  }
+
+  if (DI.inCount > 0 && DI.outCount == 0) {
+     // If this is a block with no successors.
+    if (!isSetJmpTarget) {
+      CheckValue(!Equals(DI.inWeight,DI.BBWeight), 
+                 "ASSERT:inWeight and BBWeight do not match", &DI);
+    }
+  } else if (DI.inCount == 0 && DI.outCount > 0) {
+    // If this is a block with no predecessors.
+    if (!isExitReachable)
+      CheckValue(!Equals(DI.BBWeight,DI.outWeight), 
+                 "ASSERT:BBWeight and outWeight do not match", &DI);
+  } else {
+    // If this block has successors and predecessors.
+    if (DI.inWeight > DI.outWeight && !isExitReachable)
+      CheckValue(!Equals(DI.inWeight,DI.outWeight), 
+                 "ASSERT:inWeight and outWeight do not match", &DI);
+    if (DI.inWeight < DI.outWeight && !isSetJmpTarget)
+      CheckValue(!Equals(DI.inWeight,DI.outWeight), 
+                 "ASSERT:inWeight and outWeight do not match", &DI);
+  }
+
+
+  // Mark this block as visited, rescurse into successors.
+  BBisVisited.insert(BB);
+  for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); 
+        bbi != bbe; ++bbi ) {
+    recurseBasicBlock(*bbi);
+  }
+}
+
+bool ProfileVerifierPass::runOnFunction(Function &F) {
+  PI = &getAnalysis<ProfileInfo>();
+
+  // Prepare global variables.
+  PrintedDebugTree = false;
+  BBisVisited.clear();
+
+  // Fetch entry block and recurse into it.
+  const BasicBlock *entry = &F.getEntryBlock();
+  recurseBasicBlock(entry);
+
+  if (!DisableAssertions)
+    assert((PI->getExecutionCount(&F)==PI->getExecutionCount(entry)) &&
+           "Function count and entry block count do not match");
+  return false;
+}
diff --git a/lib/Analysis/README.txt b/lib/Analysis/README.txt
new file mode 100644
index 0000000..c401090
--- /dev/null
+++ b/lib/Analysis/README.txt
@@ -0,0 +1,18 @@
+Analysis Opportunities:
+
+//===---------------------------------------------------------------------===//
+
+In test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll, the
+ScalarEvolution expression for %r is this:
+
+  {1,+,3,+,2}<loop>
+
+Outside the loop, this could be evaluated simply as (%n * %n), however
+ScalarEvolution currently evaluates it as
+
+  (-2 + (2 * (trunc i65 (((zext i64 (-2 + %n) to i65) * (zext i64 (-1 + %n) to i65)) /u 2) to i64)) + (3 * %n))
+
+In addition to being much more complicated, it involves i65 arithmetic,
+which is very inefficient when expanded into code.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 4081562..62f3aa1 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -14,9 +14,8 @@
 // There are several aspects to this library.  First is the representation of
 // scalar expressions, which are represented as subclasses of the SCEV class.
 // These classes are used to represent certain types of subexpressions that we
-// can handle.  These classes are reference counted, managed by the const SCEV*
-// class.  We only create one SCEV of a particular shape, so pointer-comparisons
-// for equality are legal.
+// can handle. We only create one SCEV of a particular shape, so
+// pointer-comparisons for equality are legal.
 //
 // One important aspect of the SCEV objects is that they are never cyclic, even
 // if there is a cycle in the dataflow for an expression (ie, a PHI node).  If
@@ -64,7 +63,10 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Operator.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -74,12 +76,14 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -118,11 +122,6 @@ void SCEV::dump() const {
   errs() << '\n';
 }
 
-void SCEV::print(std::ostream &o) const {
-  raw_os_ostream OS(o);
-  print(OS);
-}
-
 bool SCEV::isZero() const {
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
     return SC->getValue()->isZero();
@@ -142,33 +141,26 @@ bool SCEV::isAllOnesValue() const {
 }
 
 SCEVCouldNotCompute::SCEVCouldNotCompute() :
-  SCEV(scCouldNotCompute) {}
-
-void SCEVCouldNotCompute::Profile(FoldingSetNodeID &ID) const {
-  assert(0 && "Attempt to use a SCEVCouldNotCompute object!");
-}
+  SCEV(FoldingSetNodeID(), scCouldNotCompute) {}
 
 bool SCEVCouldNotCompute::isLoopInvariant(const Loop *L) const {
-  assert(0 && "Attempt to use a SCEVCouldNotCompute object!");
+  llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
   return false;
 }
 
 const Type *SCEVCouldNotCompute::getType() const {
-  assert(0 && "Attempt to use a SCEVCouldNotCompute object!");
+  llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
   return 0;
 }
 
 bool SCEVCouldNotCompute::hasComputableLoopEvolution(const Loop *L) const {
-  assert(0 && "Attempt to use a SCEVCouldNotCompute object!");
+  llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
   return false;
 }
 
-const SCEV *
-SCEVCouldNotCompute::replaceSymbolicValuesWithConcrete(
-                                                    const SCEV *Sym,
-                                                    const SCEV *Conc,
-                                                    ScalarEvolution &SE) const {
-  return this;
+bool SCEVCouldNotCompute::hasOperand(const SCEV *) const {
+  llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+  return false;
 }
 
 void SCEVCouldNotCompute::print(raw_ostream &OS) const {
@@ -179,30 +171,26 @@ bool SCEVCouldNotCompute::classof(const SCEV *S) {
   return S->getSCEVType() == scCouldNotCompute;
 }
 
-const SCEV* ScalarEvolution::getConstant(ConstantInt *V) {
+const SCEV *ScalarEvolution::getConstant(ConstantInt *V) {
   FoldingSetNodeID ID;
   ID.AddInteger(scConstant);
   ID.AddPointer(V);
   void *IP = 0;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   SCEV *S = SCEVAllocator.Allocate<SCEVConstant>();
-  new (S) SCEVConstant(V);
+  new (S) SCEVConstant(ID, V);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
 
-const SCEV* ScalarEvolution::getConstant(const APInt& Val) {
-  return getConstant(ConstantInt::get(Val));
+const SCEV *ScalarEvolution::getConstant(const APInt& Val) {
+  return getConstant(ConstantInt::get(getContext(), Val));
 }
 
-const SCEV*
+const SCEV *
 ScalarEvolution::getConstant(const Type *Ty, uint64_t V, bool isSigned) {
-  return getConstant(ConstantInt::get(cast<IntegerType>(Ty), V, isSigned));
-}
-
-void SCEVConstant::Profile(FoldingSetNodeID &ID) const {
-  ID.AddInteger(scConstant);
-  ID.AddPointer(V);
+  return getConstant(
+    ConstantInt::get(cast<IntegerType>(Ty), V, isSigned));
 }
 
 const Type *SCEVConstant::getType() const { return V->getType(); }
@@ -211,22 +199,21 @@ void SCEVConstant::print(raw_ostream &OS) const {
   WriteAsOperand(OS, V, false);
 }
 
-SCEVCastExpr::SCEVCastExpr(unsigned SCEVTy,
-                           const SCEV* op, const Type *ty)
-  : SCEV(SCEVTy), Op(op), Ty(ty) {}
-
-void SCEVCastExpr::Profile(FoldingSetNodeID &ID) const {
-  ID.AddInteger(getSCEVType());
-  ID.AddPointer(Op);
-  ID.AddPointer(Ty);
-}
+SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeID &ID,
+                           unsigned SCEVTy, const SCEV *op, const Type *ty)
+  : SCEV(ID, SCEVTy), Op(op), Ty(ty) {}
 
 bool SCEVCastExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
   return Op->dominates(BB, DT);
 }
 
-SCEVTruncateExpr::SCEVTruncateExpr(const SCEV* op, const Type *ty)
-  : SCEVCastExpr(scTruncate, op, ty) {
+bool SCEVCastExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
+  return Op->properlyDominates(BB, DT);
+}
+
+SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeID &ID,
+                                   const SCEV *op, const Type *ty)
+  : SCEVCastExpr(ID, scTruncate, op, ty) {
   assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
          (Ty->isInteger() || isa<PointerType>(Ty)) &&
          "Cannot truncate non-integer value!");
@@ -236,8 +223,9 @@ void SCEVTruncateExpr::print(raw_ostream &OS) const {
   OS << "(trunc " << *Op->getType() << " " << *Op << " to " << *Ty << ")";
 }
 
-SCEVZeroExtendExpr::SCEVZeroExtendExpr(const SCEV* op, const Type *ty)
-  : SCEVCastExpr(scZeroExtend, op, ty) {
+SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeID &ID,
+                                       const SCEV *op, const Type *ty)
+  : SCEVCastExpr(ID, scZeroExtend, op, ty) {
   assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
          (Ty->isInteger() || isa<PointerType>(Ty)) &&
          "Cannot zero extend non-integer value!");
@@ -247,8 +235,9 @@ void SCEVZeroExtendExpr::print(raw_ostream &OS) const {
   OS << "(zext " << *Op->getType() << " " << *Op << " to " << *Ty << ")";
 }
 
-SCEVSignExtendExpr::SCEVSignExtendExpr(const SCEV* op, const Type *ty)
-  : SCEVCastExpr(scSignExtend, op, ty) {
+SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeID &ID,
+                                       const SCEV *op, const Type *ty)
+  : SCEVCastExpr(ID, scSignExtend, op, ty) {
   assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
          (Ty->isInteger() || isa<PointerType>(Ty)) &&
          "Cannot sign extend non-integer value!");
@@ -267,46 +256,6 @@ void SCEVCommutativeExpr::print(raw_ostream &OS) const {
   OS << ")";
 }
 
-const SCEV *
-SCEVCommutativeExpr::replaceSymbolicValuesWithConcrete(
-                                                    const SCEV *Sym,
-                                                    const SCEV *Conc,
-                                                    ScalarEvolution &SE) const {
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
-    const SCEV* H =
-      getOperand(i)->replaceSymbolicValuesWithConcrete(Sym, Conc, SE);
-    if (H != getOperand(i)) {
-      SmallVector<const SCEV*, 8> NewOps;
-      NewOps.reserve(getNumOperands());
-      for (unsigned j = 0; j != i; ++j)
-        NewOps.push_back(getOperand(j));
-      NewOps.push_back(H);
-      for (++i; i != e; ++i)
-        NewOps.push_back(getOperand(i)->
-                         replaceSymbolicValuesWithConcrete(Sym, Conc, SE));
-
-      if (isa<SCEVAddExpr>(this))
-        return SE.getAddExpr(NewOps);
-      else if (isa<SCEVMulExpr>(this))
-        return SE.getMulExpr(NewOps);
-      else if (isa<SCEVSMaxExpr>(this))
-        return SE.getSMaxExpr(NewOps);
-      else if (isa<SCEVUMaxExpr>(this))
-        return SE.getUMaxExpr(NewOps);
-      else
-        assert(0 && "Unknown commutative expr!");
-    }
-  }
-  return this;
-}
-
-void SCEVNAryExpr::Profile(FoldingSetNodeID &ID) const {
-  ID.AddInteger(getSCEVType());
-  ID.AddInteger(Operands.size());
-  for (unsigned i = 0, e = Operands.size(); i != e; ++i)
-    ID.AddPointer(Operands[i]);
-}
-
 bool SCEVNAryExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
     if (!getOperand(i)->dominates(BB, DT))
@@ -315,16 +264,22 @@ bool SCEVNAryExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
   return true;
 }
 
-void SCEVUDivExpr::Profile(FoldingSetNodeID &ID) const {
-  ID.AddInteger(scUDivExpr);
-  ID.AddPointer(LHS);
-  ID.AddPointer(RHS);
+bool SCEVNAryExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    if (!getOperand(i)->properlyDominates(BB, DT))
+      return false;
+  }
+  return true;
 }
 
 bool SCEVUDivExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
   return LHS->dominates(BB, DT) && RHS->dominates(BB, DT);
 }
 
+bool SCEVUDivExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
+  return LHS->properlyDominates(BB, DT) && RHS->properlyDominates(BB, DT);
+}
+
 void SCEVUDivExpr::print(raw_ostream &OS) const {
   OS << "(" << *LHS << " /u " << *RHS << ")";
 }
@@ -338,38 +293,6 @@ const Type *SCEVUDivExpr::getType() const {
   return RHS->getType();
 }
 
-void SCEVAddRecExpr::Profile(FoldingSetNodeID &ID) const {
-  ID.AddInteger(scAddRecExpr);
-  ID.AddInteger(Operands.size());
-  for (unsigned i = 0, e = Operands.size(); i != e; ++i)
-    ID.AddPointer(Operands[i]);
-  ID.AddPointer(L);
-}
-
-const SCEV *
-SCEVAddRecExpr::replaceSymbolicValuesWithConcrete(const SCEV *Sym,
-                                                  const SCEV *Conc,
-                                                  ScalarEvolution &SE) const {
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
-    const SCEV* H =
-      getOperand(i)->replaceSymbolicValuesWithConcrete(Sym, Conc, SE);
-    if (H != getOperand(i)) {
-      SmallVector<const SCEV*, 8> NewOps;
-      NewOps.reserve(getNumOperands());
-      for (unsigned j = 0; j != i; ++j)
-        NewOps.push_back(getOperand(j));
-      NewOps.push_back(H);
-      for (++i; i != e; ++i)
-        NewOps.push_back(getOperand(i)->
-                         replaceSymbolicValuesWithConcrete(Sym, Conc, SE));
-
-      return SE.getAddRecExpr(NewOps, L);
-    }
-  }
-  return this;
-}
-
-
 bool SCEVAddRecExpr::isLoopInvariant(const Loop *QueryLoop) const {
   // Add recurrences are never invariant in the function-body (null loop).
   if (!QueryLoop)
@@ -396,9 +319,13 @@ void SCEVAddRecExpr::print(raw_ostream &OS) const {
   OS << "}<" << L->getHeader()->getName() + ">";
 }
 
-void SCEVUnknown::Profile(FoldingSetNodeID &ID) const {
-  ID.AddInteger(scUnknown);
-  ID.AddPointer(V);
+void SCEVFieldOffsetExpr::print(raw_ostream &OS) const {
+  // LLVM struct fields don't have names, so just print the field number.
+  OS << "offsetof(" << *STy << ", " << FieldNo << ")";
+}
+
+void SCEVAllocSizeExpr::print(raw_ostream &OS) const {
+  OS << "sizeof(" << *AllocTy << ")";
 }
 
 bool SCEVUnknown::isLoopInvariant(const Loop *L) const {
@@ -417,6 +344,12 @@ bool SCEVUnknown::dominates(BasicBlock *BB, DominatorTree *DT) const {
   return true;
 }
 
+bool SCEVUnknown::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
+  if (Instruction *I = dyn_cast<Instruction>(getValue()))
+    return DT->properlyDominates(I->getParent(), BB);
+  return true;
+}
+
 const Type *SCEVUnknown::getType() const {
   return V->getType();
 }
@@ -429,6 +362,41 @@ void SCEVUnknown::print(raw_ostream &OS) const {
 //                               SCEV Utilities
 //===----------------------------------------------------------------------===//
 
+static bool CompareTypes(const Type *A, const Type *B) {
+  if (A->getTypeID() != B->getTypeID())
+    return A->getTypeID() < B->getTypeID();
+  if (const IntegerType *AI = dyn_cast<IntegerType>(A)) {
+    const IntegerType *BI = cast<IntegerType>(B);
+    return AI->getBitWidth() < BI->getBitWidth();
+  }
+  if (const PointerType *AI = dyn_cast<PointerType>(A)) {
+    const PointerType *BI = cast<PointerType>(B);
+    return CompareTypes(AI->getElementType(), BI->getElementType());
+  }
+  if (const ArrayType *AI = dyn_cast<ArrayType>(A)) {
+    const ArrayType *BI = cast<ArrayType>(B);
+    if (AI->getNumElements() != BI->getNumElements())
+      return AI->getNumElements() < BI->getNumElements();
+    return CompareTypes(AI->getElementType(), BI->getElementType());
+  }
+  if (const VectorType *AI = dyn_cast<VectorType>(A)) {
+    const VectorType *BI = cast<VectorType>(B);
+    if (AI->getNumElements() != BI->getNumElements())
+      return AI->getNumElements() < BI->getNumElements();
+    return CompareTypes(AI->getElementType(), BI->getElementType());
+  }
+  if (const StructType *AI = dyn_cast<StructType>(A)) {
+    const StructType *BI = cast<StructType>(B);
+    if (AI->getNumElements() != BI->getNumElements())
+      return AI->getNumElements() < BI->getNumElements();
+    for (unsigned i = 0, e = AI->getNumElements(); i != e; ++i)
+      if (CompareTypes(AI->getElementType(i), BI->getElementType(i)) ||
+          CompareTypes(BI->getElementType(i), AI->getElementType(i)))
+        return CompareTypes(AI->getElementType(i), BI->getElementType(i));
+  }
+  return false;
+}
+
 namespace {
   /// SCEVComplexityCompare - Return true if the complexity of the LHS is less
   /// than the complexity of the RHS.  This comparator is used to canonicalize
@@ -439,6 +407,10 @@ namespace {
     explicit SCEVComplexityCompare(LoopInfo *li) : LI(li) {}
 
     bool operator()(const SCEV *LHS, const SCEV *RHS) const {
+      // Fast-path: SCEVs are uniqued so we can do a quick equality check.
+      if (LHS == RHS)
+        return false;
+
       // Primarily, sort the SCEVs by their getSCEVType().
       if (LHS->getSCEVType() != RHS->getSCEVType())
         return LHS->getSCEVType() < RHS->getSCEVType();
@@ -495,6 +467,8 @@ namespace {
       // Compare constant values.
       if (const SCEVConstant *LC = dyn_cast<SCEVConstant>(LHS)) {
         const SCEVConstant *RC = cast<SCEVConstant>(RHS);
+        if (LC->getValue()->getBitWidth() != RC->getValue()->getBitWidth())
+          return LC->getValue()->getBitWidth() < RC->getValue()->getBitWidth();
         return LC->getValue()->getValue().ult(RC->getValue()->getValue());
       }
 
@@ -539,7 +513,22 @@ namespace {
         return operator()(LC->getOperand(), RC->getOperand());
       }
 
-      assert(0 && "Unknown SCEV kind!");
+      // Compare offsetof expressions.
+      if (const SCEVFieldOffsetExpr *LA = dyn_cast<SCEVFieldOffsetExpr>(LHS)) {
+        const SCEVFieldOffsetExpr *RA = cast<SCEVFieldOffsetExpr>(RHS);
+        if (CompareTypes(LA->getStructType(), RA->getStructType()) ||
+            CompareTypes(RA->getStructType(), LA->getStructType()))
+          return CompareTypes(LA->getStructType(), RA->getStructType());
+        return LA->getFieldNo() < RA->getFieldNo();
+      }
+
+      // Compare sizeof expressions by the allocation type.
+      if (const SCEVAllocSizeExpr *LA = dyn_cast<SCEVAllocSizeExpr>(LHS)) {
+        const SCEVAllocSizeExpr *RA = cast<SCEVAllocSizeExpr>(RHS);
+        return CompareTypes(LA->getAllocType(), RA->getAllocType());
+      }
+
+      llvm_unreachable("Unknown SCEV kind!");
       return false;
     }
   };
@@ -555,7 +544,7 @@ namespace {
 /// this to depend on where the addresses of various SCEV objects happened to
 /// land in memory.
 ///
-static void GroupByComplexity(SmallVectorImpl<const SCEV*> &Ops,
+static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
                               LoopInfo *LI) {
   if (Ops.size() < 2) return;  // Noop
   if (Ops.size() == 2) {
@@ -598,9 +587,9 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV*> &Ops,
 
 /// BinomialCoefficient - Compute BC(It, K).  The result has width W.
 /// Assume, K > 0.
-static const SCEV* BinomialCoefficient(const SCEV* It, unsigned K,
-                                      ScalarEvolution &SE,
-                                      const Type* ResultTy) {
+static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
+                                       ScalarEvolution &SE,
+                                       const Type* ResultTy) {
   // Handle the simplest case efficiently.
   if (K == 1)
     return SE.getTruncateOrZeroExtend(It, ResultTy);
@@ -690,16 +679,17 @@ static const SCEV* BinomialCoefficient(const SCEV* It, unsigned K,
   MultiplyFactor = MultiplyFactor.trunc(W);
 
   // Calculate the product, at width T+W
-  const IntegerType *CalculationTy = IntegerType::get(CalculationBits);
-  const SCEV* Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy);
+  const IntegerType *CalculationTy = IntegerType::get(SE.getContext(),
+                                                      CalculationBits);
+  const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy);
   for (unsigned i = 1; i != K; ++i) {
-    const SCEV* S = SE.getMinusSCEV(It, SE.getIntegerSCEV(i, It->getType()));
+    const SCEV *S = SE.getMinusSCEV(It, SE.getIntegerSCEV(i, It->getType()));
     Dividend = SE.getMulExpr(Dividend,
                              SE.getTruncateOrZeroExtend(S, CalculationTy));
   }
 
   // Divide by 2^T
-  const SCEV* DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor));
+  const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor));
 
   // Truncate the result, and divide by K! / 2^T.
 
@@ -716,14 +706,14 @@ static const SCEV* BinomialCoefficient(const SCEV* It, unsigned K,
 ///
 /// where BC(It, k) stands for binomial coefficient.
 ///
-const SCEV* SCEVAddRecExpr::evaluateAtIteration(const SCEV* It,
-                                               ScalarEvolution &SE) const {
-  const SCEV* Result = getStart();
+const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It,
+                                                ScalarEvolution &SE) const {
+  const SCEV *Result = getStart();
   for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
     // The computation is correct in the face of overflow provided that the
     // multiplication is performed _after_ the evaluation of the binomial
     // coefficient.
-    const SCEV* Coeff = BinomialCoefficient(It, i, SE, getType());
+    const SCEV *Coeff = BinomialCoefficient(It, i, SE, getType());
     if (isa<SCEVCouldNotCompute>(Coeff))
       return Coeff;
 
@@ -736,14 +726,21 @@ const SCEV* SCEVAddRecExpr::evaluateAtIteration(const SCEV* It,
 //                    SCEV Expression folder implementations
 //===----------------------------------------------------------------------===//
 
-const SCEV* ScalarEvolution::getTruncateExpr(const SCEV* Op,
-                                            const Type *Ty) {
+const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
+                                             const Type *Ty) {
   assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&
          "This is not a truncating conversion!");
   assert(isSCEVable(Ty) &&
          "This is not a conversion to a SCEVable type!");
   Ty = getEffectiveSCEVType(Ty);
 
+  FoldingSetNodeID ID;
+  ID.AddInteger(scTruncate);
+  ID.AddPointer(Op);
+  ID.AddPointer(Ty);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+
   // Fold if the operand is constant.
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
     return getConstant(
@@ -763,26 +760,23 @@ const SCEV* ScalarEvolution::getTruncateExpr(const SCEV* Op,
 
   // If the input value is a chrec scev, truncate the chrec's operands.
   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
-    SmallVector<const SCEV*, 4> Operands;
+    SmallVector<const SCEV *, 4> Operands;
     for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
       Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty));
     return getAddRecExpr(Operands, AddRec->getLoop());
   }
 
-  FoldingSetNodeID ID;
-  ID.AddInteger(scTruncate);
-  ID.AddPointer(Op);
-  ID.AddPointer(Ty);
-  void *IP = 0;
+  // The cast wasn't folded; create an explicit cast node.
+  // Recompute the insert position, as it may have been invalidated.
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   SCEV *S = SCEVAllocator.Allocate<SCEVTruncateExpr>();
-  new (S) SCEVTruncateExpr(Op, Ty);
+  new (S) SCEVTruncateExpr(ID, Op, Ty);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
 
-const SCEV* ScalarEvolution::getZeroExtendExpr(const SCEV* Op,
-                                              const Type *Ty) {
+const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
+                                               const Type *Ty) {
   assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
          "This is not an extending conversion!");
   assert(isSCEVable(Ty) &&
@@ -801,12 +795,33 @@ const SCEV* ScalarEvolution::getZeroExtendExpr(const SCEV* Op,
   if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
     return getZeroExtendExpr(SZ->getOperand(), Ty);
 
+  // Before doing any expensive analysis, check to see if we've already
+  // computed a SCEV for this Op and Ty.
+  FoldingSetNodeID ID;
+  ID.AddInteger(scZeroExtend);
+  ID.AddPointer(Op);
+  ID.AddPointer(Ty);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+
   // If the input value is a chrec scev, and we can prove that the value
   // did not overflow the old, smaller, value, we can zero extend all of the
   // operands (often constants).  This allows analysis of something like
   // this:  for (unsigned char X = 0; X < 100; ++X) { int Y = X; }
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
     if (AR->isAffine()) {
+      const SCEV *Start = AR->getStart();
+      const SCEV *Step = AR->getStepRecurrence(*this);
+      unsigned BitWidth = getTypeSizeInBits(AR->getType());
+      const Loop *L = AR->getLoop();
+
+      // If we have special knowledge that this addrec won't overflow,
+      // we don't need to do any further analysis.
+      if (AR->hasNoUnsignedWrap())
+        return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+                             getZeroExtendExpr(Step, Ty),
+                             L);
+
       // Check whether the backedge-taken count is SCEVCouldNotCompute.
       // Note that this serves two purposes: It filters out loops that are
       // simply not analyzable, and it covers the case where this code is
@@ -815,28 +830,25 @@ const SCEV* ScalarEvolution::getZeroExtendExpr(const SCEV* Op,
       // in infinite recursion. In the later case, the analysis code will
       // cope with a conservative value, and it will take care to purge
       // that value once it has finished.
-      const SCEV* MaxBECount = getMaxBackedgeTakenCount(AR->getLoop());
+      const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
       if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
         // Manually compute the final value for AR, checking for
         // overflow.
-        const SCEV* Start = AR->getStart();
-        const SCEV* Step = AR->getStepRecurrence(*this);
 
         // Check whether the backedge-taken count can be losslessly casted to
         // the addrec's type. The count is always unsigned.
-        const SCEV* CastedMaxBECount =
+        const SCEV *CastedMaxBECount =
           getTruncateOrZeroExtend(MaxBECount, Start->getType());
-        const SCEV* RecastedMaxBECount =
+        const SCEV *RecastedMaxBECount =
           getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
         if (MaxBECount == RecastedMaxBECount) {
-          const Type *WideTy =
-            IntegerType::get(getTypeSizeInBits(Start->getType()) * 2);
+          const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
           // Check whether Start+Step*MaxBECount has no unsigned overflow.
-          const SCEV* ZMul =
+          const SCEV *ZMul =
             getMulExpr(CastedMaxBECount,
                        getTruncateOrZeroExtend(Step, Start->getType()));
-          const SCEV* Add = getAddExpr(Start, ZMul);
-          const SCEV* OperandExtendedAdd =
+          const SCEV *Add = getAddExpr(Start, ZMul);
+          const SCEV *OperandExtendedAdd =
             getAddExpr(getZeroExtendExpr(Start, WideTy),
                        getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
                                   getZeroExtendExpr(Step, WideTy)));
@@ -844,11 +856,11 @@ const SCEV* ScalarEvolution::getZeroExtendExpr(const SCEV* Op,
             // Return the expression with the addrec on the outside.
             return getAddRecExpr(getZeroExtendExpr(Start, Ty),
                                  getZeroExtendExpr(Step, Ty),
-                                 AR->getLoop());
+                                 L);
 
           // Similar to above, only this time treat the step value as signed.
           // This covers loops that count down.
-          const SCEV* SMul =
+          const SCEV *SMul =
             getMulExpr(CastedMaxBECount,
                        getTruncateOrSignExtend(Step, Start->getType()));
           Add = getAddExpr(Start, SMul);
@@ -860,25 +872,50 @@ const SCEV* ScalarEvolution::getZeroExtendExpr(const SCEV* Op,
             // Return the expression with the addrec on the outside.
             return getAddRecExpr(getZeroExtendExpr(Start, Ty),
                                  getSignExtendExpr(Step, Ty),
-                                 AR->getLoop());
+                                 L);
+        }
+
+        // If the backedge is guarded by a comparison with the pre-inc value
+        // the addrec is safe. Also, if the entry is guarded by a comparison
+        // with the start value and the backedge is guarded by a comparison
+        // with the post-inc value, the addrec is safe.
+        if (isKnownPositive(Step)) {
+          const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
+                                      getUnsignedRange(Step).getUnsignedMax());
+          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
+              (isLoopGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) &&
+               isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT,
+                                           AR->getPostIncExpr(*this), N)))
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+                                 getZeroExtendExpr(Step, Ty),
+                                 L);
+        } else if (isKnownNegative(Step)) {
+          const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
+                                      getSignedRange(Step).getSignedMin());
+          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) &&
+              (isLoopGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) ||
+               isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT,
+                                           AR->getPostIncExpr(*this), N)))
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+                                 getSignExtendExpr(Step, Ty),
+                                 L);
         }
       }
     }
 
-  FoldingSetNodeID ID;
-  ID.AddInteger(scZeroExtend);
-  ID.AddPointer(Op);
-  ID.AddPointer(Ty);
-  void *IP = 0;
+  // The cast wasn't folded; create an explicit cast node.
+  // Recompute the insert position, as it may have been invalidated.
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   SCEV *S = SCEVAllocator.Allocate<SCEVZeroExtendExpr>();
-  new (S) SCEVZeroExtendExpr(Op, Ty);
+  new (S) SCEVZeroExtendExpr(ID, Op, Ty);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
 
-const SCEV* ScalarEvolution::getSignExtendExpr(const SCEV* Op,
-                                              const Type *Ty) {
+const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
+                                               const Type *Ty) {
   assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
          "This is not an extending conversion!");
   assert(isSCEVable(Ty) &&
@@ -897,12 +934,33 @@ const SCEV* ScalarEvolution::getSignExtendExpr(const SCEV* Op,
   if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
     return getSignExtendExpr(SS->getOperand(), Ty);
 
+  // Before doing any expensive analysis, check to see if we've already
+  // computed a SCEV for this Op and Ty.
+  FoldingSetNodeID ID;
+  ID.AddInteger(scSignExtend);
+  ID.AddPointer(Op);
+  ID.AddPointer(Ty);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+
   // If the input value is a chrec scev, and we can prove that the value
   // did not overflow the old, smaller, value, we can sign extend all of the
   // operands (often constants).  This allows analysis of something like
   // this:  for (signed char X = 0; X < 100; ++X) { int Y = X; }
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
     if (AR->isAffine()) {
+      const SCEV *Start = AR->getStart();
+      const SCEV *Step = AR->getStepRecurrence(*this);
+      unsigned BitWidth = getTypeSizeInBits(AR->getType());
+      const Loop *L = AR->getLoop();
+
+      // If we have special knowledge that this addrec won't overflow,
+      // we don't need to do any further analysis.
+      if (AR->hasNoSignedWrap())
+        return getAddRecExpr(getSignExtendExpr(Start, Ty),
+                             getSignExtendExpr(Step, Ty),
+                             L);
+
       // Check whether the backedge-taken count is SCEVCouldNotCompute.
       // Note that this serves two purposes: It filters out loops that are
       // simply not analyzable, and it covers the case where this code is
@@ -911,28 +969,25 @@ const SCEV* ScalarEvolution::getSignExtendExpr(const SCEV* Op,
       // in infinite recursion. In the later case, the analysis code will
       // cope with a conservative value, and it will take care to purge
       // that value once it has finished.
-      const SCEV* MaxBECount = getMaxBackedgeTakenCount(AR->getLoop());
+      const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
       if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
         // Manually compute the final value for AR, checking for
         // overflow.
-        const SCEV* Start = AR->getStart();
-        const SCEV* Step = AR->getStepRecurrence(*this);
 
         // Check whether the backedge-taken count can be losslessly casted to
         // the addrec's type. The count is always unsigned.
-        const SCEV* CastedMaxBECount =
+        const SCEV *CastedMaxBECount =
           getTruncateOrZeroExtend(MaxBECount, Start->getType());
-        const SCEV* RecastedMaxBECount =
+        const SCEV *RecastedMaxBECount =
           getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
         if (MaxBECount == RecastedMaxBECount) {
-          const Type *WideTy =
-            IntegerType::get(getTypeSizeInBits(Start->getType()) * 2);
+          const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
           // Check whether Start+Step*MaxBECount has no signed overflow.
-          const SCEV* SMul =
+          const SCEV *SMul =
             getMulExpr(CastedMaxBECount,
                        getTruncateOrSignExtend(Step, Start->getType()));
-          const SCEV* Add = getAddExpr(Start, SMul);
-          const SCEV* OperandExtendedAdd =
+          const SCEV *Add = getAddExpr(Start, SMul);
+          const SCEV *OperandExtendedAdd =
             getAddExpr(getSignExtendExpr(Start, WideTy),
                        getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
                                   getSignExtendExpr(Step, WideTy)));
@@ -940,19 +995,60 @@ const SCEV* ScalarEvolution::getSignExtendExpr(const SCEV* Op,
             // Return the expression with the addrec on the outside.
             return getAddRecExpr(getSignExtendExpr(Start, Ty),
                                  getSignExtendExpr(Step, Ty),
-                                 AR->getLoop());
+                                 L);
+
+          // Similar to above, only this time treat the step value as unsigned.
+          // This covers loops that count up with an unsigned step.
+          const SCEV *UMul =
+            getMulExpr(CastedMaxBECount,
+                       getTruncateOrZeroExtend(Step, Start->getType()));
+          Add = getAddExpr(Start, UMul);
+          OperandExtendedAdd =
+            getAddExpr(getSignExtendExpr(Start, WideTy),
+                       getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
+                                  getZeroExtendExpr(Step, WideTy)));
+          if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd)
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(getSignExtendExpr(Start, Ty),
+                                 getZeroExtendExpr(Step, Ty),
+                                 L);
+        }
+
+        // If the backedge is guarded by a comparison with the pre-inc value
+        // the addrec is safe. Also, if the entry is guarded by a comparison
+        // with the start value and the backedge is guarded by a comparison
+        // with the post-inc value, the addrec is safe.
+        if (isKnownPositive(Step)) {
+          const SCEV *N = getConstant(APInt::getSignedMinValue(BitWidth) -
+                                      getSignedRange(Step).getSignedMax());
+          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, AR, N) ||
+              (isLoopGuardedByCond(L, ICmpInst::ICMP_SLT, Start, N) &&
+               isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT,
+                                           AR->getPostIncExpr(*this), N)))
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(getSignExtendExpr(Start, Ty),
+                                 getSignExtendExpr(Step, Ty),
+                                 L);
+        } else if (isKnownNegative(Step)) {
+          const SCEV *N = getConstant(APInt::getSignedMaxValue(BitWidth) -
+                                      getSignedRange(Step).getSignedMin());
+          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, AR, N) ||
+              (isLoopGuardedByCond(L, ICmpInst::ICMP_SGT, Start, N) &&
+               isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT,
+                                           AR->getPostIncExpr(*this), N)))
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(getSignExtendExpr(Start, Ty),
+                                 getSignExtendExpr(Step, Ty),
+                                 L);
         }
       }
     }
 
-  FoldingSetNodeID ID;
-  ID.AddInteger(scSignExtend);
-  ID.AddPointer(Op);
-  ID.AddPointer(Ty);
-  void *IP = 0;
+  // The cast wasn't folded; create an explicit cast node.
+  // Recompute the insert position, as it may have been invalidated.
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   SCEV *S = SCEVAllocator.Allocate<SCEVSignExtendExpr>();
-  new (S) SCEVSignExtendExpr(Op, Ty);
+  new (S) SCEVSignExtendExpr(ID, Op, Ty);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
@@ -960,8 +1056,8 @@ const SCEV* ScalarEvolution::getSignExtendExpr(const SCEV* Op,
 /// getAnyExtendExpr - Return a SCEV for the given operand extended with
 /// unspecified bits out to the given type.
 ///
-const SCEV* ScalarEvolution::getAnyExtendExpr(const SCEV* Op,
-                                             const Type *Ty) {
+const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
+                                              const Type *Ty) {
   assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
          "This is not an extending conversion!");
   assert(isSCEVable(Ty) &&
@@ -975,19 +1071,19 @@ const SCEV* ScalarEvolution::getAnyExtendExpr(const SCEV* Op,
 
   // Peel off a truncate cast.
   if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) {
-    const SCEV* NewOp = T->getOperand();
+    const SCEV *NewOp = T->getOperand();
     if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty))
       return getAnyExtendExpr(NewOp, Ty);
     return getTruncateOrNoop(NewOp, Ty);
   }
 
   // Next try a zext cast. If the cast is folded, use it.
-  const SCEV* ZExt = getZeroExtendExpr(Op, Ty);
+  const SCEV *ZExt = getZeroExtendExpr(Op, Ty);
   if (!isa<SCEVZeroExtendExpr>(ZExt))
     return ZExt;
 
   // Next try a sext cast. If the cast is folded, use it.
-  const SCEV* SExt = getSignExtendExpr(Op, Ty);
+  const SCEV *SExt = getSignExtendExpr(Op, Ty);
   if (!isa<SCEVSignExtendExpr>(SExt))
     return SExt;
 
@@ -1025,10 +1121,10 @@ const SCEV* ScalarEvolution::getAnyExtendExpr(const SCEV* Op,
 /// is also used as a check to avoid infinite recursion.
 ///
 static bool
-CollectAddOperandsWithScales(DenseMap<const SCEV*, APInt> &M,
-                             SmallVector<const SCEV*, 8> &NewOps,
+CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
+                             SmallVector<const SCEV *, 8> &NewOps,
                              APInt &AccumulatedConstant,
-                             const SmallVectorImpl<const SCEV*> &Ops,
+                             const SmallVectorImpl<const SCEV *> &Ops,
                              const APInt &Scale,
                              ScalarEvolution &SE) {
   bool Interesting = false;
@@ -1049,9 +1145,9 @@ CollectAddOperandsWithScales(DenseMap<const SCEV*, APInt> &M,
       } else {
         // A multiplication of a constant with some other value. Update
         // the map.
-        SmallVector<const SCEV*, 4> MulOps(Mul->op_begin()+1, Mul->op_end());
-        const SCEV* Key = SE.getMulExpr(MulOps);
-        std::pair<DenseMap<const SCEV*, APInt>::iterator, bool> Pair =
+        SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end());
+        const SCEV *Key = SE.getMulExpr(MulOps);
+        std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
           M.insert(std::make_pair(Key, NewScale));
         if (Pair.second) {
           NewOps.push_back(Pair.first->first);
@@ -1069,7 +1165,7 @@ CollectAddOperandsWithScales(DenseMap<const SCEV*, APInt> &M,
       AccumulatedConstant += Scale * C->getValue()->getValue();
     } else {
       // An ordinary operand. Update the map.
-      std::pair<DenseMap<const SCEV*, APInt>::iterator, bool> Pair =
+      std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
         M.insert(std::make_pair(Ops[i], Scale));
       if (Pair.second) {
         NewOps.push_back(Pair.first->first);
@@ -1095,7 +1191,8 @@ namespace {
 
 /// getAddExpr - Get a canonical add expression, or something simpler if
 /// possible.
-const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
+const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
+                                        bool HasNUW, bool HasNSW) {
   assert(!Ops.empty() && "Cannot get empty add!");
   if (Ops.size() == 1) return Ops[0];
 #ifndef NDEBUG
@@ -1139,13 +1236,13 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
     if (Ops[i] == Ops[i+1]) {      //  X + Y + Y  -->  X + Y*2
       // Found a match, merge the two values into a multiply, and add any
       // remaining values to the result.
-      const SCEV* Two = getIntegerSCEV(2, Ty);
-      const SCEV* Mul = getMulExpr(Ops[i], Two);
+      const SCEV *Two = getIntegerSCEV(2, Ty);
+      const SCEV *Mul = getMulExpr(Ops[i], Two);
       if (Ops.size() == 2)
         return Mul;
       Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
       Ops.push_back(Mul);
-      return getAddExpr(Ops);
+      return getAddExpr(Ops, HasNUW, HasNSW);
     }
 
   // Check for truncates. If all the operands are truncated from the same
@@ -1156,7 +1253,7 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
     const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]);
     const Type *DstType = Trunc->getType();
     const Type *SrcType = Trunc->getOperand()->getType();
-    SmallVector<const SCEV*, 8> LargeOps;
+    SmallVector<const SCEV *, 8> LargeOps;
     bool Ok = true;
     // Check all the operands to see if they can be represented in the
     // source type of the truncate.
@@ -1172,7 +1269,7 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
         // is much more likely to be foldable here.
         LargeOps.push_back(getSignExtendExpr(C, SrcType));
       } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) {
-        SmallVector<const SCEV*, 8> LargeMulOps;
+        SmallVector<const SCEV *, 8> LargeMulOps;
         for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) {
           if (const SCEVTruncateExpr *T =
                 dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) {
@@ -1200,7 +1297,7 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
     }
     if (Ok) {
       // Evaluate the expression in the larger type.
-      const SCEV* Fold = getAddExpr(LargeOps);
+      const SCEV *Fold = getAddExpr(LargeOps, HasNUW, HasNSW);
       // If it folds to something simple, use it. Otherwise, don't.
       if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold))
         return getTruncateExpr(Fold, DstType);
@@ -1237,16 +1334,16 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
   // operands multiplied by constant values.
   if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) {
     uint64_t BitWidth = getTypeSizeInBits(Ty);
-    DenseMap<const SCEV*, APInt> M;
-    SmallVector<const SCEV*, 8> NewOps;
+    DenseMap<const SCEV *, APInt> M;
+    SmallVector<const SCEV *, 8> NewOps;
     APInt AccumulatedConstant(BitWidth, 0);
     if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
                                      Ops, APInt(BitWidth, 1), *this)) {
       // Some interesting folding opportunity is present, so its worthwhile to
       // re-generate the operands list. Group the operands by constant scale,
       // to avoid multiplying by the same constant scale multiple times.
-      std::map<APInt, SmallVector<const SCEV*, 4>, APIntCompare> MulOpLists;
-      for (SmallVector<const SCEV*, 8>::iterator I = NewOps.begin(),
+      std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
+      for (SmallVector<const SCEV *, 8>::iterator I = NewOps.begin(),
            E = NewOps.end(); I != E; ++I)
         MulOpLists[M.find(*I)->second].push_back(*I);
       // Re-generate the operands list.
@@ -1276,17 +1373,17 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
       for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp)
         if (MulOpSCEV == Ops[AddOp] && !isa<SCEVConstant>(Ops[AddOp])) {
           // Fold W + X + (X * Y * Z)  -->  W + (X * ((Y*Z)+1))
-          const SCEV* InnerMul = Mul->getOperand(MulOp == 0);
+          const SCEV *InnerMul = Mul->getOperand(MulOp == 0);
           if (Mul->getNumOperands() != 2) {
             // If the multiply has more than two operands, we must get the
             // Y*Z term.
-            SmallVector<const SCEV*, 4> MulOps(Mul->op_begin(), Mul->op_end());
+            SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(), Mul->op_end());
             MulOps.erase(MulOps.begin()+MulOp);
             InnerMul = getMulExpr(MulOps);
           }
-          const SCEV* One = getIntegerSCEV(1, Ty);
-          const SCEV* AddOne = getAddExpr(InnerMul, One);
-          const SCEV* OuterMul = getMulExpr(AddOne, Ops[AddOp]);
+          const SCEV *One = getIntegerSCEV(1, Ty);
+          const SCEV *AddOne = getAddExpr(InnerMul, One);
+          const SCEV *OuterMul = getMulExpr(AddOne, Ops[AddOp]);
           if (Ops.size() == 2) return OuterMul;
           if (AddOp < Idx) {
             Ops.erase(Ops.begin()+AddOp);
@@ -1310,22 +1407,22 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
              OMulOp != e; ++OMulOp)
           if (OtherMul->getOperand(OMulOp) == MulOpSCEV) {
             // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E))
-            const SCEV* InnerMul1 = Mul->getOperand(MulOp == 0);
+            const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0);
             if (Mul->getNumOperands() != 2) {
               SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
                                                   Mul->op_end());
               MulOps.erase(MulOps.begin()+MulOp);
               InnerMul1 = getMulExpr(MulOps);
             }
-            const SCEV* InnerMul2 = OtherMul->getOperand(OMulOp == 0);
+            const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0);
             if (OtherMul->getNumOperands() != 2) {
               SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(),
                                                   OtherMul->op_end());
               MulOps.erase(MulOps.begin()+OMulOp);
               InnerMul2 = getMulExpr(MulOps);
             }
-            const SCEV* InnerMulSum = getAddExpr(InnerMul1,InnerMul2);
-            const SCEV* OuterMul = getMulExpr(MulOpSCEV, InnerMulSum);
+            const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2);
+            const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum);
             if (Ops.size() == 2) return OuterMul;
             Ops.erase(Ops.begin()+Idx);
             Ops.erase(Ops.begin()+OtherMulIdx-1);
@@ -1346,7 +1443,7 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
   for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
     // Scan all of the other operands to this add and add them to the vector if
     // they are loop invariant w.r.t. the recurrence.
-    SmallVector<const SCEV*, 8> LIOps;
+    SmallVector<const SCEV *, 8> LIOps;
     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
       if (Ops[i]->isLoopInvariant(AddRec->getLoop())) {
@@ -1360,11 +1457,11 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
       //  NLI + LI + {Start,+,Step}  -->  NLI + {LI+Start,+,Step}
       LIOps.push_back(AddRec->getStart());
 
-      SmallVector<const SCEV*, 4> AddRecOps(AddRec->op_begin(),
+      SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
                                            AddRec->op_end());
       AddRecOps[0] = getAddExpr(LIOps);
 
-      const SCEV* NewRec = getAddRecExpr(AddRecOps, AddRec->getLoop());
+      const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRec->getLoop());
       // If all of the other operands were loop invariant, we are done.
       if (Ops.size() == 1) return NewRec;
 
@@ -1396,7 +1493,7 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
             }
             NewOps[i] = getAddExpr(NewOps[i], OtherAddRec->getOperand(i));
           }
-          const SCEV* NewAddRec = getAddRecExpr(NewOps, AddRec->getLoop());
+          const SCEV *NewAddRec = getAddRecExpr(NewOps, AddRec->getLoop());
 
           if (Ops.size() == 2) return NewAddRec;
 
@@ -1420,16 +1517,19 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
     ID.AddPointer(Ops[i]);
   void *IP = 0;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEV *S = SCEVAllocator.Allocate<SCEVAddExpr>();
-  new (S) SCEVAddExpr(Ops);
+  SCEVAddExpr *S = SCEVAllocator.Allocate<SCEVAddExpr>();
+  new (S) SCEVAddExpr(ID, Ops);
   UniqueSCEVs.InsertNode(S, IP);
+  if (HasNUW) S->setHasNoUnsignedWrap(true);
+  if (HasNSW) S->setHasNoSignedWrap(true);
   return S;
 }
 
 
 /// getMulExpr - Get a canonical multiply expression, or something simpler if
 /// possible.
-const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV*> &Ops) {
+const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
+                                        bool HasNUW, bool HasNSW) {
   assert(!Ops.empty() && "Cannot get empty mul!");
 #ifndef NDEBUG
   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
@@ -1457,7 +1557,8 @@ const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV*> &Ops) {
     ++Idx;
     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
       // We found two constants, fold them together!
-      ConstantInt *Fold = ConstantInt::get(LHSC->getValue()->getValue() *
+      ConstantInt *Fold = ConstantInt::get(getContext(),
+                                           LHSC->getValue()->getValue() *
                                            RHSC->getValue()->getValue());
       Ops[0] = getConstant(Fold);
       Ops.erase(Ops.begin()+1);  // Erase the folded element
@@ -1510,7 +1611,7 @@ const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV*> &Ops) {
   for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
     // Scan all of the other operands to this mul and add them to the vector if
     // they are loop invariant w.r.t. the recurrence.
-    SmallVector<const SCEV*, 8> LIOps;
+    SmallVector<const SCEV *, 8> LIOps;
     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
       if (Ops[i]->isLoopInvariant(AddRec->getLoop())) {
@@ -1522,7 +1623,7 @@ const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV*> &Ops) {
     // If we found some loop invariants, fold them into the recurrence.
     if (!LIOps.empty()) {
       //  NLI * LI * {Start,+,Step}  -->  NLI * {LI*Start,+,LI*Step}
-      SmallVector<const SCEV*, 4> NewOps;
+      SmallVector<const SCEV *, 4> NewOps;
       NewOps.reserve(AddRec->getNumOperands());
       if (LIOps.size() == 1) {
         const SCEV *Scale = LIOps[0];
@@ -1530,13 +1631,13 @@ const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV*> &Ops) {
           NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i)));
       } else {
         for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
-          SmallVector<const SCEV*, 4> MulOps(LIOps.begin(), LIOps.end());
+          SmallVector<const SCEV *, 4> MulOps(LIOps.begin(), LIOps.end());
           MulOps.push_back(AddRec->getOperand(i));
           NewOps.push_back(getMulExpr(MulOps));
         }
       }
 
-      const SCEV* NewRec = getAddRecExpr(NewOps, AddRec->getLoop());
+      const SCEV *NewRec = getAddRecExpr(NewOps, AddRec->getLoop());
 
       // If all of the other operands were loop invariant, we are done.
       if (Ops.size() == 1) return NewRec;
@@ -1560,14 +1661,14 @@ const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV*> &Ops) {
         if (AddRec->getLoop() == OtherAddRec->getLoop()) {
           // F * G  -->  {A,+,B} * {C,+,D}  -->  {A*C,+,F*D + G*B + B*D}
           const SCEVAddRecExpr *F = AddRec, *G = OtherAddRec;
-          const SCEV* NewStart = getMulExpr(F->getStart(),
+          const SCEV *NewStart = getMulExpr(F->getStart(),
                                                  G->getStart());
-          const SCEV* B = F->getStepRecurrence(*this);
-          const SCEV* D = G->getStepRecurrence(*this);
-          const SCEV* NewStep = getAddExpr(getMulExpr(F, D),
+          const SCEV *B = F->getStepRecurrence(*this);
+          const SCEV *D = G->getStepRecurrence(*this);
+          const SCEV *NewStep = getAddExpr(getMulExpr(F, D),
                                           getMulExpr(G, B),
                                           getMulExpr(B, D));
-          const SCEV* NewAddRec = getAddRecExpr(NewStart, NewStep,
+          const SCEV *NewAddRec = getAddRecExpr(NewStart, NewStep,
                                                F->getLoop());
           if (Ops.size() == 2) return NewAddRec;
 
@@ -1591,14 +1692,16 @@ const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV*> &Ops) {
     ID.AddPointer(Ops[i]);
   void *IP = 0;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEV *S = SCEVAllocator.Allocate<SCEVMulExpr>();
-  new (S) SCEVMulExpr(Ops);
+  SCEVMulExpr *S = SCEVAllocator.Allocate<SCEVMulExpr>();
+  new (S) SCEVMulExpr(ID, Ops);
   UniqueSCEVs.InsertNode(S, IP);
+  if (HasNUW) S->setHasNoUnsignedWrap(true);
+  if (HasNSW) S->setHasNoSignedWrap(true);
   return S;
 }
 
-/// getUDivExpr - Get a canonical multiply expression, or something simpler if
-/// possible.
+/// getUDivExpr - Get a canonical unsigned division expression, or something
+/// simpler if possible.
 const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
                                          const SCEV *RHS) {
   assert(getEffectiveSCEVType(LHS->getType()) ==
@@ -1607,7 +1710,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
 
   if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
     if (RHSC->getValue()->equalsInt(1))
-      return LHS;                            // X udiv 1 --> x
+      return LHS;                               // X udiv 1 --> x
     if (RHSC->isZero())
       return getIntegerSCEV(0, LHS->getType()); // value is undefined
 
@@ -1622,7 +1725,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
     if (!RHSC->getValue()->getValue().isPowerOf2())
       ++MaxShiftAmt;
     const IntegerType *ExtTy =
-      IntegerType::get(getTypeSizeInBits(Ty) + MaxShiftAmt);
+      IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
     // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
     if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
       if (const SCEVConstant *Step =
@@ -1633,24 +1736,24 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
             getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
                           getZeroExtendExpr(Step, ExtTy),
                           AR->getLoop())) {
-          SmallVector<const SCEV*, 4> Operands;
+          SmallVector<const SCEV *, 4> Operands;
           for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i)
             Operands.push_back(getUDivExpr(AR->getOperand(i), RHS));
           return getAddRecExpr(Operands, AR->getLoop());
         }
     // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
     if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
-      SmallVector<const SCEV*, 4> Operands;
+      SmallVector<const SCEV *, 4> Operands;
       for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i)
         Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy));
       if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
         // Find an operand that's safely divisible.
         for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
-          const SCEV* Op = M->getOperand(i);
-          const SCEV* Div = getUDivExpr(Op, RHSC);
+          const SCEV *Op = M->getOperand(i);
+          const SCEV *Div = getUDivExpr(Op, RHSC);
           if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
-            const SmallVectorImpl<const SCEV*> &MOperands = M->getOperands();
-            Operands = SmallVector<const SCEV*, 4>(MOperands.begin(),
+            const SmallVectorImpl<const SCEV *> &MOperands = M->getOperands();
+            Operands = SmallVector<const SCEV *, 4>(MOperands.begin(),
                                                   MOperands.end());
             Operands[i] = Div;
             return getMulExpr(Operands);
@@ -1659,13 +1762,13 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
     }
     // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
     if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(LHS)) {
-      SmallVector<const SCEV*, 4> Operands;
+      SmallVector<const SCEV *, 4> Operands;
       for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i)
         Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy));
       if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
         Operands.clear();
         for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
-          const SCEV* Op = getUDivExpr(A->getOperand(i), RHS);
+          const SCEV *Op = getUDivExpr(A->getOperand(i), RHS);
           if (isa<SCEVUDivExpr>(Op) || getMulExpr(Op, RHS) != A->getOperand(i))
             break;
           Operands.push_back(Op);
@@ -1691,7 +1794,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
   void *IP = 0;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   SCEV *S = SCEVAllocator.Allocate<SCEVUDivExpr>();
-  new (S) SCEVUDivExpr(LHS, RHS);
+  new (S) SCEVUDivExpr(ID, LHS, RHS);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
@@ -1699,9 +1802,10 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
 
 /// getAddRecExpr - Get an add recurrence expression for the specified loop.
 /// Simplify the expression as much as possible.
-const SCEV* ScalarEvolution::getAddRecExpr(const SCEV* Start,
-                               const SCEV* Step, const Loop *L) {
-  SmallVector<const SCEV*, 4> Operands;
+const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start,
+                                           const SCEV *Step, const Loop *L,
+                                           bool HasNUW, bool HasNSW) {
+  SmallVector<const SCEV *, 4> Operands;
   Operands.push_back(Start);
   if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step))
     if (StepChrec->getLoop() == L) {
@@ -1711,14 +1815,15 @@ const SCEV* ScalarEvolution::getAddRecExpr(const SCEV* Start,
     }
 
   Operands.push_back(Step);
-  return getAddRecExpr(Operands, L);
+  return getAddRecExpr(Operands, L, HasNUW, HasNSW);
 }
 
 /// getAddRecExpr - Get an add recurrence expression for the specified loop.
 /// Simplify the expression as much as possible.
 const SCEV *
-ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV*> &Operands,
-                               const Loop *L) {
+ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
+                               const Loop *L,
+                               bool HasNUW, bool HasNSW) {
   if (Operands.size() == 1) return Operands[0];
 #ifndef NDEBUG
   for (unsigned i = 1, e = Operands.size(); i != e; ++i)
@@ -1729,14 +1834,14 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV*> &Operands,
 
   if (Operands.back()->isZero()) {
     Operands.pop_back();
-    return getAddRecExpr(Operands, L);             // {X,+,0}  -->  X
+    return getAddRecExpr(Operands, L, HasNUW, HasNSW); // {X,+,0}  -->  X
   }
 
   // Canonicalize nested AddRecs in by nesting them in order of loop depth.
   if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
     const Loop* NestedLoop = NestedAR->getLoop();
     if (L->getLoopDepth() < NestedLoop->getLoopDepth()) {
-      SmallVector<const SCEV*, 4> NestedOperands(NestedAR->op_begin(),
+      SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),
                                                 NestedAR->op_end());
       Operands[0] = NestedAR->getStart();
       // AddRecs require their operands be loop-invariant with respect to their
@@ -1758,7 +1863,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV*> &Operands,
           }
         if (AllInvariant)
           // Ok, both add recurrences are valid after the transformation.
-          return getAddRecExpr(NestedOperands, NestedLoop);
+          return getAddRecExpr(NestedOperands, NestedLoop, HasNUW, HasNSW);
       }
       // Reset Operands to its original state.
       Operands[0] = NestedAR;
@@ -1773,22 +1878,24 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV*> &Operands,
   ID.AddPointer(L);
   void *IP = 0;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEV *S = SCEVAllocator.Allocate<SCEVAddRecExpr>();
-  new (S) SCEVAddRecExpr(Operands, L);
+  SCEVAddRecExpr *S = SCEVAllocator.Allocate<SCEVAddRecExpr>();
+  new (S) SCEVAddRecExpr(ID, Operands, L);
   UniqueSCEVs.InsertNode(S, IP);
+  if (HasNUW) S->setHasNoUnsignedWrap(true);
+  if (HasNSW) S->setHasNoSignedWrap(true);
   return S;
 }
 
 const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS,
                                          const SCEV *RHS) {
-  SmallVector<const SCEV*, 2> Ops;
+  SmallVector<const SCEV *, 2> Ops;
   Ops.push_back(LHS);
   Ops.push_back(RHS);
   return getSMaxExpr(Ops);
 }
 
-const SCEV*
-ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV*> &Ops) {
+const SCEV *
+ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
   assert(!Ops.empty() && "Cannot get empty smax!");
   if (Ops.size() == 1) return Ops[0];
 #ifndef NDEBUG
@@ -1808,7 +1915,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV*> &Ops) {
     assert(Idx < Ops.size());
     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
       // We found two constants, fold them together!
-      ConstantInt *Fold = ConstantInt::get(
+      ConstantInt *Fold = ConstantInt::get(getContext(),
                               APIntOps::smax(LHSC->getValue()->getValue(),
                                              RHSC->getValue()->getValue()));
       Ops[0] = getConstant(Fold);
@@ -1871,21 +1978,21 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV*> &Ops) {
   void *IP = 0;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   SCEV *S = SCEVAllocator.Allocate<SCEVSMaxExpr>();
-  new (S) SCEVSMaxExpr(Ops);
+  new (S) SCEVSMaxExpr(ID, Ops);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
 
 const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS,
                                          const SCEV *RHS) {
-  SmallVector<const SCEV*, 2> Ops;
+  SmallVector<const SCEV *, 2> Ops;
   Ops.push_back(LHS);
   Ops.push_back(RHS);
   return getUMaxExpr(Ops);
 }
 
-const SCEV*
-ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV*> &Ops) {
+const SCEV *
+ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
   assert(!Ops.empty() && "Cannot get empty umax!");
   if (Ops.size() == 1) return Ops[0];
 #ifndef NDEBUG
@@ -1905,7 +2012,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV*> &Ops) {
     assert(Idx < Ops.size());
     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
       // We found two constants, fold them together!
-      ConstantInt *Fold = ConstantInt::get(
+      ConstantInt *Fold = ConstantInt::get(getContext(),
                               APIntOps::umax(LHSC->getValue()->getValue(),
                                              RHSC->getValue()->getValue()));
       Ops[0] = getConstant(Fold);
@@ -1968,7 +2075,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV*> &Ops) {
   void *IP = 0;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   SCEV *S = SCEVAllocator.Allocate<SCEVUMaxExpr>();
-  new (S) SCEVUMaxExpr(Ops);
+  new (S) SCEVUMaxExpr(ID, Ops);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
@@ -1985,7 +2092,77 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
   return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
 }
 
-const SCEV* ScalarEvolution::getUnknown(Value *V) {
+const SCEV *ScalarEvolution::getFieldOffsetExpr(const StructType *STy,
+                                                unsigned FieldNo) {
+  // If we have TargetData we can determine the constant offset.
+  if (TD) {
+    const Type *IntPtrTy = TD->getIntPtrType(getContext());
+    const StructLayout &SL = *TD->getStructLayout(STy);
+    uint64_t Offset = SL.getElementOffset(FieldNo);
+    return getIntegerSCEV(Offset, IntPtrTy);
+  }
+
+  // Field 0 is always at offset 0.
+  if (FieldNo == 0) {
+    const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
+    return getIntegerSCEV(0, Ty);
+  }
+
+  // Okay, it looks like we really DO need an offsetof expr.  Check to see if we
+  // already have one, otherwise create a new one.
+  FoldingSetNodeID ID;
+  ID.AddInteger(scFieldOffset);
+  ID.AddPointer(STy);
+  ID.AddInteger(FieldNo);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  SCEV *S = SCEVAllocator.Allocate<SCEVFieldOffsetExpr>();
+  const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
+  new (S) SCEVFieldOffsetExpr(ID, Ty, STy, FieldNo);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
+}
+
+const SCEV *ScalarEvolution::getAllocSizeExpr(const Type *AllocTy) {
+  // If we have TargetData we can determine the constant size.
+  if (TD && AllocTy->isSized()) {
+    const Type *IntPtrTy = TD->getIntPtrType(getContext());
+    return getIntegerSCEV(TD->getTypeAllocSize(AllocTy), IntPtrTy);
+  }
+
+  // Expand an array size into the element size times the number
+  // of elements.
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(AllocTy)) {
+    const SCEV *E = getAllocSizeExpr(ATy->getElementType());
+    return getMulExpr(
+      E, getConstant(ConstantInt::get(cast<IntegerType>(E->getType()),
+                                      ATy->getNumElements())));
+  }
+
+  // Expand a vector size into the element size times the number
+  // of elements.
+  if (const VectorType *VTy = dyn_cast<VectorType>(AllocTy)) {
+    const SCEV *E = getAllocSizeExpr(VTy->getElementType());
+    return getMulExpr(
+      E, getConstant(ConstantInt::get(cast<IntegerType>(E->getType()),
+                                      VTy->getNumElements())));
+  }
+
+  // Okay, it looks like we really DO need a sizeof expr.  Check to see if we
+  // already have one, otherwise create a new one.
+  FoldingSetNodeID ID;
+  ID.AddInteger(scAllocSize);
+  ID.AddPointer(AllocTy);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  SCEV *S = SCEVAllocator.Allocate<SCEVAllocSizeExpr>();
+  const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
+  new (S) SCEVAllocSizeExpr(ID, Ty, AllocTy);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
+}
+
+const SCEV *ScalarEvolution::getUnknown(Value *V) {
   // Don't attempt to do anything other than create a SCEVUnknown object
   // here.  createSCEV only calls getUnknown after checking for all other
   // interesting possibilities, and any other code that calls getUnknown
@@ -1997,7 +2174,7 @@ const SCEV* ScalarEvolution::getUnknown(Value *V) {
   void *IP = 0;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   SCEV *S = SCEVAllocator.Allocate<SCEVUnknown>();
-  new (S) SCEVUnknown(V);
+  new (S) SCEVUnknown(ID, V);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
@@ -2011,17 +2188,8 @@ const SCEV* ScalarEvolution::getUnknown(Value *V) {
 /// can optionally include pointer types if the ScalarEvolution class
 /// has access to target-specific information.
 bool ScalarEvolution::isSCEVable(const Type *Ty) const {
-  // Integers are always SCEVable.
-  if (Ty->isInteger())
-    return true;
-
-  // Pointers are SCEVable if TargetData information is available
-  // to provide pointer size information.
-  if (isa<PointerType>(Ty))
-    return TD != NULL;
-
-  // Otherwise it's not SCEVable.
-  return false;
+  // Integers and pointers are always SCEVable.
+  return Ty->isInteger() || isa<PointerType>(Ty);
 }
 
 /// getTypeSizeInBits - Return the size in bits of the specified type,
@@ -2033,9 +2201,14 @@ uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const {
   if (TD)
     return TD->getTypeSizeInBits(Ty);
 
-  // Otherwise, we support only integer types.
-  assert(Ty->isInteger() && "isSCEVable permitted a non-SCEVable type!");
-  return Ty->getPrimitiveSizeInBits();
+  // Integer types have fixed sizes.
+  if (Ty->isInteger())
+    return Ty->getPrimitiveSizeInBits();
+
+  // The only other support type is pointer. Without TargetData, conservatively
+  // assume pointers are 64-bit.
+  assert(isa<PointerType>(Ty) && "isSCEVable permitted a non-SCEVable type!");
+  return 64;
 }
 
 /// getEffectiveSCEVType - Return a type with the same bitwidth as
@@ -2048,58 +2221,60 @@ const Type *ScalarEvolution::getEffectiveSCEVType(const Type *Ty) const {
   if (Ty->isInteger())
     return Ty;
 
+  // The only other support type is pointer.
   assert(isa<PointerType>(Ty) && "Unexpected non-pointer non-integer type!");
-  return TD->getIntPtrType();
-}
+  if (TD) return TD->getIntPtrType(getContext());
 
-const SCEV* ScalarEvolution::getCouldNotCompute() {
-  return &CouldNotCompute;
+  // Without TargetData, conservatively assume pointers are 64-bit.
+  return Type::getInt64Ty(getContext());
 }
 
-/// hasSCEV - Return true if the SCEV for this value has already been
-/// computed.
-bool ScalarEvolution::hasSCEV(Value *V) const {
-  return Scalars.count(V);
+const SCEV *ScalarEvolution::getCouldNotCompute() {
+  return &CouldNotCompute;
 }
 
 /// getSCEV - Return an existing SCEV if it exists, otherwise analyze the
 /// expression and create a new one.
-const SCEV* ScalarEvolution::getSCEV(Value *V) {
+const SCEV *ScalarEvolution::getSCEV(Value *V) {
   assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
 
-  std::map<SCEVCallbackVH, const SCEV*>::iterator I = Scalars.find(V);
+  std::map<SCEVCallbackVH, const SCEV *>::iterator I = Scalars.find(V);
   if (I != Scalars.end()) return I->second;
-  const SCEV* S = createSCEV(V);
+  const SCEV *S = createSCEV(V);
   Scalars.insert(std::make_pair(SCEVCallbackVH(V, this), S));
   return S;
 }
 
 /// getIntegerSCEV - Given a SCEVable type, create a constant for the
 /// specified signed integer value and return a SCEV for the constant.
-const SCEV* ScalarEvolution::getIntegerSCEV(int Val, const Type *Ty) {
+const SCEV *ScalarEvolution::getIntegerSCEV(int Val, const Type *Ty) {
   const IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
   return getConstant(ConstantInt::get(ITy, Val));
 }
 
 /// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
 ///
-const SCEV* ScalarEvolution::getNegativeSCEV(const SCEV* V) {
+const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) {
   if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
-    return getConstant(cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
+    return getConstant(
+               cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
 
   const Type *Ty = V->getType();
   Ty = getEffectiveSCEVType(Ty);
-  return getMulExpr(V, getConstant(ConstantInt::getAllOnesValue(Ty)));
+  return getMulExpr(V,
+                  getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))));
 }
 
 /// getNotSCEV - Return a SCEV corresponding to ~V = -1-V
-const SCEV* ScalarEvolution::getNotSCEV(const SCEV* V) {
+const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
   if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
-    return getConstant(cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
+    return getConstant(
+                cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
 
   const Type *Ty = V->getType();
   Ty = getEffectiveSCEVType(Ty);
-  const SCEV* AllOnes = getConstant(ConstantInt::getAllOnesValue(Ty));
+  const SCEV *AllOnes =
+                   getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)));
   return getMinusSCEV(AllOnes, V);
 }
 
@@ -2114,12 +2289,12 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS,
 /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
 /// input value to the specified type.  If the type must be extended, it is zero
 /// extended.
-const SCEV*
-ScalarEvolution::getTruncateOrZeroExtend(const SCEV* V,
+const SCEV *
+ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V,
                                          const Type *Ty) {
   const Type *SrcTy = V->getType();
-  assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) &&
-         (Ty->isInteger() || (TD && isa<PointerType>(Ty))) &&
+  assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
+         (Ty->isInteger() || isa<PointerType>(Ty)) &&
          "Cannot truncate or zero extend with non-integer arguments!");
   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
     return V;  // No conversion
@@ -2131,12 +2306,12 @@ ScalarEvolution::getTruncateOrZeroExtend(const SCEV* V,
 /// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion of the
 /// input value to the specified type.  If the type must be extended, it is sign
 /// extended.
-const SCEV*
-ScalarEvolution::getTruncateOrSignExtend(const SCEV* V,
+const SCEV *
+ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
                                          const Type *Ty) {
   const Type *SrcTy = V->getType();
-  assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) &&
-         (Ty->isInteger() || (TD && isa<PointerType>(Ty))) &&
+  assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
+         (Ty->isInteger() || isa<PointerType>(Ty)) &&
          "Cannot truncate or zero extend with non-integer arguments!");
   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
     return V;  // No conversion
@@ -2148,11 +2323,11 @@ ScalarEvolution::getTruncateOrSignExtend(const SCEV* V,
 /// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of the
 /// input value to the specified type.  If the type must be extended, it is zero
 /// extended.  The conversion must not be narrowing.
-const SCEV*
-ScalarEvolution::getNoopOrZeroExtend(const SCEV* V, const Type *Ty) {
+const SCEV *
+ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) {
   const Type *SrcTy = V->getType();
-  assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) &&
-         (Ty->isInteger() || (TD && isa<PointerType>(Ty))) &&
+  assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
+         (Ty->isInteger() || isa<PointerType>(Ty)) &&
          "Cannot noop or zero extend with non-integer arguments!");
   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
          "getNoopOrZeroExtend cannot truncate!");
@@ -2164,11 +2339,11 @@ ScalarEvolution::getNoopOrZeroExtend(const SCEV* V, const Type *Ty) {
 /// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of the
 /// input value to the specified type.  If the type must be extended, it is sign
 /// extended.  The conversion must not be narrowing.
-const SCEV*
-ScalarEvolution::getNoopOrSignExtend(const SCEV* V, const Type *Ty) {
+const SCEV *
+ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) {
   const Type *SrcTy = V->getType();
-  assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) &&
-         (Ty->isInteger() || (TD && isa<PointerType>(Ty))) &&
+  assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
+         (Ty->isInteger() || isa<PointerType>(Ty)) &&
          "Cannot noop or sign extend with non-integer arguments!");
   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
          "getNoopOrSignExtend cannot truncate!");
@@ -2181,11 +2356,11 @@ ScalarEvolution::getNoopOrSignExtend(const SCEV* V, const Type *Ty) {
 /// the input value to the specified type. If the type must be extended,
 /// it is extended with unspecified bits. The conversion must not be
 /// narrowing.
-const SCEV*
-ScalarEvolution::getNoopOrAnyExtend(const SCEV* V, const Type *Ty) {
+const SCEV *
+ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) {
   const Type *SrcTy = V->getType();
-  assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) &&
-         (Ty->isInteger() || (TD && isa<PointerType>(Ty))) &&
+  assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
+         (Ty->isInteger() || isa<PointerType>(Ty)) &&
          "Cannot noop or any extend with non-integer arguments!");
   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
          "getNoopOrAnyExtend cannot truncate!");
@@ -2196,11 +2371,11 @@ ScalarEvolution::getNoopOrAnyExtend(const SCEV* V, const Type *Ty) {
 
 /// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the
 /// input value to the specified type.  The conversion must not be widening.
-const SCEV*
-ScalarEvolution::getTruncateOrNoop(const SCEV* V, const Type *Ty) {
+const SCEV *
+ScalarEvolution::getTruncateOrNoop(const SCEV *V, const Type *Ty) {
   const Type *SrcTy = V->getType();
-  assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) &&
-         (Ty->isInteger() || (TD && isa<PointerType>(Ty))) &&
+  assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
+         (Ty->isInteger() || isa<PointerType>(Ty)) &&
          "Cannot truncate or noop with non-integer arguments!");
   assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) &&
          "getTruncateOrNoop cannot extend!");
@@ -2214,8 +2389,8 @@ ScalarEvolution::getTruncateOrNoop(const SCEV* V, const Type *Ty) {
 /// with them.
 const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS,
                                                         const SCEV *RHS) {
-  const SCEV* PromotedLHS = LHS;
-  const SCEV* PromotedRHS = RHS;
+  const SCEV *PromotedLHS = LHS;
+  const SCEV *PromotedRHS = RHS;
 
   if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
     PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
@@ -2230,8 +2405,8 @@ const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS,
 /// with them.
 const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS,
                                                         const SCEV *RHS) {
-  const SCEV* PromotedLHS = LHS;
-  const SCEV* PromotedRHS = RHS;
+  const SCEV *PromotedLHS = LHS;
+  const SCEV *PromotedRHS = RHS;
 
   if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
     PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
@@ -2241,34 +2416,60 @@ const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS,
   return getUMinExpr(PromotedLHS, PromotedRHS);
 }
 
-/// ReplaceSymbolicValueWithConcrete - This looks up the computed SCEV value for
-/// the specified instruction and replaces any references to the symbolic value
-/// SymName with the specified value.  This is used during PHI resolution.
+/// PushDefUseChildren - Push users of the given Instruction
+/// onto the given Worklist.
+static void
+PushDefUseChildren(Instruction *I,
+                   SmallVectorImpl<Instruction *> &Worklist) {
+  // Push the def-use children onto the Worklist stack.
+  for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+       UI != UE; ++UI)
+    Worklist.push_back(cast<Instruction>(UI));
+}
+
+/// ForgetSymbolicValue - This looks up computed SCEV values for all
+/// instructions that depend on the given instruction and removes them from
+/// the Scalars map if they reference SymName. This is used during PHI
+/// resolution.
 void
-ScalarEvolution::ReplaceSymbolicValueWithConcrete(Instruction *I,
-                                                  const SCEV *SymName,
-                                                  const SCEV *NewVal) {
-  std::map<SCEVCallbackVH, const SCEV*>::iterator SI =
-    Scalars.find(SCEVCallbackVH(I, this));
-  if (SI == Scalars.end()) return;
+ScalarEvolution::ForgetSymbolicName(Instruction *I, const SCEV *SymName) {
+  SmallVector<Instruction *, 16> Worklist;
+  PushDefUseChildren(I, Worklist);
 
-  const SCEV* NV =
-    SI->second->replaceSymbolicValuesWithConcrete(SymName, NewVal, *this);
-  if (NV == SI->second) return;  // No change.
+  SmallPtrSet<Instruction *, 8> Visited;
+  Visited.insert(I);
+  while (!Worklist.empty()) {
+    Instruction *I = Worklist.pop_back_val();
+    if (!Visited.insert(I)) continue;
 
-  SI->second = NV;       // Update the scalars map!
+    std::map<SCEVCallbackVH, const SCEV*>::iterator It =
+      Scalars.find(static_cast<Value *>(I));
+    if (It != Scalars.end()) {
+      // Short-circuit the def-use traversal if the symbolic name
+      // ceases to appear in expressions.
+      if (!It->second->hasOperand(SymName))
+        continue;
+
+      // SCEVUnknown for a PHI either means that it has an unrecognized
+      // structure, or it's a PHI that's in the progress of being computed
+      // by createNodeForPHI.  In the former case, additional loop trip
+      // count information isn't going to change anything. In the later
+      // case, createNodeForPHI will perform the necessary updates on its
+      // own when it gets to that point.
+      if (!isa<PHINode>(I) || !isa<SCEVUnknown>(It->second)) {
+        ValuesAtScopes.erase(It->second);
+        Scalars.erase(It);
+      }
+    }
 
-  // Any instruction values that use this instruction might also need to be
-  // updated!
-  for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
-       UI != E; ++UI)
-    ReplaceSymbolicValueWithConcrete(cast<Instruction>(*UI), SymName, NewVal);
+    PushDefUseChildren(I, Worklist);
+  }
 }
 
 /// createNodeForPHI - PHI nodes have two cases.  Either the PHI node exists in
 /// a loop header, making it a potential recurrence, or it doesn't.
 ///
-const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) {
+const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
   if (PN->getNumIncomingValues() == 2)  // The loops have been canonicalized.
     if (const Loop *L = LI->getLoopFor(PN->getParent()))
       if (L->getHeader() == PN->getParent()) {
@@ -2278,14 +2479,15 @@ const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) {
         unsigned BackEdge     = IncomingEdge^1;
 
         // While we are analyzing this PHI node, handle its value symbolically.
-        const SCEV* SymbolicName = getUnknown(PN);
+        const SCEV *SymbolicName = getUnknown(PN);
         assert(Scalars.find(PN) == Scalars.end() &&
                "PHI node already processed?");
         Scalars.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName));
 
         // Using this symbolic name for the PHI, analyze the value coming around
         // the back-edge.
-        const SCEV* BEValue = getSCEV(PN->getIncomingValue(BackEdge));
+        Value *BEValueV = PN->getIncomingValue(BackEdge);
+        const SCEV *BEValue = getSCEV(BEValueV);
 
         // NOTE: If BEValue is loop invariant, we know that the PHI node just
         // has a special value for the first iteration of the loop.
@@ -2305,11 +2507,11 @@ const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) {
 
           if (FoundIndex != Add->getNumOperands()) {
             // Create an add with everything but the specified operand.
-            SmallVector<const SCEV*, 8> Ops;
+            SmallVector<const SCEV *, 8> Ops;
             for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
               if (i != FoundIndex)
                 Ops.push_back(Add->getOperand(i));
-            const SCEV* Accum = getAddExpr(Ops);
+            const SCEV *Accum = getAddExpr(Ops);
 
             // This is not a valid addrec if the step amount is varying each
             // loop iteration, but is not itself an addrec in this loop.
@@ -2318,15 +2520,35 @@ const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) {
                  cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
               const SCEV *StartVal =
                 getSCEV(PN->getIncomingValue(IncomingEdge));
-              const SCEV *PHISCEV =
-                getAddRecExpr(StartVal, Accum, L);
+              const SCEVAddRecExpr *PHISCEV =
+                cast<SCEVAddRecExpr>(getAddRecExpr(StartVal, Accum, L));
+
+              // If the increment doesn't overflow, then neither the addrec nor the
+              // post-increment will overflow.
+              if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV))
+                if (OBO->getOperand(0) == PN &&
+                    getSCEV(OBO->getOperand(1)) ==
+                      PHISCEV->getStepRecurrence(*this)) {
+                  const SCEVAddRecExpr *PostInc = PHISCEV->getPostIncExpr(*this);
+                  if (OBO->hasNoUnsignedWrap()) {
+                    const_cast<SCEVAddRecExpr *>(PHISCEV)
+                      ->setHasNoUnsignedWrap(true);
+                    const_cast<SCEVAddRecExpr *>(PostInc)
+                      ->setHasNoUnsignedWrap(true);
+                  }
+                  if (OBO->hasNoSignedWrap()) {
+                    const_cast<SCEVAddRecExpr *>(PHISCEV)
+                      ->setHasNoSignedWrap(true);
+                    const_cast<SCEVAddRecExpr *>(PostInc)
+                      ->setHasNoSignedWrap(true);
+                  }
+                }
 
               // Okay, for the entire analysis of this edge we assumed the PHI
-              // to be symbolic.  We now need to go back and update all of the
-              // entries for the scalars that use the PHI (except for the PHI
-              // itself) to use the new analyzed value instead of the "symbolic"
-              // value.
-              ReplaceSymbolicValueWithConcrete(PN, SymbolicName, PHISCEV);
+              // to be symbolic.  We now need to go back and purge all of the
+              // entries for the scalars that use the symbolic expression.
+              ForgetSymbolicName(PN, SymbolicName);
+              Scalars[SCEVCallbackVH(PN, this)] = PHISCEV;
               return PHISCEV;
             }
           }
@@ -2338,21 +2560,20 @@ const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) {
           // Because the other in-value of i (0) fits the evolution of BEValue
           // i really is an addrec evolution.
           if (AddRec->getLoop() == L && AddRec->isAffine()) {
-            const SCEV* StartVal = getSCEV(PN->getIncomingValue(IncomingEdge));
+            const SCEV *StartVal = getSCEV(PN->getIncomingValue(IncomingEdge));
 
             // If StartVal = j.start - j.stride, we can use StartVal as the
             // initial step of the addrec evolution.
             if (StartVal == getMinusSCEV(AddRec->getOperand(0),
                                             AddRec->getOperand(1))) {
-              const SCEV* PHISCEV =
+              const SCEV *PHISCEV =
                  getAddRecExpr(StartVal, AddRec->getOperand(1), L);
 
               // Okay, for the entire analysis of this edge we assumed the PHI
-              // to be symbolic.  We now need to go back and update all of the
-              // entries for the scalars that use the PHI (except for the PHI
-              // itself) to use the new analyzed value instead of the "symbolic"
-              // value.
-              ReplaceSymbolicValueWithConcrete(PN, SymbolicName, PHISCEV);
+              // to be symbolic.  We now need to go back and purge all of the
+              // entries for the scalars that use the symbolic expression.
+              ForgetSymbolicName(PN, SymbolicName);
+              Scalars[SCEVCallbackVH(PN, this)] = PHISCEV;
               return PHISCEV;
             }
           }
@@ -2361,6 +2582,10 @@ const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) {
         return SymbolicName;
       }
 
+  // It's tempting to recognize PHIs with a unique incoming value, however
+  // this leads passes like indvars to break LCSSA form. Fortunately, such
+  // PHIs are rare, as instcombine zaps them.
+
   // If it's not a loop phi, we can't handle it yet.
   return getUnknown(PN);
 }
@@ -2368,14 +2593,14 @@ const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) {
 /// createNodeForGEP - Expand GEP instructions into add and multiply
 /// operations. This allows them to be analyzed by regular SCEV code.
 ///
-const SCEV* ScalarEvolution::createNodeForGEP(User *GEP) {
+const SCEV *ScalarEvolution::createNodeForGEP(Operator *GEP) {
 
-  const Type *IntPtrTy = TD->getIntPtrType();
+  const Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
   Value *Base = GEP->getOperand(0);
   // Don't attempt to analyze GEPs over unsized objects.
   if (!cast<PointerType>(Base->getType())->getElementType()->isSized())
     return getUnknown(GEP);
-  const SCEV* TotalOffset = getIntegerSCEV(0, IntPtrTy);
+  const SCEV *TotalOffset = getIntegerSCEV(0, IntPtrTy);
   gep_type_iterator GTI = gep_type_begin(GEP);
   for (GetElementPtrInst::op_iterator I = next(GEP->op_begin()),
                                       E = GEP->op_end();
@@ -2384,22 +2609,16 @@ const SCEV* ScalarEvolution::createNodeForGEP(User *GEP) {
     // Compute the (potentially symbolic) offset in bytes for this index.
     if (const StructType *STy = dyn_cast<StructType>(*GTI++)) {
       // For a struct, add the member offset.
-      const StructLayout &SL = *TD->getStructLayout(STy);
       unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
-      uint64_t Offset = SL.getElementOffset(FieldNo);
       TotalOffset = getAddExpr(TotalOffset,
-                                  getIntegerSCEV(Offset, IntPtrTy));
+                               getFieldOffsetExpr(STy, FieldNo));
     } else {
       // For an array, add the element offset, explicitly scaled.
-      const SCEV* LocalOffset = getSCEV(Index);
+      const SCEV *LocalOffset = getSCEV(Index);
       if (!isa<PointerType>(LocalOffset->getType()))
         // Getelementptr indicies are signed.
-        LocalOffset = getTruncateOrSignExtend(LocalOffset,
-                                              IntPtrTy);
-      LocalOffset =
-        getMulExpr(LocalOffset,
-                   getIntegerSCEV(TD->getTypeAllocSize(*GTI),
-                                  IntPtrTy));
+        LocalOffset = getTruncateOrSignExtend(LocalOffset, IntPtrTy);
+      LocalOffset = getMulExpr(LocalOffset, getAllocSizeExpr(*GTI));
       TotalOffset = getAddExpr(TotalOffset, LocalOffset);
     }
   }
@@ -2411,7 +2630,7 @@ const SCEV* ScalarEvolution::createNodeForGEP(User *GEP) {
 /// the minimum number of times S is divisible by 2.  For example, given {4,+,8}
 /// it returns 2.  If S is guaranteed to be 0, it returns the bitwidth of S.
 uint32_t
-ScalarEvolution::GetMinTrailingZeros(const SCEV* S) {
+ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
     return C->getValue()->getValue().countTrailingZeros();
 
@@ -2487,18 +2706,100 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV* S) {
   return 0;
 }
 
-uint32_t
-ScalarEvolution::GetMinLeadingZeros(const SCEV* S) {
-  // TODO: Handle other SCEV expression types here.
+/// getUnsignedRange - Determine the unsigned range for a particular SCEV.
+///
+ConstantRange
+ScalarEvolution::getUnsignedRange(const SCEV *S) {
 
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
-    return C->getValue()->getValue().countLeadingZeros();
+    return ConstantRange(C->getValue()->getValue());
+
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    ConstantRange X = getUnsignedRange(Add->getOperand(0));
+    for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
+      X = X.add(getUnsignedRange(Add->getOperand(i)));
+    return X;
+  }
+
+  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
+    ConstantRange X = getUnsignedRange(Mul->getOperand(0));
+    for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
+      X = X.multiply(getUnsignedRange(Mul->getOperand(i)));
+    return X;
+  }
+
+  if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
+    ConstantRange X = getUnsignedRange(SMax->getOperand(0));
+    for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
+      X = X.smax(getUnsignedRange(SMax->getOperand(i)));
+    return X;
+  }
+
+  if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
+    ConstantRange X = getUnsignedRange(UMax->getOperand(0));
+    for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
+      X = X.umax(getUnsignedRange(UMax->getOperand(i)));
+    return X;
+  }
 
-  if (const SCEVZeroExtendExpr *C = dyn_cast<SCEVZeroExtendExpr>(S)) {
-    // A zero-extension cast adds zero bits.
-    return GetMinLeadingZeros(C->getOperand()) +
-           (getTypeSizeInBits(C->getType()) -
-            getTypeSizeInBits(C->getOperand()->getType()));
+  if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
+    ConstantRange X = getUnsignedRange(UDiv->getLHS());
+    ConstantRange Y = getUnsignedRange(UDiv->getRHS());
+    return X.udiv(Y);
+  }
+
+  if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
+    ConstantRange X = getUnsignedRange(ZExt->getOperand());
+    return X.zeroExtend(cast<IntegerType>(ZExt->getType())->getBitWidth());
+  }
+
+  if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
+    ConstantRange X = getUnsignedRange(SExt->getOperand());
+    return X.signExtend(cast<IntegerType>(SExt->getType())->getBitWidth());
+  }
+
+  if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
+    ConstantRange X = getUnsignedRange(Trunc->getOperand());
+    return X.truncate(cast<IntegerType>(Trunc->getType())->getBitWidth());
+  }
+
+  ConstantRange FullSet(getTypeSizeInBits(S->getType()), true);
+
+  if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
+    const SCEV *T = getBackedgeTakenCount(AddRec->getLoop());
+    const SCEVConstant *Trip = dyn_cast<SCEVConstant>(T);
+    if (!Trip) return FullSet;
+
+    // TODO: non-affine addrec
+    if (AddRec->isAffine()) {
+      const Type *Ty = AddRec->getType();
+      const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
+      if (getTypeSizeInBits(MaxBECount->getType()) <= getTypeSizeInBits(Ty)) {
+        MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
+
+        const SCEV *Start = AddRec->getStart();
+        const SCEV *Step = AddRec->getStepRecurrence(*this);
+        const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this);
+
+        // Check for overflow.
+        // TODO: This is very conservative.
+        if (!(Step->isOne() &&
+              isKnownPredicate(ICmpInst::ICMP_ULT, Start, End)) &&
+            !(Step->isAllOnesValue() &&
+              isKnownPredicate(ICmpInst::ICMP_UGT, Start, End)))
+          return FullSet;
+
+        ConstantRange StartRange = getUnsignedRange(Start);
+        ConstantRange EndRange = getUnsignedRange(End);
+        APInt Min = APIntOps::umin(StartRange.getUnsignedMin(),
+                                   EndRange.getUnsignedMin());
+        APInt Max = APIntOps::umax(StartRange.getUnsignedMax(),
+                                   EndRange.getUnsignedMax());
+        if (Min.isMinValue() && Max.isMaxValue())
+          return FullSet;
+        return ConstantRange(Min, Max+1);
+      }
+    }
   }
 
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
@@ -2507,73 +2808,128 @@ ScalarEvolution::GetMinLeadingZeros(const SCEV* S) {
     APInt Mask = APInt::getAllOnesValue(BitWidth);
     APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
     ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD);
-    return Zeros.countLeadingOnes();
+    if (Ones == ~Zeros + 1)
+      return FullSet;
+    return ConstantRange(Ones, ~Zeros + 1);
   }
 
-  return 1;
+  return FullSet;
 }
 
-uint32_t
-ScalarEvolution::GetMinSignBits(const SCEV* S) {
-  // TODO: Handle other SCEV expression types here.
+/// getSignedRange - Determine the signed range for a particular SCEV.
+///
+ConstantRange
+ScalarEvolution::getSignedRange(const SCEV *S) {
 
-  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
-    const APInt &A = C->getValue()->getValue();
-    return A.isNegative() ? A.countLeadingOnes() :
-                            A.countLeadingZeros();
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
+    return ConstantRange(C->getValue()->getValue());
+
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    ConstantRange X = getSignedRange(Add->getOperand(0));
+    for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
+      X = X.add(getSignedRange(Add->getOperand(i)));
+    return X;
   }
 
-  if (const SCEVSignExtendExpr *C = dyn_cast<SCEVSignExtendExpr>(S)) {
-    // A sign-extension cast adds sign bits.
-    return GetMinSignBits(C->getOperand()) +
-           (getTypeSizeInBits(C->getType()) -
-            getTypeSizeInBits(C->getOperand()->getType()));
+  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
+    ConstantRange X = getSignedRange(Mul->getOperand(0));
+    for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
+      X = X.multiply(getSignedRange(Mul->getOperand(i)));
+    return X;
   }
 
-  if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
-    unsigned BitWidth = getTypeSizeInBits(A->getType());
-
-    // Special case decrementing a value (ADD X, -1):
-    if (const SCEVConstant *CRHS = dyn_cast<SCEVConstant>(A->getOperand(0)))
-      if (CRHS->isAllOnesValue()) {
-        SmallVector<const SCEV *, 4> OtherOps(A->op_begin() + 1, A->op_end());
-        const SCEV *OtherOpsAdd = getAddExpr(OtherOps);
-        unsigned LZ = GetMinLeadingZeros(OtherOpsAdd);
-
-        // If the input is known to be 0 or 1, the output is 0/-1, which is all
-        // sign bits set.
-        if (LZ == BitWidth - 1)
-          return BitWidth;
-
-        // If we are subtracting one from a positive number, there is no carry
-        // out of the result.
-        if (LZ > 0)
-          return GetMinSignBits(OtherOpsAdd);
-      }
+  if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
+    ConstantRange X = getSignedRange(SMax->getOperand(0));
+    for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
+      X = X.smax(getSignedRange(SMax->getOperand(i)));
+    return X;
+  }
+
+  if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
+    ConstantRange X = getSignedRange(UMax->getOperand(0));
+    for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
+      X = X.umax(getSignedRange(UMax->getOperand(i)));
+    return X;
+  }
+
+  if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
+    ConstantRange X = getSignedRange(UDiv->getLHS());
+    ConstantRange Y = getSignedRange(UDiv->getRHS());
+    return X.udiv(Y);
+  }
+
+  if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
+    ConstantRange X = getSignedRange(ZExt->getOperand());
+    return X.zeroExtend(cast<IntegerType>(ZExt->getType())->getBitWidth());
+  }
+
+  if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
+    ConstantRange X = getSignedRange(SExt->getOperand());
+    return X.signExtend(cast<IntegerType>(SExt->getType())->getBitWidth());
+  }
+
+  if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
+    ConstantRange X = getSignedRange(Trunc->getOperand());
+    return X.truncate(cast<IntegerType>(Trunc->getType())->getBitWidth());
+  }
 
-    // Add can have at most one carry bit.  Thus we know that the output
-    // is, at worst, one more bit than the inputs.
-    unsigned Min = BitWidth;
-    for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
-      unsigned N = GetMinSignBits(A->getOperand(i));
-      Min = std::min(Min, N) - 1;
-      if (Min == 0) return 1;
+  ConstantRange FullSet(getTypeSizeInBits(S->getType()), true);
+
+  if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
+    const SCEV *T = getBackedgeTakenCount(AddRec->getLoop());
+    const SCEVConstant *Trip = dyn_cast<SCEVConstant>(T);
+    if (!Trip) return FullSet;
+
+    // TODO: non-affine addrec
+    if (AddRec->isAffine()) {
+      const Type *Ty = AddRec->getType();
+      const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
+      if (getTypeSizeInBits(MaxBECount->getType()) <= getTypeSizeInBits(Ty)) {
+        MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
+
+        const SCEV *Start = AddRec->getStart();
+        const SCEV *Step = AddRec->getStepRecurrence(*this);
+        const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this);
+
+        // Check for overflow.
+        // TODO: This is very conservative.
+        if (!(Step->isOne() &&
+              isKnownPredicate(ICmpInst::ICMP_SLT, Start, End)) &&
+            !(Step->isAllOnesValue() &&
+              isKnownPredicate(ICmpInst::ICMP_SGT, Start, End)))
+          return FullSet;
+
+        ConstantRange StartRange = getSignedRange(Start);
+        ConstantRange EndRange = getSignedRange(End);
+        APInt Min = APIntOps::smin(StartRange.getSignedMin(),
+                                   EndRange.getSignedMin());
+        APInt Max = APIntOps::smax(StartRange.getSignedMax(),
+                                   EndRange.getSignedMax());
+        if (Min.isMinSignedValue() && Max.isMaxSignedValue())
+          return FullSet;
+        return ConstantRange(Min, Max+1);
+      }
     }
-    return 1;
   }
 
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
     // For a SCEVUnknown, ask ValueTracking.
-    return ComputeNumSignBits(U->getValue(), TD);
+    unsigned BitWidth = getTypeSizeInBits(U->getType());
+    unsigned NS = ComputeNumSignBits(U->getValue(), TD);
+    if (NS == 1)
+      return FullSet;
+    return
+      ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
+                    APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1);
   }
 
-  return 1;
+  return FullSet;
 }
 
 /// createSCEV - We know that there is no SCEV for the specified value.
 /// Analyze the expression.
 ///
-const SCEV* ScalarEvolution::createSCEV(Value *V) {
+const SCEV *ScalarEvolution::createSCEV(Value *V) {
   if (!isSCEVable(V->getType()))
     return getUnknown(V);
 
@@ -2588,15 +2944,23 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) {
     return getIntegerSCEV(0, V->getType());
   else if (isa<UndefValue>(V))
     return getIntegerSCEV(0, V->getType());
+  else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
+    return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee());
   else
     return getUnknown(V);
 
-  User *U = cast<User>(V);
+  Operator *U = cast<Operator>(V);
   switch (Opcode) {
   case Instruction::Add:
+    // Don't transfer the NSW and NUW bits from the Add instruction to the
+    // Add expression, because the Instruction may be guarded by control
+    // flow and the no-overflow bits may not be valid for the expression in
+    // any context.
     return getAddExpr(getSCEV(U->getOperand(0)),
                       getSCEV(U->getOperand(1)));
   case Instruction::Mul:
+    // Don't transfer the NSW and NUW bits from the Mul instruction to the
+    // Mul expression, as with Add.
     return getMulExpr(getSCEV(U->getOperand(0)),
                       getSCEV(U->getOperand(1)));
   case Instruction::UDiv:
@@ -2630,7 +2994,7 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) {
       if (LZ != 0 && !((~A & ~KnownZero) & EffectiveMask))
         return
           getZeroExtendExpr(getTruncateExpr(getSCEV(U->getOperand(0)),
-                                            IntegerType::get(BitWidth - LZ)),
+                                IntegerType::get(getContext(), BitWidth - LZ)),
                             U->getType());
     }
     break;
@@ -2643,11 +3007,23 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) {
     // In order for this transformation to be safe, the LHS must be of the
     // form X*(2^n) and the Or constant must be less than 2^n.
     if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
-      const SCEV* LHS = getSCEV(U->getOperand(0));
+      const SCEV *LHS = getSCEV(U->getOperand(0));
       const APInt &CIVal = CI->getValue();
       if (GetMinTrailingZeros(LHS) >=
-          (CIVal.getBitWidth() - CIVal.countLeadingZeros()))
-        return getAddExpr(LHS, getSCEV(U->getOperand(1)));
+          (CIVal.getBitWidth() - CIVal.countLeadingZeros())) {
+        // Build a plain add SCEV.
+        const SCEV *S = getAddExpr(LHS, getSCEV(CI));
+        // If the LHS of the add was an addrec and it has no-wrap flags,
+        // transfer the no-wrap flags, since an or won't introduce a wrap.
+        if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) {
+          const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS);
+          if (OldAR->hasNoUnsignedWrap())
+            const_cast<SCEVAddRecExpr *>(NewAR)->setHasNoUnsignedWrap(true);
+          if (OldAR->hasNoSignedWrap())
+            const_cast<SCEVAddRecExpr *>(NewAR)->setHasNoSignedWrap(true);
+        }
+        return S;
+      }
     }
     break;
   case Instruction::Xor:
@@ -2673,7 +3049,7 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) {
             if (const SCEVZeroExtendExpr *Z =
                   dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) {
               const Type *UTy = U->getType();
-              const SCEV* Z0 = Z->getOperand();
+              const SCEV *Z0 = Z->getOperand();
               const Type *Z0Ty = Z0->getType();
               unsigned Z0TySize = getTypeSizeInBits(Z0Ty);
 
@@ -2699,7 +3075,7 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) {
     // Turn shift left of a constant amount into a multiply.
     if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
       uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
-      Constant *X = ConstantInt::get(
+      Constant *X = ConstantInt::get(getContext(),
         APInt(BitWidth, 1).shl(SA->getLimitedValue(BitWidth)));
       return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X));
     }
@@ -2709,7 +3085,7 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) {
     // Turn logical shift right of a constant into a unsigned divide.
     if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
       uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
-      Constant *X = ConstantInt::get(
+      Constant *X = ConstantInt::get(getContext(),
         APInt(BitWidth, 1).shl(SA->getLimitedValue(BitWidth)));
       return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X));
     }
@@ -2729,7 +3105,7 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) {
             return getIntegerSCEV(0, U->getType()); // value is undefined
           return
             getSignExtendExpr(getTruncateExpr(getSCEV(L->getOperand(0)),
-                                                      IntegerType::get(Amt)),
+                                           IntegerType::get(getContext(), Amt)),
                                  U->getType());
         }
     break;
@@ -2749,18 +3125,12 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) {
       return getSCEV(U->getOperand(0));
     break;
 
-  case Instruction::IntToPtr:
-    if (!TD) break; // Without TD we can't analyze pointers.
-    return getTruncateOrZeroExtend(getSCEV(U->getOperand(0)),
-                                   TD->getIntPtrType());
-
-  case Instruction::PtrToInt:
-    if (!TD) break; // Without TD we can't analyze pointers.
-    return getTruncateOrZeroExtend(getSCEV(U->getOperand(0)),
-                                   U->getType());
+    // It's tempting to handle inttoptr and ptrtoint, however this can
+    // lead to pointer expressions which cannot be expanded to GEPs
+    // (because they may overflow). For now, the only pointer-typed
+    // expressions we handle are GEPs and address literals.
 
   case Instruction::GetElementPtr:
-    if (!TD) break; // Without TD we can't analyze pointers.
     return createNodeForGEP(U);
 
   case Instruction::PHI:
@@ -2842,17 +3212,29 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) {
 /// loop-invariant backedge-taken count (see
 /// hasLoopInvariantBackedgeTakenCount).
 ///
-const SCEV* ScalarEvolution::getBackedgeTakenCount(const Loop *L) {
+const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) {
   return getBackedgeTakenInfo(L).Exact;
 }
 
 /// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except
 /// return the least SCEV value that is known never to be less than the
 /// actual backedge taken count.
-const SCEV* ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) {
+const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) {
   return getBackedgeTakenInfo(L).Max;
 }
 
+/// PushLoopPHIs - Push PHI nodes in the header of the given loop
+/// onto the given Worklist.
+static void
+PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
+  BasicBlock *Header = L->getHeader();
+
+  // Push all Loop-header PHIs onto the Worklist stack.
+  for (BasicBlock::iterator I = Header->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I)
+    Worklist.push_back(PN);
+}
+
 const ScalarEvolution::BackedgeTakenInfo &
 ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
   // Initially insert a CouldNotCompute for this loop. If the insertion
@@ -2883,10 +3265,39 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
 
     // Now that we know more about the trip count for this loop, forget any
     // existing SCEV values for PHI nodes in this loop since they are only
-    // conservative estimates made without the benefit
-    // of trip count information.
-    if (ItCount.hasAnyInfo())
-      forgetLoopPHIs(L);
+    // conservative estimates made without the benefit of trip count
+    // information. This is similar to the code in
+    // forgetLoopBackedgeTakenCount, except that it handles SCEVUnknown PHI
+    // nodes specially.
+    if (ItCount.hasAnyInfo()) {
+      SmallVector<Instruction *, 16> Worklist;
+      PushLoopPHIs(L, Worklist);
+
+      SmallPtrSet<Instruction *, 8> Visited;
+      while (!Worklist.empty()) {
+        Instruction *I = Worklist.pop_back_val();
+        if (!Visited.insert(I)) continue;
+
+        std::map<SCEVCallbackVH, const SCEV*>::iterator It =
+          Scalars.find(static_cast<Value *>(I));
+        if (It != Scalars.end()) {
+          // SCEVUnknown for a PHI either means that it has an unrecognized
+          // structure, or it's a PHI that's in the progress of being computed
+          // by createNodeForPHI.  In the former case, additional loop trip
+          // count information isn't going to change anything. In the later
+          // case, createNodeForPHI will perform the necessary updates on its
+          // own when it gets to that point.
+          if (!isa<PHINode>(I) || !isa<SCEVUnknown>(It->second)) {
+            ValuesAtScopes.erase(It->second);
+            Scalars.erase(It);
+          }
+          if (PHINode *PN = dyn_cast<PHINode>(I))
+            ConstantEvolutionLoopExitValue.erase(PN);
+        }
+
+        PushDefUseChildren(I, Worklist);
+      }
+    }
   }
   return Pair.first->second;
 }
@@ -2897,37 +3308,25 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
 /// is deleted.
 void ScalarEvolution::forgetLoopBackedgeTakenCount(const Loop *L) {
   BackedgeTakenCounts.erase(L);
-  forgetLoopPHIs(L);
-}
 
-/// forgetLoopPHIs - Delete the memoized SCEVs associated with the
-/// PHI nodes in the given loop. This is used when the trip count of
-/// the loop may have changed.
-void ScalarEvolution::forgetLoopPHIs(const Loop *L) {
-  BasicBlock *Header = L->getHeader();
-
-  // Push all Loop-header PHIs onto the Worklist stack, except those
-  // that are presently represented via a SCEVUnknown. SCEVUnknown for
-  // a PHI either means that it has an unrecognized structure, or it's
-  // a PHI that's in the progress of being computed by createNodeForPHI.
-  // In the former case, additional loop trip count information isn't
-  // going to change anything. In the later case, createNodeForPHI will
-  // perform the necessary updates on its own when it gets to that point.
   SmallVector<Instruction *, 16> Worklist;
-  for (BasicBlock::iterator I = Header->begin();
-       PHINode *PN = dyn_cast<PHINode>(I); ++I) {
-    std::map<SCEVCallbackVH, const SCEV*>::iterator It =
-      Scalars.find((Value*)I);
-    if (It != Scalars.end() && !isa<SCEVUnknown>(It->second))
-      Worklist.push_back(PN);
-  }
+  PushLoopPHIs(L, Worklist);
 
+  SmallPtrSet<Instruction *, 8> Visited;
   while (!Worklist.empty()) {
     Instruction *I = Worklist.pop_back_val();
-    if (Scalars.erase(I))
-      for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
-           UI != UE; ++UI)
-        Worklist.push_back(cast<Instruction>(UI));
+    if (!Visited.insert(I)) continue;
+
+    std::map<SCEVCallbackVH, const SCEV*>::iterator It =
+      Scalars.find(static_cast<Value *>(I));
+    if (It != Scalars.end()) {
+      ValuesAtScopes.erase(It->second);
+      Scalars.erase(It);
+      if (PHINode *PN = dyn_cast<PHINode>(I))
+        ConstantEvolutionLoopExitValue.erase(PN);
+    }
+
+    PushDefUseChildren(I, Worklist);
   }
 }
 
@@ -2939,8 +3338,8 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
   L->getExitingBlocks(ExitingBlocks);
 
   // Examine all exits and pick the most conservative values.
-  const SCEV* BECount = getCouldNotCompute();
-  const SCEV* MaxBECount = getCouldNotCompute();
+  const SCEV *BECount = getCouldNotCompute();
+  const SCEV *MaxBECount = getCouldNotCompute();
   bool CouldNotComputeBECount = false;
   for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
     BackedgeTakenInfo NewBTI =
@@ -3049,8 +3448,8 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
         ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB);
       BackedgeTakenInfo BTI1 =
         ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB);
-      const SCEV* BECount = getCouldNotCompute();
-      const SCEV* MaxBECount = getCouldNotCompute();
+      const SCEV *BECount = getCouldNotCompute();
+      const SCEV *MaxBECount = getCouldNotCompute();
       if (L->contains(TBB)) {
         // Both conditions must be true for the loop to continue executing.
         // Choose the less conservative count.
@@ -3084,8 +3483,8 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
         ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB);
       BackedgeTakenInfo BTI1 =
         ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB);
-      const SCEV* BECount = getCouldNotCompute();
-      const SCEV* MaxBECount = getCouldNotCompute();
+      const SCEV *BECount = getCouldNotCompute();
+      const SCEV *MaxBECount = getCouldNotCompute();
       if (L->contains(FBB)) {
         // Both conditions must be false for the loop to continue executing.
         // Choose the less conservative count.
@@ -3143,7 +3542,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
   // Handle common loops like: for (X = "string"; *X; ++X)
   if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
     if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
-      const SCEV* ItCnt =
+      const SCEV *ItCnt =
         ComputeLoadConstantCompareBackedgeTakenCount(LI, RHS, L, Cond);
       if (!isa<SCEVCouldNotCompute>(ItCnt)) {
         unsigned BitWidth = getTypeSizeInBits(ItCnt->getType());
@@ -3153,8 +3552,8 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
       }
     }
 
-  const SCEV* LHS = getSCEV(ExitCond->getOperand(0));
-  const SCEV* RHS = getSCEV(ExitCond->getOperand(1));
+  const SCEV *LHS = getSCEV(ExitCond->getOperand(0));
+  const SCEV *RHS = getSCEV(ExitCond->getOperand(1));
 
   // Try to evaluate any dependencies out of the loop.
   LHS = getSCEVAtScope(LHS, L);
@@ -3177,20 +3576,20 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
         ConstantRange CompRange(
             ICmpInst::makeConstantRange(Cond, RHSC->getValue()->getValue()));
 
-        const SCEV* Ret = AddRec->getNumIterationsInRange(CompRange, *this);
+        const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this);
         if (!isa<SCEVCouldNotCompute>(Ret)) return Ret;
       }
 
   switch (Cond) {
   case ICmpInst::ICMP_NE: {                     // while (X != Y)
     // Convert to: while (X-Y != 0)
-    const SCEV* TC = HowFarToZero(getMinusSCEV(LHS, RHS), L);
+    const SCEV *TC = HowFarToZero(getMinusSCEV(LHS, RHS), L);
     if (!isa<SCEVCouldNotCompute>(TC)) return TC;
     break;
   }
-  case ICmpInst::ICMP_EQ: {
-    // Convert to: while (X-Y == 0)           // while (X == Y)
-    const SCEV* TC = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
+  case ICmpInst::ICMP_EQ: {                     // while (X == Y)
+    // Convert to: while (X-Y == 0)
+    const SCEV *TC = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
     if (!isa<SCEVCouldNotCompute>(TC)) return TC;
     break;
   }
@@ -3234,8 +3633,8 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
 static ConstantInt *
 EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
                                 ScalarEvolution &SE) {
-  const SCEV* InVal = SE.getConstant(C);
-  const SCEV* Val = AddRec->evaluateAtIteration(InVal, SE);
+  const SCEV *InVal = SE.getConstant(C);
+  const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE);
   assert(isa<SCEVConstant>(Val) &&
          "Evaluation of SCEV at constant didn't fold correctly?");
   return cast<SCEVConstant>(Val)->getValue();
@@ -3246,7 +3645,7 @@ EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
 /// the addressed element of the initializer or null if the index expression is
 /// invalid.
 static Constant *
-GetAddressedElementFromGlobal(GlobalVariable *GV,
+GetAddressedElementFromGlobal(LLVMContext &Context, GlobalVariable *GV,
                               const std::vector<ConstantInt*> &Indices) {
   Constant *Init = GV->getInitializer();
   for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
@@ -3265,7 +3664,7 @@ GetAddressedElementFromGlobal(GlobalVariable *GV,
         if (Idx >= ATy->getNumElements()) return 0;  // Bogus program
         Init = Constant::getNullValue(ATy->getElementType());
       } else {
-        assert(0 && "Unknown constant aggregate type!");
+        llvm_unreachable("Unknown constant aggregate type!");
       }
       return 0;
     } else {
@@ -3293,7 +3692,7 @@ ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
   // Make sure that it is really a constant global we are gepping, with an
   // initializer, and make sure the first IDX is really 0.
   GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
-  if (!GV || !GV->isConstant() || !GV->hasInitializer() ||
+  if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
       GEP->getNumOperands() < 3 || !isa<Constant>(GEP->getOperand(1)) ||
       !cast<Constant>(GEP->getOperand(1))->isNullValue())
     return getCouldNotCompute();
@@ -3314,7 +3713,7 @@ ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
 
   // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant.
   // Check to see if X is a loop variant variable value now.
-  const SCEV* Idx = getSCEV(VarIdx);
+  const SCEV *Idx = getSCEV(VarIdx);
   Idx = getSCEVAtScope(Idx, L);
 
   // We can only recognize very limited forms of loop index expressions, in
@@ -3327,14 +3726,14 @@ ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
 
   unsigned MaxSteps = MaxBruteForceIterations;
   for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) {
-    ConstantInt *ItCst =
-      ConstantInt::get(cast<IntegerType>(IdxExpr->getType()), IterationNum);
+    ConstantInt *ItCst = ConstantInt::get(
+                           cast<IntegerType>(IdxExpr->getType()), IterationNum);
     ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this);
 
     // Form the GEP offset.
     Indexes[VarIdxNum] = Val;
 
-    Constant *Result = GetAddressedElementFromGlobal(GV, Indexes);
+    Constant *Result = GetAddressedElementFromGlobal(getContext(), GV, Indexes);
     if (Result == 0) break;  // Cannot compute!
 
     // Evaluate the condition for this iteration.
@@ -3418,6 +3817,7 @@ static Constant *EvaluateExpression(Value *V, Constant *PHIVal) {
   if (Constant *C = dyn_cast<Constant>(V)) return C;
   if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) return GV;
   Instruction *I = cast<Instruction>(V);
+  LLVMContext &Context = I->getParent()->getContext();
 
   std::vector<Constant*> Operands;
   Operands.resize(I->getNumOperands());
@@ -3429,10 +3829,12 @@ static Constant *EvaluateExpression(Value *V, Constant *PHIVal) {
 
   if (const CmpInst *CI = dyn_cast<CmpInst>(I))
     return ConstantFoldCompareInstOperands(CI->getPredicate(),
-                                           &Operands[0], Operands.size());
+                                           &Operands[0], Operands.size(),
+                                           Context);
   else
     return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
-                                    &Operands[0], Operands.size());
+                                    &Operands[0], Operands.size(),
+                                    Context);
 }
 
 /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
@@ -3487,7 +3889,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
   }
 }
 
-/// ComputeBackedgeTakenCountExhaustively - If the trip is known to execute a
+/// ComputeBackedgeTakenCountExhaustively - If the loop is known to execute a
 /// constant number of times (the condition evolves only from constants),
 /// try to evaluate a few iterations of the loop until we get the exit
 /// condition gets a value of ExitWhen (true or false).  If we cannot
@@ -3526,7 +3928,7 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
 
     if (CondVal->getValue() == uint64_t(ExitWhen)) {
       ++NumBruteForceTripCountsComputed;
-      return getConstant(Type::Int32Ty, IterationNum);
+      return getConstant(Type::getInt32Ty(getContext()), IterationNum);
     }
 
     // Compute the value of the PHI node for the next iteration.
@@ -3540,7 +3942,7 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
   return getCouldNotCompute();
 }
 
-/// getSCEVAtScope - Return a SCEV expression handle for the specified value
+/// getSCEVAtScope - Return a SCEV expression for the specified value
 /// at the specified scope in the program.  The L value specifies a loop
 /// nest to evaluate the expression at, where null is the top-level or a
 /// specified loop is immediately inside of the loop.
@@ -3550,9 +3952,21 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
 ///
 /// In the case that a relevant loop exit value cannot be computed, the
 /// original value V is returned.
-const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
-  // FIXME: this should be turned into a virtual method on SCEV!
+const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
+  // Check to see if we've folded this expression at this loop before.
+  std::map<const Loop *, const SCEV *> &Values = ValuesAtScopes[V];
+  std::pair<std::map<const Loop *, const SCEV *>::iterator, bool> Pair =
+    Values.insert(std::make_pair(L, static_cast<const SCEV *>(0)));
+  if (!Pair.second)
+    return Pair.first->second ? Pair.first->second : V;
 
+  // Otherwise compute it.
+  const SCEV *C = computeSCEVAtScope(V, L);
+  ValuesAtScopes[V][L] = C;
+  return C;
+}
+
+const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
   if (isa<SCEVConstant>(V)) return V;
 
   // If this instruction is evolved from a constant-evolving PHI, compute the
@@ -3567,7 +3981,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
             // to see if the loop that contains it has a known backedge-taken
             // count.  If so, we may be able to force computation of the exit
             // value.
-            const SCEV* BackedgeTakenCount = getBackedgeTakenCount(LI);
+            const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI);
             if (const SCEVConstant *BTCC =
                   dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
               // Okay, we know how many times the containing loop executes.  If
@@ -3585,13 +3999,6 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
       // the arguments into constants, and if so, try to constant propagate the
       // result.  This is particularly useful for computing loop exit values.
       if (CanConstantFold(I)) {
-        // Check to see if we've folded this instruction at this loop before.
-        std::map<const Loop *, Constant *> &Values = ValuesAtScopes[I];
-        std::pair<std::map<const Loop *, Constant *>::iterator, bool> Pair =
-          Values.insert(std::make_pair(L, static_cast<Constant *>(0)));
-        if (!Pair.second)
-          return Pair.first->second ? &*getSCEV(Pair.first->second) : V;
-
         std::vector<Constant*> Operands;
         Operands.reserve(I->getNumOperands());
         for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
@@ -3605,7 +4012,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
             if (!isSCEVable(Op->getType()))
               return V;
 
-            const SCEV* OpV = getSCEVAtScope(getSCEV(Op), L);
+            const SCEV* OpV = getSCEVAtScope(Op, L);
             if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OpV)) {
               Constant *C = SC->getValue();
               if (C->getType() != Op->getType())
@@ -3634,11 +4041,12 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
         Constant *C;
         if (const CmpInst *CI = dyn_cast<CmpInst>(I))
           C = ConstantFoldCompareInstOperands(CI->getPredicate(),
-                                              &Operands[0], Operands.size());
+                                              &Operands[0], Operands.size(),
+                                              getContext());
         else
           C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
-                                       &Operands[0], Operands.size());
-        Pair.first->second = C;
+                                       &Operands[0], Operands.size(),
+                                       getContext());
         return getSCEV(C);
       }
     }
@@ -3651,7 +4059,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
     // Avoid performing the look-up in the common case where the specified
     // expression has no loop-variant portions.
     for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) {
-      const SCEV* OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
+      const SCEV *OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
       if (OpAtScope != Comm->getOperand(i)) {
         // Okay, at least one of these operands is loop variant but might be
         // foldable.  Build a new instance of the folded commutative expression.
@@ -3671,7 +4079,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
           return getSMaxExpr(NewOps);
         if (isa<SCEVUMaxExpr>(Comm))
           return getUMaxExpr(NewOps);
-        assert(0 && "Unknown commutative SCEV type!");
+        llvm_unreachable("Unknown commutative SCEV type!");
       }
     }
     // If we got here, all operands are loop invariant.
@@ -3679,8 +4087,8 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
   }
 
   if (const SCEVUDivExpr *Div = dyn_cast<SCEVUDivExpr>(V)) {
-    const SCEV* LHS = getSCEVAtScope(Div->getLHS(), L);
-    const SCEV* RHS = getSCEVAtScope(Div->getRHS(), L);
+    const SCEV *LHS = getSCEVAtScope(Div->getLHS(), L);
+    const SCEV *RHS = getSCEVAtScope(Div->getRHS(), L);
     if (LHS == Div->getLHS() && RHS == Div->getRHS())
       return Div;   // must be loop invariant
     return getUDivExpr(LHS, RHS);
@@ -3692,7 +4100,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
     if (!L || !AddRec->getLoop()->contains(L->getHeader())) {
       // To evaluate this recurrence, we need to know how many times the AddRec
       // loop iterates.  Compute this now.
-      const SCEV* BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop());
+      const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop());
       if (BackedgeTakenCount == getCouldNotCompute()) return AddRec;
 
       // Then, evaluate the AddRec.
@@ -3702,33 +4110,36 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
   }
 
   if (const SCEVZeroExtendExpr *Cast = dyn_cast<SCEVZeroExtendExpr>(V)) {
-    const SCEV* Op = getSCEVAtScope(Cast->getOperand(), L);
+    const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
     if (Op == Cast->getOperand())
       return Cast;  // must be loop invariant
     return getZeroExtendExpr(Op, Cast->getType());
   }
 
   if (const SCEVSignExtendExpr *Cast = dyn_cast<SCEVSignExtendExpr>(V)) {
-    const SCEV* Op = getSCEVAtScope(Cast->getOperand(), L);
+    const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
     if (Op == Cast->getOperand())
       return Cast;  // must be loop invariant
     return getSignExtendExpr(Op, Cast->getType());
   }
 
   if (const SCEVTruncateExpr *Cast = dyn_cast<SCEVTruncateExpr>(V)) {
-    const SCEV* Op = getSCEVAtScope(Cast->getOperand(), L);
+    const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
     if (Op == Cast->getOperand())
       return Cast;  // must be loop invariant
     return getTruncateExpr(Op, Cast->getType());
   }
 
-  assert(0 && "Unknown SCEV type!");
+  if (isa<SCEVTargetDataConstant>(V))
+    return V;
+
+  llvm_unreachable("Unknown SCEV type!");
   return 0;
 }
 
 /// getSCEVAtScope - This is a convenience function which does
 /// getSCEVAtScope(getSCEV(V), L).
-const SCEV* ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) {
+const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) {
   return getSCEVAtScope(getSCEV(V), L);
 }
 
@@ -3741,7 +4152,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) {
 /// A and B isn't important.
 ///
 /// If the equation does not have a solution, SCEVCouldNotCompute is returned.
-static const SCEV* SolveLinEquationWithOverflow(const APInt &A, const APInt &B,
+static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B,
                                                ScalarEvolution &SE) {
   uint32_t BW = A.getBitWidth();
   assert(BW == B.getBitWidth() && "Bit widths must be the same.");
@@ -3784,7 +4195,7 @@ static const SCEV* SolveLinEquationWithOverflow(const APInt &A, const APInt &B,
 /// given quadratic chrec {L,+,M,+,N}.  This returns either the two roots (which
 /// might be the same) or two SCEVCouldNotCompute objects.
 ///
-static std::pair<const SCEV*,const SCEV*>
+static std::pair<const SCEV *,const SCEV *>
 SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
   assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!");
   const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0));
@@ -3833,8 +4244,12 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
       return std::make_pair(CNC, CNC);
     }
 
-    ConstantInt *Solution1 = ConstantInt::get((NegB + SqrtVal).sdiv(TwoA));
-    ConstantInt *Solution2 = ConstantInt::get((NegB - SqrtVal).sdiv(TwoA));
+    LLVMContext &Context = SE.getContext();
+
+    ConstantInt *Solution1 =
+      ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA));
+    ConstantInt *Solution2 =
+      ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA));
 
     return std::make_pair(SE.getConstant(Solution1),
                           SE.getConstant(Solution2));
@@ -3843,7 +4258,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
 
 /// HowFarToZero - Return the number of times a backedge comparing the specified
 /// value to zero will execute.  If not computable, return CouldNotCompute.
-const SCEV* ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
+const SCEV *ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
   // If the value is a constant
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
     // If the value is already zero, the branch will execute zero times.
@@ -3878,7 +4293,7 @@ const SCEV* ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
 
       // First, handle unitary steps.
       if (StepC->getValue()->equalsInt(1))      // 1*N = -Start (mod 2^BW), so:
-        return getNegativeSCEV(Start);       //   N = -Start (as unsigned)
+        return getNegativeSCEV(Start);          //   N = -Start (as unsigned)
       if (StepC->getValue()->isAllOnesValue())  // -1*N = -Start (mod 2^BW), so:
         return Start;                           //    N = Start (as unsigned)
 
@@ -3891,7 +4306,7 @@ const SCEV* ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
   } else if (AddRec->isQuadratic() && AddRec->getType()->isInteger()) {
     // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
     // the quadratic equation to solve it.
-    std::pair<const SCEV*,const SCEV*> Roots = SolveQuadraticEquation(AddRec,
+    std::pair<const SCEV *,const SCEV *> Roots = SolveQuadraticEquation(AddRec,
                                                                     *this);
     const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
     const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
@@ -3910,7 +4325,7 @@ const SCEV* ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
         // We can only use this value if the chrec ends up with an exact zero
         // value at this index.  When solving for "X*X != 5", for example, we
         // should not accept a root of 2.
-        const SCEV* Val = AddRec->evaluateAtIteration(R1, *this);
+        const SCEV *Val = AddRec->evaluateAtIteration(R1, *this);
         if (Val->isZero())
           return R1;  // We found a quadratic root!
       }
@@ -3923,7 +4338,7 @@ const SCEV* ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
 /// HowFarToNonZero - Return the number of times a backedge checking the
 /// specified value for nonzero will execute.  If not computable, return
 /// CouldNotCompute
-const SCEV* ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
+const SCEV *ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
   // Loops that look like: while (X == 0) are very strange indeed.  We don't
   // handle them yet except for the trivial case.  This could be expanded in the
   // future as needed.
@@ -3984,7 +4399,7 @@ ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) {
 /// more general, since a front-end may have replicated the controlling
 /// expression.
 ///
-static bool HasSameValue(const SCEV* A, const SCEV* B) {
+static bool HasSameValue(const SCEV *A, const SCEV *B) {
   // Quick check to see if they are the same SCEV.
   if (A == B) return true;
 
@@ -3994,19 +4409,142 @@ static bool HasSameValue(const SCEV* A, const SCEV* B) {
     if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B))
       if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue()))
         if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue()))
-          if (AI->isIdenticalTo(BI))
+          if (AI->isIdenticalTo(BI) && !AI->mayReadFromMemory())
             return true;
 
   // Otherwise assume they may have a different value.
   return false;
 }
 
-/// isLoopGuardedByCond - Test whether entry to the loop is protected by
-/// a conditional between LHS and RHS.  This is used to help avoid max
-/// expressions in loop trip counts.
-bool ScalarEvolution::isLoopGuardedByCond(const Loop *L,
-                                          ICmpInst::Predicate Pred,
-                                          const SCEV *LHS, const SCEV *RHS) {
+bool ScalarEvolution::isKnownNegative(const SCEV *S) {
+  return getSignedRange(S).getSignedMax().isNegative();
+}
+
+bool ScalarEvolution::isKnownPositive(const SCEV *S) {
+  return getSignedRange(S).getSignedMin().isStrictlyPositive();
+}
+
+bool ScalarEvolution::isKnownNonNegative(const SCEV *S) {
+  return !getSignedRange(S).getSignedMin().isNegative();
+}
+
+bool ScalarEvolution::isKnownNonPositive(const SCEV *S) {
+  return !getSignedRange(S).getSignedMax().isStrictlyPositive();
+}
+
+bool ScalarEvolution::isKnownNonZero(const SCEV *S) {
+  return isKnownNegative(S) || isKnownPositive(S);
+}
+
+bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred,
+                                       const SCEV *LHS, const SCEV *RHS) {
+
+  if (HasSameValue(LHS, RHS))
+    return ICmpInst::isTrueWhenEqual(Pred);
+
+  switch (Pred) {
+  default:
+    llvm_unreachable("Unexpected ICmpInst::Predicate value!");
+    break;
+  case ICmpInst::ICMP_SGT:
+    Pred = ICmpInst::ICMP_SLT;
+    std::swap(LHS, RHS);
+  case ICmpInst::ICMP_SLT: {
+    ConstantRange LHSRange = getSignedRange(LHS);
+    ConstantRange RHSRange = getSignedRange(RHS);
+    if (LHSRange.getSignedMax().slt(RHSRange.getSignedMin()))
+      return true;
+    if (LHSRange.getSignedMin().sge(RHSRange.getSignedMax()))
+      return false;
+    break;
+  }
+  case ICmpInst::ICMP_SGE:
+    Pred = ICmpInst::ICMP_SLE;
+    std::swap(LHS, RHS);
+  case ICmpInst::ICMP_SLE: {
+    ConstantRange LHSRange = getSignedRange(LHS);
+    ConstantRange RHSRange = getSignedRange(RHS);
+    if (LHSRange.getSignedMax().sle(RHSRange.getSignedMin()))
+      return true;
+    if (LHSRange.getSignedMin().sgt(RHSRange.getSignedMax()))
+      return false;
+    break;
+  }
+  case ICmpInst::ICMP_UGT:
+    Pred = ICmpInst::ICMP_ULT;
+    std::swap(LHS, RHS);
+  case ICmpInst::ICMP_ULT: {
+    ConstantRange LHSRange = getUnsignedRange(LHS);
+    ConstantRange RHSRange = getUnsignedRange(RHS);
+    if (LHSRange.getUnsignedMax().ult(RHSRange.getUnsignedMin()))
+      return true;
+    if (LHSRange.getUnsignedMin().uge(RHSRange.getUnsignedMax()))
+      return false;
+    break;
+  }
+  case ICmpInst::ICMP_UGE:
+    Pred = ICmpInst::ICMP_ULE;
+    std::swap(LHS, RHS);
+  case ICmpInst::ICMP_ULE: {
+    ConstantRange LHSRange = getUnsignedRange(LHS);
+    ConstantRange RHSRange = getUnsignedRange(RHS);
+    if (LHSRange.getUnsignedMax().ule(RHSRange.getUnsignedMin()))
+      return true;
+    if (LHSRange.getUnsignedMin().ugt(RHSRange.getUnsignedMax()))
+      return false;
+    break;
+  }
+  case ICmpInst::ICMP_NE: {
+    if (getUnsignedRange(LHS).intersectWith(getUnsignedRange(RHS)).isEmptySet())
+      return true;
+    if (getSignedRange(LHS).intersectWith(getSignedRange(RHS)).isEmptySet())
+      return true;
+
+    const SCEV *Diff = getMinusSCEV(LHS, RHS);
+    if (isKnownNonZero(Diff))
+      return true;
+    break;
+  }
+  case ICmpInst::ICMP_EQ:
+    // The check at the top of the function catches the case where
+    // the values are known to be equal.
+    break;
+  }
+  return false;
+}
+
+/// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is
+/// protected by a conditional between LHS and RHS.  This is used to
+/// to eliminate casts.
+bool
+ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
+                                             ICmpInst::Predicate Pred,
+                                             const SCEV *LHS, const SCEV *RHS) {
+  // Interpret a null as meaning no loop, where there is obviously no guard
+  // (interprocedural conditions notwithstanding).
+  if (!L) return true;
+
+  BasicBlock *Latch = L->getLoopLatch();
+  if (!Latch)
+    return false;
+
+  BranchInst *LoopContinuePredicate =
+    dyn_cast<BranchInst>(Latch->getTerminator());
+  if (!LoopContinuePredicate ||
+      LoopContinuePredicate->isUnconditional())
+    return false;
+
+  return isImpliedCond(LoopContinuePredicate->getCondition(), Pred, LHS, RHS,
+                       LoopContinuePredicate->getSuccessor(0) != L->getHeader());
+}
+
+/// isLoopGuardedByCond - Test whether entry to the loop is protected
+/// by a conditional between LHS and RHS.  This is used to help avoid max
+/// expressions in loop trip counts, and to eliminate casts.
+bool
+ScalarEvolution::isLoopGuardedByCond(const Loop *L,
+                                     ICmpInst::Predicate Pred,
+                                     const SCEV *LHS, const SCEV *RHS) {
   // Interpret a null as meaning no loop, where there is obviously no guard
   // (interprocedural conditions notwithstanding).
   if (!L) return false;
@@ -4027,136 +4565,308 @@ bool ScalarEvolution::isLoopGuardedByCond(const Loop *L,
         LoopEntryPredicate->isUnconditional())
       continue;
 
-    if (isNecessaryCond(LoopEntryPredicate->getCondition(), Pred, LHS, RHS,
-                        LoopEntryPredicate->getSuccessor(0) != PredecessorDest))
+    if (isImpliedCond(LoopEntryPredicate->getCondition(), Pred, LHS, RHS,
+                      LoopEntryPredicate->getSuccessor(0) != PredecessorDest))
       return true;
   }
 
   return false;
 }
 
-/// isNecessaryCond - Test whether the given CondValue value is a condition
-/// which is at least as strict as the one described by Pred, LHS, and RHS.
-bool ScalarEvolution::isNecessaryCond(Value *CondValue,
-                                      ICmpInst::Predicate Pred,
-                                      const SCEV *LHS, const SCEV *RHS,
-                                      bool Inverse) {
+/// isImpliedCond - Test whether the condition described by Pred, LHS,
+/// and RHS is true whenever the given Cond value evaluates to true.
+bool ScalarEvolution::isImpliedCond(Value *CondValue,
+                                    ICmpInst::Predicate Pred,
+                                    const SCEV *LHS, const SCEV *RHS,
+                                    bool Inverse) {
   // Recursivly handle And and Or conditions.
   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CondValue)) {
     if (BO->getOpcode() == Instruction::And) {
       if (!Inverse)
-        return isNecessaryCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) ||
-               isNecessaryCond(BO->getOperand(1), Pred, LHS, RHS, Inverse);
+        return isImpliedCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) ||
+               isImpliedCond(BO->getOperand(1), Pred, LHS, RHS, Inverse);
     } else if (BO->getOpcode() == Instruction::Or) {
       if (Inverse)
-        return isNecessaryCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) ||
-               isNecessaryCond(BO->getOperand(1), Pred, LHS, RHS, Inverse);
+        return isImpliedCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) ||
+               isImpliedCond(BO->getOperand(1), Pred, LHS, RHS, Inverse);
     }
   }
 
   ICmpInst *ICI = dyn_cast<ICmpInst>(CondValue);
   if (!ICI) return false;
 
+  // Bail if the ICmp's operands' types are wider than the needed type
+  // before attempting to call getSCEV on them. This avoids infinite
+  // recursion, since the analysis of widening casts can require loop
+  // exit condition information for overflow checking, which would
+  // lead back here.
+  if (getTypeSizeInBits(LHS->getType()) <
+      getTypeSizeInBits(ICI->getOperand(0)->getType()))
+    return false;
+
   // Now that we found a conditional branch that dominates the loop, check to
   // see if it is the comparison we are looking for.
-  Value *PreCondLHS = ICI->getOperand(0);
-  Value *PreCondRHS = ICI->getOperand(1);
-  ICmpInst::Predicate Cond;
+  ICmpInst::Predicate FoundPred;
   if (Inverse)
-    Cond = ICI->getInversePredicate();
+    FoundPred = ICI->getInversePredicate();
   else
-    Cond = ICI->getPredicate();
+    FoundPred = ICI->getPredicate();
+
+  const SCEV *FoundLHS = getSCEV(ICI->getOperand(0));
+  const SCEV *FoundRHS = getSCEV(ICI->getOperand(1));
+
+  // Balance the types. The case where FoundLHS' type is wider than
+  // LHS' type is checked for above.
+  if (getTypeSizeInBits(LHS->getType()) >
+      getTypeSizeInBits(FoundLHS->getType())) {
+    if (CmpInst::isSigned(Pred)) {
+      FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType());
+      FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType());
+    } else {
+      FoundLHS = getZeroExtendExpr(FoundLHS, LHS->getType());
+      FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType());
+    }
+  }
 
-  if (Cond == Pred)
-    ; // An exact match.
-  else if (!ICmpInst::isTrueWhenEqual(Cond) && Pred == ICmpInst::ICMP_NE)
-    ; // The actual condition is beyond sufficient.
-  else
-    // Check a few special cases.
-    switch (Cond) {
+  // Canonicalize the query to match the way instcombine will have
+  // canonicalized the comparison.
+  // First, put a constant operand on the right.
+  if (isa<SCEVConstant>(LHS)) {
+    std::swap(LHS, RHS);
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+  }
+  // Then, canonicalize comparisons with boundary cases.
+  if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) {
+    const APInt &RA = RC->getValue()->getValue();
+    switch (Pred) {
+    default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
+    case ICmpInst::ICMP_EQ:
+    case ICmpInst::ICMP_NE:
+      break;
+    case ICmpInst::ICMP_UGE:
+      if ((RA - 1).isMinValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        RHS = getConstant(RA - 1);
+        break;
+      }
+      if (RA.isMaxValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        break;
+      }
+      if (RA.isMinValue()) return true;
+      break;
+    case ICmpInst::ICMP_ULE:
+      if ((RA + 1).isMaxValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        RHS = getConstant(RA + 1);
+        break;
+      }
+      if (RA.isMinValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        break;
+      }
+      if (RA.isMaxValue()) return true;
+      break;
+    case ICmpInst::ICMP_SGE:
+      if ((RA - 1).isMinSignedValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        RHS = getConstant(RA - 1);
+        break;
+      }
+      if (RA.isMaxSignedValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        break;
+      }
+      if (RA.isMinSignedValue()) return true;
+      break;
+    case ICmpInst::ICMP_SLE:
+      if ((RA + 1).isMaxSignedValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        RHS = getConstant(RA + 1);
+        break;
+      }
+      if (RA.isMinSignedValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        break;
+      }
+      if (RA.isMaxSignedValue()) return true;
+      break;
     case ICmpInst::ICMP_UGT:
-      if (Pred == ICmpInst::ICMP_ULT) {
-        std::swap(PreCondLHS, PreCondRHS);
-        Cond = ICmpInst::ICMP_ULT;
+      if (RA.isMinValue()) {
+        Pred = ICmpInst::ICMP_NE;
         break;
       }
-      return false;
+      if ((RA + 1).isMaxValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        RHS = getConstant(RA + 1);
+        break;
+      }
+      if (RA.isMaxValue()) return false;
+      break;
+    case ICmpInst::ICMP_ULT:
+      if (RA.isMaxValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        break;
+      }
+      if ((RA - 1).isMinValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        RHS = getConstant(RA - 1);
+        break;
+      }
+      if (RA.isMinValue()) return false;
+      break;
     case ICmpInst::ICMP_SGT:
-      if (Pred == ICmpInst::ICMP_SLT) {
-        std::swap(PreCondLHS, PreCondRHS);
-        Cond = ICmpInst::ICMP_SLT;
+      if (RA.isMinSignedValue()) {
+        Pred = ICmpInst::ICMP_NE;
         break;
       }
-      return false;
-    case ICmpInst::ICMP_NE:
-      // Expressions like (x >u 0) are often canonicalized to (x != 0),
-      // so check for this case by checking if the NE is comparing against
-      // a minimum or maximum constant.
-      if (!ICmpInst::isTrueWhenEqual(Pred))
-        if (ConstantInt *CI = dyn_cast<ConstantInt>(PreCondRHS)) {
-          const APInt &A = CI->getValue();
-          switch (Pred) {
-          case ICmpInst::ICMP_SLT:
-            if (A.isMaxSignedValue()) break;
-            return false;
-          case ICmpInst::ICMP_SGT:
-            if (A.isMinSignedValue()) break;
-            return false;
-          case ICmpInst::ICMP_ULT:
-            if (A.isMaxValue()) break;
-            return false;
-          case ICmpInst::ICMP_UGT:
-            if (A.isMinValue()) break;
-            return false;
-          default:
-            return false;
-          }
-          Cond = ICmpInst::ICMP_NE;
-          // NE is symmetric but the original comparison may not be. Swap
-          // the operands if necessary so that they match below.
-          if (isa<SCEVConstant>(LHS))
-            std::swap(PreCondLHS, PreCondRHS);
-          break;
-        }
-      return false;
-    default:
-      // We weren't able to reconcile the condition.
-      return false;
+      if ((RA + 1).isMaxSignedValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        RHS = getConstant(RA + 1);
+        break;
+      }
+      if (RA.isMaxSignedValue()) return false;
+      break;
+    case ICmpInst::ICMP_SLT:
+      if (RA.isMaxSignedValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        break;
+      }
+      if ((RA - 1).isMinSignedValue()) {
+       Pred = ICmpInst::ICMP_EQ;
+       RHS = getConstant(RA - 1);
+       break;
+      }
+      if (RA.isMinSignedValue()) return false;
+      break;
+    }
+  }
+
+  // Check to see if we can make the LHS or RHS match.
+  if (LHS == FoundRHS || RHS == FoundLHS) {
+    if (isa<SCEVConstant>(RHS)) {
+      std::swap(FoundLHS, FoundRHS);
+      FoundPred = ICmpInst::getSwappedPredicate(FoundPred);
+    } else {
+      std::swap(LHS, RHS);
+      Pred = ICmpInst::getSwappedPredicate(Pred);
     }
+  }
 
-  if (!PreCondLHS->getType()->isInteger()) return false;
+  // Check whether the found predicate is the same as the desired predicate.
+  if (FoundPred == Pred)
+    return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS);
 
-  const SCEV *PreCondLHSSCEV = getSCEV(PreCondLHS);
-  const SCEV *PreCondRHSSCEV = getSCEV(PreCondRHS);
-  return (HasSameValue(LHS, PreCondLHSSCEV) &&
-          HasSameValue(RHS, PreCondRHSSCEV)) ||
-         (HasSameValue(LHS, getNotSCEV(PreCondRHSSCEV)) &&
-          HasSameValue(RHS, getNotSCEV(PreCondLHSSCEV)));
+  // Check whether swapping the found predicate makes it the same as the
+  // desired predicate.
+  if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) {
+    if (isa<SCEVConstant>(RHS))
+      return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS);
+    else
+      return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred),
+                                   RHS, LHS, FoundLHS, FoundRHS);
+  }
+
+  // Check whether the actual condition is beyond sufficient.
+  if (FoundPred == ICmpInst::ICMP_EQ)
+    if (ICmpInst::isTrueWhenEqual(Pred))
+      if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS))
+        return true;
+  if (Pred == ICmpInst::ICMP_NE)
+    if (!ICmpInst::isTrueWhenEqual(FoundPred))
+      if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS))
+        return true;
+
+  // Otherwise assume the worst.
+  return false;
+}
+
+/// isImpliedCondOperands - Test whether the condition described by Pred,
+/// LHS, and RHS is true whenever the condition desribed by Pred, FoundLHS,
+/// and FoundRHS is true.
+bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
+                                            const SCEV *LHS, const SCEV *RHS,
+                                            const SCEV *FoundLHS,
+                                            const SCEV *FoundRHS) {
+  return isImpliedCondOperandsHelper(Pred, LHS, RHS,
+                                     FoundLHS, FoundRHS) ||
+         // ~x < ~y --> x > y
+         isImpliedCondOperandsHelper(Pred, LHS, RHS,
+                                     getNotSCEV(FoundRHS),
+                                     getNotSCEV(FoundLHS));
+}
+
+/// isImpliedCondOperandsHelper - Test whether the condition described by
+/// Pred, LHS, and RHS is true whenever the condition desribed by Pred,
+/// FoundLHS, and FoundRHS is true.
+bool
+ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
+                                             const SCEV *LHS, const SCEV *RHS,
+                                             const SCEV *FoundLHS,
+                                             const SCEV *FoundRHS) {
+  switch (Pred) {
+  default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
+  case ICmpInst::ICMP_EQ:
+  case ICmpInst::ICMP_NE:
+    if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS))
+      return true;
+    break;
+  case ICmpInst::ICMP_SLT:
+  case ICmpInst::ICMP_SLE:
+    if (isKnownPredicate(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
+        isKnownPredicate(ICmpInst::ICMP_SGE, RHS, FoundRHS))
+      return true;
+    break;
+  case ICmpInst::ICMP_SGT:
+  case ICmpInst::ICMP_SGE:
+    if (isKnownPredicate(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
+        isKnownPredicate(ICmpInst::ICMP_SLE, RHS, FoundRHS))
+      return true;
+    break;
+  case ICmpInst::ICMP_ULT:
+  case ICmpInst::ICMP_ULE:
+    if (isKnownPredicate(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
+        isKnownPredicate(ICmpInst::ICMP_UGE, RHS, FoundRHS))
+      return true;
+    break;
+  case ICmpInst::ICMP_UGT:
+  case ICmpInst::ICMP_UGE:
+    if (isKnownPredicate(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
+        isKnownPredicate(ICmpInst::ICMP_ULE, RHS, FoundRHS))
+      return true;
+    break;
+  }
+
+  return false;
 }
 
 /// getBECount - Subtract the end and start values and divide by the step,
 /// rounding up, to get the number of times the backedge is executed. Return
 /// CouldNotCompute if an intermediate computation overflows.
-const SCEV* ScalarEvolution::getBECount(const SCEV* Start,
-                                       const SCEV* End,
-                                       const SCEV* Step) {
+const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
+                                        const SCEV *End,
+                                        const SCEV *Step,
+                                        bool NoWrap) {
   const Type *Ty = Start->getType();
-  const SCEV* NegOne = getIntegerSCEV(-1, Ty);
-  const SCEV* Diff = getMinusSCEV(End, Start);
-  const SCEV* RoundUp = getAddExpr(Step, NegOne);
+  const SCEV *NegOne = getIntegerSCEV(-1, Ty);
+  const SCEV *Diff = getMinusSCEV(End, Start);
+  const SCEV *RoundUp = getAddExpr(Step, NegOne);
 
   // Add an adjustment to the difference between End and Start so that
   // the division will effectively round up.
-  const SCEV* Add = getAddExpr(Diff, RoundUp);
-
-  // Check Add for unsigned overflow.
-  // TODO: More sophisticated things could be done here.
-  const Type *WideTy = IntegerType::get(getTypeSizeInBits(Ty) + 1);
-  const SCEV* OperandExtendedAdd =
-    getAddExpr(getZeroExtendExpr(Diff, WideTy),
-               getZeroExtendExpr(RoundUp, WideTy));
-  if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd)
-    return getCouldNotCompute();
+  const SCEV *Add = getAddExpr(Diff, RoundUp);
+
+  if (!NoWrap) {
+    // Check Add for unsigned overflow.
+    // TODO: More sophisticated things could be done here.
+    const Type *WideTy = IntegerType::get(getContext(),
+                                          getTypeSizeInBits(Ty) + 1);
+    const SCEV *EDiff = getZeroExtendExpr(Diff, WideTy);
+    const SCEV *ERoundUp = getZeroExtendExpr(RoundUp, WideTy);
+    const SCEV *OperandExtendedAdd = getAddExpr(EDiff, ERoundUp);
+    if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd)
+      return getCouldNotCompute();
+  }
 
   return getUDivExpr(Add, Step);
 }
@@ -4174,10 +4884,14 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
   if (!AddRec || AddRec->getLoop() != L)
     return getCouldNotCompute();
 
+  // Check to see if we have a flag which makes analysis easy.
+  bool NoWrap = isSigned ? AddRec->hasNoSignedWrap() :
+                           AddRec->hasNoUnsignedWrap();
+
   if (AddRec->isAffine()) {
     // FORNOW: We only support unit strides.
     unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
-    const SCEV* Step = AddRec->getStepRecurrence(*this);
+    const SCEV *Step = AddRec->getStepRecurrence(*this);
 
     // TODO: handle non-constant strides.
     const SCEVConstant *CStep = dyn_cast<SCEVConstant>(Step);
@@ -4186,7 +4900,10 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
     if (CStep->isOne()) {
       // With unit stride, the iteration never steps past the limit value.
     } else if (CStep->getValue()->getValue().isStrictlyPositive()) {
-      if (const SCEVConstant *CLimit = dyn_cast<SCEVConstant>(RHS)) {
+      if (NoWrap) {
+        // We know the iteration won't step past the maximum value for its type.
+        ;
+      } else if (const SCEVConstant *CLimit = dyn_cast<SCEVConstant>(RHS)) {
         // Test whether a positive iteration iteration can step past the limit
         // value and past the maximum value for its type in a single step.
         if (isSigned) {
@@ -4213,39 +4930,37 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
     // treat m-n as signed nor unsigned due to overflow possibility.
 
     // First, we get the value of the LHS in the first iteration: n
-    const SCEV* Start = AddRec->getOperand(0);
+    const SCEV *Start = AddRec->getOperand(0);
 
     // Determine the minimum constant start value.
-    const SCEV *MinStart = isa<SCEVConstant>(Start) ? Start :
-      getConstant(isSigned ? APInt::getSignedMinValue(BitWidth) :
-                             APInt::getMinValue(BitWidth));
+    const SCEV *MinStart = getConstant(isSigned ?
+      getSignedRange(Start).getSignedMin() :
+      getUnsignedRange(Start).getUnsignedMin());
 
     // If we know that the condition is true in order to enter the loop,
     // then we know that it will run exactly (m-n)/s times. Otherwise, we
     // only know that it will execute (max(m,n)-n)/s times. In both cases,
     // the division must round up.
-    const SCEV* End = RHS;
+    const SCEV *End = RHS;
     if (!isLoopGuardedByCond(L,
-                             isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+                             isSigned ? ICmpInst::ICMP_SLT :
+                                        ICmpInst::ICMP_ULT,
                              getMinusSCEV(Start, Step), RHS))
       End = isSigned ? getSMaxExpr(RHS, Start)
                      : getUMaxExpr(RHS, Start);
 
     // Determine the maximum constant end value.
-    const SCEV* MaxEnd =
-      isa<SCEVConstant>(End) ? End :
-      getConstant(isSigned ? APInt::getSignedMaxValue(BitWidth)
-                               .ashr(GetMinSignBits(End) - 1) :
-                             APInt::getMaxValue(BitWidth)
-                               .lshr(GetMinLeadingZeros(End)));
+    const SCEV *MaxEnd = getConstant(isSigned ?
+      getSignedRange(End).getSignedMax() :
+      getUnsignedRange(End).getUnsignedMax());
 
     // Finally, we subtract these two values and divide, rounding up, to get
     // the number of times the backedge is executed.
-    const SCEV* BECount = getBECount(Start, End, Step);
+    const SCEV *BECount = getBECount(Start, End, Step, NoWrap);
 
     // The maximum backedge count is similar, except using the minimum start
     // value and the maximum end value.
-    const SCEV* MaxBECount = getBECount(MinStart, MaxEnd, Step);
+    const SCEV *MaxBECount = getBECount(MinStart, MaxEnd, Step, NoWrap);
 
     return BackedgeTakenInfo(BECount, MaxBECount);
   }
@@ -4258,7 +4973,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
 /// this is that it returns the first iteration number where the value is not in
 /// the condition, thus computing the exit count. If the iteration count can't
 /// be computed, an instance of SCEVCouldNotCompute is returned.
-const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
+const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
                                                     ScalarEvolution &SE) const {
   if (Range.isFullSet())  // Infinite loop.
     return SE.getCouldNotCompute();
@@ -4266,9 +4981,9 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
   // If the start is a non-zero constant, shift the range to simplify things.
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart()))
     if (!SC->getValue()->isZero()) {
-      SmallVector<const SCEV*, 4> Operands(op_begin(), op_end());
+      SmallVector<const SCEV *, 4> Operands(op_begin(), op_end());
       Operands[0] = SE.getIntegerSCEV(0, SC->getType());
-      const SCEV* Shifted = SE.getAddRecExpr(Operands, getLoop());
+      const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop());
       if (const SCEVAddRecExpr *ShiftedAddRec =
             dyn_cast<SCEVAddRecExpr>(Shifted))
         return ShiftedAddRec->getNumIterationsInRange(
@@ -4307,7 +5022,7 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
 
     // The exit value should be (End+A)/A.
     APInt ExitVal = (End + A).udiv(A);
-    ConstantInt *ExitValue = ConstantInt::get(ExitVal);
+    ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal);
 
     // Evaluate at the exit value.  If we really did fall out of the valid
     // range, then we computed our trip count, otherwise wrap around or other
@@ -4319,7 +5034,7 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
     // Ensure that the previous value is in the range.  This is a sanity check.
     assert(Range.contains(
            EvaluateConstantChrecAtConstant(this,
-           ConstantInt::get(ExitVal - One), SE)->getValue()) &&
+           ConstantInt::get(SE.getContext(), ExitVal - One), SE)->getValue()) &&
            "Linear scev computation is off in a bad way!");
     return SE.getConstant(ExitValue);
   } else if (isQuadratic()) {
@@ -4327,12 +5042,12 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
     // quadratic equation to solve it.  To do this, we must frame our problem in
     // terms of figuring out when zero is crossed, instead of when
     // Range.getUpper() is crossed.
-    SmallVector<const SCEV*, 4> NewOps(op_begin(), op_end());
+    SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end());
     NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper()));
-    const SCEV* NewAddRec = SE.getAddRecExpr(NewOps, getLoop());
+    const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop());
 
     // Next, solve the constructed addrec
-    std::pair<const SCEV*,const SCEV*> Roots =
+    std::pair<const SCEV *,const SCEV *> Roots =
       SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE);
     const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
     const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
@@ -4340,7 +5055,7 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
       // Pick the smallest positive root value.
       if (ConstantInt *CB =
           dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT,
-                                   R1->getValue(), R2->getValue()))) {
+                         R1->getValue(), R2->getValue()))) {
         if (CB->getZExtValue() == false)
           std::swap(R1, R2);   // R1 is the minimum root now.
 
@@ -4352,7 +5067,8 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
                                                              SE);
         if (Range.contains(R1Val->getValue())) {
           // The next iteration must be out of the range...
-          ConstantInt *NextVal = ConstantInt::get(R1->getValue()->getValue()+1);
+          ConstantInt *NextVal =
+                ConstantInt::get(SE.getContext(), R1->getValue()->getValue()+1);
 
           R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
           if (!Range.contains(R1Val->getValue()))
@@ -4362,7 +5078,8 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
 
         // If R1 was not in the range, then it is a good return value.  Make
         // sure that R1-1 WAS in the range though, just in case.
-        ConstantInt *NextVal = ConstantInt::get(R1->getValue()->getValue()-1);
+        ConstantInt *NextVal =
+               ConstantInt::get(SE.getContext(), R1->getValue()->getValue()-1);
         R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
         if (Range.contains(R1Val->getValue()))
           return R1;
@@ -4381,22 +5098,21 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
 //===----------------------------------------------------------------------===//
 
 void ScalarEvolution::SCEVCallbackVH::deleted() {
-  assert(SE && "SCEVCallbackVH called with a non-null ScalarEvolution!");
+  assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
   if (PHINode *PN = dyn_cast<PHINode>(getValPtr()))
     SE->ConstantEvolutionLoopExitValue.erase(PN);
-  if (Instruction *I = dyn_cast<Instruction>(getValPtr()))
-    SE->ValuesAtScopes.erase(I);
   SE->Scalars.erase(getValPtr());
   // this now dangles!
 }
 
 void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *) {
-  assert(SE && "SCEVCallbackVH called with a non-null ScalarEvolution!");
+  assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
 
   // Forget all the expressions associated with users of the old value,
   // so that future queries will recompute the expressions using the new
   // value.
   SmallVector<User *, 16> Worklist;
+  SmallPtrSet<User *, 8> Visited;
   Value *Old = getValPtr();
   bool DeleteOld = false;
   for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end();
@@ -4410,20 +5126,19 @@ void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *) {
       DeleteOld = true;
       continue;
     }
+    if (!Visited.insert(U))
+      continue;
     if (PHINode *PN = dyn_cast<PHINode>(U))
       SE->ConstantEvolutionLoopExitValue.erase(PN);
-    if (Instruction *I = dyn_cast<Instruction>(U))
-      SE->ValuesAtScopes.erase(I);
-    if (SE->Scalars.erase(U))
-      for (Value::use_iterator UI = U->use_begin(), UE = U->use_end();
-           UI != UE; ++UI)
-        Worklist.push_back(*UI);
+    SE->Scalars.erase(U);
+    for (Value::use_iterator UI = U->use_begin(), UE = U->use_end();
+         UI != UE; ++UI)
+      Worklist.push_back(*UI);
   }
+  // Delete the Old value if it (indirectly) references itself.
   if (DeleteOld) {
     if (PHINode *PN = dyn_cast<PHINode>(Old))
       SE->ConstantEvolutionLoopExitValue.erase(PN);
-    if (Instruction *I = dyn_cast<Instruction>(Old))
-      SE->ValuesAtScopes.erase(I);
     SE->Scalars.erase(Old);
     // this now dangles!
   }
@@ -4502,21 +5217,21 @@ void ScalarEvolution::print(raw_ostream &OS, const Module* ) const {
   // out SCEV values of all instructions that are interesting. Doing
   // this potentially causes it to create new SCEV objects though,
   // which technically conflicts with the const qualifier. This isn't
-  // observable from outside the class though (the hasSCEV function
-  // notwithstanding), so casting away the const isn't dangerous.
+  // observable from outside the class though, so casting away the
+  // const isn't dangerous.
   ScalarEvolution &SE = *const_cast<ScalarEvolution*>(this);
 
   OS << "Classifying expressions for: " << F->getName() << "\n";
   for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
     if (isSCEVable(I->getType())) {
-      OS << *I;
+      OS << *I << '\n';
       OS << "  -->  ";
-      const SCEV* SV = SE.getSCEV(&*I);
+      const SCEV *SV = SE.getSCEV(&*I);
       SV->print(OS);
 
       const Loop *L = LI->getLoopFor((*I).getParent());
 
-      const SCEV* AtUse = SE.getSCEVAtScope(SV, L);
+      const SCEV *AtUse = SE.getSCEVAtScope(SV, L);
       if (AtUse != SV) {
         OS << "  -->  ";
         AtUse->print(OS);
@@ -4524,7 +5239,7 @@ void ScalarEvolution::print(raw_ostream &OS, const Module* ) const {
 
       if (L) {
         OS << "\t\t" "Exits: ";
-        const SCEV* ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop());
+        const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop());
         if (!ExitValue->isLoopInvariant(L)) {
           OS << "<<Unknown>>";
         } else {
@@ -4540,7 +5255,3 @@ void ScalarEvolution::print(raw_ostream &OS, const Module* ) const {
     PrintLoopInfo(OS, &SE, *I);
 }
 
-void ScalarEvolution::print(std::ostream &o, const Module *M) const {
-  raw_os_ostream OS(o);
-  print(OS, M);
-}
diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
new file mode 100644
index 0000000..cc79e6c
--- /dev/null
+++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -0,0 +1,133 @@
+//===- ScalarEvolutionAliasAnalysis.cpp - SCEV-based Alias Analysis -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ScalarEvolutionAliasAnalysis pass, which implements a
+// simple alias analysis implemented in terms of ScalarEvolution queries.
+//
+// ScalarEvolution has a more complete understanding of pointer arithmetic
+// than BasicAliasAnalysis' collection of ad-hoc analyses.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+using namespace llvm;
+
+namespace {
+  /// ScalarEvolutionAliasAnalysis - This is a simple alias analysis
+  /// implementation that uses ScalarEvolution to answer queries.
+  class VISIBILITY_HIDDEN ScalarEvolutionAliasAnalysis : public FunctionPass,
+                                                         public AliasAnalysis {
+    ScalarEvolution *SE;
+
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+    ScalarEvolutionAliasAnalysis() : FunctionPass(&ID), SE(0) {}
+
+  private:
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual bool runOnFunction(Function &F);
+    virtual AliasResult alias(const Value *V1, unsigned V1Size,
+                              const Value *V2, unsigned V2Size);
+
+    Value *GetUnderlyingIdentifiedObject(const SCEV *S);
+  };
+}  // End of anonymous namespace
+
+// Register this pass...
+char ScalarEvolutionAliasAnalysis::ID = 0;
+static RegisterPass<ScalarEvolutionAliasAnalysis>
+X("scev-aa", "ScalarEvolution-based Alias Analysis", false, true);
+
+// Declare that we implement the AliasAnalysis interface
+static RegisterAnalysisGroup<AliasAnalysis> Y(X);
+
+FunctionPass *llvm::createScalarEvolutionAliasAnalysisPass() {
+  return new ScalarEvolutionAliasAnalysis();
+}
+
+void
+ScalarEvolutionAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequiredTransitive<ScalarEvolution>();
+  AU.setPreservesAll();
+  AliasAnalysis::getAnalysisUsage(AU);
+}
+
+bool
+ScalarEvolutionAliasAnalysis::runOnFunction(Function &F) {
+  InitializeAliasAnalysis(this);
+  SE = &getAnalysis<ScalarEvolution>();
+  return false;
+}
+
+/// GetUnderlyingIdentifiedObject - Given an expression, try to find an
+/// "identified object" (see AliasAnalysis::isIdentifiedObject) base
+/// value. Return null is none was found.
+Value *
+ScalarEvolutionAliasAnalysis::GetUnderlyingIdentifiedObject(const SCEV *S) {
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    // In an addrec, assume that the base will be in the start, rather
+    // than the step.
+    return GetUnderlyingIdentifiedObject(AR->getStart());
+  } else if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
+    // If there's a pointer operand, it'll be sorted at the end of the list.
+    const SCEV *Last = A->getOperand(A->getNumOperands()-1);
+    if (isa<PointerType>(Last->getType()))
+      return GetUnderlyingIdentifiedObject(Last);
+  } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+    // Determine if we've found an Identified object.
+    Value *V = U->getValue();
+    if (isIdentifiedObject(V))
+      return V;
+  }
+  // No Identified object found.
+  return 0;
+}
+
+AliasAnalysis::AliasResult
+ScalarEvolutionAliasAnalysis::alias(const Value *A, unsigned ASize,
+                                    const Value *B, unsigned BSize) {
+  // This is ScalarEvolutionAliasAnalysis. Get the SCEVs!
+  const SCEV *AS = SE->getSCEV(const_cast<Value *>(A));
+  const SCEV *BS = SE->getSCEV(const_cast<Value *>(B));
+
+  // If they evaluate to the same expression, it's a MustAlias.
+  if (AS == BS) return MustAlias;
+
+  // If something is known about the difference between the two addresses,
+  // see if it's enough to prove a NoAlias.
+  if (SE->getEffectiveSCEVType(AS->getType()) ==
+      SE->getEffectiveSCEVType(BS->getType())) {
+    unsigned BitWidth = SE->getTypeSizeInBits(AS->getType());
+    APInt AI(BitWidth, ASize);
+    const SCEV *BA = SE->getMinusSCEV(BS, AS);
+    if (AI.ule(SE->getUnsignedRange(BA).getUnsignedMin())) {
+      APInt BI(BitWidth, BSize);
+      const SCEV *AB = SE->getMinusSCEV(AS, BS);
+      if (BI.ule(SE->getUnsignedRange(AB).getUnsignedMin()))
+        return NoAlias;
+    }
+  }
+
+  // If ScalarEvolution can find an underlying object, form a new query.
+  // The correctness of this depends on ScalarEvolution not recognizing
+  // inttoptr and ptrtoint operators.
+  Value *AO = GetUnderlyingIdentifiedObject(AS);
+  Value *BO = GetUnderlyingIdentifiedObject(BS);
+  if ((AO && AO != A) || (BO && BO != B))
+    if (alias(AO ? AO : A, AO ? ~0u : ASize,
+              BO ? BO : B, BO ? ~0u : BSize) == NoAlias)
+      return NoAlias;
+
+  // Forward the query to the next analysis.
+  return AliasAnalysis::alias(A, ASize, B, BSize);
+}
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 729a0c3..d674ee8 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -15,6 +15,7 @@
 
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/ADT/STLExtras.h"
 using namespace llvm;
@@ -52,10 +53,9 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) {
         return CE->getOperand(0);
   }
 
-  // FIXME: keep track of the cast instruction.
   if (Constant *C = dyn_cast<Constant>(V))
     return ConstantExpr::getCast(Op, C, Ty);
-  
+
   if (Argument *A = dyn_cast<Argument>(V)) {
     // Check to see if there is already a cast!
     for (Value::use_iterator UI = A->use_begin(), E = A->use_end();
@@ -155,55 +155,95 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
 /// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made
 /// unnecessary; in its place, just signed-divide Ops[i] by the scale and
 /// check to see if the divide was folded.
-static bool FactorOutConstant(const SCEV* &S,
-                              const SCEV* &Remainder,
-                              const APInt &Factor,
-                              ScalarEvolution &SE) {
+static bool FactorOutConstant(const SCEV *&S,
+                              const SCEV *&Remainder,
+                              const SCEV *Factor,
+                              ScalarEvolution &SE,
+                              const TargetData *TD) {
   // Everything is divisible by one.
-  if (Factor == 1)
+  if (Factor->isOne())
+    return true;
+
+  // x/x == 1.
+  if (S == Factor) {
+    S = SE.getIntegerSCEV(1, S->getType());
     return true;
+  }
 
   // For a Constant, check for a multiple of the given factor.
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
-    ConstantInt *CI =
-      ConstantInt::get(C->getValue()->getValue().sdiv(Factor));
-    // If the quotient is zero and the remainder is non-zero, reject
-    // the value at this scale. It will be considered for subsequent
-    // smaller scales.
-    if (C->isZero() || !CI->isZero()) {
-      const SCEV* Div = SE.getConstant(CI);
-      S = Div;
-      Remainder =
-        SE.getAddExpr(Remainder,
-                      SE.getConstant(C->getValue()->getValue().srem(Factor)));
+    // 0/x == 0.
+    if (C->isZero())
       return true;
+    // Check for divisibility.
+    if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) {
+      ConstantInt *CI =
+        ConstantInt::get(SE.getContext(),
+                         C->getValue()->getValue().sdiv(
+                                                   FC->getValue()->getValue()));
+      // If the quotient is zero and the remainder is non-zero, reject
+      // the value at this scale. It will be considered for subsequent
+      // smaller scales.
+      if (!CI->isZero()) {
+        const SCEV *Div = SE.getConstant(CI);
+        S = Div;
+        Remainder =
+          SE.getAddExpr(Remainder,
+                        SE.getConstant(C->getValue()->getValue().srem(
+                                                  FC->getValue()->getValue())));
+        return true;
+      }
     }
   }
 
   // In a Mul, check if there is a constant operand which is a multiple
   // of the given factor.
-  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S))
-    if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
-      if (!C->getValue()->getValue().srem(Factor)) {
-        const SmallVectorImpl<const SCEV *> &MOperands = M->getOperands();
-        SmallVector<const SCEV *, 4> NewMulOps(MOperands.begin(),
-                                               MOperands.end());
-        NewMulOps[0] =
-          SE.getConstant(C->getValue()->getValue().sdiv(Factor));
-        S = SE.getMulExpr(NewMulOps);
-        return true;
+  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
+    if (TD) {
+      // With TargetData, the size is known. Check if there is a constant
+      // operand which is a multiple of the given factor. If so, we can
+      // factor it.
+      const SCEVConstant *FC = cast<SCEVConstant>(Factor);
+      if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
+        if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) {
+          const SmallVectorImpl<const SCEV *> &MOperands = M->getOperands();
+          SmallVector<const SCEV *, 4> NewMulOps(MOperands.begin(),
+                                                 MOperands.end());
+          NewMulOps[0] =
+            SE.getConstant(C->getValue()->getValue().sdiv(
+                                                   FC->getValue()->getValue()));
+          S = SE.getMulExpr(NewMulOps);
+          return true;
+        }
+    } else {
+      // Without TargetData, check if Factor can be factored out of any of the
+      // Mul's operands. If so, we can just remove it.
+      for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
+        const SCEV *SOp = M->getOperand(i);
+        const SCEV *Remainder = SE.getIntegerSCEV(0, SOp->getType());
+        if (FactorOutConstant(SOp, Remainder, Factor, SE, TD) &&
+            Remainder->isZero()) {
+          const SmallVectorImpl<const SCEV *> &MOperands = M->getOperands();
+          SmallVector<const SCEV *, 4> NewMulOps(MOperands.begin(),
+                                                 MOperands.end());
+          NewMulOps[i] = SOp;
+          S = SE.getMulExpr(NewMulOps);
+          return true;
+        }
       }
+    }
+  }
 
   // In an AddRec, check if both start and step are divisible.
   if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
-    const SCEV* Step = A->getStepRecurrence(SE);
-    const SCEV* StepRem = SE.getIntegerSCEV(0, Step->getType());
-    if (!FactorOutConstant(Step, StepRem, Factor, SE))
+    const SCEV *Step = A->getStepRecurrence(SE);
+    const SCEV *StepRem = SE.getIntegerSCEV(0, Step->getType());
+    if (!FactorOutConstant(Step, StepRem, Factor, SE, TD))
       return false;
     if (!StepRem->isZero())
       return false;
-    const SCEV* Start = A->getStart();
-    if (!FactorOutConstant(Start, Remainder, Factor, SE))
+    const SCEV *Start = A->getStart();
+    if (!FactorOutConstant(Start, Remainder, Factor, SE, TD))
       return false;
     S = SE.getAddRecExpr(Start, Step, A->getLoop());
     return true;
@@ -212,15 +252,81 @@ static bool FactorOutConstant(const SCEV* &S,
   return false;
 }
 
-/// expandAddToGEP - Expand a SCEVAddExpr with a pointer type into a GEP
-/// instead of using ptrtoint+arithmetic+inttoptr. This helps
-/// BasicAliasAnalysis analyze the result. However, it suffers from the
-/// underlying bug described in PR2831. Addition in LLVM currently always
-/// has two's complement wrapping guaranteed. However, the semantics for
-/// getelementptr overflow are ambiguous. In the common case though, this
-/// expansion gets used when a GEP in the original code has been converted
-/// into integer arithmetic, in which case the resulting code will be no
-/// more undefined than it was originally.
+/// SimplifyAddOperands - Sort and simplify a list of add operands. NumAddRecs
+/// is the number of SCEVAddRecExprs present, which are kept at the end of
+/// the list.
+///
+static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops,
+                                const Type *Ty,
+                                ScalarEvolution &SE) {
+  unsigned NumAddRecs = 0;
+  for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i)
+    ++NumAddRecs;
+  // Group Ops into non-addrecs and addrecs.
+  SmallVector<const SCEV *, 8> NoAddRecs(Ops.begin(), Ops.end() - NumAddRecs);
+  SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end());
+  // Let ScalarEvolution sort and simplify the non-addrecs list.
+  const SCEV *Sum = NoAddRecs.empty() ?
+                    SE.getIntegerSCEV(0, Ty) :
+                    SE.getAddExpr(NoAddRecs);
+  // If it returned an add, use the operands. Otherwise it simplified
+  // the sum into a single value, so just use that.
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum))
+    Ops = Add->getOperands();
+  else {
+    Ops.clear();
+    if (!Sum->isZero())
+      Ops.push_back(Sum);
+  }
+  // Then append the addrecs.
+  Ops.insert(Ops.end(), AddRecs.begin(), AddRecs.end());
+}
+
+/// SplitAddRecs - Flatten a list of add operands, moving addrec start values
+/// out to the top level. For example, convert {a + b,+,c} to a, b, {0,+,d}.
+/// This helps expose more opportunities for folding parts of the expressions
+/// into GEP indices.
+///
+static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops,
+                         const Type *Ty,
+                         ScalarEvolution &SE) {
+  // Find the addrecs.
+  SmallVector<const SCEV *, 8> AddRecs;
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) {
+      const SCEV *Start = A->getStart();
+      if (Start->isZero()) break;
+      const SCEV *Zero = SE.getIntegerSCEV(0, Ty);
+      AddRecs.push_back(SE.getAddRecExpr(Zero,
+                                         A->getStepRecurrence(SE),
+                                         A->getLoop()));
+      if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) {
+        Ops[i] = Zero;
+        Ops.insert(Ops.end(), Add->op_begin(), Add->op_end());
+        e += Add->getNumOperands();
+      } else {
+        Ops[i] = Start;
+      }
+    }
+  if (!AddRecs.empty()) {
+    // Add the addrecs onto the end of the list.
+    Ops.insert(Ops.end(), AddRecs.begin(), AddRecs.end());
+    // Resort the operand list, moving any constants to the front.
+    SimplifyAddOperands(Ops, Ty, SE);
+  }
+}
+
+/// expandAddToGEP - Expand an addition expression with a pointer type into
+/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
+/// BasicAliasAnalysis and other passes analyze the result. See the rules
+/// for getelementptr vs. inttoptr in
+/// http://llvm.org/docs/LangRef.html#pointeraliasing
+/// for details.
+///
+/// Design note: The correctness of using getelmeentptr here depends on
+/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
+/// they may introduce pointer arithmetic which may not be safely converted
+/// into getelementptr.
 ///
 /// Design note: It might seem desirable for this function to be more
 /// loop-aware. If some of the indices are loop-invariant while others
@@ -237,92 +343,130 @@ static bool FactorOutConstant(const SCEV* &S,
 /// loop-invariant portions of expressions, after considering what
 /// can be folded using target addressing modes.
 ///
-Value *SCEVExpander::expandAddToGEP(const SCEV* const *op_begin,
-                                    const SCEV* const *op_end,
+Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
+                                    const SCEV *const *op_end,
                                     const PointerType *PTy,
                                     const Type *Ty,
                                     Value *V) {
   const Type *ElTy = PTy->getElementType();
   SmallVector<Value *, 4> GepIndices;
-  SmallVector<const SCEV*, 8> Ops(op_begin, op_end);
+  SmallVector<const SCEV *, 8> Ops(op_begin, op_end);
   bool AnyNonZeroIndices = false;
 
+  // Split AddRecs up into parts as either of the parts may be usable
+  // without the other.
+  SplitAddRecs(Ops, Ty, SE);
+
   // Decend down the pointer's type and attempt to convert the other
   // operands into GEP indices, at each level. The first index in a GEP
   // indexes into the array implied by the pointer operand; the rest of
   // the indices index into the element or field type selected by the
   // preceding index.
   for (;;) {
-    APInt ElSize = APInt(SE.getTypeSizeInBits(Ty),
-                         ElTy->isSized() ?  SE.TD->getTypeAllocSize(ElTy) : 0);
-    SmallVector<const SCEV*, 8> NewOps;
-    SmallVector<const SCEV*, 8> ScaledOps;
-    for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-      // Split AddRecs up into parts as either of the parts may be usable
-      // without the other.
-      if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i]))
-        if (!A->getStart()->isZero()) {
-          const SCEV* Start = A->getStart();
-          Ops.push_back(SE.getAddRecExpr(SE.getIntegerSCEV(0, A->getType()),
-                                         A->getStepRecurrence(SE),
-                                         A->getLoop()));
-          Ops[i] = Start;
-          ++e;
-        }
-      // If the scale size is not 0, attempt to factor out a scale.
-      if (ElSize != 0) {
-        const SCEV* Op = Ops[i];
-        const SCEV* Remainder = SE.getIntegerSCEV(0, Op->getType());
-        if (FactorOutConstant(Op, Remainder, ElSize, SE)) {
-          ScaledOps.push_back(Op); // Op now has ElSize factored out.
-          NewOps.push_back(Remainder);
-          continue;
+    const SCEV *ElSize = SE.getAllocSizeExpr(ElTy);
+    // If the scale size is not 0, attempt to factor out a scale for
+    // array indexing.
+    SmallVector<const SCEV *, 8> ScaledOps;
+    if (ElTy->isSized() && !ElSize->isZero()) {
+      SmallVector<const SCEV *, 8> NewOps;
+      for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+        const SCEV *Op = Ops[i];
+        const SCEV *Remainder = SE.getIntegerSCEV(0, Ty);
+        if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.TD)) {
+          // Op now has ElSize factored out.
+          ScaledOps.push_back(Op);
+          if (!Remainder->isZero())
+            NewOps.push_back(Remainder);
+          AnyNonZeroIndices = true;
+        } else {
+          // The operand was not divisible, so add it to the list of operands
+          // we'll scan next iteration.
+          NewOps.push_back(Ops[i]);
         }
       }
-      // If the operand was not divisible, add it to the list of operands
-      // we'll scan next iteration.
-      NewOps.push_back(Ops[i]);
+      // If we made any changes, update Ops.
+      if (!ScaledOps.empty()) {
+        Ops = NewOps;
+        SimplifyAddOperands(Ops, Ty, SE);
+      }
     }
-    Ops = NewOps;
-    AnyNonZeroIndices |= !ScaledOps.empty();
+
+    // Record the scaled array index for this level of the type. If
+    // we didn't find any operands that could be factored, tentatively
+    // assume that element zero was selected (since the zero offset
+    // would obviously be folded away).
     Value *Scaled = ScaledOps.empty() ?
                     Constant::getNullValue(Ty) :
                     expandCodeFor(SE.getAddExpr(ScaledOps), Ty);
     GepIndices.push_back(Scaled);
 
     // Collect struct field index operands.
-    if (!Ops.empty())
-      while (const StructType *STy = dyn_cast<StructType>(ElTy)) {
+    while (const StructType *STy = dyn_cast<StructType>(ElTy)) {
+      bool FoundFieldNo = false;
+      // An empty struct has no fields.
+      if (STy->getNumElements() == 0) break;
+      if (SE.TD) {
+        // With TargetData, field offsets are known. See if a constant offset
+        // falls within any of the struct fields.
+        if (Ops.empty()) break;
         if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
           if (SE.getTypeSizeInBits(C->getType()) <= 64) {
             const StructLayout &SL = *SE.TD->getStructLayout(STy);
             uint64_t FullOffset = C->getValue()->getZExtValue();
             if (FullOffset < SL.getSizeInBytes()) {
               unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
-              GepIndices.push_back(ConstantInt::get(Type::Int32Ty, ElIdx));
+              GepIndices.push_back(
+                  ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx));
               ElTy = STy->getTypeAtIndex(ElIdx);
               Ops[0] =
                 SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx));
               AnyNonZeroIndices = true;
-              continue;
+              FoundFieldNo = true;
             }
           }
-        break;
+      } else {
+        // Without TargetData, just check for a SCEVFieldOffsetExpr of the
+        // appropriate struct type.
+        for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+          if (const SCEVFieldOffsetExpr *FO =
+                dyn_cast<SCEVFieldOffsetExpr>(Ops[i]))
+            if (FO->getStructType() == STy) {
+              unsigned FieldNo = FO->getFieldNo();
+              GepIndices.push_back(
+                  ConstantInt::get(Type::getInt32Ty(Ty->getContext()),
+                                   FieldNo));
+              ElTy = STy->getTypeAtIndex(FieldNo);
+              Ops[i] = SE.getConstant(Ty, 0);
+              AnyNonZeroIndices = true;
+              FoundFieldNo = true;
+              break;
+            }
+      }
+      // If no struct field offsets were found, tentatively assume that
+      // field zero was selected (since the zero offset would obviously
+      // be folded away).
+      if (!FoundFieldNo) {
+        ElTy = STy->getTypeAtIndex(0u);
+        GepIndices.push_back(
+          Constant::getNullValue(Type::getInt32Ty(Ty->getContext())));
       }
+    }
 
-    if (const ArrayType *ATy = dyn_cast<ArrayType>(ElTy)) {
+    if (const ArrayType *ATy = dyn_cast<ArrayType>(ElTy))
       ElTy = ATy->getElementType();
-      continue;
-    }
-    break;
+    else
+      break;
   }
 
   // If none of the operands were convertable to proper GEP indices, cast
   // the base to i8* and do an ugly getelementptr with that. It's still
   // better than ptrtoint+arithmetic+inttoptr at least.
   if (!AnyNonZeroIndices) {
+    // Cast the base to i8*.
     V = InsertNoopCastOfTo(V,
-                           Type::Int8Ty->getPointerTo(PTy->getAddressSpace()));
+       Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace()));
+
+    // Expand the operands for a plain byte offset.
     Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty);
 
     // Fold a GEP with constant operands.
@@ -345,12 +489,15 @@ Value *SCEVExpander::expandAddToGEP(const SCEV* const *op_begin,
       }
     }
 
-    Value *GEP = Builder.CreateGEP(V, Idx, "scevgep");
+    // Emit a GEP.
+    Value *GEP = Builder.CreateGEP(V, Idx, "uglygep");
     InsertedValues.insert(GEP);
     return GEP;
   }
 
-  // Insert a pretty getelementptr.
+  // Insert a pretty getelementptr. Note that this GEP is not marked inbounds,
+  // because ScalarEvolution may have changed the address arithmetic to
+  // compute a value which is beyond the end of the allocated object.
   Value *GEP = Builder.CreateGEP(V,
                                  GepIndices.begin(),
                                  GepIndices.end(),
@@ -361,21 +508,37 @@ Value *SCEVExpander::expandAddToGEP(const SCEV* const *op_begin,
 }
 
 Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
+  int NumOperands = S->getNumOperands();
   const Type *Ty = SE.getEffectiveSCEVType(S->getType());
-  Value *V = expand(S->getOperand(S->getNumOperands()-1));
+
+  // Find the index of an operand to start with. Choose the operand with
+  // pointer type, if there is one, or the last operand otherwise.
+  int PIdx = 0;
+  for (; PIdx != NumOperands - 1; ++PIdx)
+    if (isa<PointerType>(S->getOperand(PIdx)->getType())) break;
+
+  // Expand code for the operand that we chose.
+  Value *V = expand(S->getOperand(PIdx));
 
   // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the
   // comments on expandAddToGEP for details.
-  if (SE.TD)
-    if (const PointerType *PTy = dyn_cast<PointerType>(V->getType())) {
-      const SmallVectorImpl<const SCEV*> &Ops = S->getOperands();
-      return expandAddToGEP(&Ops[0], &Ops[Ops.size() - 1], PTy, Ty, V);
-    }
+  if (const PointerType *PTy = dyn_cast<PointerType>(V->getType())) {
+    // Take the operand at PIdx out of the list.
+    const SmallVectorImpl<const SCEV *> &Ops = S->getOperands();
+    SmallVector<const SCEV *, 8> NewOps;
+    NewOps.insert(NewOps.end(), Ops.begin(), Ops.begin() + PIdx);
+    NewOps.insert(NewOps.end(), Ops.begin() + PIdx + 1, Ops.end());
+    // Make a GEP.
+    return expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, V);
+  }
 
+  // Otherwise, we'll expand the rest of the SCEVAddExpr as plain integer
+  // arithmetic.
   V = InsertNoopCastOfTo(V, Ty);
 
   // Emit a bunch of add instructions
-  for (int i = S->getNumOperands()-2; i >= 0; --i) {
+  for (int i = NumOperands-1; i >= 0; --i) {
+    if (i == PIdx) continue;
     Value *W = expandCodeFor(S->getOperand(i), Ty);
     V = InsertBinop(Instruction::Add, V, W);
   }
@@ -422,7 +585,7 @@ Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
 /// Move parts of Base into Rest to leave Base with the minimal
 /// expression that provides a pointer operand suitable for a
 /// GEP expansion.
-static void ExposePointerBase(const SCEV* &Base, const SCEV* &Rest,
+static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest,
                               ScalarEvolution &SE) {
   while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) {
     Base = A->getStart();
@@ -433,7 +596,7 @@ static void ExposePointerBase(const SCEV* &Base, const SCEV* &Rest,
   }
   if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) {
     Base = A->getOperand(A->getNumOperands()-1);
-    SmallVector<const SCEV*, 8> NewAddOps(A->op_begin(), A->op_end());
+    SmallVector<const SCEV *, 8> NewAddOps(A->op_begin(), A->op_end());
     NewAddOps.back() = Rest;
     Rest = SE.getAddExpr(NewAddOps);
     ExposePointerBase(Base, Rest, SE);
@@ -457,11 +620,11 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
   if (CanonicalIV &&
       SE.getTypeSizeInBits(CanonicalIV->getType()) >
       SE.getTypeSizeInBits(Ty)) {
-    const SCEV *Start = SE.getAnyExtendExpr(S->getStart(),
-                                            CanonicalIV->getType());
-    const SCEV *Step = SE.getAnyExtendExpr(S->getStepRecurrence(SE),
-                                           CanonicalIV->getType());
-    Value *V = expand(SE.getAddRecExpr(Start, Step, S->getLoop()));
+    const SmallVectorImpl<const SCEV *> &Ops = S->getOperands();
+    SmallVector<const SCEV *, 4> NewOps(Ops.size());
+    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+      NewOps[i] = SE.getAnyExtendExpr(Ops[i], CanonicalIV->getType());
+    Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop()));
     BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
     BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
     BasicBlock::iterator NewInsertPt =
@@ -475,28 +638,26 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
 
   // {X,+,F} --> X + {0,+,F}
   if (!S->getStart()->isZero()) {
-    const SmallVectorImpl<const SCEV*> &SOperands = S->getOperands();
-    SmallVector<const SCEV*, 4> NewOps(SOperands.begin(), SOperands.end());
+    const SmallVectorImpl<const SCEV *> &SOperands = S->getOperands();
+    SmallVector<const SCEV *, 4> NewOps(SOperands.begin(), SOperands.end());
     NewOps[0] = SE.getIntegerSCEV(0, Ty);
-    const SCEV* Rest = SE.getAddRecExpr(NewOps, L);
+    const SCEV *Rest = SE.getAddRecExpr(NewOps, L);
 
     // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the
     // comments on expandAddToGEP for details.
-    if (SE.TD) {
-      const SCEV* Base = S->getStart();
-      const SCEV* RestArray[1] = { Rest };
-      // Dig into the expression to find the pointer base for a GEP.
-      ExposePointerBase(Base, RestArray[0], SE);
-      // If we found a pointer, expand the AddRec with a GEP.
-      if (const PointerType *PTy = dyn_cast<PointerType>(Base->getType())) {
-        // Make sure the Base isn't something exotic, such as a multiplied
-        // or divided pointer value. In those cases, the result type isn't
-        // actually a pointer type.
-        if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) {
-          Value *StartV = expand(Base);
-          assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!");
-          return expandAddToGEP(RestArray, RestArray+1, PTy, Ty, StartV);
-        }
+    const SCEV *Base = S->getStart();
+    const SCEV *RestArray[1] = { Rest };
+    // Dig into the expression to find the pointer base for a GEP.
+    ExposePointerBase(Base, RestArray[0], SE);
+    // If we found a pointer, expand the AddRec with a GEP.
+    if (const PointerType *PTy = dyn_cast<PointerType>(Base->getType())) {
+      // Make sure the Base isn't something exotic, such as a multiplied
+      // or divided pointer value. In those cases, the result type isn't
+      // actually a pointer type.
+      if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) {
+        Value *StartV = expand(Base);
+        assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!");
+        return expandAddToGEP(RestArray, RestArray+1, PTy, Ty, StartV);
       }
     }
 
@@ -519,29 +680,22 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
     // Create and insert the PHI node for the induction variable in the
     // specified loop.
     BasicBlock *Header = L->getHeader();
-    BasicBlock *Preheader = L->getLoopPreheader();
     PHINode *PN = PHINode::Create(Ty, "indvar", Header->begin());
     InsertedValues.insert(PN);
-    PN->addIncoming(Constant::getNullValue(Ty), Preheader);
 
-    pred_iterator HPI = pred_begin(Header);
-    assert(HPI != pred_end(Header) && "Loop with zero preds???");
-    if (!L->contains(*HPI)) ++HPI;
-    assert(HPI != pred_end(Header) && L->contains(*HPI) &&
-           "No backedge in loop?");
-
-    // Insert a unit add instruction right before the terminator corresponding
-    // to the back-edge.
     Constant *One = ConstantInt::get(Ty, 1);
-    Instruction *Add = BinaryOperator::CreateAdd(PN, One, "indvar.next",
-                                                 (*HPI)->getTerminator());
-    InsertedValues.insert(Add);
-
-    pred_iterator PI = pred_begin(Header);
-    if (*PI == Preheader)
-      ++PI;
-    PN->addIncoming(Add, *PI);
-    return PN;
+    for (pred_iterator HPI = pred_begin(Header), HPE = pred_end(Header);
+         HPI != HPE; ++HPI)
+      if (L->contains(*HPI)) {
+        // Insert a unit add instruction right before the terminator corresponding
+        // to the back-edge.
+        Instruction *Add = BinaryOperator::CreateAdd(PN, One, "indvar.next",
+                                                     (*HPI)->getTerminator());
+        InsertedValues.insert(Add);
+        PN->addIncoming(Add, *HPI);
+      } else {
+        PN->addIncoming(Constant::getNullValue(Ty), *HPI);
+      }
   }
 
   // {0,+,F} --> {0,+,1} * F
@@ -563,19 +717,19 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
   // folders, then expandCodeFor the closed form.  This allows the folders to
   // simplify the expression without having to build a bunch of special code
   // into this folder.
-  const SCEV* IH = SE.getUnknown(I);   // Get I as a "symbolic" SCEV.
+  const SCEV *IH = SE.getUnknown(I);   // Get I as a "symbolic" SCEV.
 
   // Promote S up to the canonical IV type, if the cast is foldable.
-  const SCEV* NewS = S;
-  const SCEV* Ext = SE.getNoopOrAnyExtend(S, I->getType());
+  const SCEV *NewS = S;
+  const SCEV *Ext = SE.getNoopOrAnyExtend(S, I->getType());
   if (isa<SCEVAddRecExpr>(Ext))
     NewS = Ext;
 
-  const SCEV* V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE);
+  const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE);
   //cerr << "Evaluated: " << *this << "\n     to: " << *V << "\n";
 
   // Truncate the result down to the original type, if needed.
-  const SCEV* T = SE.getTruncateOrNoop(V, Ty);
+  const SCEV *T = SE.getTruncateOrNoop(V, Ty);
   return expand(T);
 }
 
@@ -607,9 +761,15 @@ Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
 }
 
 Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
-  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
-  Value *LHS = expandCodeFor(S->getOperand(0), Ty);
-  for (unsigned i = 1; i < S->getNumOperands(); ++i) {
+  Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
+  const Type *Ty = LHS->getType();
+  for (int i = S->getNumOperands()-2; i >= 0; --i) {
+    // In the case of mixed integer and pointer types, do the
+    // rest of the comparisons as integer.
+    if (S->getOperand(i)->getType() != Ty) {
+      Ty = SE.getEffectiveSCEVType(Ty);
+      LHS = InsertNoopCastOfTo(LHS, Ty);
+    }
     Value *RHS = expandCodeFor(S->getOperand(i), Ty);
     Value *ICmp = Builder.CreateICmpSGT(LHS, RHS, "tmp");
     InsertedValues.insert(ICmp);
@@ -617,13 +777,23 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
     InsertedValues.insert(Sel);
     LHS = Sel;
   }
+  // In the case of mixed integer and pointer types, cast the
+  // final result back to the pointer type.
+  if (LHS->getType() != S->getType())
+    LHS = InsertNoopCastOfTo(LHS, S->getType());
   return LHS;
 }
 
 Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
-  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
-  Value *LHS = expandCodeFor(S->getOperand(0), Ty);
-  for (unsigned i = 1; i < S->getNumOperands(); ++i) {
+  Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
+  const Type *Ty = LHS->getType();
+  for (int i = S->getNumOperands()-2; i >= 0; --i) {
+    // In the case of mixed integer and pointer types, do the
+    // rest of the comparisons as integer.
+    if (S->getOperand(i)->getType() != Ty) {
+      Ty = SE.getEffectiveSCEVType(Ty);
+      LHS = InsertNoopCastOfTo(LHS, Ty);
+    }
     Value *RHS = expandCodeFor(S->getOperand(i), Ty);
     Value *ICmp = Builder.CreateICmpUGT(LHS, RHS, "tmp");
     InsertedValues.insert(ICmp);
@@ -631,10 +801,22 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
     InsertedValues.insert(Sel);
     LHS = Sel;
   }
+  // In the case of mixed integer and pointer types, cast the
+  // final result back to the pointer type.
+  if (LHS->getType() != S->getType())
+    LHS = InsertNoopCastOfTo(LHS, S->getType());
   return LHS;
 }
 
-Value *SCEVExpander::expandCodeFor(const SCEV* SH, const Type *Ty) {
+Value *SCEVExpander::visitFieldOffsetExpr(const SCEVFieldOffsetExpr *S) {
+  return ConstantExpr::getOffsetOf(S->getStructType(), S->getFieldNo());
+}
+
+Value *SCEVExpander::visitAllocSizeExpr(const SCEVAllocSizeExpr *S) {
+  return ConstantExpr::getSizeOf(S->getAllocType());
+}
+
+Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty) {
   // Expand the code for this SCEV.
   Value *V = expand(SH);
   if (Ty) {
@@ -695,7 +877,7 @@ Value *
 SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
                                                     const Type *Ty) {
   assert(Ty->isInteger() && "Can only insert integer induction variables!");
-  const SCEV* H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty),
+  const SCEV *H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty),
                                    SE.getIntegerSCEV(1, Ty), L);
   BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
   BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
diff --git a/lib/Analysis/SparsePropagation.cpp b/lib/Analysis/SparsePropagation.cpp
index 5433068..b7844f0 100644
--- a/lib/Analysis/SparsePropagation.cpp
+++ b/lib/Analysis/SparsePropagation.cpp
@@ -17,7 +17,9 @@
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -27,7 +29,7 @@ using namespace llvm;
 AbstractLatticeFunction::~AbstractLatticeFunction() {}
 
 /// PrintValue - Render the specified lattice value to the specified stream.
-void AbstractLatticeFunction::PrintValue(LatticeVal V, std::ostream &OS) {
+void AbstractLatticeFunction::PrintValue(LatticeVal V, raw_ostream &OS) {
   if (V == UndefVal)
     OS << "undefined";
   else if (V == OverdefinedVal)
@@ -87,7 +89,7 @@ void SparseSolver::UpdateState(Instruction &Inst, LatticeVal V) {
 /// MarkBlockExecutable - This method can be used by clients to mark all of
 /// the blocks that are known to be intrinsically live in the processed unit.
 void SparseSolver::MarkBlockExecutable(BasicBlock *BB) {
-  DOUT << "Marking Block Executable: " << BB->getNameStart() << "\n";
+  DEBUG(errs() << "Marking Block Executable: " << BB->getName() << "\n");
   BBExecutable.insert(BB);   // Basic block is executable!
   BBWorkList.push_back(BB);  // Add the block to the work list!
 }
@@ -98,8 +100,8 @@ void SparseSolver::markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) {
   if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second)
     return;  // This edge is already known to be executable!
   
-  DOUT << "Marking Edge Executable: " << Source->getNameStart()
-       << " -> " << Dest->getNameStart() << "\n";
+  DEBUG(errs() << "Marking Edge Executable: " << Source->getName()
+        << " -> " << Dest->getName() << "\n");
 
   if (BBExecutable.count(Dest)) {
     // The destination is already executable, but we just made an edge
@@ -153,7 +155,7 @@ void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI,
     }
 
     // Constant condition variables mean the branch can only go a single way
-    Succs[C == ConstantInt::getFalse()] = true;
+    Succs[C == ConstantInt::getFalse(*Context)] = true;
     return;
   }
   
@@ -221,6 +223,16 @@ void SparseSolver::visitTerminatorInst(TerminatorInst &TI) {
 }
 
 void SparseSolver::visitPHINode(PHINode &PN) {
+  // The lattice function may store more information on a PHINode than could be
+  // computed from its incoming values.  For example, SSI form stores its sigma
+  // functions as PHINodes with a single incoming value.
+  if (LatticeFunc->IsSpecialCasedPHI(&PN)) {
+    LatticeVal IV = LatticeFunc->ComputeInstructionState(PN, *this);
+    if (IV != LatticeFunc->getUntrackedVal())
+      UpdateState(PN, IV);
+    return;
+  }
+
   LatticeVal PNIV = getOrInitValueState(&PN);
   LatticeVal Overdefined = LatticeFunc->getOverdefinedVal();
   
@@ -283,7 +295,7 @@ void SparseSolver::Solve(Function &F) {
       Instruction *I = InstWorkList.back();
       InstWorkList.pop_back();
 
-      DOUT << "\nPopped off I-WL: " << *I;
+      DEBUG(errs() << "\nPopped off I-WL: " << *I << "\n");
 
       // "I" got into the work list because it made a transition.  See if any
       // users are both live and in need of updating.
@@ -300,7 +312,7 @@ void SparseSolver::Solve(Function &F) {
       BasicBlock *BB = BBWorkList.back();
       BBWorkList.pop_back();
 
-      DOUT << "\nPopped off BBWL: " << *BB;
+      DEBUG(errs() << "\nPopped off BBWL: " << *BB);
 
       // Notify all instructions in this basic block that they are newly
       // executable.
@@ -310,7 +322,7 @@ void SparseSolver::Solve(Function &F) {
   }
 }
 
-void SparseSolver::Print(Function &F, std::ostream &OS) const {
+void SparseSolver::Print(Function &F, raw_ostream &OS) const {
   OS << "\nFUNCTION: " << F.getNameStr() << "\n";
   for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
     if (!BBExecutable.count(BB))
@@ -322,7 +334,7 @@ void SparseSolver::Print(Function &F, std::ostream &OS) const {
       OS << "; anon bb\n";
     for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
       LatticeFunc->PrintValue(getLatticeState(I), OS);
-      OS << *I;
+      OS << *I << "\n";
     }
     
     OS << "\n";
diff --git a/lib/Analysis/Trace.cpp b/lib/Analysis/Trace.cpp
index 8f19fda..c9b303b 100644
--- a/lib/Analysis/Trace.cpp
+++ b/lib/Analysis/Trace.cpp
@@ -18,7 +18,7 @@
 #include "llvm/Analysis/Trace.h"
 #include "llvm/Function.h"
 #include "llvm/Assembly/Writer.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 Function *Trace::getFunction() const {
@@ -31,9 +31,9 @@ Module *Trace::getModule() const {
 
 /// print - Write trace to output stream.
 ///
-void Trace::print(std::ostream &O) const {
-  Function *F = getFunction ();
-  O << "; Trace from function " << F->getName() << ", blocks:\n";
+void Trace::print(raw_ostream &O) const {
+  Function *F = getFunction();
+  O << "; Trace from function " << F->getNameStr() << ", blocks:\n";
   for (const_iterator i = begin(), e = end(); i != e; ++i) {
     O << "; ";
     WriteAsOperand(O, *i, true, getModule());
@@ -46,5 +46,5 @@ void Trace::print(std::ostream &O) const {
 /// output stream.
 ///
 void Trace::dump() const {
-  print(cerr);
+  print(errs());
 }
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 07a18fe..baa347a 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -16,25 +16,16 @@
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Operator.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
 #include <cstring>
 using namespace llvm;
 
-/// getOpcode - If this is an Instruction or a ConstantExpr, return the
-/// opcode value. Otherwise return UserOp1.
-static unsigned getOpcode(const Value *V) {
-  if (const Instruction *I = dyn_cast<Instruction>(V))
-    return I->getOpcode();
-  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
-    return CE->getOpcode();
-  // Use UserOp1 to mean there's no opcode.
-  return Instruction::UserOp1;
-}
-
-
 /// ComputeMaskedBits - Determine which of the bits specified in Mask are
 /// known to be either zero or one and return them in the KnownZero/KnownOne
 /// bit sets.  This code only analyzes bits in Mask, in order to short-circuit
@@ -45,9 +36,15 @@ static unsigned getOpcode(const Value *V) {
 /// optimized based on the contradictory assumption that it is non-zero.
 /// Because instcombine aggressively folds operations with undef args anyway,
 /// this won't lose us code quality.
+///
+/// This function is defined on values with integer type, values with pointer
+/// type (but only if TD is non-null), and vectors of integers.  In the case
+/// where V is a vector, the mask, known zero, and known one values are the
+/// same width as the vector element, and the bit is set only if it is true
+/// for all of the elements in the vector.
 void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
                              APInt &KnownZero, APInt &KnownOne,
-                             TargetData *TD, unsigned Depth) {
+                             const TargetData *TD, unsigned Depth) {
   const unsigned MaxDepth = 6;
   assert(V && "No Value?");
   assert(Depth <= MaxDepth && "Limit Search Depth");
@@ -91,8 +88,16 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
   // The address of an aligned GlobalValue has trailing zeros.
   if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
     unsigned Align = GV->getAlignment();
-    if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) 
-      Align = TD->getPrefTypeAlignment(GV->getType()->getElementType());
+    if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) {
+      const Type *ObjectType = GV->getType()->getElementType();
+      // If the object is defined in the current Module, we'll be giving
+      // it the preferred alignment. Otherwise, we have to assume that it
+      // may only have the minimum ABI alignment.
+      if (!GV->isDeclaration() && !GV->mayBeOverridden())
+        Align = TD->getPrefTypeAlignment(ObjectType);
+      else
+        Align = TD->getABITypeAlignment(ObjectType);
+    }
     if (Align > 0)
       KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
                                               CountTrailingZeros_32(Align));
@@ -101,17 +106,28 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     KnownOne.clear();
     return;
   }
+  // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
+  // the bits of its aliasee.
+  if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+    if (GA->mayBeOverridden()) {
+      KnownZero.clear(); KnownOne.clear();
+    } else {
+      ComputeMaskedBits(GA->getAliasee(), Mask, KnownZero, KnownOne,
+                        TD, Depth+1);
+    }
+    return;
+  }
 
   KnownZero.clear(); KnownOne.clear();   // Start out not knowing anything.
 
   if (Depth == MaxDepth || Mask == 0)
     return;  // Limit search depth.
 
-  User *I = dyn_cast<User>(V);
+  Operator *I = dyn_cast<Operator>(V);
   if (!I) return;
 
   APInt KnownZero2(KnownZero), KnownOne2(KnownOne);
-  switch (getOpcode(I)) {
+  switch (I->getOpcode()) {
   default: break;
   case Instruction::And: {
     // If either the LHS or the RHS are Zero, the result is zero.
@@ -228,12 +244,16 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     // FALL THROUGH and handle them the same as zext/trunc.
   case Instruction::ZExt:
   case Instruction::Trunc: {
+    const Type *SrcTy = I->getOperand(0)->getType();
+    
+    unsigned SrcBitWidth;
     // Note that we handle pointer operands here because of inttoptr/ptrtoint
     // which fall through here.
-    const Type *SrcTy = I->getOperand(0)->getType();
-    unsigned SrcBitWidth = TD ?
-      TD->getTypeSizeInBits(SrcTy) :
-      SrcTy->getScalarSizeInBits();
+    if (isa<PointerType>(SrcTy))
+      SrcBitWidth = TD->getTypeSizeInBits(SrcTy);
+    else
+      SrcBitWidth = SrcTy->getScalarSizeInBits();
+    
     APInt MaskIn(Mask);
     MaskIn.zextOrTrunc(SrcBitWidth);
     KnownZero.zextOrTrunc(SrcBitWidth);
@@ -261,8 +281,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
   }
   case Instruction::SExt: {
     // Compute the bits in the result that are not present in the input.
-    const IntegerType *SrcTy = cast<IntegerType>(I->getOperand(0)->getType());
-    unsigned SrcBitWidth = SrcTy->getBitWidth();
+    unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
       
     APInt MaskIn(Mask); 
     MaskIn.trunc(SrcBitWidth);
@@ -382,7 +401,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     // Determine which operand has more trailing zeros, and use that
     // many bits from the other operand.
     if (LHSKnownZeroOut > RHSKnownZeroOut) {
-      if (getOpcode(I) == Instruction::Add) {
+      if (I->getOpcode() == Instruction::Add) {
         APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut);
         KnownZero |= KnownZero2 & Mask;
         KnownOne  |= KnownOne2 & Mask;
@@ -462,10 +481,12 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
         Align = TD->getABITypeAlignment(AI->getType()->getElementType());
         Align =
           std::max(Align,
-                   (unsigned)TD->getABITypeAlignment(Type::DoubleTy));
+                   (unsigned)TD->getABITypeAlignment(
+                     Type::getDoubleTy(V->getContext())));
         Align =
           std::max(Align,
-                   (unsigned)TD->getABITypeAlignment(Type::Int64Ty));
+                   (unsigned)TD->getABITypeAlignment(
+                      Type::getInt64Ty(V->getContext())));
       }
     }
     
@@ -522,10 +543,10 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
       for (unsigned i = 0; i != 2; ++i) {
         Value *L = P->getIncomingValue(i);
         Value *R = P->getIncomingValue(!i);
-        User *LU = dyn_cast<User>(L);
+        Operator *LU = dyn_cast<Operator>(L);
         if (!LU)
           continue;
-        unsigned Opcode = getOpcode(LU);
+        unsigned Opcode = LU->getOpcode();
         // Check for operations that have the property that if
         // both their operands have low zero bits, the result
         // will have low zero bits.
@@ -608,8 +629,14 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
 /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero.  We use
 /// this predicate to simplify operations downstream.  Mask is known to be zero
 /// for bits that V cannot have.
+///
+/// This function is defined on values with integer type, values with pointer
+/// type (but only if TD is non-null), and vectors of integers.  In the case
+/// where V is a vector, the mask, known zero, and known one values are the
+/// same width as the vector element, and the bit is set only if it is true
+/// for all of the elements in the vector.
 bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask,
-                             TargetData *TD, unsigned Depth) {
+                             const TargetData *TD, unsigned Depth) {
   APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0);
   ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth);
   assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
@@ -626,7 +653,8 @@ bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask,
 ///
 /// 'Op' must have a scalar integer type.
 ///
-unsigned llvm::ComputeNumSignBits(Value *V, TargetData *TD, unsigned Depth) {
+unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
+                                  unsigned Depth) {
   assert((TD || V->getType()->isIntOrIntVector()) &&
          "ComputeNumSignBits requires a TargetData object to operate "
          "on non-integer values!");
@@ -642,8 +670,8 @@ unsigned llvm::ComputeNumSignBits(Value *V, TargetData *TD, unsigned Depth) {
   if (Depth == 6)
     return 1;  // Limit search depth.
   
-  User *U = dyn_cast<User>(V);
-  switch (getOpcode(V)) {
+  Operator *U = dyn_cast<Operator>(V);
+  switch (Operator::getOpcode(V)) {
   default: break;
   case Instruction::SExt:
     Tmp = TyBits-cast<IntegerType>(U->getOperand(0)->getType())->getBitWidth();
@@ -789,7 +817,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
   if (Depth == 6)
     return 1;  // Limit search depth.
 
-  const Instruction *I = dyn_cast<Instruction>(V);
+  const Operator *I = dyn_cast<Operator>(V);
   if (I == 0) return false;
   
   // (add x, 0.0) is guaranteed to return +0.0, not -0.0.
@@ -810,15 +838,15 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
   if (const CallInst *CI = dyn_cast<CallInst>(I))
     if (const Function *F = CI->getCalledFunction()) {
       if (F->isDeclaration()) {
-        switch (F->getNameLen()) {
-        case 3:  // abs(x) != -0.0
-          if (!strcmp(F->getNameStart(), "abs")) return true;
-          break;
-        case 4:  // abs[lf](x) != -0.0
-          if (!strcmp(F->getNameStart(), "absf")) return true;
-          if (!strcmp(F->getNameStart(), "absl")) return true;
-          break;
-        }
+        // abs(x) != -0.0
+        if (F->getName() == "abs") return true;
+        // fabs[lf](x) != -0.0
+        if (F->getName() == "fabs") return true;
+        if (F->getName() == "fabsf") return true;
+        if (F->getName() == "fabsl") return true;
+        if (F->getName() == "sqrt" || F->getName() == "sqrtf" ||
+            F->getName() == "sqrtl")
+          return CannotBeNegativeZero(CI->getOperand(1), Depth+1);
       }
     }
   
@@ -831,10 +859,11 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
 // indices from Idxs that should be left out when inserting into the resulting
 // struct. To is the result struct built so far, new insertvalue instructions
 // build on that.
-Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType,
-                                 SmallVector<unsigned, 10> &Idxs,
-                                 unsigned IdxSkip,
-                                 Instruction *InsertBefore) {
+static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType,
+                                SmallVector<unsigned, 10> &Idxs,
+                                unsigned IdxSkip,
+                                LLVMContext &Context,
+                                Instruction *InsertBefore) {
   const llvm::StructType *STy = llvm::dyn_cast<llvm::StructType>(IndexedType);
   if (STy) {
     // Save the original To argument so we can modify it
@@ -845,7 +874,7 @@ Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType,
       Idxs.push_back(i);
       Value *PrevTo = To;
       To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip,
-                             InsertBefore);
+                             Context, InsertBefore);
       Idxs.pop_back();
       if (!To) {
         // Couldn't find any inserted value for this index? Cleanup
@@ -868,7 +897,7 @@ Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType,
   // we might be able to find the complete struct somewhere.
   
   // Find the value that is at that particular spot
-  Value *V = FindInsertedValue(From, Idxs.begin(), Idxs.end());
+  Value *V = FindInsertedValue(From, Idxs.begin(), Idxs.end(), Context);
 
   if (!V)
     return NULL;
@@ -890,8 +919,9 @@ Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType,
 // insertvalue instruction somewhere).
 //
 // All inserted insertvalue instructions are inserted before InsertBefore
-Value *BuildSubAggregate(Value *From, const unsigned *idx_begin,
-                         const unsigned *idx_end, Instruction *InsertBefore) {
+static Value *BuildSubAggregate(Value *From, const unsigned *idx_begin,
+                                const unsigned *idx_end, LLVMContext &Context,
+                                Instruction *InsertBefore) {
   assert(InsertBefore && "Must have someplace to insert!");
   const Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(),
                                                              idx_begin,
@@ -900,7 +930,8 @@ Value *BuildSubAggregate(Value *From, const unsigned *idx_begin,
   SmallVector<unsigned, 10> Idxs(idx_begin, idx_end);
   unsigned IdxSkip = Idxs.size();
 
-  return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore);
+  return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip,
+                           Context, InsertBefore);
 }
 
 /// FindInsertedValue - Given an aggregrate and an sequence of indices, see if
@@ -910,7 +941,8 @@ Value *BuildSubAggregate(Value *From, const unsigned *idx_begin,
 /// If InsertBefore is not null, this function will duplicate (modified)
 /// insertvalues when a part of a nested struct is extracted.
 Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
-                         const unsigned *idx_end, Instruction *InsertBefore) {
+                         const unsigned *idx_end, LLVMContext &Context,
+                         Instruction *InsertBefore) {
   // Nothing to index? Just return V then (this is useful at the end of our
   // recursion)
   if (idx_begin == idx_end)
@@ -921,20 +953,20 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
   assert(ExtractValueInst::getIndexedType(V->getType(), idx_begin, idx_end)
          && "Invalid indices for type?");
   const CompositeType *PTy = cast<CompositeType>(V->getType());
-  
+
   if (isa<UndefValue>(V))
     return UndefValue::get(ExtractValueInst::getIndexedType(PTy,
                                                               idx_begin,
                                                               idx_end));
   else if (isa<ConstantAggregateZero>(V))
     return Constant::getNullValue(ExtractValueInst::getIndexedType(PTy, 
-                                                                     idx_begin,
-                                                                     idx_end));
+                                                                  idx_begin,
+                                                                  idx_end));
   else if (Constant *C = dyn_cast<Constant>(V)) {
     if (isa<ConstantArray>(C) || isa<ConstantStruct>(C))
       // Recursively process this constant
-      return FindInsertedValue(C->getOperand(*idx_begin), idx_begin + 1, idx_end,
-                               InsertBefore);
+      return FindInsertedValue(C->getOperand(*idx_begin), idx_begin + 1,
+                               idx_end, Context, InsertBefore);
   } else if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) {
     // Loop the indices for the insertvalue instruction in parallel with the
     // requested indices
@@ -953,7 +985,8 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
           // %C = insertvalue {i32, i32 } %A, i32 11, 1
           // which allows the unused 0,0 element from the nested struct to be
           // removed.
-          return BuildSubAggregate(V, idx_begin, req_idx, InsertBefore);
+          return BuildSubAggregate(V, idx_begin, req_idx,
+                                   Context, InsertBefore);
         else
           // We can't handle this without inserting insertvalues
           return 0;
@@ -964,13 +997,13 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
       // looking for, then.
       if (*req_idx != *i)
         return FindInsertedValue(I->getAggregateOperand(), idx_begin, idx_end,
-                                 InsertBefore);
+                                 Context, InsertBefore);
     }
     // If we end up here, the indices of the insertvalue match with those
     // requested (though possibly only partially). Now we recursively look at
     // the inserted value, passing any remaining indices.
     return FindInsertedValue(I->getInsertedValueOperand(), req_idx, idx_end,
-                             InsertBefore);
+                             Context, InsertBefore);
   } else if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
     // If we're extracting a value from an aggregrate that was extracted from
     // something else, we can extract from that something else directly instead.
@@ -994,7 +1027,7 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
            && "Number of indices added not correct?");
     
     return FindInsertedValue(I->getAggregateOperand(), Idxs.begin(), Idxs.end(),
-                             InsertBefore);
+                             Context, InsertBefore);
   }
   // Otherwise, we don't know (such as, extracting from a function return value
   // or load instruction)
@@ -1035,7 +1068,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
     // Make sure the index-ee is a pointer to array of i8.
     const PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType());
     const ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType());
-    if (AT == 0 || AT->getElementType() != Type::Int8Ty)
+    if (AT == 0 || AT->getElementType() != Type::getInt8Ty(V->getContext()))
       return false;
     
     // Check to make sure that the first operand of the GEP is an integer and
@@ -1056,11 +1089,16 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
                                  StopAtNul);
   }
   
+  if (MDString *MDStr = dyn_cast<MDString>(V)) {
+    Str = MDStr->getString();
+    return true;
+  }
+
   // The GEP instruction, constant or instruction, must reference a global
   // variable that is a constant and is initialized. The referenced constant
   // initializer is the array that we'll use for optimization.
   GlobalVariable* GV = dyn_cast<GlobalVariable>(V);
-  if (!GV || !GV->isConstant() || !GV->hasInitializer())
+  if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
     return false;
   Constant *GlobalInit = GV->getInitializer();
   
@@ -1074,7 +1112,8 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
   
   // Must be a Constant Array
   ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
-  if (Array == 0 || Array->getType()->getElementType() != Type::Int8Ty)
+  if (Array == 0 ||
+      Array->getType()->getElementType() != Type::getInt8Ty(V->getContext()))
     return false;
   
   // Get the number of elements in the array