20 files changed, 560 insertions, 370 deletions
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 4635d0e..1793bbf 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -2493,29 +2493,28 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
       Changed = true;
     }
 
-    // If the aliasee has internal linkage, give it the name and linkage
-    // of the alias, and delete the alias.  This turns:
-    //   define internal ... @f(...)
-    //   @a = alias ... @f
-    // into:
-    //   define ... @a(...)
-    if (!Target->hasLocalLinkage())
-      continue;
-
-    // The transform is only useful if the alias does not have internal linkage.
-    if (J->hasLocalLinkage())
-      continue;
+    // If the alias is externally visible, we may still be able to simplify it.
+    if (!J->hasLocalLinkage()) {
+      // If the aliasee has internal linkage, give it the name and linkage
+      // of the alias, and delete the alias.  This turns:
+      //   define internal ... @f(...)
+      //   @a = alias ... @f
+      // into:
+      //   define ... @a(...)
+      if (!Target->hasLocalLinkage())
+        continue;
 
-    // Do not perform the transform if multiple aliases potentially target the
-    // aliasee.  This check also ensures that it is safe to replace the section
-    // and other attributes of the aliasee with those of the alias.
-    if (!hasOneUse)
-      continue;
+      // Do not perform the transform if multiple aliases potentially target the
+      // aliasee.  This check also ensures that it is safe to replace the section
+      // and other attributes of the aliasee with those of the alias.
+      if (!hasOneUse)
+        continue;
 
-    // Give the aliasee the name, linkage and other attributes of the alias.
-    Target->takeName(J);
-    Target->setLinkage(J->getLinkage());
-    Target->GlobalValue::copyAttributesFrom(J);
+      // Give the aliasee the name, linkage and other attributes of the alias.
+      Target->takeName(J);
+      Target->setLinkage(J->getLinkage());
+      Target->GlobalValue::copyAttributesFrom(J);
+    }
 
     // Delete the alias.
     M.getAliasList().erase(J);
diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
index 2951dbc..829da6b 100644
--- a/lib/Transforms/Instrumentation/MaximumSpanningTree.h
+++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H
 #define LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H
 
+#include "llvm/BasicBlock.h"
 #include "llvm/ADT/EquivalenceClasses.h"
 #include <vector>
 #include <algorithm>
@@ -33,6 +34,18 @@ namespace llvm {
                       typename MaximumSpanningTree<CT>::EdgeWeight Y) const {
         if (X.second > Y.second) return true;
         if (X.second < Y.second) return false;
+        if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.first)) {
+          if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.first)) {
+            if (BBX->size() > BBY->size()) return true;
+            if (BBX->size() < BBY->size()) return false;
+          }
+        }
+        if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.second)) {
+          if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.second)) {
+            if (BBX->size() > BBY->size()) return true;
+            if (BBX->size() < BBY->size()) return false;
+          }
+        }
         return false;
       }
     };
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 9ca90c3..e4c4ae5 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -21,7 +21,6 @@
 #include "llvm/InlineAsm.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Target/TargetData.h"
@@ -563,7 +562,7 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
   return false;
 }
 
-/// OptimizeMemoryInst - Load and Store Instructions have often have
+/// OptimizeMemoryInst - Load and Store Instructions often have
 /// addressing modes that can do significant amounts of computation.  As such,
 /// instruction selection will try to get the load or store to do as much
 /// computation as possible for the program.  The problem is that isel can only
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index b0988b5..1cfde8f 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -85,9 +85,14 @@ static bool doesClobberMemory(Instruction *I) {
     return true;
   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
     switch (II->getIntrinsicID()) {
-    default: return false;
-    case Intrinsic::memset: case Intrinsic::memmove: case Intrinsic::memcpy:
-    case Intrinsic::init_trampoline: case Intrinsic::lifetime_end: return true;
+    default:
+      return false;
+    case Intrinsic::memset:
+    case Intrinsic::memmove:
+    case Intrinsic::memcpy:
+    case Intrinsic::init_trampoline:
+    case Intrinsic::lifetime_end:
+      return true;
     }
   }
   return false;
@@ -111,14 +116,13 @@ static Value *getPointerOperand(Instruction *I) {
     return SI->getPointerOperand();
   if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
     return MI->getOperand(1);
-  IntrinsicInst *II = cast<IntrinsicInst>(I);
-  switch (II->getIntrinsicID()) {
-    default:
-      assert(false && "Unexpected intrinsic!");
-    case Intrinsic::init_trampoline:
-      return II->getOperand(1);
-    case Intrinsic::lifetime_end:
-      return II->getOperand(2);
+  
+  switch (cast<IntrinsicInst>(I)->getIntrinsicID()) {
+  default: assert(false && "Unexpected intrinsic!");
+  case Intrinsic::init_trampoline:
+    return I->getOperand(1);
+  case Intrinsic::lifetime_end:
+    return I->getOperand(2);
   }
 }
 
@@ -135,15 +139,13 @@ static unsigned getStoreSize(Instruction *I, const TargetData *TD) {
   if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
     Len = MI->getLength();
   } else {
-    IntrinsicInst *II = cast<IntrinsicInst>(I);
-    switch (II->getIntrinsicID()) {
-      default:
-        assert(false && "Unexpected intrinsic!");
-      case Intrinsic::init_trampoline:
-        return -1u;
-      case Intrinsic::lifetime_end:
-        Len = II->getOperand(1);
-        break;
+    switch (cast<IntrinsicInst>(I)->getIntrinsicID()) {
+    default: assert(false && "Unexpected intrinsic!");
+    case Intrinsic::init_trampoline:
+      return -1u;
+    case Intrinsic::lifetime_end:
+      Len = I->getOperand(1);
+      break;
     }
   }
   if (ConstantInt *LenCI = dyn_cast<ConstantInt>(Len))
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 6f1c32c..222792b 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -31,15 +31,18 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/PHITransAddr.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/IRBuilder.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -187,8 +190,11 @@ template <> struct DenseMapInfo<Expression> {
   static bool isEqual(const Expression &LHS, const Expression &RHS) {
     return LHS == RHS;
   }
-  static bool isPod() { return true; }
 };
+  
+template <>
+struct isPodLike<Expression> { static const bool value = true; };
+
 }
 
 //===----------------------------------------------------------------------===//
@@ -488,21 +494,21 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
     // Check to see if we have a single dominating call instruction that is
     // identical to C.
     for (unsigned i = 0, e = deps.size(); i != e; ++i) {
-      const MemoryDependenceAnalysis::NonLocalDepEntry *I = &deps[i];
+      const NonLocalDepEntry *I = &deps[i];
       // Ignore non-local dependencies.
-      if (I->second.isNonLocal())
+      if (I->getResult().isNonLocal())
         continue;
 
       // We don't handle non-depedencies.  If we already have a call, reject
       // instruction dependencies.
-      if (I->second.isClobber() || cdep != 0) {
+      if (I->getResult().isClobber() || cdep != 0) {
         cdep = 0;
         break;
       }
 
-      CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->second.getInst());
+      CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst());
       // FIXME: All duplicated with non-local case.
-      if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){
+      if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){
         cdep = NonLocalDepCall;
         continue;
       }
@@ -987,27 +993,27 @@ static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
 }
 
 
-/// AnalyzeLoadFromClobberingStore - This function is called when we have a
-/// memdep query of a load that ends up being a clobbering store.  This means
-/// that the store *may* provide bits used by the load but we can't be sure
-/// because the pointers don't mustalias.  Check this case to see if there is
-/// anything more we can do before we give up.  This returns -1 if we have to
-/// give up, or a byte number in the stored value of the piece that feeds the
-/// load.
-static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
+/// AnalyzeLoadFromClobberingWrite - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering memory write (store,
+/// memset, memcpy, memmove).  This means that the write *may* provide bits used
+/// by the load but we can't be sure because the pointers don't mustalias.
+///
+/// Check this case to see if there is anything more we can do before we give
+/// up.  This returns -1 if we have to give up, or a byte number in the stored
+/// value of the piece that feeds the load.
+static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
+                                          Value *WritePtr,
+                                          uint64_t WriteSizeInBits,
                                           const TargetData &TD) {
   // If the loaded or stored value is an first class array or struct, don't try
   // to transform them.  We need to be able to bitcast to integer.
-  if (isa<StructType>(L->getType()) || isa<ArrayType>(L->getType()) ||
-      isa<StructType>(DepSI->getOperand(0)->getType()) ||
-      isa<ArrayType>(DepSI->getOperand(0)->getType()))
+  if (isa<StructType>(LoadTy) || isa<ArrayType>(LoadTy))
     return -1;
   
   int64_t StoreOffset = 0, LoadOffset = 0;
-  Value *StoreBase = 
-    GetBaseWithConstantOffset(DepSI->getPointerOperand(), StoreOffset, TD);
+  Value *StoreBase = GetBaseWithConstantOffset(WritePtr, StoreOffset, TD);
   Value *LoadBase = 
-    GetBaseWithConstantOffset(L->getPointerOperand(), LoadOffset, TD);
+    GetBaseWithConstantOffset(LoadPtr, LoadOffset, TD);
   if (StoreBase != LoadBase)
     return -1;
   
@@ -1018,12 +1024,10 @@ static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
 #if 0
     errs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
     << "Base       = " << *StoreBase << "\n"
-    << "Store Ptr  = " << *DepSI->getPointerOperand() << "\n"
-    << "Store Offs = " << StoreOffset << " - " << *DepSI << "\n"
-    << "Load Ptr   = " << *L->getPointerOperand() << "\n"
-    << "Load Offs  = " << LoadOffset << " - " << *L << "\n\n";
-    errs() << "'" << L->getParent()->getParent()->getName() << "'"
-    << *L->getParent();
+    << "Store Ptr  = " << *WritePtr << "\n"
+    << "Store Offs = " << StoreOffset << "\n"
+    << "Load Ptr   = " << *LoadPtr << "\n";
+    abort();
 #endif
     return -1;
   }
@@ -1033,12 +1037,11 @@ static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
   // must have gotten confused.
   // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then
   // remove this check, as it is duplicated with what we have below.
-  uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType());
-  uint64_t LoadSize = TD.getTypeSizeInBits(L->getType());
+  uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy);
   
-  if ((StoreSize & 7) | (LoadSize & 7))
+  if ((WriteSizeInBits & 7) | (LoadSize & 7))
     return -1;
-  StoreSize >>= 3;  // Convert to bytes.
+  uint64_t StoreSize = WriteSizeInBits >> 3;  // Convert to bytes.
   LoadSize >>= 3;
   
   
@@ -1052,12 +1055,10 @@ static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
 #if 0
     errs() << "STORE LOAD DEP WITH COMMON BASE:\n"
     << "Base       = " << *StoreBase << "\n"
-    << "Store Ptr  = " << *DepSI->getPointerOperand() << "\n"
-    << "Store Offs = " << StoreOffset << " - " << *DepSI << "\n"
-    << "Load Ptr   = " << *L->getPointerOperand() << "\n"
-    << "Load Offs  = " << LoadOffset << " - " << *L << "\n\n";
-    errs() << "'" << L->getParent()->getParent()->getName() << "'"
-    << *L->getParent();
+    << "Store Ptr  = " << *WritePtr << "\n"
+    << "Store Offs = " << StoreOffset << "\n"
+    << "Load Ptr   = " << *LoadPtr << "\n";
+    abort();
 #endif
     return -1;
   }
@@ -1075,6 +1076,66 @@ static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
   return LoadOffset-StoreOffset;
 }  
 
+/// AnalyzeLoadFromClobberingStore - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering store.
+static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr,
+                                          StoreInst *DepSI,
+                                          const TargetData &TD) {
+  // Cannot handle reading from store of first-class aggregate yet.
+  if (isa<StructType>(DepSI->getOperand(0)->getType()) ||
+      isa<ArrayType>(DepSI->getOperand(0)->getType()))
+    return -1;
+
+  Value *StorePtr = DepSI->getPointerOperand();
+  uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType());
+  return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
+                                        StorePtr, StoreSize, TD);
+}
+
+static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
+                                            MemIntrinsic *MI,
+                                            const TargetData &TD) {
+  // If the mem operation is a non-constant size, we can't handle it.
+  ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
+  if (SizeCst == 0) return -1;
+  uint64_t MemSizeInBits = SizeCst->getZExtValue()*8;
+
+  // If this is memset, we just need to see if the offset is valid in the size
+  // of the memset..
+  if (MI->getIntrinsicID() == Intrinsic::memset)
+    return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
+                                          MemSizeInBits, TD);
+  
+  // If we have a memcpy/memmove, the only case we can handle is if this is a
+  // copy from constant memory.  In that case, we can read directly from the
+  // constant memory.
+  MemTransferInst *MTI = cast<MemTransferInst>(MI);
+  
+  Constant *Src = dyn_cast<Constant>(MTI->getSource());
+  if (Src == 0) return -1;
+  
+  GlobalVariable *GV = dyn_cast<GlobalVariable>(Src->getUnderlyingObject());
+  if (GV == 0 || !GV->isConstant()) return -1;
+  
+  // See if the access is within the bounds of the transfer.
+  int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
+                                              MI->getDest(), MemSizeInBits, TD);
+  if (Offset == -1)
+    return Offset;
+  
+  // Otherwise, see if we can constant fold a load from the constant with the
+  // offset applied as appropriate.
+  Src = ConstantExpr::getBitCast(Src,
+                                 llvm::Type::getInt8PtrTy(Src->getContext()));
+  Constant *OffsetCst = 
+    ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+  Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
+  Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
+  if (ConstantFoldLoadFromConstPtr(Src, &TD))
+    return Offset;
+  return -1;
+}
+                                            
 
 /// GetStoreValueForLoad - This function is called when we have a
 /// memdep query of a load that ends up being a clobbering store.  This means
@@ -1089,50 +1150,134 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
   uint64_t StoreSize = TD.getTypeSizeInBits(SrcVal->getType())/8;
   uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
   
+  IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
   
   // Compute which bits of the stored value are being used by the load.  Convert
   // to an integer type to start with.
   if (isa<PointerType>(SrcVal->getType()))
-    SrcVal = new PtrToIntInst(SrcVal, TD.getIntPtrType(Ctx), "tmp", InsertPt);
+    SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp");
   if (!isa<IntegerType>(SrcVal->getType()))
-    SrcVal = new BitCastInst(SrcVal, IntegerType::get(Ctx, StoreSize*8),
-                             "tmp", InsertPt);
+    SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8),
+                                   "tmp");
   
   // Shift the bits to the least significant depending on endianness.
   unsigned ShiftAmt;
-  if (TD.isLittleEndian()) {
+  if (TD.isLittleEndian())
     ShiftAmt = Offset*8;
-  } else {
+  else
     ShiftAmt = (StoreSize-LoadSize-Offset)*8;
-  }
   
   if (ShiftAmt)
-    SrcVal = BinaryOperator::CreateLShr(SrcVal,
-                ConstantInt::get(SrcVal->getType(), ShiftAmt), "tmp", InsertPt);
+    SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt, "tmp");
   
   if (LoadSize != StoreSize)
-    SrcVal = new TruncInst(SrcVal, IntegerType::get(Ctx, LoadSize*8),
-                           "tmp", InsertPt);
+    SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8),
+                                 "tmp");
   
   return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
 }
 
+/// GetMemInstValueForLoad - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering mem intrinsic.
+static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
+                                     const Type *LoadTy, Instruction *InsertPt,
+                                     const TargetData &TD){
+  LLVMContext &Ctx = LoadTy->getContext();
+  uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
+
+  IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
+  
+  // We know that this method is only called when the mem transfer fully
+  // provides the bits for the load.
+  if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
+    // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
+    // independently of what the offset is.
+    Value *Val = MSI->getValue();
+    if (LoadSize != 1)
+      Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8));
+    
+    Value *OneElt = Val;
+    
+    // Splat the value out to the right number of bits.
+    for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) {
+      // If we can double the number of bytes set, do it.
+      if (NumBytesSet*2 <= LoadSize) {
+        Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8);
+        Val = Builder.CreateOr(Val, ShVal);
+        NumBytesSet <<= 1;
+        continue;
+      }
+      
+      // Otherwise insert one byte at a time.
+      Value *ShVal = Builder.CreateShl(Val, 1*8);
+      Val = Builder.CreateOr(OneElt, ShVal);
+      ++NumBytesSet;
+    }
+    
+    return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD);
+  }
+ 
+  // Otherwise, this is a memcpy/memmove from a constant global.
+  MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
+  Constant *Src = cast<Constant>(MTI->getSource());
+
+  // Otherwise, see if we can constant fold a load from the constant with the
+  // offset applied as appropriate.
+  Src = ConstantExpr::getBitCast(Src,
+                                 llvm::Type::getInt8PtrTy(Src->getContext()));
+  Constant *OffsetCst = 
+  ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+  Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
+  Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
+  return ConstantFoldLoadFromConstPtr(Src, &TD);
+}
+
+
+
 struct AvailableValueInBlock {
   /// BB - The basic block in question.
   BasicBlock *BB;
+  enum ValType {
+    SimpleVal,  // A simple offsetted value that is accessed.
+    MemIntrin   // A memory intrinsic which is loaded from.
+  };
+  
   /// V - The value that is live out of the block.
-  Value *V;
-  /// Offset - The byte offset in V that is interesting for the load query.
+  PointerIntPair<Value *, 1, ValType> Val;
+  
+  /// Offset - The byte offset in Val that is interesting for the load query.
   unsigned Offset;
   
   static AvailableValueInBlock get(BasicBlock *BB, Value *V,
                                    unsigned Offset = 0) {
     AvailableValueInBlock Res;
     Res.BB = BB;
-    Res.V = V;
+    Res.Val.setPointer(V);
+    Res.Val.setInt(SimpleVal);
+    Res.Offset = Offset;
+    return Res;
+  }
+
+  static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
+                                     unsigned Offset = 0) {
+    AvailableValueInBlock Res;
+    Res.BB = BB;
+    Res.Val.setPointer(MI);
+    Res.Val.setInt(MemIntrin);
     Res.Offset = Offset;
     return Res;
   }
+  
+  bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
+  Value *getSimpleValue() const {
+    assert(isSimpleValue() && "Wrong accessor");
+    return Val.getPointer();
+  }
+  
+  MemIntrinsic *getMemIntrinValue() const {
+    assert(!isSimpleValue() && "Wrong accessor");
+    return cast<MemIntrinsic>(Val.getPointer());
+  }
 };
 
 /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
@@ -1149,30 +1294,33 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
   const Type *LoadTy = LI->getType();
   
   for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
-    BasicBlock *BB = ValuesPerBlock[i].BB;
-    Value *AvailableVal = ValuesPerBlock[i].V;
-    unsigned Offset = ValuesPerBlock[i].Offset;
+    const AvailableValueInBlock &AV = ValuesPerBlock[i];
+    BasicBlock *BB = AV.BB;
     
     if (SSAUpdate.HasValueForBlock(BB))
       continue;
-    
-    if (AvailableVal->getType() != LoadTy) {
-      assert(TD && "Need target data to handle type mismatch case");
-      AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy,
-                                          BB->getTerminator(), *TD);
-      
-      if (Offset) {
-        DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n"
-              << *ValuesPerBlock[i].V << '\n'
+
+    unsigned Offset = AV.Offset;
+
+    Value *AvailableVal;
+    if (AV.isSimpleValue()) {
+      AvailableVal = AV.getSimpleValue();
+      if (AvailableVal->getType() != LoadTy) {
+        assert(TD && "Need target data to handle type mismatch case");
+        AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy,
+                                            BB->getTerminator(), *TD);
+        
+        DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << "  "
+              << *AV.getSimpleValue() << '\n'
               << *AvailableVal << '\n' << "\n\n\n");
       }
-      
-      
-      DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n"
-            << *ValuesPerBlock[i].V << '\n'
+    } else {
+      AvailableVal = GetMemInstValueForLoad(AV.getMemIntrinValue(), Offset,
+                                            LoadTy, BB->getTerminator(), *TD);
+      DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
+            << "  " << *AV.getMemIntrinValue() << '\n'
             << *AvailableVal << '\n' << "\n\n\n");
     }
-    
     SSAUpdate.AddAvailableValue(BB, AvailableVal);
   }
   
@@ -1187,12 +1335,18 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
   return V;
 }
 
+static bool isLifetimeStart(Instruction *Inst) {
+  if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
+    return II->getIntrinsicID() == Intrinsic::lifetime_start;
+  return false;
+}
+
 /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
 /// non-local by performing PHI construction.
 bool GVN::processNonLocalLoad(LoadInst *LI,
                               SmallVectorImpl<Instruction*> &toErase) {
   // Find the non-local dependencies of the load.
-  SmallVector<MemoryDependenceAnalysis::NonLocalDepEntry, 64> Deps;
+  SmallVector<NonLocalDepEntry, 64> Deps;
   MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(),
                                    Deps);
   //DEBUG(errs() << "INVESTIGATING NONLOCAL LOAD: "
@@ -1206,11 +1360,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
 
   // If we had a phi translation failure, we'll have a single entry which is a
   // clobber in the current block.  Reject this early.
-  if (Deps.size() == 1 && Deps[0].second.isClobber()) {
+  if (Deps.size() == 1 && Deps[0].getResult().isClobber()) {
     DEBUG(
       errs() << "GVN: non-local load ";
       WriteAsOperand(errs(), LI);
-      errs() << " is clobbered by " << *Deps[0].second.getInst() << '\n';
+      errs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n';
     );
     return false;
   }
@@ -1225,18 +1379,24 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   const TargetData *TD = 0;
   
   for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
-    BasicBlock *DepBB = Deps[i].first;
-    MemDepResult DepInfo = Deps[i].second;
+    BasicBlock *DepBB = Deps[i].getBB();
+    MemDepResult DepInfo = Deps[i].getResult();
 
     if (DepInfo.isClobber()) {
+      // The address being loaded in this non-local block may not be the same as
+      // the pointer operand of the load if PHI translation occurs.  Make sure
+      // to consider the right address.
+      Value *Address = Deps[i].getAddress();
+      
       // If the dependence is to a store that writes to a superset of the bits
       // read by the load, we can extract the bits we need for the load from the
       // stored value.
       if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
         if (TD == 0)
           TD = getAnalysisIfAvailable<TargetData>();
-        if (TD) {
-          int Offset = AnalyzeLoadFromClobberingStore(LI, DepSI, *TD);
+        if (TD && Address) {
+          int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address,
+                                                      DepSI, *TD);
           if (Offset != -1) {
             ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
                                                            DepSI->getOperand(0),
@@ -1245,8 +1405,23 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
           }
         }
       }
+
+      // If the clobbering value is a memset/memcpy/memmove, see if we can
+      // forward a value on from it.
+      if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
+        if (TD == 0)
+          TD = getAnalysisIfAvailable<TargetData>();
+        if (TD && Address) {
+          int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
+                                                        DepMI, *TD);
+          if (Offset != -1) {
+            ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
+                                                                  Offset));
+            continue;
+          }            
+        }
+      }
       
-      // FIXME: Handle memset/memcpy.
       UnavailableBlocks.push_back(DepBB);
       continue;
     }
@@ -1254,21 +1429,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
     Instruction *DepInst = DepInfo.getInst();
 
     // Loading the allocation -> undef.
-    if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) {
+    if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) ||
+        // Loading immediately after lifetime begin -> undef.
+        isLifetimeStart(DepInst)) {
       ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
                                              UndefValue::get(LI->getType())));
       continue;
     }
     
-    // Loading immediately after lifetime begin or end -> undef.
-    if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) {
-      if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
-          II->getIntrinsicID() == Intrinsic::lifetime_end) {
-        ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
-                                             UndefValue::get(LI->getType())));
-      }
-    }
-
     if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
       // Reject loads and stores that are to the same address but are of
       // different types if we have to.
@@ -1378,19 +1546,25 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   // to eliminate LI even if we insert uses in the other predecessors, we will
   // end up increasing code size.  Reject this by scanning for LI.
   for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
-    if (ValuesPerBlock[i].V == LI)
+    if (ValuesPerBlock[i].isSimpleValue() &&
+        ValuesPerBlock[i].getSimpleValue() == LI)
       return false;
 
+  // FIXME: It is extremely unclear what this loop is doing, other than
+  // artificially restricting loadpre.
   if (isSinglePred) {
     bool isHot = false;
-    for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
-      if (Instruction *I = dyn_cast<Instruction>(ValuesPerBlock[i].V))
+    for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
+      const AvailableValueInBlock &AV = ValuesPerBlock[i];
+      if (AV.isSimpleValue())
         // "Hot" Instruction is in some loop (because it dominates its dep.
         // instruction).
-        if (DT->dominates(LI, I)) {
-          isHot = true;
-          break;
-        }
+        if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue()))
+          if (DT->dominates(LI, I)) {
+            isHot = true;
+            break;
+          }
+    }
 
     // We are interested only in "hot" instructions. We don't want to do any
     // mis-optimizations here.
@@ -1435,29 +1609,43 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   // Do PHI translation to get its value in the predecessor if necessary.  The
   // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
   //
-  // FIXME: This may insert a computation, but we don't tell scalar GVN
-  // optimization stuff about it.  How do we do this?
   SmallVector<Instruction*, 8> NewInsts;
-  Value *LoadPtr = 0;
   
   // If all preds have a single successor, then we know it is safe to insert the
   // load on the pred (?!?), so we can insert code to materialize the pointer if
   // it is not available.
+  PHITransAddr Address(LI->getOperand(0), TD);
+  Value *LoadPtr = 0;
   if (allSingleSucc) {
-    LoadPtr = MD->InsertPHITranslatedPointer(LI->getOperand(0), LoadBB,
-                                             UnavailablePred, TD, *DT,NewInsts);
+    LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
+                                                *DT, NewInsts);
   } else {
-    LoadPtr = MD->GetAvailablePHITranslatedValue(LI->getOperand(0), LoadBB,
-                                                 UnavailablePred, TD, *DT);
-  }
+    Address.PHITranslateValue(LoadBB, UnavailablePred);
+    LoadPtr = Address.getAddr();
     
+    // Make sure the value is live in the predecessor.
+    if (Instruction *Inst = dyn_cast_or_null<Instruction>(LoadPtr))
+      if (!DT->dominates(Inst->getParent(), UnavailablePred))
+        LoadPtr = 0;
+  }
+
   // If we couldn't find or insert a computation of this phi translated value,
   // we fail PRE.
   if (LoadPtr == 0) {
+    assert(NewInsts.empty() && "Shouldn't insert insts on failure");
     DEBUG(errs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
                  << *LI->getOperand(0) << "\n");
     return false;
   }
+
+  // Assign value numbers to these new instructions.
+  for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) {
+    // FIXME: We really _ought_ to insert these value numbers into their 
+    // parent's availability map.  However, in doing so, we risk getting into
+    // ordering issues.  If a block hasn't been processed yet, we would be
+    // marking a value as AVAIL-IN, which isn't what we intend.
+    VN.lookup_or_add(NewInsts[i]);
+  }
   
   // Make sure it is valid to move this load here.  We have to watch out for:
   //  @1 = getelementptr (i8* p, ...
@@ -1517,11 +1705,6 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
 
   // If the value isn't available, don't do anything!
   if (Dep.isClobber()) {
-    // FIXME: We should handle memset/memcpy/memmove as dependent instructions
-    // to forward the value if available.
-    //if (isa<MemIntrinsic>(Dep.getInst()))
-    //errs() << "LOAD DEPENDS ON MEM: " << *L << "\n" << *Dep.getInst()<<"\n\n";
-    
     // Check to see if we have something like this:
     //   store i32 123, i32* %P
     //   %A = bitcast i32* %P to i8*
@@ -1532,25 +1715,42 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
     // a common base + constant offset, and if the previous store (or memset)
     // completely covers this load.  This sort of thing can happen in bitfield
     // access code.
+    Value *AvailVal = 0;
     if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
       if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
-        int Offset = AnalyzeLoadFromClobberingStore(L, DepSI, *TD);
-        if (Offset != -1) {
-          Value *AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset,
-                                                 L->getType(), L, *TD);
-          DEBUG(errs() << "GVN COERCED STORE BITS:\n" << *DepSI << '\n'
-                       << *AvailVal << '\n' << *L << "\n\n\n");
-    
-          // Replace the load!
-          L->replaceAllUsesWith(AvailVal);
-          if (isa<PointerType>(AvailVal->getType()))
-            MD->invalidateCachedPointerInfo(AvailVal);
-          toErase.push_back(L);
-          NumGVNLoad++;
-          return true;
-        }
+        int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
+                                                    L->getPointerOperand(),
+                                                    DepSI, *TD);
+        if (Offset != -1)
+          AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset,
+                                          L->getType(), L, *TD);
       }
     
+    // If the clobbering value is a memset/memcpy/memmove, see if we can forward
+    // a value on from it.
+    if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
+      if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
+        int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
+                                                      L->getPointerOperand(),
+                                                      DepMI, *TD);
+        if (Offset != -1)
+          AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD);
+      }
+    }
+        
+    if (AvailVal) {
+      DEBUG(errs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n'
+            << *AvailVal << '\n' << *L << "\n\n\n");
+      
+      // Replace the load!
+      L->replaceAllUsesWith(AvailVal);
+      if (isa<PointerType>(AvailVal->getType()))
+        MD->invalidateCachedPointerInfo(AvailVal);
+      toErase.push_back(L);
+      NumGVNLoad++;
+      return true;
+    }
+        
     DEBUG(
       // fast print dep, using operator<< on instruction would be too slow
       errs() << "GVN: load ";
@@ -1635,11 +1835,10 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
     return true;
   }
   
-  // If this load occurs either right after a lifetime begin or a lifetime end,
+  // If this load occurs either right after a lifetime begin,
   // then the loaded value is undefined.
   if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) {
-    if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
-        II->getIntrinsicID() == Intrinsic::lifetime_end) {
+    if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
       L->replaceAllUsesWith(UndefValue::get(L->getType()));
       toErase.push_back(L);
       NumGVNLoad++;
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index d12ad81..b9c536f 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -8585,25 +8585,36 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
   if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) {
     if (const IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) {
       uint32_t BitWidth = ITy->getBitWidth();
-      if (BitWidth > 1) {
-        Value *LHS = ICI->getOperand(0);
-        Value *RHS = ICI->getOperand(1);
-
-        APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0);
-        APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0);
-        APInt TypeMask(APInt::getHighBitsSet(BitWidth, BitWidth-1));
-        ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS);
-        ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS);
-
-        if (KnownZeroLHS.countLeadingOnes() == BitWidth-1 &&
-            KnownZeroRHS.countLeadingOnes() == BitWidth-1) {
+      Value *LHS = ICI->getOperand(0);
+      Value *RHS = ICI->getOperand(1);
+
+      APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0);
+      APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0);
+      APInt TypeMask(APInt::getAllOnesValue(BitWidth));
+      ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS);
+      ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS);
+
+      if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) {
+        APInt KnownBits = KnownZeroLHS | KnownOneLHS;
+        APInt UnknownBit = ~KnownBits;
+        if (UnknownBit.countPopulation() == 1) {
           if (!DoXform) return ICI;
 
-          Value *Xor = Builder->CreateXor(LHS, RHS);
+          Value *Result = Builder->CreateXor(LHS, RHS);
+
+          // Mask off any bits that are set and won't be shifted away.
+          if (KnownOneLHS.uge(UnknownBit))
+            Result = Builder->CreateAnd(Result,
+                                        ConstantInt::get(ITy, UnknownBit));
+
+          // Shift the bit we're testing down to the lsb.
+          Result = Builder->CreateLShr(
+               Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros()));
+
           if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
-            Xor = Builder->CreateXor(Xor, ConstantInt::get(ITy, 1));
-          Xor->takeName(ICI);
-          return ReplaceInstUsesWith(CI, Xor);
+            Result = Builder->CreateXor(Result, ConstantInt::get(ITy, 1));
+          Result->takeName(ICI);
+          return ReplaceInstUsesWith(CI, Result);
         }
       }
     }
@@ -11189,8 +11200,9 @@ namespace llvm {
       return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift &&
              LHS.Width == RHS.Width;
     }
-    static bool isPod() { return true; }
   };
+  template <>
+  struct isPodLike<LoweredPHIRecord> { static const bool value = true; };
 }
 
 
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 1b93f34..d58b9c9 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -718,6 +718,11 @@ bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB,
       if (PredSI->getSuccessor(PredCase) != DestBB &&
           DestSI->getSuccessor(i) != DestBB)
         continue;
+      
+      // Do not forward this if it already goes to this destination, this would
+      // be an infinite loop.
+      if (PredSI->getSuccessor(PredCase) == DestSucc)
+        continue;
 
       // Otherwise, we're safe to make the change.  Make sure that the edge from
       // DestSI to DestSucc is not critical and has no PHI nodes.
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 5511387..42a8fdc 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -160,16 +160,17 @@ namespace {
 
       // Because the exit block is not in the loop, we know we have to get _at
       // least_ its immediate dominator.
-      do {
-        // Get next Immediate Dominator.
-        IDom = IDom->getIDom();
-
+      IDom = IDom->getIDom();
+      
+      while (IDom && IDom != BlockInLoopNode) {
         // If we have got to the header of the loop, then the instructions block
         // did not dominate the exit node, so we can't hoist it.
         if (IDom->getBlock() == LoopHeader)
           return false;
 
-      } while (IDom != BlockInLoopNode);
+        // Get next Immediate Dominator.
+        IDom = IDom->getIDom();
+      };
 
       return true;
     }
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 564c7ac..85cc712 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -24,18 +24,14 @@
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
-#include "llvm/Type.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/IVUsers.h"
-#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Transforms/Utils/AddrModeMatcher.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ValueHandle.h"
@@ -85,8 +81,6 @@ namespace {
 
   class LoopStrengthReduce : public LoopPass {
     IVUsers *IU;
-    LoopInfo *LI;
-    DominatorTree *DT;
     ScalarEvolution *SE;
     bool Changed;
 
@@ -94,10 +88,6 @@ namespace {
     /// particular stride.
     std::map<const SCEV *, IVsOfOneStride> IVsByStride;
 
-    /// StrideNoReuse - Keep track of all the strides whose ivs cannot be
-    /// reused (nor should they be rewritten to reuse other strides).
-    SmallSet<const SCEV *, 4> StrideNoReuse;
-
     /// DeadInsts - Keep track of instructions we may have made dead, so that
     /// we can remove them after we are done working.
     SmallVector<WeakVH, 16> DeadInsts;
@@ -109,8 +99,7 @@ namespace {
   public:
     static char ID; // Pass ID, replacement for typeid
     explicit LoopStrengthReduce(const TargetLowering *tli = NULL) :
-      LoopPass(&ID), TLI(tli) {
-    }
+      LoopPass(&ID), TLI(tli) {}
 
     bool runOnLoop(Loop *L, LPPassManager &LPM);
 
@@ -118,13 +107,11 @@ namespace {
       // We split critical edges, so we change the CFG.  However, we do update
       // many analyses if they are around.
       AU.addPreservedID(LoopSimplifyID);
-      AU.addPreserved<LoopInfo>();
-      AU.addPreserved<DominanceFrontier>();
-      AU.addPreserved<DominatorTree>();
+      AU.addPreserved("loops");
+      AU.addPreserved("domfrontier");
+      AU.addPreserved("domtree");
 
       AU.addRequiredID(LoopSimplifyID);
-      AU.addRequired<LoopInfo>();
-      AU.addRequired<DominatorTree>();
       AU.addRequired<ScalarEvolution>();
       AU.addPreserved<ScalarEvolution>();
       AU.addRequired<IVUsers>();
@@ -228,19 +215,17 @@ void LoopStrengthReduce::DeleteTriviallyDeadInstructions() {
   if (DeadInsts.empty()) return;
 
   while (!DeadInsts.empty()) {
-    Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.back());
-    DeadInsts.pop_back();
+    Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
 
     if (I == 0 || !isInstructionTriviallyDead(I))
       continue;
 
-    for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) {
+    for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
       if (Instruction *U = dyn_cast<Instruction>(*OI)) {
         *OI = 0;
         if (U->use_empty())
           DeadInsts.push_back(U);
       }
-    }
 
     I->eraseFromParent();
     Changed = true;
@@ -265,7 +250,7 @@ static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) {
       if (newLoop == L)
         return false;
       // if newLoop is an outer loop of L, this is OK.
-      if (!LoopInfo::isNotAlreadyContainedIn(L, newLoop))
+      if (newLoop->contains(L->getHeader()))
         return false;
     }
     return true;
@@ -338,9 +323,6 @@ namespace {
   /// BasedUser - For a particular base value, keep information about how we've
   /// partitioned the expression so far.
   struct BasedUser {
-    /// SE - The current ScalarEvolution object.
-    ScalarEvolution *SE;
-
     /// Base - The Base value for the PHI node that needs to be inserted for
     /// this use.  As the use is processed, information gets moved from this
     /// field to the Imm field (below).  BasedUser values are sorted by this
@@ -372,9 +354,9 @@ namespace {
     bool isUseOfPostIncrementedValue;
 
     BasedUser(IVStrideUse &IVSU, ScalarEvolution *se)
-      : SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()),
+      : Base(IVSU.getOffset()), Inst(IVSU.getUser()),
         OperandValToReplace(IVSU.getOperandValToReplace()),
-        Imm(SE->getIntegerSCEV(0, Base->getType())),
+        Imm(se->getIntegerSCEV(0, Base->getType())),
         isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {}
 
     // Once we rewrite the code to insert the new IVs we want, update the
@@ -383,14 +365,14 @@ namespace {
     void RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
                                         Instruction *InsertPt,
                                        SCEVExpander &Rewriter, Loop *L, Pass *P,
-                                        LoopInfo &LI,
-                                        SmallVectorImpl<WeakVH> &DeadInsts);
+                                        SmallVectorImpl<WeakVH> &DeadInsts,
+                                        ScalarEvolution *SE);
 
     Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
                                        const Type *Ty,
                                        SCEVExpander &Rewriter,
-                                       Instruction *IP, Loop *L,
-                                       LoopInfo &LI);
+                                       Instruction *IP,
+                                       ScalarEvolution *SE);
     void dump() const;
   };
 }
@@ -404,27 +386,12 @@ void BasedUser::dump() const {
 Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
                                               const Type *Ty,
                                               SCEVExpander &Rewriter,
-                                              Instruction *IP, Loop *L,
-                                              LoopInfo &LI) {
-  // Figure out where we *really* want to insert this code.  In particular, if
-  // the user is inside of a loop that is nested inside of L, we really don't
-  // want to insert this expression before the user, we'd rather pull it out as
-  // many loops as possible.
-  Instruction *BaseInsertPt = IP;
-
-  // Figure out the most-nested loop that IP is in.
-  Loop *InsertLoop = LI.getLoopFor(IP->getParent());
-
-  // If InsertLoop is not L, and InsertLoop is nested inside of L, figure out
-  // the preheader of the outer-most loop where NewBase is not loop invariant.
-  if (L->contains(IP->getParent()))
-    while (InsertLoop && NewBase->isLoopInvariant(InsertLoop)) {
-      BaseInsertPt = InsertLoop->getLoopPreheader()->getTerminator();
-      InsertLoop = InsertLoop->getParentLoop();
-    }
-
-  Value *Base = Rewriter.expandCodeFor(NewBase, 0, BaseInsertPt);
+                                              Instruction *IP,
+                                              ScalarEvolution *SE) {
+  Value *Base = Rewriter.expandCodeFor(NewBase, 0, IP);
 
+  // Wrap the base in a SCEVUnknown so that ScalarEvolution doesn't try to
+  // re-analyze it.
   const SCEV *NewValSCEV = SE->getUnknown(Base);
 
   // Always emit the immediate into the same block as the user.
@@ -443,8 +410,8 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
 void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
                                                Instruction *NewBasePt,
                                       SCEVExpander &Rewriter, Loop *L, Pass *P,
-                                      LoopInfo &LI,
-                                      SmallVectorImpl<WeakVH> &DeadInsts) {
+                                      SmallVectorImpl<WeakVH> &DeadInsts,
+                                      ScalarEvolution *SE) {
   if (!isa<PHINode>(Inst)) {
     // By default, insert code at the user instruction.
     BasicBlock::iterator InsertPt = Inst;
@@ -473,7 +440,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
     }
     Value *NewVal = InsertCodeForBaseAtPosition(NewBase,
                                                 OperandValToReplace->getType(),
-                                                Rewriter, InsertPt, L, LI);
+                                                Rewriter, InsertPt, SE);
     // Replace the use of the operand Value with the new Phi we just created.
     Inst->replaceUsesOfWith(OperandValToReplace, NewVal);
 
@@ -535,7 +502,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
                                 PHIPred->getTerminator() :
                                 OldLoc->getParent()->getTerminator();
         Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(),
-                                           Rewriter, InsertPt, L, LI);
+                                           Rewriter, InsertPt, SE);
 
         DEBUG(errs() << "      Changing PHI use to ");
         DEBUG(WriteAsOperand(errs(), Code, /*PrintType=*/false));
@@ -1011,17 +978,13 @@ const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
                                 const SCEV *const &Stride,
                                 IVExpr &IV, const Type *Ty,
                                 const std::vector<BasedUser>& UsersToProcess) {
-  if (StrideNoReuse.count(Stride))
-    return SE->getIntegerSCEV(0, Stride->getType());
-
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) {
     int64_t SInt = SC->getValue()->getSExtValue();
     for (unsigned NewStride = 0, e = IU->StrideOrder.size();
          NewStride != e; ++NewStride) {
       std::map<const SCEV *, IVsOfOneStride>::iterator SI =
                 IVsByStride.find(IU->StrideOrder[NewStride]);
-      if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first) ||
-          StrideNoReuse.count(SI->first))
+      if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))
         continue;
       // The other stride has no uses, don't reuse it.
       std::map<const SCEV *, IVUsersOfOneStride *>::iterator UI =
@@ -1780,8 +1743,8 @@ LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *const &Stride,
         RewriteExpr = SE->getAddExpr(RewriteExpr, SE->getUnknown(BaseV));
 
       User.RewriteInstructionToUseNewBase(RewriteExpr, NewBasePt,
-                                          Rewriter, L, this, *LI,
-                                          DeadInsts);
+                                          Rewriter, L, this,
+                                          DeadInsts, SE);
 
       // Mark old value we replaced as possibly dead, so that it is eliminated
       // if we just replaced the last use of that value.
@@ -2745,8 +2708,6 @@ bool LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
 
 bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
   IU = &getAnalysis<IVUsers>();
-  LI = &getAnalysis<LoopInfo>();
-  DT = &getAnalysis<DominatorTree>();
   SE = &getAnalysis<ScalarEvolution>();
   Changed = false;
 
@@ -2792,15 +2753,14 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
     // After all sharing is done, see if we can adjust the loop to test against
     // zero instead of counting up to a maximum.  This is usually faster.
     OptimizeLoopCountIV(L);
-  }
 
-  // We're done analyzing this loop; release all the state we built up for it.
-  IVsByStride.clear();
-  StrideNoReuse.clear();
+    // We're done analyzing this loop; release all the state we built up for it.
+    IVsByStride.clear();
 
-  // Clean up after ourselves
-  if (!DeadInsts.empty())
-    DeleteTriviallyDeadInstructions();
+    // Clean up after ourselves
+    if (!DeadInsts.empty())
+      DeleteTriviallyDeadInstructions();
+  }
 
   // At this point, it is worth checking to see if any recurrence PHIs are also
   // dead, so that we can remove them as well.
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 38d267a..b7adfdc 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -404,12 +404,13 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
 bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){
 
   initLoopData();
-  Function *F = loopHeader->getParent();
 
   // If LoopSimplify was unable to form a preheader, don't do any unswitching.
   if (!loopPreheader)
     return false;
 
+  Function *F = loopHeader->getParent();
+
   // If the condition is trivial, always unswitch.  There is no code growth for
   // this case.
   if (!IsTrivialUnswitchCondition(LoopCond)) {
diff --git a/lib/Transforms/Scalar/SCCVN.cpp b/lib/Transforms/Scalar/SCCVN.cpp
index 001267a..dbc82e1 100644
--- a/lib/Transforms/Scalar/SCCVN.cpp
+++ b/lib/Transforms/Scalar/SCCVN.cpp
@@ -19,7 +19,6 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Operator.h"
 #include "llvm/Value.h"
 #include "llvm/ADT/DenseMap.h"
@@ -155,8 +154,10 @@ template <> struct DenseMapInfo<Expression> {
   static bool isEqual(const Expression &LHS, const Expression &RHS) {
     return LHS == RHS;
   }
-  static bool isPod() { return true; }
 };
+template <>
+struct isPodLike<Expression> { static const bool value = true; };
+
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index ae6ad74..b040a27 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -105,7 +105,7 @@ namespace {
     void isSafeUseOfAllocation(Instruction *User, AllocaInst *AI,
                                AllocaInfo &Info);
     void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI,
-                         AllocaInfo &Info);
+                          AllocaInfo &Info);
     void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI,
                                         unsigned OpNo, AllocaInfo &Info);
     void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocaInst *AI,
@@ -362,7 +362,6 @@ void SROA::DoScalarReplacement(AllocaInst *AI,
 
   // Now that we have created the alloca instructions that we want to use,
   // expand the getelementptr instructions to use them.
-  //
   while (!AI->use_empty()) {
     Instruction *User = cast<Instruction>(AI->use_back());
     if (BitCastInst *BCInst = dyn_cast<BitCastInst>(User)) {
@@ -450,11 +449,9 @@ void SROA::DoScalarReplacement(AllocaInst *AI,
   NumReplaced++;
 }
 
-
 /// isSafeElementUse - Check to see if this use is an allowed use for a
 /// getelementptr instruction of an array aggregate allocation.  isFirstElt
 /// indicates whether Ptr is known to the start of the aggregate.
-///
 void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI,
                             AllocaInfo &Info) {
   for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end();
@@ -503,7 +500,6 @@ void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI,
         }
       }
       
-      
       isSafeElementUse(GEP, AreAllZeroIndices, AI, Info);
       if (Info.isUnsafe) return;
       break;
@@ -543,9 +539,8 @@ static bool AllUsersAreLoads(Value *Ptr) {
   return true;
 }
 
-/// isSafeUseOfAllocation - Check to see if this user is an allowed use for an
+/// isSafeUseOfAllocation - Check if this user is an allowed use for an
 /// aggregate allocation.
-///
 void SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI,
                                  AllocaInfo &Info) {
   if (BitCastInst *C = dyn_cast<BitCastInst>(User))
@@ -614,7 +609,7 @@ void SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI,
       // integer. Specifically, consider A[0][i]. We cannot know that the user
       // isn't doing invalid things like allowing i to index an out-of-range
       // subscript that accesses A[1].  Because of this, we have to reject SROA
-      // of any accesses into structs where any of the components are variables. 
+      // of any accesses into structs where any of the components are variables.
       if (IdxVal->getZExtValue() >= AT->getNumElements())
         return MarkUnsafe(Info);
     } else if (const VectorType *VT = dyn_cast<VectorType>(*I)) {
@@ -628,7 +623,7 @@ void SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI,
   return isSafeElementUse(GEPI, IsAllZeroIndices, AI, Info);
 }
 
-/// isSafeMemIntrinsicOnAllocation - Return true if the specified memory
+/// isSafeMemIntrinsicOnAllocation - Check if the specified memory
 /// intrinsic can be promoted by SROA.  At this point, we know that the operand
 /// of the memintrinsic is a pointer to the beginning of the allocation.
 void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI,
@@ -656,8 +651,8 @@ void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI,
   }
 }
 
-/// isSafeUseOfBitCastedAllocation - Return true if all users of this bitcast
-/// are 
+/// isSafeUseOfBitCastedAllocation - Check if all users of this bitcast
+/// from an alloca are safe for SROA of that alloca.
 void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocaInst *AI,
                                           AllocaInfo &Info) {
   for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end();
@@ -773,6 +768,10 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
       OtherPtr = MTI->getRawDest();
     }
   }
+
+  // Keep track of the other intrinsic argument, so it can be removed if it
+  // is dead when the intrinsic is replaced.
+  Value *PossiblyDead = OtherPtr;
   
   // If there is an other pointer, we want to convert it to the same pointer
   // type as AI has, so we can GEP through it safely.
@@ -926,9 +925,11 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
     }
   }
   MI->eraseFromParent();
+  if (PossiblyDead)
+    RecursivelyDeleteTriviallyDeadInstructions(PossiblyDead);
 }
 
-/// RewriteStoreUserOfWholeAlloca - We found an store of an integer that
+/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that
 /// overwrites the entire allocation.  Extract out the pieces of the stored
 /// integer and store them individually.
 void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
@@ -1052,7 +1053,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
   SI->eraseFromParent();
 }
 
-/// RewriteLoadUserOfWholeAlloca - We found an load of the entire allocation to
+/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to
 /// an integer.  Load the individual pieces to form the aggregate value.
 void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
                                         SmallVector<AllocaInst*, 32> &NewElts) {
@@ -1186,7 +1187,6 @@ static bool HasPadding(const Type *Ty, const TargetData &TD) {
 /// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
 /// an aggregate can be broken down into elements.  Return 0 if not, 3 if safe,
 /// or 1 if safe after canonicalization has been performed.
-///
 int SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
   // Loop over the use list of the alloca.  We can only transform it if all of
   // the users are safe to transform.
@@ -1215,7 +1215,7 @@ int SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
   return Info.needsCleanup ? 1 : 3;
 }
 
-/// CleanupGEP - GEP is used by an Alloca, which can be prompted after the GEP
+/// CleanupGEP - GEP is used by an Alloca, which can be promoted after the GEP
 /// is canonicalized here.
 void SROA::CleanupGEP(GetElementPtrInst *GEPI) {
   gep_type_iterator I = gep_type_begin(GEPI);
@@ -1347,7 +1347,7 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,
 }
 
 /// CanConvertToScalar - V is a pointer.  If we can convert the pointee and all
-/// its accesses to use a to single vector type, return true, and set VecTy to
+/// its accesses to a single vector type, return true and set VecTy to
 /// the new type.  If we could convert the alloca into a single promotable
 /// integer, return true but set VecTy to VoidTy.  Further, if the use is not a
 /// completely trivial use that mem2reg could promote, set IsNotTrivial.  Offset
@@ -1355,7 +1355,6 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,
 ///
 /// If we see at least one access to the value that is as a vector type, set the
 /// SawVec flag.
-///
 bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
                               bool &SawVec, uint64_t Offset,
                               unsigned AllocaSize) {
@@ -1438,7 +1437,6 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
   return true;
 }
 
-
 /// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca
 /// directly.  This happens when we are converting an "integer union" to a
 /// single integer scalar, or when we are converting a "vector union" to a
@@ -1481,7 +1479,8 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) {
     if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
       assert(SI->getOperand(0) != Ptr && "Consistency error!");
       // FIXME: Remove once builder has Twine API.
-      Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").str().c_str());
+      Value *Old = Builder.CreateLoad(NewAI,
+                                      (NewAI->getName()+".in").str().c_str());
       Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
                                              Builder);
       Builder.CreateStore(New, NewAI);
@@ -1506,7 +1505,8 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) {
             APVal |= APVal << 8;
         
         // FIXME: Remove once builder has Twine API.
-        Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").str().c_str());
+        Value *Old = Builder.CreateLoad(NewAI,
+                                        (NewAI->getName()+".in").str().c_str());
         Value *New = ConvertScalar_InsertValue(
                                     ConstantInt::get(User->getContext(), APVal),
                                                Old, Offset, Builder);
@@ -1679,7 +1679,6 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
   return FromVal;
 }
 
-
 /// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer
 /// or vector value "Old" at the offset specified by Offset.
 ///
diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
index 13077fe..5acd6aa 100644
--- a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
@@ -88,7 +88,7 @@ InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
     if (!isa<ReturnInst>(Body->getTerminator()))
       break;
 
-    Instruction *NextInst = next(BasicBlock::iterator(Call));
+    Instruction *NextInst = llvm::next(BasicBlock::iterator(Call));
 
     // Inline the call, taking care of what code ends up where.
     NewBlock = SplitBlock(NextInst->getParent(), NextInst, this);
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index f9b929c..6fd884b 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -128,8 +128,7 @@ public:
 
 /// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
 Value *LibCallOptimization::CastToCStr(Value *V, IRBuilder<> &B) {
-  return
-        B.CreateBitCast(V, Type::getInt8PtrTy(*Context), "cstr");
+  return B.CreateBitCast(V, Type::getInt8PtrTy(*Context), "cstr");
 }
 
 /// EmitStrLen - Emit a call to the strlen function to the builder, for the
@@ -157,27 +156,25 @@ Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder<> &B) {
 Value *LibCallOptimization::EmitMemCpy(Value *Dst, Value *Src, Value *Len,
                                        unsigned Align, IRBuilder<> &B) {
   Module *M = Caller->getParent();
-  Intrinsic::ID IID = Intrinsic::memcpy;
-  const Type *Tys[1];
-  Tys[0] = Len->getType();
-  Value *MemCpy = Intrinsic::getDeclaration(M, IID, Tys, 1);
-  return B.CreateCall4(MemCpy, CastToCStr(Dst, B), CastToCStr(Src, B), Len,
+  const Type *Ty = Len->getType();
+  Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, &Ty, 1);
+  Dst = CastToCStr(Dst, B);
+  Src = CastToCStr(Src, B);
+  return B.CreateCall4(MemCpy, Dst, Src, Len,
                        ConstantInt::get(Type::getInt32Ty(*Context), Align));
 }
 
-/// EmitMemMOve - Emit a call to the memmove function to the builder.  This
+/// EmitMemMove - Emit a call to the memmove function to the builder.  This
 /// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
 Value *LibCallOptimization::EmitMemMove(Value *Dst, Value *Src, Value *Len,
 					unsigned Align, IRBuilder<> &B) {
   Module *M = Caller->getParent();
-  Intrinsic::ID IID = Intrinsic::memmove;
-  const Type *Tys[1];
-  Tys[0] = TD->getIntPtrType(*Context);
-  Value *MemMove = Intrinsic::getDeclaration(M, IID, Tys, 1);
-  Value *D = CastToCStr(Dst, B);
-  Value *S = CastToCStr(Src, B);
+  const Type *Ty = TD->getIntPtrType(*Context);
+  Value *MemMove = Intrinsic::getDeclaration(M, Intrinsic::memmove, &Ty, 1);
+  Dst = CastToCStr(Dst, B);
+  Src = CastToCStr(Src, B);
   Value *A = ConstantInt::get(Type::getInt32Ty(*Context), Align);
-  return B.CreateCall4(MemMove, D, S, Len, A);
+  return B.CreateCall4(MemMove, Dst, Src, Len, A);
 }
 
 /// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is
@@ -2647,10 +2644,11 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
 //   * strcspn("",a) -> 0
 //   * strcspn(s,"") -> strlen(a)
 //
-// strstr:
+// strstr: (PR5783)
 //   * strstr(x,x)  -> x
-//   * strstr(s1,s2) -> offset_of_s2_in(s1)
-//       (if s1 and s2 are constant strings)
+//   * strstr(x, "") -> x
+//   * strstr(x, "a") -> strchr(x, 'a')
+//   * strstr(s1,s2) -> result   (if s1 and s2 are constant strings)
 //
 // tan, tanf, tanl:
 //   * tan(atan(x)) -> x
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 2974592..2962e84 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -16,7 +16,6 @@
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Constant.h"
 #include "llvm/Type.h"
 #include "llvm/Analysis/AliasAnalysis.h"
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index aef0f5f..7a37aa3 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -603,3 +603,65 @@ bool llvm::OnlyUsedByDbgInfoIntrinsics(Instruction *I,
   return true;
 }
 
+/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI
+/// nodes in this block. This doesn't try to be clever about PHI nodes
+/// which differ only in the order of the incoming values, but instcombine
+/// orders them so it usually won't matter.
+///
+bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
+  bool Changed = false;
+
+  // This implementation doesn't currently consider undef operands
+  // specially. Theroetically, two phis which are identical except for
+  // one having an undef where the other doesn't could be collapsed.
+
+  // Map from PHI hash values to PHI nodes. If multiple PHIs have
+  // the same hash value, the element is the first PHI in the
+  // linked list in CollisionMap.
+  DenseMap<uintptr_t, PHINode *> HashMap;
+
+  // Maintain linked lists of PHI nodes with common hash values.
+  DenseMap<PHINode *, PHINode *> CollisionMap;
+
+  // Examine each PHI.
+  for (BasicBlock::iterator I = BB->begin();
+       PHINode *PN = dyn_cast<PHINode>(I++); ) {
+    // Compute a hash value on the operands. Instcombine will likely have sorted
+    // them, which helps expose duplicates, but we have to check all the
+    // operands to be safe in case instcombine hasn't run.
+    uintptr_t Hash = 0;
+    for (User::op_iterator I = PN->op_begin(), E = PN->op_end(); I != E; ++I) {
+      // This hash algorithm is quite weak as hash functions go, but it seems
+      // to do a good enough job for this particular purpose, and is very quick.
+      Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I));
+      Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7));
+    }
+    // If we've never seen this hash value before, it's a unique PHI.
+    std::pair<DenseMap<uintptr_t, PHINode *>::iterator, bool> Pair =
+      HashMap.insert(std::make_pair(Hash, PN));
+    if (Pair.second) continue;
+    // Otherwise it's either a duplicate or a hash collision.
+    for (PHINode *OtherPN = Pair.first->second; ; ) {
+      if (OtherPN->isIdenticalTo(PN)) {
+        // A duplicate. Replace this PHI with its duplicate.
+        PN->replaceAllUsesWith(OtherPN);
+        PN->eraseFromParent();
+        Changed = true;
+        break;
+      }
+      // A non-duplicate hash collision.
+      DenseMap<PHINode *, PHINode *>::iterator I = CollisionMap.find(OtherPN);
+      if (I == CollisionMap.end()) {
+        // Set this PHI to be the head of the linked list of colliding PHIs.
+        PHINode *Old = Pair.first->second;
+        Pair.first->second = PN;
+        CollisionMap[PN] = Old;
+        break;
+      }
+      // Procede to the next PHI in the list.
+      OtherPN = I->second;
+    }
+  }
+
+  return Changed;
+}
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 8c18b59..743bb6e 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -244,7 +244,7 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
 
   // Merge case into clusters
   if (Cases.size()>=2)
-    for (CaseItr I=Cases.begin(), J=next(Cases.begin()); J!=Cases.end(); ) {
+    for (CaseItr I=Cases.begin(), J=llvm::next(Cases.begin()); J!=Cases.end(); ) {
       int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue();
       int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue();
       BasicBlock* nextBB = J->BB;
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index e25f9e2..846e432 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -55,7 +55,6 @@ struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > {
   static bool isEqual(const EltTy &LHS, const EltTy &RHS) {
     return LHS == RHS;
   }
-  static bool isPod() { return true; }
 };
 }
 
@@ -102,7 +101,7 @@ namespace {
   public:
     typedef std::vector<Value *> ValVector;
     
-    RenamePassData() {}
+    RenamePassData() : BB(NULL), Pred(NULL), Values() {}
     RenamePassData(BasicBlock *B, BasicBlock *P,
                    const ValVector &V) : BB(B), Pred(P), Values(V) {}
     BasicBlock *BB;
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 8a07c35..ba41bf9 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -295,10 +295,14 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
       InsertedVal = SingularValue;
     }
 
+    // Either path through the 'if' should have set insertedVal -> SingularVal.
+    assert((InsertedVal == SingularValue || isa<UndefValue>(InsertedVal)) &&
+           "RAUW didn't change InsertedVal to be SingularVal");
+
     // Drop the entries we added in IncomingPredInfo to restore the stack.
     IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
                            IncomingPredInfo.end());
-    return InsertedVal;
+    return SingularValue;
   }
 
   // Otherwise, we do need a PHI: insert one now if we don't already have one.
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 89b0bd9..d7ca45e 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1589,69 +1589,6 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
   return true;
 }
 
-/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI
-/// nodes in this block. This doesn't try to be clever about PHI nodes
-/// which differ only in the order of the incoming values, but instcombine
-/// orders them so it usually won't matter.
-///
-bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
-  bool Changed = false;
-  
-  // This implementation doesn't currently consider undef operands
-  // specially. Theroetically, two phis which are identical except for
-  // one having an undef where the other doesn't could be collapsed.
-
-  // Map from PHI hash values to PHI nodes. If multiple PHIs have
-  // the same hash value, the element is the first PHI in the
-  // linked list in CollisionMap.
-  DenseMap<uintptr_t, PHINode *> HashMap;
-
-  // Maintain linked lists of PHI nodes with common hash values.
-  DenseMap<PHINode *, PHINode *> CollisionMap;
-
-  // Examine each PHI.
-  for (BasicBlock::iterator I = BB->begin();
-       PHINode *PN = dyn_cast<PHINode>(I++); ) {
-    // Compute a hash value on the operands. Instcombine will likely have sorted
-    // them, which helps expose duplicates, but we have to check all the
-    // operands to be safe in case instcombine hasn't run.
-    uintptr_t Hash = 0;
-    for (User::op_iterator I = PN->op_begin(), E = PN->op_end(); I != E; ++I) {
-      // This hash algorithm is quite weak as hash functions go, but it seems
-      // to do a good enough job for this particular purpose, and is very quick.
-      Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I));
-      Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7));
-    }
-    // If we've never seen this hash value before, it's a unique PHI.
-    std::pair<DenseMap<uintptr_t, PHINode *>::iterator, bool> Pair =
-      HashMap.insert(std::make_pair(Hash, PN));
-    if (Pair.second) continue;
-    // Otherwise it's either a duplicate or a hash collision.
-    for (PHINode *OtherPN = Pair.first->second; ; ) {
-      if (OtherPN->isIdenticalTo(PN)) {
-        // A duplicate. Replace this PHI with its duplicate.
-        PN->replaceAllUsesWith(OtherPN);
-        PN->eraseFromParent();
-        Changed = true;
-        break;
-      }
-      // A non-duplicate hash collision.
-      DenseMap<PHINode *, PHINode *>::iterator I = CollisionMap.find(OtherPN);
-      if (I == CollisionMap.end()) {
-        // Set this PHI to be the head of the linked list of colliding PHIs.
-        PHINode *Old = Pair.first->second;
-        Pair.first->second = PN;
-        CollisionMap[PN] = Old;
-        break;
-      }
-      // Procede to the next PHI in the list.
-      OtherPN = I->second;
-    }
-  }
-
-  return Changed;
-}
-
 /// SimplifyCFG - This function is used to do simplification of a CFG.  For
 /// example, it adjusts branches to branches to eliminate the extra hop, it
 /// eliminates unreachable basic blocks, and does other "peephole" optimization