Update LLVM to r104832.

author: rdivacky <rdivacky@FreeBSD.org> 2010-05-27 15:15:58 +0000
committer: rdivacky <rdivacky@FreeBSD.org> 2010-05-27 15:15:58 +0000
commit: 1e3dec662ea18131c495db50caccc57f77b7a5fe (patch)
tree: 9fad9a5d5dd8c4ff54af48edad9c8cc26dd5fda1 /lib/Transforms
parent: 377552607e51dc1d3e6ff33833f9620bcfe815ac (diff)
download: FreeBSD-src-1e3dec662ea18131c495db50caccc57f77b7a5fe.zip
FreeBSD-src-1e3dec662ea18131c495db50caccc57f77b7a5fe.tar.gz
17 files changed, 1141 insertions, 759 deletions
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 6443dd4..692e47d 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -535,14 +535,14 @@ void DAE::MarkValue(const RetOrArg &RA, Liveness L,
 /// values (according to Uses) live as well.
 void DAE::MarkLive(const Function &F) {
   DEBUG(dbgs() << "DAE - Intrinsically live fn: " << F.getName() << "\n");
-    // Mark the function as live.
-    LiveFunctions.insert(&F);
-    // Mark all arguments as live.
-    for (unsigned i = 0, e = F.arg_size(); i != e; ++i)
-      PropagateLiveness(CreateArg(&F, i));
-    // Mark all return values as live.
-    for (unsigned i = 0, e = NumRetVals(&F); i != e; ++i)
-      PropagateLiveness(CreateRet(&F, i));
+  // Mark the function as live.
+  LiveFunctions.insert(&F);
+  // Mark all arguments as live.
+  for (unsigned i = 0, e = F.arg_size(); i != e; ++i)
+    PropagateLiveness(CreateArg(&F, i));
+  // Mark all return values as live.
+  for (unsigned i = 0, e = NumRetVals(&F); i != e; ++i)
+    PropagateLiveness(CreateRet(&F, i));
 }
 
 /// MarkLive - Mark the given return value or argument as live. Additionally,
@@ -859,7 +859,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
       if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
         Value *RetVal;
 
-        if (NFTy->getReturnType() == Type::getVoidTy(F->getContext())) {
+        if (NFTy->getReturnType()->isVoidTy()) {
           RetVal = 0;
         } else {
           assert (RetTy->isStructTy());
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index bc8028c..8e312e7 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -54,6 +54,9 @@ namespace {
       return removeDeadFunctions(CG, &NeverInline); 
     }
     virtual bool doInitialization(CallGraph &CG);
+    void releaseMemory() {
+      CA.clear();
+    }
   };
 }
 
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index 46cf4b2..74b4a1c 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -49,6 +49,9 @@ namespace {
       CA.growCachedCostInfo(Caller, Callee);
     }
     virtual bool doInitialization(CallGraph &CG);
+    void releaseMemory() {
+      CA.clear();
+    }
   };
 }
 
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index b07e22c..622a9b5 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -17,32 +17,55 @@
 // important that the hash function be high quality. The equality comparison
 // iterates through each instruction in each basic block.
 //
-// When a match is found, the functions are folded. We can only fold two
-// functions when we know that the definition of one of them is not
-// overridable.
+// When a match is found the functions are folded. If both functions are
+// overridable, we move the functionality into a new internal function and
+// leave two overridable thunks to it.
 //
 //===----------------------------------------------------------------------===//
 //
 // Future work:
 //
-// * fold vector<T*>::push_back and vector<S*>::push_back.
-//
-// These two functions have different types, but in a way that doesn't matter
-// to us. As long as we never see an S or T itself, using S* and S** is the
-// same as using a T* and T**.
-//
 // * virtual functions.
 //
 // Many functions have their address taken by the virtual function table for
 // the object they belong to. However, as long as it's only used for a lookup
 // and call, this is irrelevant, and we'd like to fold such implementations.
 //
+// * use SCC to cut down on pair-wise comparisons and solve larger cycles.
+//
+// The current implementation loops over a pair-wise comparison of all
+// functions in the program where the two functions in the pair are treated as
+// assumed to be equal until proven otherwise. We could both use fewer
+// comparisons and optimize more complex cases if we used strongly connected
+// components of the call graph.
+//
+// * be smarter about bitcast.
+//
+// In order to fold functions, we will sometimes add either bitcast instructions
+// or bitcast constant expressions. Unfortunately, this can confound further
+// analysis since the two functions differ where one has a bitcast and the
+// other doesn't. We should learn to peer through bitcasts without imposing bad
+// performance properties.
+//
+// * don't emit aliases for Mach-O.
+//
+// Mach-O doesn't support aliases which means that we must avoid introducing
+// them in the bitcode on architectures which don't support them, such as
+// Mac OSX. There's a few approaches to this problem;
+//   a) teach codegen to lower global aliases to thunks on platforms which don't
+//      support them.
+//   b) always emit thunks, and create a separate thunk-to-alias pass which
+//      runs on ELF systems. This has the added benefit of transforming other
+//      thunks such as those produced by a C++ frontend into aliases when legal
+//      to do so.
+//
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "mergefunc"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Constants.h"
 #include "llvm/InlineAsm.h"
@@ -54,6 +77,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
 #include <map>
 #include <vector>
 using namespace llvm;
@@ -61,17 +85,33 @@ using namespace llvm;
 STATISTIC(NumFunctionsMerged, "Number of functions merged");
 
 namespace {
-  struct MergeFunctions : public ModulePass {
+  class MergeFunctions : public ModulePass {
+  public:
     static char ID; // Pass identification, replacement for typeid
     MergeFunctions() : ModulePass(&ID) {}
 
     bool runOnModule(Module &M);
+
+  private:
+    bool isEquivalentGEP(const GetElementPtrInst *GEP1,
+                         const GetElementPtrInst *GEP2);
+
+    bool equals(const BasicBlock *BB1, const BasicBlock *BB2);
+    bool equals(const Function *F, const Function *G);
+
+    bool compare(const Value *V1, const Value *V2);
+
+    const Function *LHS, *RHS;
+    typedef DenseMap<const Value *, unsigned long> IDMap;
+    IDMap Map;
+    DenseMap<const Function *, IDMap> Domains;
+    DenseMap<const Function *, unsigned long> DomainCount;
+    TargetData *TD;
   };
 }
 
 char MergeFunctions::ID = 0;
-static RegisterPass<MergeFunctions>
-X("mergefunc", "Merge Functions");
+static RegisterPass<MergeFunctions> X("mergefunc", "Merge Functions");
 
 ModulePass *llvm::createMergeFunctionsPass() {
   return new MergeFunctions();
@@ -95,15 +135,6 @@ static unsigned long hash(const Function *F) {
   return ID.ComputeHash();
 }
 
-/// IgnoreBitcasts - given a bitcast, returns the first non-bitcast found by
-/// walking the chain of cast operands. Otherwise, returns the argument.
-static Value* IgnoreBitcasts(Value *V) {
-  while (BitCastInst *BC = dyn_cast<BitCastInst>(V))
-    V = BC->getOperand(0);
-
-  return V;
-}
-
 /// isEquivalentType - any two pointers are equivalent. Otherwise, standard
 /// type equivalence rules apply.
 static bool isEquivalentType(const Type *Ty1, const Type *Ty2) {
@@ -113,6 +144,14 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) {
     return false;
 
   switch(Ty1->getTypeID()) {
+  default:
+    llvm_unreachable("Unknown type!");
+    // Fall through in Release-Asserts mode.
+  case Type::IntegerTyID:
+  case Type::OpaqueTyID:
+    // Ty1 == Ty2 would have returned true earlier.
+    return false;
+
   case Type::VoidTyID:
   case Type::FloatTyID:
   case Type::DoubleTyID:
@@ -123,15 +162,6 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) {
   case Type::MetadataTyID:
     return true;
 
-  case Type::IntegerTyID:
-  case Type::OpaqueTyID:
-    // Ty1 == Ty2 would have returned true earlier.
-    return false;
-
-  default:
-    llvm_unreachable("Unknown type!");
-    return false;
-
   case Type::PointerTyID: {
     const PointerType *PTy1 = cast<PointerType>(Ty1);
     const PointerType *PTy2 = cast<PointerType>(Ty2);
@@ -154,6 +184,21 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) {
     return true;
   }
 
+  case Type::UnionTyID: {
+    const UnionType *UTy1 = cast<UnionType>(Ty1);
+    const UnionType *UTy2 = cast<UnionType>(Ty2);
+
+    // TODO: we could be fancy with union(A, union(A, B)) === union(A, B), etc.
+    if (UTy1->getNumElements() != UTy2->getNumElements())
+      return false;
+
+    for (unsigned i = 0, e = UTy1->getNumElements(); i != e; ++i) {
+      if (!isEquivalentType(UTy1->getElementType(i), UTy2->getElementType(i)))
+        return false;
+    }
+    return true;
+  }
+
   case Type::FunctionTyID: {
     const FunctionType *FTy1 = cast<FunctionType>(Ty1);
     const FunctionType *FTy2 = cast<FunctionType>(Ty2);
@@ -236,123 +281,136 @@ isEquivalentOperation(const Instruction *I1, const Instruction *I2) {
   return true;
 }
 
-static bool compare(const Value *V, const Value *U) {
-  assert(!isa<BasicBlock>(V) && !isa<BasicBlock>(U) &&
-         "Must not compare basic blocks.");
-
-  assert(isEquivalentType(V->getType(), U->getType()) &&
-        "Two of the same operation have operands of different type.");
+bool MergeFunctions::isEquivalentGEP(const GetElementPtrInst *GEP1,
+                                     const GetElementPtrInst *GEP2) {
+  if (TD && GEP1->hasAllConstantIndices() && GEP2->hasAllConstantIndices()) {
+    SmallVector<Value *, 8> Indices1, Indices2;
+    for (GetElementPtrInst::const_op_iterator I = GEP1->idx_begin(),
+           E = GEP1->idx_end(); I != E; ++I) {
+      Indices1.push_back(*I);
+    }
+    for (GetElementPtrInst::const_op_iterator I = GEP2->idx_begin(),
+           E = GEP2->idx_end(); I != E; ++I) {
+      Indices2.push_back(*I);
+    }
+    uint64_t Offset1 = TD->getIndexedOffset(GEP1->getPointerOperandType(),
+                                            Indices1.data(), Indices1.size());
+    uint64_t Offset2 = TD->getIndexedOffset(GEP2->getPointerOperandType(),
+                                            Indices2.data(), Indices2.size());
+    return Offset1 == Offset2;
+  }
 
-  // TODO: If the constant is an expression of F, we should accept that it's
-  // equal to the same expression in terms of G.
-  if (isa<Constant>(V))
-    return V == U;
+  // Equivalent types aren't enough.
+  if (GEP1->getPointerOperand()->getType() !=
+      GEP2->getPointerOperand()->getType())
+    return false;
 
-  // The caller has ensured that ValueMap[V] != U. Since Arguments are
-  // pre-loaded into the ValueMap, and Instructions are added as we go, we know
-  // that this can only be a mis-match.
-  if (isa<Instruction>(V) || isa<Argument>(V))
+  if (GEP1->getNumOperands() != GEP2->getNumOperands())
     return false;
 
-  if (isa<InlineAsm>(V) && isa<InlineAsm>(U)) {
-    const InlineAsm *IAF = cast<InlineAsm>(V);
-    const InlineAsm *IAG = cast<InlineAsm>(U);
-    return IAF->getAsmString() == IAG->getAsmString() &&
-           IAF->getConstraintString() == IAG->getConstraintString();
+  for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) {
+    if (!compare(GEP1->getOperand(i), GEP2->getOperand(i)))
+      return false;
   }
 
-  return false;
+  return true;
 }
 
-static bool equals(const BasicBlock *BB1, const BasicBlock *BB2,
-                   DenseMap<const Value *, const Value *> &ValueMap,
-                   DenseMap<const Value *, const Value *> &SpeculationMap) {
-  // Speculatively add it anyways. If it's false, we'll notice a difference
-  // later, and this won't matter.
-  ValueMap[BB1] = BB2;
+bool MergeFunctions::compare(const Value *V1, const Value *V2) {
+  if (V1 == LHS || V1 == RHS)
+    if (V2 == LHS || V2 == RHS)
+      return true;
 
-  BasicBlock::const_iterator FI = BB1->begin(), FE = BB1->end();
-  BasicBlock::const_iterator GI = BB2->begin(), GE = BB2->end();
+  // TODO: constant expressions in terms of LHS and RHS
+  if (isa<Constant>(V1))
+    return V1 == V2;
 
-  do {
-    if (isa<BitCastInst>(FI)) {
-      ++FI;
-      continue;
-    }
-    if (isa<BitCastInst>(GI)) {
-      ++GI;
-      continue;
-    }
+  if (isa<InlineAsm>(V1) && isa<InlineAsm>(V2)) {
+    const InlineAsm *IA1 = cast<InlineAsm>(V1);
+    const InlineAsm *IA2 = cast<InlineAsm>(V2);
+    return IA1->getAsmString() == IA2->getAsmString() &&
+           IA1->getConstraintString() == IA2->getConstraintString();
+  }
 
-    if (!isEquivalentOperation(FI, GI))
-      return false;
+  // We enumerate constants globally and arguments, basic blocks or
+  // instructions within the function they belong to.
+  const Function *Domain1 = NULL;
+  if (const Argument *A = dyn_cast<Argument>(V1)) {
+    Domain1 = A->getParent();
+  } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(V1)) {
+    Domain1 = BB->getParent();
+  } else if (const Instruction *I = dyn_cast<Instruction>(V1)) {
+    Domain1 = I->getParent()->getParent();
+  }
 
-    if (isa<GetElementPtrInst>(FI)) {
-      const GetElementPtrInst *GEPF = cast<GetElementPtrInst>(FI);
-      const GetElementPtrInst *GEPG = cast<GetElementPtrInst>(GI);
-      if (GEPF->hasAllZeroIndices() && GEPG->hasAllZeroIndices()) {
-        // It's effectively a bitcast.
-        ++FI, ++GI;
-        continue;
-      }
+  const Function *Domain2 = NULL;
+  if (const Argument *A = dyn_cast<Argument>(V2)) {
+    Domain2 = A->getParent();
+  } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(V2)) {
+    Domain2 = BB->getParent();
+  } else if (const Instruction *I = dyn_cast<Instruction>(V2)) {
+    Domain2 = I->getParent()->getParent();
+  }
 
-      // TODO: we only really care about the elements before the index
-      if (FI->getOperand(0)->getType() != GI->getOperand(0)->getType())
+  if (Domain1 != Domain2)
+    if (Domain1 != LHS && Domain1 != RHS)
+      if (Domain2 != LHS && Domain2 != RHS)
         return false;
-    }
 
-    if (ValueMap[FI] == GI) {
-      ++FI, ++GI;
-      continue;
-    }
+  IDMap &Map1 = Domains[Domain1];
+  unsigned long &ID1 = Map1[V1];
+  if (!ID1)
+    ID1 = ++DomainCount[Domain1];
 
-    if (ValueMap[FI] != NULL)
-      return false;
+  IDMap &Map2 = Domains[Domain2];
+  unsigned long &ID2 = Map2[V2];
+  if (!ID2)
+    ID2 = ++DomainCount[Domain2];
 
-    for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) {
-      Value *OpF = IgnoreBitcasts(FI->getOperand(i));
-      Value *OpG = IgnoreBitcasts(GI->getOperand(i));
+  return ID1 == ID2;
+}
 
-      if (ValueMap[OpF] == OpG)
-        continue;
+bool MergeFunctions::equals(const BasicBlock *BB1, const BasicBlock *BB2) {
+  BasicBlock::const_iterator FI = BB1->begin(), FE = BB1->end();
+  BasicBlock::const_iterator GI = BB2->begin(), GE = BB2->end();
 
-      if (ValueMap[OpF] != NULL)
+  do {
+    if (!compare(FI, GI))
+      return false;
+
+    if (isa<GetElementPtrInst>(FI) && isa<GetElementPtrInst>(GI)) {
+      const GetElementPtrInst *GEP1 = cast<GetElementPtrInst>(FI);
+      const GetElementPtrInst *GEP2 = cast<GetElementPtrInst>(GI);
+
+      if (!compare(GEP1->getPointerOperand(), GEP2->getPointerOperand()))
         return false;
 
-      if (OpF->getValueID() != OpG->getValueID() ||
-          !isEquivalentType(OpF->getType(), OpG->getType()))
+      if (!isEquivalentGEP(GEP1, GEP2))
+        return false;
+    } else {
+      if (!isEquivalentOperation(FI, GI))
         return false;
 
-      if (isa<PHINode>(FI)) {
-        if (SpeculationMap[OpF] == NULL)
-          SpeculationMap[OpF] = OpG;
-        else if (SpeculationMap[OpF] != OpG)
-          return false;
-        continue;
-      } else if (isa<BasicBlock>(OpF)) {
-        assert(isa<TerminatorInst>(FI) &&
-               "BasicBlock referenced by non-Terminator non-PHI");
-        // This call changes the ValueMap, hence we can't use
-        // Value *& = ValueMap[...]
-        if (!equals(cast<BasicBlock>(OpF), cast<BasicBlock>(OpG), ValueMap,
-                    SpeculationMap))
-          return false;
-      } else {
+      for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) {
+        Value *OpF = FI->getOperand(i);
+        Value *OpG = GI->getOperand(i);
+
         if (!compare(OpF, OpG))
           return false;
-      }
 
-      ValueMap[OpF] = OpG;
+        if (OpF->getValueID() != OpG->getValueID() ||
+            !isEquivalentType(OpF->getType(), OpG->getType()))
+          return false;
+      }
     }
 
-    ValueMap[FI] = GI;
     ++FI, ++GI;
   } while (FI != FE && GI != GE);
 
   return FI == FE && GI == GE;
 }
 
-static bool equals(const Function *F, const Function *G) {
+bool MergeFunctions::equals(const Function *F, const Function *G) {
   // We need to recheck everything, but check the things that weren't included
   // in the hash first.
 
@@ -382,27 +440,46 @@ static bool equals(const Function *F, const Function *G) {
   if (!isEquivalentType(F->getFunctionType(), G->getFunctionType()))
     return false;
 
-  DenseMap<const Value *, const Value *> ValueMap;
-  DenseMap<const Value *, const Value *> SpeculationMap;
-  ValueMap[F] = G;
-
   assert(F->arg_size() == G->arg_size() &&
          "Identical functions have a different number of args.");
 
-  for (Function::const_arg_iterator fi = F->arg_begin(), gi = G->arg_begin(),
-         fe = F->arg_end(); fi != fe; ++fi, ++gi)
-    ValueMap[fi] = gi;
+  LHS = F;
+  RHS = G;
 
-  if (!equals(&F->getEntryBlock(), &G->getEntryBlock(), ValueMap,
-              SpeculationMap))
-    return false;
+  // Visit the arguments so that they get enumerated in the order they're
+  // passed in.
+  for (Function::const_arg_iterator fi = F->arg_begin(), gi = G->arg_begin(),
+         fe = F->arg_end(); fi != fe; ++fi, ++gi) {
+    if (!compare(fi, gi))
+      llvm_unreachable("Arguments repeat");
+  }
 
-  for (DenseMap<const Value *, const Value *>::iterator
-         I = SpeculationMap.begin(), E = SpeculationMap.end(); I != E; ++I) {
-    if (ValueMap[I->first] != I->second)
+  SmallVector<const BasicBlock *, 8> FBBs, GBBs;
+  SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F.
+  FBBs.push_back(&F->getEntryBlock());
+  GBBs.push_back(&G->getEntryBlock());
+  VisitedBBs.insert(FBBs[0]);
+  while (!FBBs.empty()) {
+    const BasicBlock *FBB = FBBs.pop_back_val();
+    const BasicBlock *GBB = GBBs.pop_back_val();
+    if (!compare(FBB, GBB) || !equals(FBB, GBB)) {
+      Domains.clear();
+      DomainCount.clear();
       return false;
+    }
+    const TerminatorInst *FTI = FBB->getTerminator();
+    const TerminatorInst *GTI = GBB->getTerminator();
+    assert(FTI->getNumSuccessors() == GTI->getNumSuccessors());
+    for (unsigned i = 0, e = FTI->getNumSuccessors(); i != e; ++i) {
+      if (!VisitedBBs.insert(FTI->getSuccessor(i)))
+        continue;
+      FBBs.push_back(FTI->getSuccessor(i));
+      GBBs.push_back(GTI->getSuccessor(i));
+    }
   }
 
+  Domains.clear();
+  DomainCount.clear();
   return true;
 }
 
@@ -476,20 +553,32 @@ static LinkageCategory categorize(const Function *F) {
 }
 
 static void ThunkGToF(Function *F, Function *G) {
+  if (!G->mayBeOverridden()) {
+    // Redirect direct callers of G to F.
+    Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
+    for (Value::use_iterator UI = G->use_begin(), UE = G->use_end();
+         UI != UE;) {
+      Value::use_iterator TheIter = UI;
+      ++UI;
+      CallSite CS(*TheIter);
+      if (CS && CS.isCallee(TheIter))
+        TheIter.getUse().set(BitcastF);
+    }
+  }
+
   Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
                                     G->getParent());
   BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG);
 
-  std::vector<Value *> Args;
+  SmallVector<Value *, 16> Args;
   unsigned i = 0;
   const FunctionType *FFTy = F->getFunctionType();
   for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end();
        AI != AE; ++AI) {
-    if (FFTy->getParamType(i) == AI->getType())
+    if (FFTy->getParamType(i) == AI->getType()) {
       Args.push_back(AI);
-    else {
-      Value *BCI = new BitCastInst(AI, FFTy->getParamType(i), "", BB);
-      Args.push_back(BCI);
+    } else {
+      Args.push_back(new BitCastInst(AI, FFTy->getParamType(i), "", BB));
     }
     ++i;
   }
@@ -510,8 +599,6 @@ static void ThunkGToF(Function *F, Function *G) {
   NewG->takeName(G);
   G->replaceAllUsesWith(NewG);
   G->eraseFromParent();
-
-  // TODO: look at direct callers to G and make them all direct callers to F.
 }
 
 static void AliasGToF(Function *F, Function *G) {
@@ -542,67 +629,66 @@ static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) {
   }
 
   switch (catF) {
+  case ExternalStrong:
+    switch (catG) {
     case ExternalStrong:
-      switch (catG) {
-        case ExternalStrong:
-        case ExternalWeak:
-          ThunkGToF(F, G);
-          break;
-        case Internal:
-          if (G->hasAddressTaken())
-            ThunkGToF(F, G);
-          else
-            AliasGToF(F, G);
-          break;
-      }
+    case ExternalWeak:
+      ThunkGToF(F, G);
       break;
+    case Internal:
+      if (G->hasAddressTaken())
+        ThunkGToF(F, G);
+      else
+        AliasGToF(F, G);
+      break;
+    }
+    break;
 
-    case ExternalWeak: {
-      assert(catG == ExternalWeak);
+  case ExternalWeak: {
+    assert(catG == ExternalWeak);
 
-      // Make them both thunks to the same internal function.
-      F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
-      Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
-                                     F->getParent());
-      H->copyAttributesFrom(F);
-      H->takeName(F);
-      F->replaceAllUsesWith(H);
+    // Make them both thunks to the same internal function.
+    F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
+    Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
+                                   F->getParent());
+    H->copyAttributesFrom(F);
+    H->takeName(F);
+    F->replaceAllUsesWith(H);
 
-      ThunkGToF(F, G);
-      ThunkGToF(F, H);
+    ThunkGToF(F, G);
+    ThunkGToF(F, H);
 
-      F->setLinkage(GlobalValue::InternalLinkage);
-    } break;
+    F->setLinkage(GlobalValue::InternalLinkage);
+  } break;
 
-    case Internal:
-      switch (catG) {
-        case ExternalStrong:
-          llvm_unreachable(0);
-          // fall-through
-        case ExternalWeak:
-          if (F->hasAddressTaken())
-            ThunkGToF(F, G);
-          else
-            AliasGToF(F, G);
-          break;
-        case Internal: {
-          bool addrTakenF = F->hasAddressTaken();
-          bool addrTakenG = G->hasAddressTaken();
-          if (!addrTakenF && addrTakenG) {
-            std::swap(FnVec[i], FnVec[j]);
-            std::swap(F, G);
-            std::swap(addrTakenF, addrTakenG);
-          }
+  case Internal:
+    switch (catG) {
+    case ExternalStrong:
+      llvm_unreachable(0);
+      // fall-through
+    case ExternalWeak:
+      if (F->hasAddressTaken())
+        ThunkGToF(F, G);
+      else
+        AliasGToF(F, G);
+      break;
+    case Internal: {
+      bool addrTakenF = F->hasAddressTaken();
+      bool addrTakenG = G->hasAddressTaken();
+      if (!addrTakenF && addrTakenG) {
+        std::swap(FnVec[i], FnVec[j]);
+        std::swap(F, G);
+        std::swap(addrTakenF, addrTakenG);
+      }
 
-          if (addrTakenF && addrTakenG) {
-            ThunkGToF(F, G);
-          } else {
-            assert(!addrTakenG);
-            AliasGToF(F, G);
-          }
-        } break;
+      if (addrTakenF && addrTakenG) {
+        ThunkGToF(F, G);
+      } else {
+        assert(!addrTakenG);
+        AliasGToF(F, G);
       }
-      break;
+    } break;
+  } break;
   }
 
   ++NumFunctionsMerged;
@@ -619,22 +705,20 @@ bool MergeFunctions::runOnModule(Module &M) {
   std::map<unsigned long, std::vector<Function *> > FnMap;
 
   for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
-    if (F->isDeclaration() || F->isIntrinsic())
+    if (F->isDeclaration())
       continue;
 
     FnMap[hash(F)].push_back(F);
   }
 
-  // TODO: instead of running in a loop, we could also fold functions in
-  // callgraph order. Constructing the CFG probably isn't cheaper than just
-  // running in a loop, unless it happened to already be available.
+  TD = getAnalysisIfAvailable<TargetData>();
 
   bool LocalChanged;
   do {
     LocalChanged = false;
     DEBUG(dbgs() << "size: " << FnMap.size() << "\n");
     for (std::map<unsigned long, std::vector<Function *> >::iterator
-         I = FnMap.begin(), E = FnMap.end(); I != E; ++I) {
+           I = FnMap.begin(), E = FnMap.end(); I != E; ++I) {
       std::vector<Function *> &FnVec = I->second;
       DEBUG(dbgs() << "hash (" << I->first << "): " << FnVec.size() << "\n");
 
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 310e4a2..6bc8e66 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -229,6 +229,12 @@ static bool StripDebugInfo(Module &M) {
     NMD->eraseFromParent();
   }
   
+  NMD = M.getNamedMetadata("llvm.dbg.lv");
+  if (NMD) {
+    Changed = true;
+    NMD->eraseFromParent();
+  }
+  
   unsigned MDDbgKind = M.getMDKindID("dbg");
   for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI) 
     for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE;
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index bd06499..c7b04a4 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -51,7 +51,7 @@ static inline unsigned getComplexity(Value *V) {
 /// InstCombineIRInserter - This is an IRBuilder insertion helper that works
 /// just like the normal insertion helper, but also adds any new instructions
 /// to the instcombine worklist.
-class VISIBILITY_HIDDEN InstCombineIRInserter 
+class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter 
     : public IRBuilderDefaultInserter<true> {
   InstCombineWorklist &Worklist;
 public:
@@ -65,7 +65,7 @@ public:
 };
   
 /// InstCombiner - The -instcombine pass.
-class VISIBILITY_HIDDEN InstCombiner
+class LLVM_LIBRARY_VISIBILITY InstCombiner
                              : public FunctionPass,
                                public InstVisitor<InstCombiner, Instruction*> {
   TargetData *TD;
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index eb7628e..b0137c4 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -442,7 +442,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
     // If this cast is a truncate, evaluting in a different type always
     // eliminates the cast, so it is always a win.
     DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
-          " to avoid cast: " << CI);
+          " to avoid cast: " << CI << '\n');
     Value *Res = EvaluateInDifferentType(Src, DestTy, false);
     assert(Res->getType() == DestTy);
     return ReplaceInstUsesWith(CI, Res);
@@ -1252,6 +1252,64 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
   return commonPointerCastTransforms(CI);
 }
 
+/// OptimizeVectorResize - This input value (which is known to have vector type)
+/// is being zero extended or truncated to the specified vector type.  Try to
+/// replace it with a shuffle (and vector/vector bitcast) if possible.
+///
+/// The source and destination vector types may have different element types.
+static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy,
+                                         InstCombiner &IC) {
+  // We can only do this optimization if the output is a multiple of the input
+  // element size, or the input is a multiple of the output element size.
+  // Convert the input type to have the same element type as the output.
+  const VectorType *SrcTy = cast<VectorType>(InVal->getType());
+  
+  if (SrcTy->getElementType() != DestTy->getElementType()) {
+    // The input types don't need to be identical, but for now they must be the
+    // same size.  There is no specific reason we couldn't handle things like
+    // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
+    // there yet. 
+    if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
+        DestTy->getElementType()->getPrimitiveSizeInBits())
+      return 0;
+    
+    SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements());
+    InVal = IC.Builder->CreateBitCast(InVal, SrcTy);
+  }
+  
+  // Now that the element types match, get the shuffle mask and RHS of the
+  // shuffle to use, which depends on whether we're increasing or decreasing the
+  // size of the input.
+  SmallVector<Constant*, 16> ShuffleMask;
+  Value *V2;
+  const IntegerType *Int32Ty = Type::getInt32Ty(SrcTy->getContext());
+  
+  if (SrcTy->getNumElements() > DestTy->getNumElements()) {
+    // If we're shrinking the number of elements, just shuffle in the low
+    // elements from the input and use undef as the second shuffle input.
+    V2 = UndefValue::get(SrcTy);
+    for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i)
+      ShuffleMask.push_back(ConstantInt::get(Int32Ty, i));
+    
+  } else {
+    // If we're increasing the number of elements, shuffle in all of the
+    // elements from InVal and fill the rest of the result elements with zeros
+    // from a constant zero.
+    V2 = Constant::getNullValue(SrcTy);
+    unsigned SrcElts = SrcTy->getNumElements();
+    for (unsigned i = 0, e = SrcElts; i != e; ++i)
+      ShuffleMask.push_back(ConstantInt::get(Int32Ty, i));
+
+    // The excess elements reference the first element of the zero input.
+    ShuffleMask.append(DestTy->getNumElements()-SrcElts,
+                       ConstantInt::get(Int32Ty, SrcElts));
+  }
+  
+  Constant *Mask = ConstantVector::get(ShuffleMask.data(), ShuffleMask.size());
+  return new ShuffleVectorInst(InVal, V2, Mask);
+}
+
+
 Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
   // If the operands are integer typed then apply the integer transforms,
   // otherwise just apply the common ones.
@@ -1310,6 +1368,18 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
                      Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
       // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
     }
+    
+    // If this is a cast from an integer to vector, check to see if the input
+    // is a trunc or zext of a bitcast from vector.  If so, we can replace all
+    // the casts with a shuffle and (potentially) a bitcast.
+    if (isa<IntegerType>(SrcTy) && (isa<TruncInst>(Src) || isa<ZExtInst>(Src))){
+      CastInst *SrcCast = cast<CastInst>(Src);
+      if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
+        if (isa<VectorType>(BCIn->getOperand(0)->getType()))
+          if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0),
+                                               cast<VectorType>(DestTy), *this))
+            return I;
+    }
   }
 
   if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) {
diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h
index 9d88621..9100a85 100644
--- a/lib/Transforms/InstCombine/InstCombineWorklist.h
+++ b/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -22,7 +22,7 @@ namespace llvm {
   
 /// InstCombineWorklist - This is the worklist management logic for
 /// InstCombine.
-class VISIBILITY_HIDDEN InstCombineWorklist {
+class LLVM_LIBRARY_VISIBILITY InstCombineWorklist {
   SmallVector<Instruction*, 256> Worklist;
   DenseMap<Instruction*, unsigned> WorklistMap;
   
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 5778864..1a3b10c 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -26,6 +26,7 @@ add_llvm_library(LLVMScalarOpts
   SimplifyCFGPass.cpp
   SimplifyHalfPowrLibCalls.cpp
   SimplifyLibCalls.cpp
+  Sink.cpp
   TailDuplication.cpp
   TailRecursionElimination.cpp
   )
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 65b34b1..ca8ab49 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -868,7 +868,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
   
   const Type *StoredValTy = StoredVal->getType();
   
-  uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy);
+  uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy);
   uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
   
   // If the store and reload are the same size, we can always reuse it.
@@ -1132,8 +1132,8 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
                                    Instruction *InsertPt, const TargetData &TD){
   LLVMContext &Ctx = SrcVal->getType()->getContext();
   
-  uint64_t StoreSize = TD.getTypeSizeInBits(SrcVal->getType())/8;
-  uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
+  uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
+  uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8;
   
   IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
   
@@ -1604,7 +1604,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
     }
   }
   if (!NeedToSplit.empty()) {
-    toSplit.append(NeedToSplit.size(), NeedToSplit.front());
+    toSplit.append(NeedToSplit.begin(), NeedToSplit.end());
     return false;
   }
 
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index cf3d16f..86ea3eb 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -107,11 +107,13 @@ namespace {
 class RegUseTracker {
   typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
 
-  RegUsesTy RegUses;
+  RegUsesTy RegUsesMap;
   SmallVector<const SCEV *, 16> RegSequence;
 
 public:
   void CountRegister(const SCEV *Reg, size_t LUIdx);
+  void DropRegister(const SCEV *Reg, size_t LUIdx);
+  void DropUse(size_t LUIdx);
 
   bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
 
@@ -132,7 +134,7 @@ public:
 void
 RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
   std::pair<RegUsesTy::iterator, bool> Pair =
-    RegUses.insert(std::make_pair(Reg, RegSortData()));
+    RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
   RegSortData &RSD = Pair.first->second;
   if (Pair.second)
     RegSequence.push_back(Reg);
@@ -140,11 +142,28 @@ RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
   RSD.UsedByIndices.set(LUIdx);
 }
 
+void
+RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) {
+  RegUsesTy::iterator It = RegUsesMap.find(Reg);
+  assert(It != RegUsesMap.end());
+  RegSortData &RSD = It->second;
+  assert(RSD.UsedByIndices.size() > LUIdx);
+  RSD.UsedByIndices.reset(LUIdx);
+}
+
+void
+RegUseTracker::DropUse(size_t LUIdx) {
+  // Remove the use index from every register's use list.
+  for (RegUsesTy::iterator I = RegUsesMap.begin(), E = RegUsesMap.end();
+       I != E; ++I)
+    I->second.UsedByIndices.reset(LUIdx);
+}
+
 bool
 RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
-  if (!RegUses.count(Reg)) return false;
+  if (!RegUsesMap.count(Reg)) return false;
   const SmallBitVector &UsedByIndices =
-    RegUses.find(Reg)->second.UsedByIndices;
+    RegUsesMap.find(Reg)->second.UsedByIndices;
   int i = UsedByIndices.find_first();
   if (i == -1) return false;
   if ((size_t)i != LUIdx) return true;
@@ -152,13 +171,13 @@ RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
 }
 
 const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {
-  RegUsesTy::const_iterator I = RegUses.find(Reg);
-  assert(I != RegUses.end() && "Unknown register!");
+  RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
+  assert(I != RegUsesMap.end() && "Unknown register!");
   return I->second.UsedByIndices;
 }
 
 void RegUseTracker::clear() {
-  RegUses.clear();
+  RegUsesMap.clear();
   RegSequence.clear();
 }
 
@@ -188,6 +207,8 @@ struct Formula {
   unsigned getNumRegs() const;
   const Type *getType() const;
 
+  void DeleteBaseReg(const SCEV *&S);
+
   bool referencesReg(const SCEV *S) const;
   bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
                                   const RegUseTracker &RegUses) const;
@@ -291,6 +312,13 @@ const Type *Formula::getType() const {
          0;
 }
 
+/// DeleteBaseReg - Delete the given base reg from the BaseRegs list.
+void Formula::DeleteBaseReg(const SCEV *&S) {
+  if (&S != &BaseRegs.back())
+    std::swap(S, BaseRegs.back());
+  BaseRegs.pop_back();
+}
+
 /// referencesReg - Test if this formula references the given register.
 bool Formula::referencesReg(const SCEV *S) const {
   return S == ScaledReg ||
@@ -326,6 +354,13 @@ void Formula::print(raw_ostream &OS) const {
     if (!First) OS << " + "; else First = false;
     OS << "reg(" << **I << ')';
   }
+  if (AM.HasBaseReg && BaseRegs.empty()) {
+    if (!First) OS << " + "; else First = false;
+    OS << "**error: HasBaseReg**";
+  } else if (!AM.HasBaseReg && !BaseRegs.empty()) {
+    if (!First) OS << " + "; else First = false;
+    OS << "**error: !HasBaseReg**";
+  }
   if (AM.Scale != 0) {
     if (!First) OS << " + "; else First = false;
     OS << AM.Scale << "*reg(";
@@ -345,8 +380,7 @@ void Formula::dump() const {
 /// without changing its value.
 static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
   const Type *WideTy =
-    IntegerType::get(SE.getContext(),
-                     SE.getTypeSizeInBits(AR->getType()) + 1);
+    IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
   return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
 }
 
@@ -354,8 +388,7 @@ static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
 /// without changing its value.
 static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
   const Type *WideTy =
-    IntegerType::get(SE.getContext(),
-                     SE.getTypeSizeInBits(A->getType()) + 1);
+    IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
   return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
 }
 
@@ -363,8 +396,7 @@ static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
 /// without changing its value.
 static bool isMulSExtable(const SCEVMulExpr *A, ScalarEvolution &SE) {
   const Type *WideTy =
-    IntegerType::get(SE.getContext(),
-                     SE.getTypeSizeInBits(A->getType()) + 1);
+    IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
   return isa<SCEVMulExpr>(SE.getSignExtendExpr(A, WideTy));
 }
 
@@ -432,14 +464,14 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
       bool Found = false;
       for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end();
            I != E; ++I) {
+        const SCEV *S = *I;
         if (!Found)
-          if (const SCEV *Q = getExactSDiv(*I, RHS, SE,
+          if (const SCEV *Q = getExactSDiv(S, RHS, SE,
                                            IgnoreSignificantBits)) {
-            Ops.push_back(Q);
+            S = Q;
             Found = true;
-            continue;
           }
-        Ops.push_back(*I);
+        Ops.push_back(S);
       }
       return Found ? SE.getMulExpr(Ops) : 0;
     }
@@ -810,8 +842,7 @@ struct LSRFixup {
 }
 
 LSRFixup::LSRFixup()
-  : UserInst(0), OperandValToReplace(0),
-    LUIdx(~size_t(0)), Offset(0) {}
+  : UserInst(0), OperandValToReplace(0), LUIdx(~size_t(0)), Offset(0) {}
 
 /// isUseFullyOutsideLoop - Test whether this fixup always uses its
 /// value outside of the given loop.
@@ -934,7 +965,10 @@ public:
                                       MaxOffset(INT64_MIN),
                                       AllFixupsOutsideLoop(true) {}
 
+  bool HasFormulaWithSameRegs(const Formula &F) const;
   bool InsertFormula(const Formula &F);
+  void DeleteFormula(Formula &F);
+  void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
 
   void check() const;
 
@@ -942,6 +976,16 @@ public:
   void dump() const;
 };
 
+/// HasFormula - Test whether this use as a formula which has the same
+/// registers as the given formula.
+bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
+  SmallVector<const SCEV *, 2> Key = F.BaseRegs;
+  if (F.ScaledReg) Key.push_back(F.ScaledReg);
+  // Unstable sort by host order ok, because this is only used for uniquifying.
+  std::sort(Key.begin(), Key.end());
+  return Uniquifier.count(Key);
+}
+
 /// InsertFormula - If the given formula has not yet been inserted, add it to
 /// the list, and return true. Return false otherwise.
 bool LSRUse::InsertFormula(const Formula &F) {
@@ -972,6 +1016,33 @@ bool LSRUse::InsertFormula(const Formula &F) {
   return true;
 }
 
+/// DeleteFormula - Remove the given formula from this use's list.
+void LSRUse::DeleteFormula(Formula &F) {
+  if (&F != &Formulae.back())
+    std::swap(F, Formulae.back());
+  Formulae.pop_back();
+  assert(!Formulae.empty() && "LSRUse has no formulae left!");
+}
+
+/// RecomputeRegs - Recompute the Regs field, and update RegUses.
+void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
+  // Now that we've filtered out some formulae, recompute the Regs set.
+  SmallPtrSet<const SCEV *, 4> OldRegs = Regs;
+  Regs.clear();
+  for (SmallVectorImpl<Formula>::const_iterator I = Formulae.begin(),
+       E = Formulae.end(); I != E; ++I) {
+    const Formula &F = *I;
+    if (F.ScaledReg) Regs.insert(F.ScaledReg);
+    Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
+  }
+
+  // Update the RegTracker.
+  for (SmallPtrSet<const SCEV *, 4>::iterator I = OldRegs.begin(),
+       E = OldRegs.end(); I != E; ++I)
+    if (!Regs.count(*I))
+      RegUses.DropRegister(*I, LUIdx);
+}
+
 void LSRUse::print(raw_ostream &OS) const {
   OS << "LSR Use: Kind=";
   switch (Kind) {
@@ -1091,6 +1162,13 @@ static bool isAlwaysFoldable(int64_t BaseOffs,
   AM.HasBaseReg = HasBaseReg;
   AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
 
+  // Canonicalize a scale of 1 to a base register if the formula doesn't
+  // already have a base register.
+  if (!AM.HasBaseReg && AM.Scale == 1) {
+    AM.Scale = 0;
+    AM.HasBaseReg = true;
+  }
+
   return isLegalUse(AM, Kind, AccessTy, TLI);
 }
 
@@ -1186,7 +1264,7 @@ class LSRInstance {
   void OptimizeShadowIV();
   bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
   ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
-  bool OptimizeLoopTermCond();
+  void OptimizeLoopTermCond();
 
   void CollectInterestingTypesAndFactors();
   void CollectFixupsAndInitialFormulae();
@@ -1200,13 +1278,17 @@ class LSRInstance {
   typedef DenseMap<const SCEV *, size_t> UseMapTy;
   UseMapTy UseMap;
 
-  bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
+  bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
                           LSRUse::KindType Kind, const Type *AccessTy);
 
   std::pair<size_t, int64_t> getUse(const SCEV *&Expr,
                                     LSRUse::KindType Kind,
                                     const Type *AccessTy);
 
+  void DeleteUse(LSRUse &LU);
+
+  LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
+
 public:
   void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
   void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
@@ -1227,6 +1309,8 @@ public:
   void GenerateAllReuseFormulae();
 
   void FilterOutUndesirableDedicatedRegisters();
+
+  size_t EstimateSearchSpaceComplexity() const;
   void NarrowSearchSpaceUsingHeuristics();
 
   void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
@@ -1375,6 +1459,7 @@ void LSRInstance::OptimizeShadowIV() {
     /* Remove cast operation */
     ShadowUse->replaceAllUsesWith(NewPH);
     ShadowUse->eraseFromParent();
+    Changed = true;
     break;
   }
 }
@@ -1382,8 +1467,7 @@ void LSRInstance::OptimizeShadowIV() {
 /// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
 /// set the IV user and stride information and return true, otherwise return
 /// false.
-bool LSRInstance::FindIVUserForCond(ICmpInst *Cond,
-                                    IVStrideUse *&CondUse) {
+bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
   for (IVUsers::iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
     if (UI->getUser() == Cond) {
       // NOTE: we could handle setcc instructions with multiple uses here, but
@@ -1555,7 +1639,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
 
 /// OptimizeLoopTermCond - Change loop terminating condition to use the
 /// postinc iv when possible.
-bool
+void
 LSRInstance::OptimizeLoopTermCond() {
   SmallPtrSet<Instruction *, 4> PostIncs;
 
@@ -1621,13 +1705,13 @@ LSRInstance::OptimizeLoopTermCond() {
           }
           if (const SCEVConstant *D =
                 dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {
+            const ConstantInt *C = D->getValue();
             // Stride of one or negative one can have reuse with non-addresses.
-            if (D->getValue()->isOne() ||
-                D->getValue()->isAllOnesValue())
+            if (C->isOne() || C->isAllOnesValue())
               goto decline_post_inc;
             // Avoid weird situations.
-            if (D->getValue()->getValue().getMinSignedBits() >= 64 ||
-                D->getValue()->getValue().isMinSignedValue())
+            if (C->getValue().getMinSignedBits() >= 64 ||
+                C->getValue().isMinSignedValue())
               goto decline_post_inc;
             // Without TLI, assume that any stride might be valid, and so any
             // use might be shared.
@@ -1636,7 +1720,7 @@ LSRInstance::OptimizeLoopTermCond() {
             // Check for possible scaled-address reuse.
             const Type *AccessTy = getAccessType(UI->getUser());
             TargetLowering::AddrMode AM;
-            AM.Scale = D->getValue()->getSExtValue();
+            AM.Scale = C->getSExtValue();
             if (TLI->isLegalAddressingMode(AM, AccessTy))
               goto decline_post_inc;
             AM.Scale = -AM.Scale;
@@ -1691,12 +1775,13 @@ LSRInstance::OptimizeLoopTermCond() {
     else if (BB != IVIncInsertPos->getParent())
       IVIncInsertPos = BB->getTerminator();
   }
-
-  return Changed;
 }
 
+/// reconcileNewOffset - Determine if the given use can accomodate a fixup
+/// at the given offset and other details. If so, update the use and
+/// return true.
 bool
-LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
+LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
                                 LSRUse::KindType Kind, const Type *AccessTy) {
   int64_t NewMinOffset = LU.MinOffset;
   int64_t NewMaxOffset = LU.MaxOffset;
@@ -1709,12 +1794,12 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
     return false;
   // Conservatively assume HasBaseReg is true for now.
   if (NewOffset < LU.MinOffset) {
-    if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, /*HasBaseReg=*/true,
+    if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, HasBaseReg,
                           Kind, AccessTy, TLI))
       return false;
     NewMinOffset = NewOffset;
   } else if (NewOffset > LU.MaxOffset) {
-    if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, /*HasBaseReg=*/true,
+    if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, HasBaseReg,
                           Kind, AccessTy, TLI))
       return false;
     NewMaxOffset = NewOffset;
@@ -1753,7 +1838,7 @@ LSRInstance::getUse(const SCEV *&Expr,
     // A use already existed with this base.
     size_t LUIdx = P.first->second;
     LSRUse &LU = Uses[LUIdx];
-    if (reconcileNewOffset(LU, Offset, Kind, AccessTy))
+    if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy))
       // Reuse this use.
       return std::make_pair(LUIdx, Offset);
   }
@@ -1774,6 +1859,47 @@ LSRInstance::getUse(const SCEV *&Expr,
   return std::make_pair(LUIdx, Offset);
 }
 
+/// DeleteUse - Delete the given use from the Uses list.
+void LSRInstance::DeleteUse(LSRUse &LU) {
+  if (&LU != &Uses.back())
+    std::swap(LU, Uses.back());
+  Uses.pop_back();
+}
+
+/// FindUseWithFormula - Look for a use distinct from OrigLU which is has
+/// a formula that has the same registers as the given formula.
+LSRUse *
+LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
+                                       const LSRUse &OrigLU) {
+  // Search all uses for the formula. This could be more clever. Ignore
+  // ICmpZero uses because they may contain formulae generated by
+  // GenerateICmpZeroScales, in which case adding fixup offsets may
+  // be invalid.
+  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+    LSRUse &LU = Uses[LUIdx];
+    if (&LU != &OrigLU &&
+        LU.Kind != LSRUse::ICmpZero &&
+        LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
+        LU.HasFormulaWithSameRegs(OrigF)) {
+      for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
+           E = LU.Formulae.end(); I != E; ++I) {
+        const Formula &F = *I;
+        if (F.BaseRegs == OrigF.BaseRegs &&
+            F.ScaledReg == OrigF.ScaledReg &&
+            F.AM.BaseGV == OrigF.AM.BaseGV &&
+            F.AM.Scale == OrigF.AM.Scale &&
+            LU.Kind) {
+          if (F.AM.BaseOffs == 0)
+            return &LU;
+          break;
+        }
+      }
+    }
+  }
+
+  return 0;
+}
+
 void LSRInstance::CollectInterestingTypesAndFactors() {
   SmallSetVector<const SCEV *, 4> Strides;
 
@@ -1867,6 +1993,8 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
         if (NV == LF.OperandValToReplace) {
           CI->setOperand(1, CI->getOperand(0));
           CI->setOperand(0, NV);
+          NV = CI->getOperand(1);
+          Changed = true;
         }
 
         // x == y  -->  x - y == 0
@@ -1901,6 +2029,9 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
   DEBUG(print_fixups(dbgs()));
 }
 
+/// InsertInitialFormula - Insert a formula for the given expression into
+/// the given use, separating out loop-variant portions from loop-invariant
+/// and loop-computable portions.
 void
 LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
   Formula F;
@@ -1909,6 +2040,8 @@ LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
   assert(Inserted && "Initial formula already exists!"); (void)Inserted;
 }
 
+/// InsertSupplementalFormula - Insert a simple single-register formula for
+/// the given expression into the given use.
 void
 LSRInstance::InsertSupplementalFormula(const SCEV *S,
                                        LSRUse &LU, size_t LUIdx) {
@@ -2265,8 +2398,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
 
 /// GenerateScales - Generate stride factor reuse formulae by making use of
 /// scaled-offset address modes, for example.
-void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx,
-                                 Formula Base) {
+void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
   // Determine the integer type for the base formula.
   const Type *IntTy = Base.getType();
   if (!IntTy) return;
@@ -2312,8 +2444,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx,
           // TODO: This could be optimized to avoid all the copying.
           Formula F = Base;
           F.ScaledReg = Quotient;
-          std::swap(F.BaseRegs[i], F.BaseRegs.back());
-          F.BaseRegs.pop_back();
+          F.DeleteBaseReg(F.BaseRegs[i]);
           (void)InsertFormula(LU, LUIdx, F);
         }
       }
@@ -2321,8 +2452,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx,
 }
 
 /// GenerateTruncates - Generate reuse formulae from different IV types.
-void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx,
-                                    Formula Base) {
+void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
   // This requires TargetLowering to tell us which truncates are free.
   if (!TLI) return;
 
@@ -2479,7 +2609,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
 
     // TODO: Use a more targeted data structure.
     for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
-      Formula F = LU.Formulae[L];
+      const Formula &F = LU.Formulae[L];
       // Use the immediate in the scaled register.
       if (F.ScaledReg == OrigReg) {
         int64_t Offs = (uint64_t)F.AM.BaseOffs +
@@ -2527,10 +2657,11 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
                J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end();
                J != JE; ++J)
             if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J))
-              if (C->getValue()->getValue().isNegative() !=
-                    (NewF.AM.BaseOffs < 0) &&
-                  C->getValue()->getValue().abs()
-                    .ule(abs64(NewF.AM.BaseOffs)))
+              if ((C->getValue()->getValue() + NewF.AM.BaseOffs).abs().slt(
+                   abs64(NewF.AM.BaseOffs)) &&
+                  (C->getValue()->getValue() +
+                   NewF.AM.BaseOffs).countTrailingZeros() >=
+                   CountTrailingZeros_64(NewF.AM.BaseOffs))
                 goto skip_formula;
 
           // Ok, looks good.
@@ -2579,7 +2710,7 @@ LSRInstance::GenerateAllReuseFormulae() {
 /// by other uses, pick the best one and delete the others.
 void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
 #ifndef NDEBUG
-  bool Changed = false;
+  bool ChangedFormulae = false;
 #endif
 
   // Collect the best formula for each unique set of shared registers. This
@@ -2591,10 +2722,9 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
     LSRUse &LU = Uses[LUIdx];
     FormulaSorter Sorter(L, LU, SE, DT);
+    DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n');
 
-    // Clear out the set of used regs; it will be recomputed.
-    LU.Regs.clear();
-
+    bool Any = false;
     for (size_t FIdx = 0, NumForms = LU.Formulae.size();
          FIdx != NumForms; ++FIdx) {
       Formula &F = LU.Formulae[FIdx];
@@ -2619,62 +2749,200 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
         Formula &Best = LU.Formulae[P.first->second];
         if (Sorter.operator()(F, Best))
           std::swap(F, Best);
-        DEBUG(dbgs() << "Filtering out "; F.print(dbgs());
+        DEBUG(dbgs() << "  Filtering out formula "; F.print(dbgs());
               dbgs() << "\n"
-                        "  in favor of "; Best.print(dbgs());
+                        "    in favor of formula "; Best.print(dbgs());
               dbgs() << '\n');
 #ifndef NDEBUG
-        Changed = true;
+        ChangedFormulae = true;
 #endif
-        std::swap(F, LU.Formulae.back());
-        LU.Formulae.pop_back();
+        LU.DeleteFormula(F);
         --FIdx;
         --NumForms;
+        Any = true;
         continue;
       }
-      if (F.ScaledReg) LU.Regs.insert(F.ScaledReg);
-      LU.Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
     }
+
+    // Now that we've filtered out some formulae, recompute the Regs set.
+    if (Any)
+      LU.RecomputeRegs(LUIdx, RegUses);
+
+    // Reset this to prepare for the next use.
     BestFormulae.clear();
   }
 
-  DEBUG(if (Changed) {
+  DEBUG(if (ChangedFormulae) {
           dbgs() << "\n"
                     "After filtering out undesirable candidates:\n";
           print_uses(dbgs());
         });
 }
 
+// This is a rough guess that seems to work fairly well.
+static const size_t ComplexityLimit = UINT16_MAX;
+
+/// EstimateSearchSpaceComplexity - Estimate the worst-case number of
+/// solutions the solver might have to consider. It almost never considers
+/// this many solutions because it prune the search space, but the pruning
+/// isn't always sufficient.
+size_t LSRInstance::EstimateSearchSpaceComplexity() const {
+  uint32_t Power = 1;
+  for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
+       E = Uses.end(); I != E; ++I) {
+    size_t FSize = I->Formulae.size();
+    if (FSize >= ComplexityLimit) {
+      Power = ComplexityLimit;
+      break;
+    }
+    Power *= FSize;
+    if (Power >= ComplexityLimit)
+      break;
+  }
+  return Power;
+}
+
 /// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
 /// formulae to choose from, use some rough heuristics to prune down the number
 /// of formulae. This keeps the main solver from taking an extraordinary amount
 /// of time in some worst-case scenarios.
 void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
-  // This is a rough guess that seems to work fairly well.
-  const size_t Limit = UINT16_MAX;
+  if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+    DEBUG(dbgs() << "The search space is too complex.\n");
 
-  SmallPtrSet<const SCEV *, 4> Taken;
-  for (;;) {
-    // Estimate the worst-case number of solutions we might consider. We almost
-    // never consider this many solutions because we prune the search space,
-    // but the pruning isn't always sufficient.
-    uint32_t Power = 1;
-    for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
-         E = Uses.end(); I != E; ++I) {
-      size_t FSize = I->Formulae.size();
-      if (FSize >= Limit) {
-        Power = Limit;
-        break;
+    DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "
+                    "which use a superset of registers used by other "
+                    "formulae.\n");
+
+    for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+      LSRUse &LU = Uses[LUIdx];
+      bool Any = false;
+      for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
+        Formula &F = LU.Formulae[i];
+        // Look for a formula with a constant or GV in a register. If the use
+        // also has a formula with that same value in an immediate field,
+        // delete the one that uses a register.
+        for (SmallVectorImpl<const SCEV *>::const_iterator
+             I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
+          if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
+            Formula NewF = F;
+            NewF.AM.BaseOffs += C->getValue()->getSExtValue();
+            NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
+                                (I - F.BaseRegs.begin()));
+            if (LU.HasFormulaWithSameRegs(NewF)) {
+              DEBUG(dbgs() << "  Deleting "; F.print(dbgs()); dbgs() << '\n');
+              LU.DeleteFormula(F);
+              --i;
+              --e;
+              Any = true;
+              break;
+            }
+          } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
+            if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
+              if (!F.AM.BaseGV) {
+                Formula NewF = F;
+                NewF.AM.BaseGV = GV;
+                NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
+                                    (I - F.BaseRegs.begin()));
+                if (LU.HasFormulaWithSameRegs(NewF)) {
+                  DEBUG(dbgs() << "  Deleting "; F.print(dbgs());
+                        dbgs() << '\n');
+                  LU.DeleteFormula(F);
+                  --i;
+                  --e;
+                  Any = true;
+                  break;
+                }
+              }
+          }
+        }
       }
-      Power *= FSize;
-      if (Power >= Limit)
-        break;
+      if (Any)
+        LU.RecomputeRegs(LUIdx, RegUses);
     }
-    if (Power < Limit)
-      break;
 
+    DEBUG(dbgs() << "After pre-selection:\n";
+          print_uses(dbgs()));
+  }
+
+  if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+    DEBUG(dbgs() << "The search space is too complex.\n");
+
+    DEBUG(dbgs() << "Narrowing the search space by assuming that uses "
+                    "separated by a constant offset will use the same "
+                    "registers.\n");
+
+    // This is especially useful for unrolled loops.
+
+    for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+      LSRUse &LU = Uses[LUIdx];
+      for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
+           E = LU.Formulae.end(); I != E; ++I) {
+        const Formula &F = *I;
+        if (F.AM.BaseOffs != 0 && F.AM.Scale == 0) {
+          if (LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU)) {
+            if (reconcileNewOffset(*LUThatHas, F.AM.BaseOffs,
+                                   /*HasBaseReg=*/false,
+                                   LU.Kind, LU.AccessTy)) {
+              DEBUG(dbgs() << "  Deleting use "; LU.print(dbgs());
+                    dbgs() << '\n');
+
+              LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
+
+              // Delete formulae from the new use which are no longer legal.
+              bool Any = false;
+              for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
+                Formula &F = LUThatHas->Formulae[i];
+                if (!isLegalUse(F.AM,
+                                LUThatHas->MinOffset, LUThatHas->MaxOffset,
+                                LUThatHas->Kind, LUThatHas->AccessTy, TLI)) {
+                  DEBUG(dbgs() << "  Deleting "; F.print(dbgs());
+                        dbgs() << '\n');
+                  LUThatHas->DeleteFormula(F);
+                  --i;
+                  --e;
+                  Any = true;
+                }
+              }
+              if (Any)
+                LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
+
+              // Update the relocs to reference the new use.
+              for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
+                   E = Fixups.end(); I != E; ++I) {
+                LSRFixup &Fixup = *I;
+                if (Fixup.LUIdx == LUIdx) {
+                  Fixup.LUIdx = LUThatHas - &Uses.front();
+                  Fixup.Offset += F.AM.BaseOffs;
+                  DEBUG(errs() << "New fixup has offset "
+                               << Fixup.Offset << '\n');
+                }
+                if (Fixup.LUIdx == NumUses-1)
+                  Fixup.LUIdx = LUIdx;
+              }
+
+              // Delete the old use.
+              DeleteUse(LU);
+              --LUIdx;
+              --NumUses;
+              break;
+            }
+          }
+        }
+      }
+    }
+
+    DEBUG(dbgs() << "After pre-selection:\n";
+          print_uses(dbgs()));
+  }
+
+  // With all other options exhausted, loop until the system is simple
+  // enough to handle.
+  SmallPtrSet<const SCEV *, 4> Taken;
+  while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
     // Ok, we have too many of formulae on our hands to conveniently handle.
     // Use a rough heuristic to thin out the list.
+    DEBUG(dbgs() << "The search space is too complex.\n");
 
     // Pick the register which is used by the most LSRUses, which is likely
     // to be a good reuse register candidate.
@@ -2702,28 +2970,26 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
 
     // In any use with formulae which references this register, delete formulae
     // which don't reference it.
-    for (SmallVectorImpl<LSRUse>::iterator I = Uses.begin(),
-         E = Uses.end(); I != E; ++I) {
-      LSRUse &LU = *I;
+    for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+      LSRUse &LU = Uses[LUIdx];
       if (!LU.Regs.count(Best)) continue;
 
-      // Clear out the set of used regs; it will be recomputed.
-      LU.Regs.clear();
-
+      bool Any = false;
       for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
         Formula &F = LU.Formulae[i];
         if (!F.referencesReg(Best)) {
           DEBUG(dbgs() << "  Deleting "; F.print(dbgs()); dbgs() << '\n');
-          std::swap(LU.Formulae.back(), F);
-          LU.Formulae.pop_back();
+          LU.DeleteFormula(F);
           --e;
           --i;
+          Any = true;
+          assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?");
           continue;
         }
-
-        if (F.ScaledReg) LU.Regs.insert(F.ScaledReg);
-        LU.Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
       }
+
+      if (Any)
+        LU.RecomputeRegs(LUIdx, RegUses);
     }
 
     DEBUG(dbgs() << "After pre-selection:\n";
@@ -2810,11 +3076,14 @@ retry:
   // If none of the formulae had all of the required registers, relax the
   // constraint so that we don't exclude all formulae.
   if (!AnySatisfiedReqRegs) {
+    assert(!ReqRegs.empty() && "Solver failed even without required registers");
     ReqRegs.clear();
     goto retry;
   }
 }
 
+/// Solve - Choose one formula from each use. Return the results in the given
+/// Solution vector.
 void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
   SmallVector<const Formula *, 8> Workspace;
   Cost SolutionCost;
@@ -2824,6 +3093,7 @@ void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
   DenseSet<const SCEV *> VisitedRegs;
   Workspace.reserve(Uses.size());
 
+  // SolveRecurse does all the work.
   SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
                CurRegs, VisitedRegs);
 
@@ -2839,17 +3109,8 @@ void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
           Solution[i]->print(dbgs());
           dbgs() << '\n';
         });
-}
 
-/// getImmediateDominator - A handy utility for the specific DominatorTree
-/// query that we need here.
-///
-static BasicBlock *getImmediateDominator(BasicBlock *BB, DominatorTree &DT) {
-  DomTreeNode *Node = DT.getNode(BB);
-  if (!Node) return 0;
-  Node = Node->getIDom();
-  if (!Node) return 0;
-  return Node->getBlock();
+  assert(Solution.size() == Uses.size() && "Malformed solution!");
 }
 
 /// HoistInsertPosition - Helper for AdjustInsertPositionForExpand. Climb up
@@ -2865,9 +3126,11 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
     unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
 
     BasicBlock *IDom;
-    for (BasicBlock *Rung = IP->getParent(); ; Rung = IDom) {
-      IDom = getImmediateDominator(Rung, DT);
-      if (!IDom) return IP;
+    for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
+      if (!Rung) return IP;
+      Rung = Rung->getIDom();
+      if (!Rung) return IP;
+      IDom = Rung->getBlock();
 
       // Don't climb into a loop though.
       const Loop *IDomLoop = LI.getLoopFor(IDom);
@@ -2891,7 +3154,7 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
       // instead of at the end, so that it can be used for other expansions.
       if (IDom == Inst->getParent() &&
           (!BetterPos || DT.dominates(BetterPos, Inst)))
-        BetterPos = next(BasicBlock::iterator(Inst));
+        BetterPos = llvm::next(BasicBlock::iterator(Inst));
     }
     if (!AllDominate)
       break;
@@ -2957,6 +3220,8 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP,
   return IP;
 }
 
+/// Expand - Emit instructions for the leading candidate expression for this
+/// LSRUse (this is called "expanding").
 Value *LSRInstance::Expand(const LSRFixup &LF,
                            const Formula &F,
                            BasicBlock::iterator IP,
@@ -3212,6 +3477,8 @@ void LSRInstance::Rewrite(const LSRFixup &LF,
   DeadInsts.push_back(LF.OperandValToReplace);
 }
 
+/// ImplementSolution - Rewrite all the fixup locations with new values,
+/// following the chosen solution.
 void
 LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
                                Pass *P) {
@@ -3224,10 +3491,11 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
   Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
 
   // Expand the new value definitions and update the users.
-  for (size_t i = 0, e = Fixups.size(); i != e; ++i) {
-    size_t LUIdx = Fixups[i].LUIdx;
+  for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
+       E = Fixups.end(); I != E; ++I) {
+    const LSRFixup &Fixup = *I;
 
-    Rewrite(Fixups[i], *Solution[LUIdx], Rewriter, DeadInsts, P);
+    Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts, P);
 
     Changed = true;
   }
@@ -3256,13 +3524,11 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
         WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
         dbgs() << ":\n");
 
-  /// OptimizeShadowIV - If IV is used in a int-to-float cast
-  /// inside the loop then try to eliminate the cast operation.
+  // First, perform some low-level loop optimizations.
   OptimizeShadowIV();
+  OptimizeLoopTermCond();
 
-  // Change loop terminating condition to use the postinc iv when possible.
-  Changed |= OptimizeLoopTermCond();
-
+  // Start collecting data and preparing for the solver.
   CollectInterestingTypesAndFactors();
   CollectFixupsAndInitialFormulae();
   CollectLoopInvariantFixupsAndFormulae();
@@ -3283,7 +3549,6 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
 
   SmallVector<const Formula *, 8> Solution;
   Solve(Solution);
-  assert(Solution.size() == Uses.size() && "Malformed solution!");
 
   // Release memory that is no longer needed.
   Factors.clear();
@@ -3333,9 +3598,8 @@ void LSRInstance::print_fixups(raw_ostream &OS) const {
   OS << "LSR is examining the following fixup sites:\n";
   for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
        E = Fixups.end(); I != E; ++I) {
-    const LSRFixup &LF = *I;
     dbgs() << "  ";
-    LF.print(OS);
+    I->print(OS);
     OS << '\n';
   }
 }
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index b621e8d..9744100 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -58,13 +58,20 @@ FunctionPass *llvm::createCFGSimplificationPass() {
 
 /// ChangeToUnreachable - Insert an unreachable instruction before the specified
 /// instruction, making it and the rest of the code in the block dead.
-static void ChangeToUnreachable(Instruction *I) {
+static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
   BasicBlock *BB = I->getParent();
   // Loop over all of the successors, removing BB's entry from any PHI
   // nodes.
   for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
     (*SI)->removePredecessor(BB);
   
+  // Insert a call to llvm.trap right before this.  This turns the undefined
+  // behavior into a hard fail instead of falling through into random code.
+  if (UseLLVMTrap) {
+    Function *TrapFn =
+      Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap);
+    CallInst::Create(TrapFn, "", I);
+  }
   new UnreachableInst(I->getContext(), I);
   
   // All instructions after this are dead.
@@ -118,7 +125,8 @@ static bool MarkAliveBlocks(BasicBlock *BB,
           // though.
           ++BBI;
           if (!isa<UnreachableInst>(BBI)) {
-            ChangeToUnreachable(BBI);
+            // Don't insert a call to llvm.trap right before the unreachable.
+            ChangeToUnreachable(BBI, false);
             Changed = true;
           }
           break;
@@ -134,7 +142,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
         if (isa<UndefValue>(Ptr) ||
             (isa<ConstantPointerNull>(Ptr) &&
              SI->getPointerAddressSpace() == 0)) {
-          ChangeToUnreachable(SI);
+          ChangeToUnreachable(SI, true);
           Changed = true;
           break;
         }
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index b053cfc..7414be7 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -558,10 +558,13 @@ struct MemCmpOpt : public LibCallOptimization {
     if (Len == 0) // memcmp(s1,s2,0) -> 0
       return Constant::getNullValue(CI->getType());
 
-    if (Len == 1) { // memcmp(S1,S2,1) -> *LHS - *RHS
-      Value *LHSV = B.CreateLoad(CastToCStr(LHS, B), "lhsv");
-      Value *RHSV = B.CreateLoad(CastToCStr(RHS, B), "rhsv");
-      return B.CreateSExt(B.CreateSub(LHSV, RHSV, "chardiff"), CI->getType());
+    // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS
+    if (Len == 1) {
+      Value *LHSV = B.CreateZExt(B.CreateLoad(CastToCStr(LHS, B), "lhsc"),
+                                 CI->getType(), "lhsv");
+      Value *RHSV = B.CreateZExt(B.CreateLoad(CastToCStr(RHS, B), "rhsc"),
+                                 CI->getType(), "rhsv");
+      return B.CreateSub(LHSV, RHSV, "chardiff");
     }
 
     // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
new file mode 100644
index 0000000..b88ba48
--- /dev/null
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -0,0 +1,267 @@
+//===-- Sink.cpp - Code Sinking -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass moves instructions into successor blocks, when possible, so that
+// they aren't executed on paths where their results aren't needed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sink"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumSunk, "Number of instructions sunk");
+
+namespace {
+  class Sinking : public FunctionPass {
+    DominatorTree *DT;
+    LoopInfo *LI;
+    AliasAnalysis *AA;
+
+  public:
+    static char ID; // Pass identification
+    Sinking() : FunctionPass(&ID) {}
+    
+    virtual bool runOnFunction(Function &F);
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      FunctionPass::getAnalysisUsage(AU);
+      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<DominatorTree>();
+      AU.addRequired<LoopInfo>();
+      AU.addPreserved<DominatorTree>();
+      AU.addPreserved<LoopInfo>();
+    }
+  private:
+    bool ProcessBlock(BasicBlock &BB);
+    bool SinkInstruction(Instruction *I, SmallPtrSet<Instruction *, 8> &Stores);
+    bool AllUsesDominatedByBlock(Instruction *Inst, BasicBlock *BB) const;
+  };
+} // end anonymous namespace
+  
+char Sinking::ID = 0;
+static RegisterPass<Sinking>
+X("sink", "Code sinking");
+
+FunctionPass *llvm::createSinkingPass() { return new Sinking(); }
+
+/// AllUsesDominatedByBlock - Return true if all uses of the specified value
+/// occur in blocks dominated by the specified block.
+bool Sinking::AllUsesDominatedByBlock(Instruction *Inst, 
+                                      BasicBlock *BB) const {
+  // Ignoring debug uses is necessary so debug info doesn't affect the code.
+  // This may leave a referencing dbg_value in the original block, before
+  // the definition of the vreg.  Dwarf generator handles this although the
+  // user might not get the right info at runtime.
+  for (Value::use_iterator I = Inst->use_begin(),
+       E = Inst->use_end(); I != E; ++I) {
+    // Determine the block of the use.
+    Instruction *UseInst = cast<Instruction>(*I);
+    BasicBlock *UseBlock = UseInst->getParent();
+    if (PHINode *PN = dyn_cast<PHINode>(UseInst)) {
+      // PHI nodes use the operand in the predecessor block, not the block with
+      // the PHI.
+      unsigned Num = PHINode::getIncomingValueNumForOperand(I.getOperandNo());
+      UseBlock = PN->getIncomingBlock(Num);
+    }
+    // Check that it dominates.
+    if (!DT->dominates(BB, UseBlock))
+      return false;
+  }
+  return true;
+}
+
+bool Sinking::runOnFunction(Function &F) {
+  DT = &getAnalysis<DominatorTree>();
+  LI = &getAnalysis<LoopInfo>();
+  AA = &getAnalysis<AliasAnalysis>();
+
+  bool EverMadeChange = false;
+  
+  while (1) {
+    bool MadeChange = false;
+
+    // Process all basic blocks.
+    for (Function::iterator I = F.begin(), E = F.end(); 
+         I != E; ++I)
+      MadeChange |= ProcessBlock(*I);
+    
+    // If this iteration over the code changed anything, keep iterating.
+    if (!MadeChange) break;
+    EverMadeChange = true;
+  } 
+  return EverMadeChange;
+}
+
+bool Sinking::ProcessBlock(BasicBlock &BB) {
+  // Can't sink anything out of a block that has less than two successors.
+  if (BB.getTerminator()->getNumSuccessors() <= 1 || BB.empty()) return false;
+
+  // Don't bother sinking code out of unreachable blocks. In addition to being
+  // unprofitable, it can also lead to infinite looping, because in an unreachable
+  // loop there may be nowhere to stop.
+  if (!DT->isReachableFromEntry(&BB)) return false;
+
+  bool MadeChange = false;
+
+  // Walk the basic block bottom-up.  Remember if we saw a store.
+  BasicBlock::iterator I = BB.end();
+  --I;
+  bool ProcessedBegin = false;
+  SmallPtrSet<Instruction *, 8> Stores;
+  do {
+    Instruction *Inst = I;  // The instruction to sink.
+    
+    // Predecrement I (if it's not begin) so that it isn't invalidated by
+    // sinking.
+    ProcessedBegin = I == BB.begin();
+    if (!ProcessedBegin)
+      --I;
+
+    if (isa<DbgInfoIntrinsic>(Inst))
+      continue;
+
+    if (SinkInstruction(Inst, Stores))
+      ++NumSunk, MadeChange = true;
+    
+    // If we just processed the first instruction in the block, we're done.
+  } while (!ProcessedBegin);
+  
+  return MadeChange;
+}
+
+static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
+                         SmallPtrSet<Instruction *, 8> &Stores) {
+  if (LoadInst *L = dyn_cast<LoadInst>(Inst)) {
+    if (L->isVolatile()) return false;
+
+    Value *Ptr = L->getPointerOperand();
+    unsigned Size = AA->getTypeStoreSize(L->getType());
+    for (SmallPtrSet<Instruction *, 8>::iterator I = Stores.begin(),
+         E = Stores.end(); I != E; ++I)
+      if (AA->getModRefInfo(*I, Ptr, Size) & AliasAnalysis::Mod)
+        return false;
+  }
+
+  if (Inst->mayWriteToMemory()) {
+    Stores.insert(Inst);
+    return false;
+  }
+
+  return Inst->isSafeToSpeculativelyExecute();
+}
+
+/// SinkInstruction - Determine whether it is safe to sink the specified machine
+/// instruction out of its current block into a successor.
+bool Sinking::SinkInstruction(Instruction *Inst,
+                              SmallPtrSet<Instruction *, 8> &Stores) {
+  // Check if it's safe to move the instruction.
+  if (!isSafeToMove(Inst, AA, Stores))
+    return false;
+  
+  // FIXME: This should include support for sinking instructions within the
+  // block they are currently in to shorten the live ranges.  We often get
+  // instructions sunk into the top of a large block, but it would be better to
+  // also sink them down before their first use in the block.  This xform has to
+  // be careful not to *increase* register pressure though, e.g. sinking
+  // "x = y + z" down if it kills y and z would increase the live ranges of y
+  // and z and only shrink the live range of x.
+  
+  // Loop over all the operands of the specified instruction.  If there is
+  // anything we can't handle, bail out.
+  BasicBlock *ParentBlock = Inst->getParent();
+  
+  // SuccToSinkTo - This is the successor to sink this instruction to, once we
+  // decide.
+  BasicBlock *SuccToSinkTo = 0;
+  
+  // FIXME: This picks a successor to sink into based on having one
+  // successor that dominates all the uses.  However, there are cases where
+  // sinking can happen but where the sink point isn't a successor.  For
+  // example:
+  //   x = computation
+  //   if () {} else {}
+  //   use x
+  // the instruction could be sunk over the whole diamond for the 
+  // if/then/else (or loop, etc), allowing it to be sunk into other blocks
+  // after that.
+  
+  // Instructions can only be sunk if all their uses are in blocks
+  // dominated by one of the successors.
+  // Look at all the successors and decide which one
+  // we should sink to.
+  for (succ_iterator SI = succ_begin(ParentBlock),
+       E = succ_end(ParentBlock); SI != E; ++SI) {
+    if (AllUsesDominatedByBlock(Inst, *SI)) {
+      SuccToSinkTo = *SI;
+      break;
+    }
+  }
+      
+  // If we couldn't find a block to sink to, ignore this instruction.
+  if (SuccToSinkTo == 0)
+    return false;
+  
+  // It is not possible to sink an instruction into its own block.  This can
+  // happen with loops.
+  if (Inst->getParent() == SuccToSinkTo)
+    return false;
+  
+  DEBUG(dbgs() << "Sink instr " << *Inst);
+  DEBUG(dbgs() << "to block ";
+        WriteAsOperand(dbgs(), SuccToSinkTo, false));
+  
+  // If the block has multiple predecessors, this would introduce computation on
+  // a path that it doesn't already exist.  We could split the critical edge,
+  // but for now we just punt.
+  // FIXME: Split critical edges if not backedges.
+  if (SuccToSinkTo->getUniquePredecessor() != ParentBlock) {
+    // We cannot sink a load across a critical edge - there may be stores in
+    // other code paths.
+    if (!Inst->isSafeToSpeculativelyExecute()) {
+      DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n");
+      return false;
+    }
+
+    // We don't want to sink across a critical edge if we don't dominate the
+    // successor. We could be introducing calculations to new code paths.
+    if (!DT->dominates(ParentBlock, SuccToSinkTo)) {
+      DEBUG(dbgs() << " *** PUNTING: Critical edge found\n");
+      return false;
+    }
+
+    // Don't sink instructions into a loop.
+    if (LI->isLoopHeader(SuccToSinkTo)) {
+      DEBUG(dbgs() << " *** PUNTING: Loop header found\n");
+      return false;
+    }
+
+    // Otherwise we are OK with sinking along a critical edge.
+    DEBUG(dbgs() << "Sinking along critical edge.\n");
+  }
+  
+  // Determine where to insert into.  Skip phi nodes.
+  BasicBlock::iterator InsertPos = SuccToSinkTo->begin();
+  while (InsertPos != SuccToSinkTo->end() && isa<PHINode>(InsertPos))
+    ++InsertPos;
+  
+  // Move the instruction.
+  Inst->moveBefore(InsertPos);
+  return true;
+}
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 8ad66dd..6d4fe4b 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -344,7 +344,7 @@ static MDNode *UpdateInlinedAtInfo(MDNode *InsnMD, MDNode *TheCallMD) {
   DILocation OrigLocation = ILoc.getOrigLocation();
   MDNode *NewLoc = TheCallMD;
   if (OrigLocation.Verify())
-    NewLoc = UpdateInlinedAtInfo(OrigLocation.getNode(), TheCallMD);
+    NewLoc = UpdateInlinedAtInfo(OrigLocation, TheCallMD);
 
   Value *MDVs[] = {
     InsnMD->getOperand(0), // Line
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index f181f3a..13f0a28 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -861,7 +861,7 @@ void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
     // Find the nearest store that has a lower than this load. 
     StoresByIndexTy::iterator I = 
       std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
-                       std::pair<unsigned, StoreInst*>(LoadIdx, 0),
+                       std::pair<unsigned, StoreInst*>(LoadIdx, static_cast<StoreInst*>(0)),
                        StoreIndexSearchPredicate());
     
     // If there is no store before this load, then we can't promote this load.
@@ -886,7 +886,7 @@ void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
 void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
                                                      StoreInst *SI) {
   DIVariable DIVar(DDI->getVariable());
-  if (!DIVar.getNode())
+  if (!DIVar.Verify())
     return;
 
   if (!DIF)
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 25d50db..f4bdb527 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "ssaupdater"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Instructions.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/AlignOf.h"
@@ -20,40 +19,17 @@
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
 using namespace llvm;
 
-/// BBInfo - Per-basic block information used internally by SSAUpdater.
-/// The predecessors of each block are cached here since pred_iterator is
-/// slow and we need to iterate over the blocks at least a few times.
-class SSAUpdater::BBInfo {
-public:
-  BasicBlock *BB;      // Back-pointer to the corresponding block.
-  Value *AvailableVal; // Value to use in this block.
-  BBInfo *DefBB;       // Block that defines the available value.
-  int BlkNum;          // Postorder number.
-  BBInfo *IDom;        // Immediate dominator.
-  unsigned NumPreds;   // Number of predecessor blocks.
-  BBInfo **Preds;      // Array[NumPreds] of predecessor blocks.
-  PHINode *PHITag;     // Marker for existing PHIs that match.
-
-  BBInfo(BasicBlock *ThisBB, Value *V)
-    : BB(ThisBB), AvailableVal(V), DefBB(V ? this : 0), BlkNum(0), IDom(0),
-      NumPreds(0), Preds(0), PHITag(0) { }
-};
-
-typedef DenseMap<BasicBlock*, SSAUpdater::BBInfo*> BBMapTy;
-
 typedef DenseMap<BasicBlock*, Value*> AvailableValsTy;
 static AvailableValsTy &getAvailableVals(void *AV) {
   return *static_cast<AvailableValsTy*>(AV);
 }
 
-static BBMapTy *getBBMap(void *BM) {
-  return static_cast<BBMapTy*>(BM);
-}
-
 SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
-  : AV(0), PrototypeValue(0), BM(0), InsertedPHIs(NewPHI) {}
+  : AV(0), PrototypeValue(0), InsertedPHIs(NewPHI) {}
 
 SSAUpdater::~SSAUpdater() {
   delete &getAvailableVals(AV);
@@ -105,9 +81,7 @@ static bool IsEquivalentPHI(PHINode *PHI,
 /// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
 /// live at the end of the specified block.
 Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) {
-  assert(BM == 0 && "Unexpected Internal State");
   Value *Res = GetValueAtEndOfBlockInternal(BB);
-  assert(BM == 0 && "Unexpected Internal State");
   return Res;
 }
 
@@ -231,427 +205,126 @@ void SSAUpdater::RewriteUse(Use &U) {
   U.set(V);
 }
 
-/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
-/// for the specified BB and if so, return it.  If not, construct SSA form by
-/// first calculating the required placement of PHIs and then inserting new
-/// PHIs where needed.
-Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
-  AvailableValsTy &AvailableVals = getAvailableVals(AV);
-  if (Value *V = AvailableVals[BB])
-    return V;
-
-  // Pool allocation used internally by GetValueAtEndOfBlock.
-  BumpPtrAllocator Allocator;
-  BBMapTy BBMapObj;
-  BM = &BBMapObj;
-
-  SmallVector<BBInfo*, 100> BlockList;
-  BuildBlockList(BB, &BlockList, &Allocator);
-
-  // Special case: bail out if BB is unreachable.
-  if (BlockList.size() == 0) {
-    BM = 0;
-    return UndefValue::get(PrototypeValue->getType());
-  }
-
-  FindDominators(&BlockList);
-  FindPHIPlacement(&BlockList);
-  FindAvailableVals(&BlockList);
-
-  BM = 0;
-  return BBMapObj[BB]->DefBB->AvailableVal;
+/// PHIiter - Iterator for PHI operands.  This is used for the PHI_iterator
+/// in the SSAUpdaterImpl template.
+namespace {
+  class PHIiter {
+  private:
+    PHINode *PHI;
+    unsigned idx;
+
+  public:
+    explicit PHIiter(PHINode *P) // begin iterator
+      : PHI(P), idx(0) {}
+    PHIiter(PHINode *P, bool) // end iterator
+      : PHI(P), idx(PHI->getNumIncomingValues()) {}
+
+    PHIiter &operator++() { ++idx; return *this; } 
+    bool operator==(const PHIiter& x) const { return idx == x.idx; }
+    bool operator!=(const PHIiter& x) const { return !operator==(x); }
+    Value *getIncomingValue() { return PHI->getIncomingValue(idx); }
+    BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); }
+  };
 }
 
-/// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds
-/// vector, set Info->NumPreds, and allocate space in Info->Preds.
-static void FindPredecessorBlocks(SSAUpdater::BBInfo *Info,
-                                  SmallVectorImpl<BasicBlock*> *Preds,
-                                  BumpPtrAllocator *Allocator) {
-  // We can get our predecessor info by walking the pred_iterator list,
-  // but it is relatively slow.  If we already have PHI nodes in this
-  // block, walk one of them to get the predecessor list instead.
-  BasicBlock *BB = Info->BB;
-  if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
-    for (unsigned PI = 0, E = SomePhi->getNumIncomingValues(); PI != E; ++PI)
-      Preds->push_back(SomePhi->getIncomingBlock(PI));
-  } else {
-    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
-      Preds->push_back(*PI);
+/// SSAUpdaterTraits<SSAUpdater> - Traits for the SSAUpdaterImpl template,
+/// specialized for SSAUpdater.
+namespace llvm {
+template<>
+class SSAUpdaterTraits<SSAUpdater> {
+public:
+  typedef BasicBlock BlkT;
+  typedef Value *ValT;
+  typedef PHINode PhiT;
+
+  typedef succ_iterator BlkSucc_iterator;
+  static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return succ_begin(BB); }
+  static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return succ_end(BB); }
+
+  typedef PHIiter PHI_iterator;
+  static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
+  static inline PHI_iterator PHI_end(PhiT *PHI) {
+    return PHI_iterator(PHI, true);
   }
 
-  Info->NumPreds = Preds->size();
-  Info->Preds = static_cast<SSAUpdater::BBInfo**>
-    (Allocator->Allocate(Info->NumPreds * sizeof(SSAUpdater::BBInfo*),
-                         AlignOf<SSAUpdater::BBInfo*>::Alignment));
-}
-
-/// BuildBlockList - Starting from the specified basic block, traverse back
-/// through its predecessors until reaching blocks with known values.  Create
-/// BBInfo structures for the blocks and append them to the block list.
-void SSAUpdater::BuildBlockList(BasicBlock *BB, BlockListTy *BlockList,
-                                BumpPtrAllocator *Allocator) {
-  AvailableValsTy &AvailableVals = getAvailableVals(AV);
-  BBMapTy *BBMap = getBBMap(BM);
-  SmallVector<BBInfo*, 10> RootList;
-  SmallVector<BBInfo*, 64> WorkList;
-
-  BBInfo *Info = new (*Allocator) BBInfo(BB, 0);
-  (*BBMap)[BB] = Info;
-  WorkList.push_back(Info);
-
-  // Search backward from BB, creating BBInfos along the way and stopping when
-  // reaching blocks that define the value.  Record those defining blocks on
-  // the RootList.
-  SmallVector<BasicBlock*, 10> Preds;
-  while (!WorkList.empty()) {
-    Info = WorkList.pop_back_val();
-    Preds.clear();
-    FindPredecessorBlocks(Info, &Preds, Allocator);
-
-    // Treat an unreachable predecessor as a definition with 'undef'.
-    if (Info->NumPreds == 0) {
-      Info->AvailableVal = UndefValue::get(PrototypeValue->getType());
-      Info->DefBB = Info;
-      RootList.push_back(Info);
-      continue;
-    }
-
-    for (unsigned p = 0; p != Info->NumPreds; ++p) {
-      BasicBlock *Pred = Preds[p];
-      // Check if BBMap already has a BBInfo for the predecessor block.
-      BBMapTy::value_type &BBMapBucket = BBMap->FindAndConstruct(Pred);
-      if (BBMapBucket.second) {
-        Info->Preds[p] = BBMapBucket.second;
-        continue;
-      }
-
-      // Create a new BBInfo for the predecessor.
-      Value *PredVal = AvailableVals.lookup(Pred);
-      BBInfo *PredInfo = new (*Allocator) BBInfo(Pred, PredVal);
-      BBMapBucket.second = PredInfo;
-      Info->Preds[p] = PredInfo;
-
-      if (PredInfo->AvailableVal) {
-        RootList.push_back(PredInfo);
-        continue;
-      }
-      WorkList.push_back(PredInfo);
+  /// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds
+  /// vector, set Info->NumPreds, and allocate space in Info->Preds.
+  static void FindPredecessorBlocks(BasicBlock *BB,
+                                    SmallVectorImpl<BasicBlock*> *Preds) {
+    // We can get our predecessor info by walking the pred_iterator list,
+    // but it is relatively slow.  If we already have PHI nodes in this
+    // block, walk one of them to get the predecessor list instead.
+    if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
+      for (unsigned PI = 0, E = SomePhi->getNumIncomingValues(); PI != E; ++PI)
+        Preds->push_back(SomePhi->getIncomingBlock(PI));
+    } else {
+      for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+        Preds->push_back(*PI);
     }
   }
 
-  // Now that we know what blocks are backwards-reachable from the starting
-  // block, do a forward depth-first traversal to assign postorder numbers
-  // to those blocks.
-  BBInfo *PseudoEntry = new (*Allocator) BBInfo(0, 0);
-  unsigned BlkNum = 1;
-
-  // Initialize the worklist with the roots from the backward traversal.
-  while (!RootList.empty()) {
-    Info = RootList.pop_back_val();
-    Info->IDom = PseudoEntry;
-    Info->BlkNum = -1;
-    WorkList.push_back(Info);
+  /// GetUndefVal - Get an undefined value of the same type as the value
+  /// being handled.
+  static Value *GetUndefVal(BasicBlock *BB, SSAUpdater *Updater) {
+    return UndefValue::get(Updater->PrototypeValue->getType());
   }
 
-  while (!WorkList.empty()) {
-    Info = WorkList.back();
-
-    if (Info->BlkNum == -2) {
-      // All the successors have been handled; assign the postorder number.
-      Info->BlkNum = BlkNum++;
-      // If not a root, put it on the BlockList.
-      if (!Info->AvailableVal)
-        BlockList->push_back(Info);
-      WorkList.pop_back();
-      continue;
-    }
-
-    // Leave this entry on the worklist, but set its BlkNum to mark that its
-    // successors have been put on the worklist.  When it returns to the top
-    // the list, after handling its successors, it will be assigned a number.
-    Info->BlkNum = -2;
-
-    // Add unvisited successors to the work list.
-    for (succ_iterator SI = succ_begin(Info->BB), E = succ_end(Info->BB);
-         SI != E; ++SI) {
-      BBInfo *SuccInfo = (*BBMap)[*SI];
-      if (!SuccInfo || SuccInfo->BlkNum)
-        continue;
-      SuccInfo->BlkNum = -1;
-      WorkList.push_back(SuccInfo);
-    }
+  /// CreateEmptyPHI - Create a new PHI instruction in the specified block.
+  /// Reserve space for the operands but do not fill them in yet.
+  static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds,
+                               SSAUpdater *Updater) {
+    PHINode *PHI = PHINode::Create(Updater->PrototypeValue->getType(),
+                                   Updater->PrototypeValue->getName(),
+                                   &BB->front());
+    PHI->reserveOperandSpace(NumPreds);
+    return PHI;
   }
-  PseudoEntry->BlkNum = BlkNum;
-}
 
-/// IntersectDominators - This is the dataflow lattice "meet" operation for
-/// finding dominators.  Given two basic blocks, it walks up the dominator
-/// tree until it finds a common dominator of both.  It uses the postorder
-/// number of the blocks to determine how to do that.
-static SSAUpdater::BBInfo *IntersectDominators(SSAUpdater::BBInfo *Blk1,
-                                               SSAUpdater::BBInfo *Blk2) {
-  while (Blk1 != Blk2) {
-    while (Blk1->BlkNum < Blk2->BlkNum) {
-      Blk1 = Blk1->IDom;
-      if (!Blk1)
-        return Blk2;
-    }
-    while (Blk2->BlkNum < Blk1->BlkNum) {
-      Blk2 = Blk2->IDom;
-      if (!Blk2)
-        return Blk1;
-    }
+  /// AddPHIOperand - Add the specified value as an operand of the PHI for
+  /// the specified predecessor block.
+  static void AddPHIOperand(PHINode *PHI, Value *Val, BasicBlock *Pred) {
+    PHI->addIncoming(Val, Pred);
   }
-  return Blk1;
-}
 
-/// FindDominators - Calculate the dominator tree for the subset of the CFG
-/// corresponding to the basic blocks on the BlockList.  This uses the
-/// algorithm from: "A Simple, Fast Dominance Algorithm" by Cooper, Harvey and
-/// Kennedy, published in Software--Practice and Experience, 2001, 4:1-10.
-/// Because the CFG subset does not include any edges leading into blocks that
-/// define the value, the results are not the usual dominator tree.  The CFG
-/// subset has a single pseudo-entry node with edges to a set of root nodes
-/// for blocks that define the value.  The dominators for this subset CFG are
-/// not the standard dominators but they are adequate for placing PHIs within
-/// the subset CFG.
-void SSAUpdater::FindDominators(BlockListTy *BlockList) {
-  bool Changed;
-  do {
-    Changed = false;
-    // Iterate over the list in reverse order, i.e., forward on CFG edges.
-    for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
-           E = BlockList->rend(); I != E; ++I) {
-      BBInfo *Info = *I;
-
-      // Start with the first predecessor.
-      assert(Info->NumPreds > 0 && "unreachable block");
-      BBInfo *NewIDom = Info->Preds[0];
-
-      // Iterate through the block's other predecessors.
-      for (unsigned p = 1; p != Info->NumPreds; ++p) {
-        BBInfo *Pred = Info->Preds[p];
-        NewIDom = IntersectDominators(NewIDom, Pred);
-      }
-
-      // Check if the IDom value has changed.
-      if (NewIDom != Info->IDom) {
-        Info->IDom = NewIDom;
-        Changed = true;
-      }
-    }
-  } while (Changed);
-}
-
-/// IsDefInDomFrontier - Search up the dominator tree from Pred to IDom for
-/// any blocks containing definitions of the value.  If one is found, then the
-/// successor of Pred is in the dominance frontier for the definition, and
-/// this function returns true.
-static bool IsDefInDomFrontier(const SSAUpdater::BBInfo *Pred,
-                               const SSAUpdater::BBInfo *IDom) {
-  for (; Pred != IDom; Pred = Pred->IDom) {
-    if (Pred->DefBB == Pred)
-      return true;
+  /// InstrIsPHI - Check if an instruction is a PHI.
+  ///
+  static PHINode *InstrIsPHI(Instruction *I) {
+    return dyn_cast<PHINode>(I);
   }
-  return false;
-}
-
-/// FindPHIPlacement - PHIs are needed in the iterated dominance frontiers of
-/// the known definitions.  Iteratively add PHIs in the dom frontiers until
-/// nothing changes.  Along the way, keep track of the nearest dominating
-/// definitions for non-PHI blocks.
-void SSAUpdater::FindPHIPlacement(BlockListTy *BlockList) {
-  bool Changed;
-  do {
-    Changed = false;
-    // Iterate over the list in reverse order, i.e., forward on CFG edges.
-    for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
-           E = BlockList->rend(); I != E; ++I) {
-      BBInfo *Info = *I;
-
-      // If this block already needs a PHI, there is nothing to do here.
-      if (Info->DefBB == Info)
-        continue;
-
-      // Default to use the same def as the immediate dominator.
-      BBInfo *NewDefBB = Info->IDom->DefBB;
-      for (unsigned p = 0; p != Info->NumPreds; ++p) {
-        if (IsDefInDomFrontier(Info->Preds[p], Info->IDom)) {
-          // Need a PHI here.
-          NewDefBB = Info;
-          break;
-        }
-      }
-
-      // Check if anything changed.
-      if (NewDefBB != Info->DefBB) {
-        Info->DefBB = NewDefBB;
-        Changed = true;
-      }
-    }
-  } while (Changed);
-}
-
-/// FindAvailableVal - If this block requires a PHI, first check if an existing
-/// PHI matches the PHI placement and reaching definitions computed earlier,
-/// and if not, create a new PHI.  Visit all the block's predecessors to
-/// calculate the available value for each one and fill in the incoming values
-/// for a new PHI.
-void SSAUpdater::FindAvailableVals(BlockListTy *BlockList) {
-  AvailableValsTy &AvailableVals = getAvailableVals(AV);
 
-  // Go through the worklist in forward order (i.e., backward through the CFG)
-  // and check if existing PHIs can be used.  If not, create empty PHIs where
-  // they are needed.
-  for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end();
-       I != E; ++I) {
-    BBInfo *Info = *I;
-    // Check if there needs to be a PHI in BB.
-    if (Info->DefBB != Info)
-      continue;
-
-    // Look for an existing PHI.
-    FindExistingPHI(Info->BB, BlockList);
-    if (Info->AvailableVal)
-      continue;
-
-    PHINode *PHI = PHINode::Create(PrototypeValue->getType(),
-                                   PrototypeValue->getName(),
-                                   &Info->BB->front());
-    PHI->reserveOperandSpace(Info->NumPreds);
-    Info->AvailableVal = PHI;
-    AvailableVals[Info->BB] = PHI;
+  /// ValueIsPHI - Check if a value is a PHI.
+  ///
+  static PHINode *ValueIsPHI(Value *Val, SSAUpdater *Updater) {
+    return dyn_cast<PHINode>(Val);
   }
 
-  // Now go back through the worklist in reverse order to fill in the arguments
-  // for any new PHIs added in the forward traversal.
-  for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
-         E = BlockList->rend(); I != E; ++I) {
-    BBInfo *Info = *I;
-
-    if (Info->DefBB != Info) {
-      // Record the available value at join nodes to speed up subsequent
-      // uses of this SSAUpdater for the same value.
-      if (Info->NumPreds > 1)
-        AvailableVals[Info->BB] = Info->DefBB->AvailableVal;
-      continue;
-    }
-
-    // Check if this block contains a newly added PHI.
-    PHINode *PHI = dyn_cast<PHINode>(Info->AvailableVal);
-    if (!PHI || PHI->getNumIncomingValues() == Info->NumPreds)
-      continue;
-
-    // Iterate through the block's predecessors.
-    for (unsigned p = 0; p != Info->NumPreds; ++p) {
-      BBInfo *PredInfo = Info->Preds[p];
-      BasicBlock *Pred = PredInfo->BB;
-      // Skip to the nearest preceding definition.
-      if (PredInfo->DefBB != PredInfo)
-        PredInfo = PredInfo->DefBB;
-      PHI->addIncoming(PredInfo->AvailableVal, Pred);
-    }
-
-    DEBUG(dbgs() << "  Inserted PHI: " << *PHI << "\n");
-
-    // If the client wants to know about all new instructions, tell it.
-    if (InsertedPHIs) InsertedPHIs->push_back(PHI);
+  /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source
+  /// operands, i.e., it was just added.
+  static PHINode *ValueIsNewPHI(Value *Val, SSAUpdater *Updater) {
+    PHINode *PHI = ValueIsPHI(Val, Updater);
+    if (PHI && PHI->getNumIncomingValues() == 0)
+      return PHI;
+    return 0;
   }
-}
 
-/// FindExistingPHI - Look through the PHI nodes in a block to see if any of
-/// them match what is needed.
-void SSAUpdater::FindExistingPHI(BasicBlock *BB, BlockListTy *BlockList) {
-  PHINode *SomePHI;
-  for (BasicBlock::iterator It = BB->begin();
-       (SomePHI = dyn_cast<PHINode>(It)); ++It) {
-    if (CheckIfPHIMatches(SomePHI)) {
-      RecordMatchingPHI(SomePHI);
-      break;
-    }
-    // Match failed: clear all the PHITag values.
-    for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end();
-         I != E; ++I)
-      (*I)->PHITag = 0;
+  /// GetPHIValue - For the specified PHI instruction, return the value
+  /// that it defines.
+  static Value *GetPHIValue(PHINode *PHI) {
+    return PHI;
   }
-}
+};
 
-/// CheckIfPHIMatches - Check if a PHI node matches the placement and values
-/// in the BBMap.
-bool SSAUpdater::CheckIfPHIMatches(PHINode *PHI) {
-  BBMapTy *BBMap = getBBMap(BM);
-  SmallVector<PHINode*, 20> WorkList;
-  WorkList.push_back(PHI);
-
-  // Mark that the block containing this PHI has been visited.
-  (*BBMap)[PHI->getParent()]->PHITag = PHI;
-
-  while (!WorkList.empty()) {
-    PHI = WorkList.pop_back_val();
-
-    // Iterate through the PHI's incoming values.
-    for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
-      Value *IncomingVal = PHI->getIncomingValue(i);
-      BBInfo *PredInfo = (*BBMap)[PHI->getIncomingBlock(i)];
-      // Skip to the nearest preceding definition.
-      if (PredInfo->DefBB != PredInfo)
-        PredInfo = PredInfo->DefBB;
-
-      // Check if it matches the expected value.
-      if (PredInfo->AvailableVal) {
-        if (IncomingVal == PredInfo->AvailableVal)
-          continue;
-        return false;
-      }
-
-      // Check if the value is a PHI in the correct block.
-      PHINode *IncomingPHIVal = dyn_cast<PHINode>(IncomingVal);
-      if (!IncomingPHIVal || IncomingPHIVal->getParent() != PredInfo->BB)
-        return false;
-
-      // If this block has already been visited, check if this PHI matches.
-      if (PredInfo->PHITag) {
-        if (IncomingPHIVal == PredInfo->PHITag)
-          continue;
-        return false;
-      }
-      PredInfo->PHITag = IncomingPHIVal;
-
-      WorkList.push_back(IncomingPHIVal);
-    }
-  }
-  return true;
-}
+} // End llvm namespace
 
-/// RecordMatchingPHI - For a PHI node that matches, record it and its input
-/// PHIs in both the BBMap and the AvailableVals mapping.
-void SSAUpdater::RecordMatchingPHI(PHINode *PHI) {
-  BBMapTy *BBMap = getBBMap(BM);
+/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
+/// for the specified BB and if so, return it.  If not, construct SSA form by
+/// first calculating the required placement of PHIs and then inserting new
+/// PHIs where needed.
+Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
   AvailableValsTy &AvailableVals = getAvailableVals(AV);
-  SmallVector<PHINode*, 20> WorkList;
-  WorkList.push_back(PHI);
-
-  // Record this PHI.
-  BasicBlock *BB = PHI->getParent();
-  AvailableVals[BB] = PHI;
-  (*BBMap)[BB]->AvailableVal = PHI;
-
-  while (!WorkList.empty()) {
-    PHI = WorkList.pop_back_val();
-
-    // Iterate through the PHI's incoming values.
-    for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
-      PHINode *IncomingPHIVal = dyn_cast<PHINode>(PHI->getIncomingValue(i));
-      if (!IncomingPHIVal) continue;
-      BB = IncomingPHIVal->getParent();
-      BBInfo *Info = (*BBMap)[BB];
-      if (!Info || Info->AvailableVal)
-        continue;
-
-      // Record the PHI and add it to the worklist.
-      AvailableVals[BB] = IncomingPHIVal;
-      Info->AvailableVal = IncomingPHIVal;
-      WorkList.push_back(IncomingPHIVal);
-    }
-  }
+  if (Value *V = AvailableVals[BB])
+    return V;
+
+  SSAUpdaterImpl<SSAUpdater> Impl(this, &AvailableVals, InsertedPHIs);
+  return Impl.GetValue(BB);
 }
author	rdivacky <rdivacky@FreeBSD.org>	2010-05-27 15:15:58 +0000
committer	rdivacky <rdivacky@FreeBSD.org>	2010-05-27 15:15:58 +0000
commit	1e3dec662ea18131c495db50caccc57f77b7a5fe (patch)
tree	9fad9a5d5dd8c4ff54af48edad9c8cc26dd5fda1 /lib/Transforms
parent	377552607e51dc1d3e6ff33833f9620bcfe815ac (diff)
download	FreeBSD-src-1e3dec662ea18131c495db50caccc57f77b7a5fe.zip FreeBSD-src-1e3dec662ea18131c495db50caccc57f77b7a5fe.tar.gz