25 files changed, 1547 insertions, 1069 deletions
diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp
index 71049fa..135a621 100644
--- a/lib/Transforms/Utils/AddrModeMatcher.cpp
+++ b/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -19,17 +19,18 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
-void ExtAddrMode::print(OStream &OS) const {
+void ExtAddrMode::print(raw_ostream &OS) const {
   bool NeedPlus = false;
   OS << "[";
   if (BaseGV) {
     OS << (NeedPlus ? " + " : "")
        << "GV:";
-    WriteAsOperand(*OS.stream(), BaseGV, /*PrintType=*/false);
+    WriteAsOperand(OS, BaseGV, /*PrintType=*/false);
     NeedPlus = true;
   }
 
@@ -39,13 +40,13 @@ void ExtAddrMode::print(OStream &OS) const {
   if (BaseReg) {
     OS << (NeedPlus ? " + " : "")
        << "Base:";
-    WriteAsOperand(*OS.stream(), BaseReg, /*PrintType=*/false);
+    WriteAsOperand(OS, BaseReg, /*PrintType=*/false);
     NeedPlus = true;
   }
   if (Scale) {
     OS << (NeedPlus ? " + " : "")
        << Scale << "*";
-    WriteAsOperand(*OS.stream(), ScaledReg, /*PrintType=*/false);
+    WriteAsOperand(OS, ScaledReg, /*PrintType=*/false);
     NeedPlus = true;
   }
 
@@ -53,8 +54,8 @@ void ExtAddrMode::print(OStream &OS) const {
 }
 
 void ExtAddrMode::dump() const {
-  print(cerr);
-  cerr << '\n';
+  print(errs());
+  errs() << '\n';
 }
 
 
@@ -205,7 +206,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
     if (!RHS) return false;
     int64_t Scale = RHS->getSExtValue();
     if (Opcode == Instruction::Shl)
-      Scale = 1 << Scale;
+      Scale = 1LL << Scale;
     
     return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
   }
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 6d1180d..4931ab3 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Constant.h"
 #include "llvm/Type.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -23,6 +24,8 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ValueHandle.h"
 #include <algorithm>
 using namespace llvm;
@@ -249,11 +252,11 @@ void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) {
       Value *RetVal = 0;
 
       // Create a value to return... if the function doesn't return null...
-      if (BB->getParent()->getReturnType() != Type::VoidTy)
+      if (BB->getParent()->getReturnType() != Type::getVoidTy(TI->getContext()))
         RetVal = Constant::getNullValue(BB->getParent()->getReturnType());
 
       // Create the return...
-      NewTI = ReturnInst::Create(RetVal);
+      NewTI = ReturnInst::Create(TI->getContext(), RetVal);
     }
     break;
 
@@ -261,8 +264,7 @@ void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) {
   case Instruction::Switch:    // Should remove entry
   default:
   case Instruction::Ret:       // Cannot happen, has no successors!
-    assert(0 && "Unhandled terminator instruction type in RemoveSuccessor!");
-    abort();
+    llvm_unreachable("Unhandled terminator instruction type in RemoveSuccessor!");
   }
 
   if (NewTI)   // If it's a different instruction, replace.
@@ -318,7 +320,8 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
     ++SplitIt;
   BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split");
 
-  // The new block lives in whichever loop the old one did.
+  // The new block lives in whichever loop the old one did. This preserves
+  // LCSSA as well, because we force the split point to be after any PHI nodes.
   if (LoopInfo* LI = P->getAnalysisIfAvailable<LoopInfo>())
     if (Loop *L = LI->getLoopFor(Old))
       L->addBasicBlockToLoop(New, LI->getBase());
@@ -352,32 +355,61 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
 /// Preds array, which has NumPreds elements in it.  The new block is given a
 /// suffix of 'Suffix'.
 ///
-/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree and
-/// DominanceFrontier, but no other analyses.
+/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
+/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses.
+/// In particular, it does not preserve LoopSimplify (because it's
+/// complicated to handle the case where one of the edges being split
+/// is an exit of a loop with other exits).
+///
 BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, 
                                          BasicBlock *const *Preds,
                                          unsigned NumPreds, const char *Suffix,
                                          Pass *P) {
   // Create new basic block, insert right before the original block.
-  BasicBlock *NewBB =
-    BasicBlock::Create(BB->getName()+Suffix, BB->getParent(), BB);
+  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix,
+                                         BB->getParent(), BB);
   
   // The new block unconditionally branches to the old block.
   BranchInst *BI = BranchInst::Create(BB, NewBB);
   
+  LoopInfo *LI = P ? P->getAnalysisIfAvailable<LoopInfo>() : 0;
+  Loop *L = LI ? LI->getLoopFor(BB) : 0;
+  bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID);
+
   // Move the edges from Preds to point to NewBB instead of BB.
-  for (unsigned i = 0; i != NumPreds; ++i)
+  // While here, if we need to preserve loop analyses, collect
+  // some information about how this split will affect loops.
+  bool HasLoopExit = false;
+  bool IsLoopEntry = !!L;
+  bool SplitMakesNewLoopHeader = false;
+  for (unsigned i = 0; i != NumPreds; ++i) {
     Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
-  
+
+    if (LI) {
+      // If we need to preserve LCSSA, determine if any of
+      // the preds is a loop exit.
+      if (PreserveLCSSA)
+        if (Loop *PL = LI->getLoopFor(Preds[i]))
+          if (!PL->contains(BB))
+            HasLoopExit = true;
+      // If we need to preserve LoopInfo, note whether any of the
+      // preds crosses an interesting loop boundary.
+      if (L) {
+        if (L->contains(Preds[i]))
+          IsLoopEntry = false;
+        else
+          SplitMakesNewLoopHeader = true;
+      }
+    }
+  }
+
   // Update dominator tree and dominator frontier if available.
   DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0;
   if (DT)
     DT->splitBlock(NewBB);
   if (DominanceFrontier *DF = P ? P->getAnalysisIfAvailable<DominanceFrontier>():0)
     DF->splitBlock(NewBB);
-  AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0;
-  
-  
+
   // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
   // node becomes an incoming value for BB's phi node.  However, if the Preds
   // list is empty, we need to insert dummy entries into the PHI nodes in BB to
@@ -388,20 +420,42 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
       cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);
     return NewBB;
   }
+
+  AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0;
+
+  if (L) {
+    if (IsLoopEntry) {
+      if (Loop *PredLoop = LI->getLoopFor(Preds[0])) {
+        // Add the new block to the nearest enclosing loop (and not an
+        // adjacent loop).
+        while (PredLoop && !PredLoop->contains(BB))
+          PredLoop = PredLoop->getParentLoop();
+        if (PredLoop)
+          PredLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+      }
+    } else {
+      L->addBasicBlockToLoop(NewBB, LI->getBase());
+      if (SplitMakesNewLoopHeader)
+        L->moveToHeader(NewBB);
+    }
+  }
   
   // Otherwise, create a new PHI node in NewBB for each PHI node in BB.
   for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ) {
     PHINode *PN = cast<PHINode>(I++);
     
     // Check to see if all of the values coming in are the same.  If so, we
-    // don't need to create a new PHI node.
-    Value *InVal = PN->getIncomingValueForBlock(Preds[0]);
-    for (unsigned i = 1; i != NumPreds; ++i)
-      if (InVal != PN->getIncomingValueForBlock(Preds[i])) {
-        InVal = 0;
-        break;
-      }
-    
+    // don't need to create a new PHI node, unless it's needed for LCSSA.
+    Value *InVal = 0;
+    if (!HasLoopExit) {
+      InVal = PN->getIncomingValueForBlock(Preds[0]);
+      for (unsigned i = 1; i != NumPreds; ++i)
+        if (InVal != PN->getIncomingValueForBlock(Preds[i])) {
+          InVal = 0;
+          break;
+        }
+    }
+
     if (InVal) {
       // If all incoming values for the new PHI would be the same, just don't
       // make a new PHI.  Instead, just remove the incoming values from the old
@@ -426,16 +480,6 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
     // Add an incoming value to the PHI node in the loop for the preheader
     // edge.
     PN->addIncoming(InVal, NewBB);
-    
-    // Check to see if we can eliminate this phi node.
-    if (Value *V = PN->hasConstantValue(DT != 0)) {
-      Instruction *I = dyn_cast<Instruction>(V);
-      if (!I || DT == 0 || DT->dominates(I, PN)) {
-        PN->replaceAllUsesWith(V);
-        if (AA) AA->deleteValue(PN);
-        PN->eraseFromParent();
-      }
-    }
   }
   
   return NewBB;
@@ -503,11 +547,15 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
   // Test if the values are trivially equivalent.
   if (A == B) return true;
   
-  // Test if the values come form identical arithmetic instructions.
+  // Test if the values come from identical arithmetic instructions.
+  // Use isIdenticalToWhenDefined instead of isIdenticalTo because
+  // this function is only used when one address use dominates the
+  // other, which means that they'll always either have the same
+  // value or one of them will have an undefined value.
   if (isa<BinaryOperator>(A) || isa<CastInst>(A) ||
       isa<PHINode>(A) || isa<GetElementPtrInst>(A))
     if (const Instruction *BI = dyn_cast<Instruction>(B))
-      if (cast<Instruction>(A)->isIdenticalTo(BI))
+      if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
         return true;
   
   // Otherwise they may not be equivalent.
@@ -537,7 +585,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
   unsigned AccessSize = 0;
   if (AA) {
     const Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
-    AccessSize = AA->getTargetData().getTypeStoreSizeInBits(AccessTy);
+    AccessSize = AA->getTypeStoreSize(AccessTy);
   }
   
   while (ScanFrom != ScanBB->begin()) {
diff --git a/lib/Transforms/Utils/BasicInliner.cpp b/lib/Transforms/Utils/BasicInliner.cpp
index 1650cfa..4b720b1 100644
--- a/lib/Transforms/Utils/BasicInliner.cpp
+++ b/lib/Transforms/Utils/BasicInliner.cpp
@@ -13,7 +13,6 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "basicinliner"
-
 #include "llvm/Module.h"
 #include "llvm/Function.h"
 #include "llvm/Transforms/Utils/BasicInliner.h"
@@ -21,6 +20,7 @@
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include <vector>
 
@@ -89,7 +89,7 @@ void BasicInlinerImpl::inlineFunctions() {
       }
   }
   
-  DOUT << ": " << CallSites.size() << " call sites.\n";
+  DEBUG(errs() << ": " << CallSites.size() << " call sites.\n");
   
   // Inline call sites.
   bool Changed = false;
@@ -109,22 +109,22 @@ void BasicInlinerImpl::inlineFunctions() {
         }
         InlineCost IC = CA.getInlineCost(CS, NeverInline);
         if (IC.isAlways()) {        
-          DOUT << "  Inlining: cost=always"
-               <<", call: " << *CS.getInstruction();
+          DEBUG(errs() << "  Inlining: cost=always"
+                       <<", call: " << *CS.getInstruction());
         } else if (IC.isNever()) {
-          DOUT << "  NOT Inlining: cost=never"
-               <<", call: " << *CS.getInstruction();
+          DEBUG(errs() << "  NOT Inlining: cost=never"
+                       <<", call: " << *CS.getInstruction());
           continue;
         } else {
           int Cost = IC.getValue();
           
           if (Cost >= (int) BasicInlineThreshold) {
-            DOUT << "  NOT Inlining: cost = " << Cost
-                 << ", call: " <<  *CS.getInstruction();
+            DEBUG(errs() << "  NOT Inlining: cost = " << Cost
+                         << ", call: " <<  *CS.getInstruction());
             continue;
           } else {
-            DOUT << "  Inlining: cost = " << Cost
-                 << ", call: " <<  *CS.getInstruction();
+            DEBUG(errs() << "  Inlining: cost = " << Cost
+                         << ", call: " <<  *CS.getInstruction());
           }
         }
         
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index c4fd1ea..849b2b5 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -21,11 +21,13 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/Type.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
@@ -43,6 +45,7 @@ namespace {
       AU.addPreserved<DominatorTree>();
       AU.addPreserved<DominanceFrontier>();
       AU.addPreserved<LoopInfo>();
+      AU.addPreserved<ProfileInfo>();
 
       // No loop canonicalization guarantees are broken by this pass.
       AU.addPreservedID(LoopSimplifyID);
@@ -114,6 +117,38 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
   return false;
 }
 
+/// CreatePHIsForSplitLoopExit - When a loop exit edge is split, LCSSA form
+/// may require new PHIs in the new exit block. This function inserts the
+/// new PHIs, as needed.  Preds is a list of preds inside the loop, SplitBB
+/// is the new loop exit block, and DestBB is the old loop exit, now the
+/// successor of SplitBB.
+static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds,
+                                       BasicBlock *SplitBB,
+                                       BasicBlock *DestBB) {
+  // SplitBB shouldn't have anything non-trivial in it yet.
+  assert(SplitBB->getFirstNonPHI() == SplitBB->getTerminator() &&
+         "SplitBB has non-PHI nodes!");
+
+  // For each PHI in the destination block...
+  for (BasicBlock::iterator I = DestBB->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+    unsigned Idx = PN->getBasicBlockIndex(SplitBB);
+    Value *V = PN->getIncomingValue(Idx);
+    // If the input is a PHI which already satisfies LCSSA, don't create
+    // a new one.
+    if (const PHINode *VP = dyn_cast<PHINode>(V))
+      if (VP->getParent() == SplitBB)
+        continue;
+    // Otherwise a new PHI is needed. Create one and populate it.
+    PHINode *NewPN = PHINode::Create(PN->getType(), "split",
+                                     SplitBB->getTerminator());
+    for (unsigned i = 0, e = Preds.size(); i != e; ++i)
+      NewPN->addIncoming(V, Preds[i]);
+    // Update the original PHI.
+    PN->setIncomingValue(Idx, NewPN);
+  }
+}
+
 /// SplitCriticalEdge - If this edge is a critical edge, insert a new node to
 /// split the critical edge.  This will update DominatorTree and
 /// DominatorFrontier  information if it is available, thus calling this pass
@@ -121,15 +156,15 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
 /// false otherwise.  This ensures that all edges to that dest go to one block
 /// instead of each going to a different block.
 //
-bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P,
-                             bool MergeIdenticalEdges) {
-  if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return false;
+BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
+                                    Pass *P, bool MergeIdenticalEdges) {
+  if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0;
   BasicBlock *TIBB = TI->getParent();
   BasicBlock *DestBB = TI->getSuccessor(SuccNum);
 
   // Create a new basic block, linking it into the CFG.
-  BasicBlock *NewBB = BasicBlock::Create(TIBB->getName() + "." +
-                                         DestBB->getName() + "_crit_edge");
+  BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
+                      TIBB->getName() + "." + DestBB->getName() + "_crit_edge");
   // Create our unconditional branch...
   BranchInst::Create(DestBB, NewBB);
 
@@ -171,7 +206,7 @@ bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P,
   
 
   // If we don't have a pass object, we can't update anything...
-  if (P == 0) return true;
+  if (P == 0) return NewBB;
 
   // Now update analysis information.  Since the only predecessor of NewBB is
   // the TIBB, TIBB clearly dominates NewBB.  TIBB usually doesn't dominate
@@ -222,8 +257,8 @@ bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P,
     // If NewBBDominatesDestBB hasn't been computed yet, do so with DF.
     if (!OtherPreds.empty()) {
       // FIXME: IMPLEMENT THIS!
-      assert(0 && "Requiring domfrontiers but not idom/domtree/domset."
-             " not implemented yet!");
+      llvm_unreachable("Requiring domfrontiers but not idom/domtree/domset."
+                       " not implemented yet!");
     }
     
     // Since the new block is dominated by its only predecessor TIBB,
@@ -253,9 +288,9 @@ bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P,
   
   // Update LoopInfo if it is around.
   if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>()) {
-    // If one or the other blocks were not in a loop, the new block is not
-    // either, and thus LI doesn't need to be updated.
-    if (Loop *TIL = LI->getLoopFor(TIBB))
+    if (Loop *TIL = LI->getLoopFor(TIBB)) {
+      // If one or the other blocks were not in a loop, the new block is not
+      // either, and thus LI doesn't need to be updated.
       if (Loop *DestLoop = LI->getLoopFor(DestBB)) {
         if (TIL == DestLoop) {
           // Both in the same loop, the NewBB joins loop.
@@ -277,6 +312,65 @@ bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P,
             P->addBasicBlockToLoop(NewBB, LI->getBase());
         }
       }
+      // If TIBB is in a loop and DestBB is outside of that loop, split the
+      // other exit blocks of the loop that also have predecessors outside
+      // the loop, to maintain a LoopSimplify guarantee.
+      if (!TIL->contains(DestBB) &&
+          P->mustPreserveAnalysisID(LoopSimplifyID)) {
+        assert(!TIL->contains(NewBB) &&
+               "Split point for loop exit is contained in loop!");
+
+        // Update LCSSA form in the newly created exit block.
+        if (P->mustPreserveAnalysisID(LCSSAID)) {
+          SmallVector<BasicBlock *, 1> OrigPred;
+          OrigPred.push_back(TIBB);
+          CreatePHIsForSplitLoopExit(OrigPred, NewBB, DestBB);
+        }
+
+        // For each unique exit block...
+        SmallVector<BasicBlock *, 4> ExitBlocks;
+        TIL->getExitBlocks(ExitBlocks);
+        for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+          // Collect all the preds that are inside the loop, and note
+          // whether there are any preds outside the loop.
+          SmallVector<BasicBlock *, 4> Preds;
+          bool HasPredOutsideOfLoop = false;
+          BasicBlock *Exit = ExitBlocks[i];
+          for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit);
+               I != E; ++I)
+            if (TIL->contains(*I))
+              Preds.push_back(*I);
+            else
+              HasPredOutsideOfLoop = true;
+          // If there are any preds not in the loop, we'll need to split
+          // the edges. The Preds.empty() check is needed because a block
+          // may appear multiple times in the list. We can't use
+          // getUniqueExitBlocks above because that depends on LoopSimplify
+          // form, which we're in the process of restoring!
+          if (!Preds.empty() && HasPredOutsideOfLoop) {
+            BasicBlock *NewExitBB =
+              SplitBlockPredecessors(Exit, Preds.data(), Preds.size(),
+                                     "split", P);
+            if (P->mustPreserveAnalysisID(LCSSAID))
+              CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit);
+          }
+        }
+      }
+      // LCSSA form was updated above for the case where LoopSimplify is
+      // available, which means that all predecessors of loop exit blocks
+      // are within the loop. Without LoopSimplify form, it would be
+      // necessary to insert a new phi.
+      assert((!P->mustPreserveAnalysisID(LCSSAID) ||
+              P->mustPreserveAnalysisID(LoopSimplifyID)) &&
+             "SplitCriticalEdge doesn't know how to update LCCSA form "
+             "without LoopSimplify!");
+    }
   }
-  return true;
+
+  // Update ProfileInfo if it is around.
+  if (ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>()) {
+    PI->splitEdge(TIBB,DestBB,NewBB,MergeIdenticalEdges);
+  }
+
+  return NewBB;
 }
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 10cae5c..f4394ea 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -6,11 +6,10 @@ add_llvm_library(LLVMTransformUtils
   CloneFunction.cpp
   CloneLoop.cpp
   CloneModule.cpp
-  CloneTrace.cpp
   CodeExtractor.cpp
   DemoteRegToStack.cpp
-  InlineCost.cpp
   InlineFunction.cpp
+  InstructionNamer.cpp
   LCSSA.cpp
   Local.cpp
   LoopSimplify.cpp
@@ -19,12 +18,12 @@ add_llvm_library(LLVMTransformUtils
   LowerSwitch.cpp
   Mem2Reg.cpp
   PromoteMemoryToRegister.cpp
-  SimplifyCFG.cpp
+  SSAUpdater.cpp
   SSI.cpp
+  SimplifyCFG.cpp
   UnifyFunctionExitNodes.cpp
   UnrollLoop.cpp
   ValueMapper.cpp
-  InstructionNamer.cpp
   )
 
 target_link_libraries (LLVMTransformUtils LLVMSupport)
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index d0fdefa..30130fa 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -20,6 +20,7 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
@@ -34,7 +35,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
                                   DenseMap<const Value*, Value*> &ValueMap,
                                   const char *NameSuffix, Function *F,
                                   ClonedCodeInfo *CodeInfo) {
-  BasicBlock *NewBB = BasicBlock::Create("", F);
+  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F);
   if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
 
   bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
@@ -72,7 +73,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
 //
 void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
                              DenseMap<const Value*, Value*> &ValueMap,
-                             std::vector<ReturnInst*> &Returns,
+                             SmallVectorImpl<ReturnInst*> &Returns,
                              const char *NameSuffix, ClonedCodeInfo *CodeInfo) {
   assert(NameSuffix && "NameSuffix cannot be null!");
 
@@ -165,7 +166,7 @@ Function *llvm::CloneFunction(const Function *F,
       ValueMap[I] = DestI++;        // Add mapping to ValueMap
     }
 
-  std::vector<ReturnInst*> Returns;  // Ignore returns cloned...
+  SmallVector<ReturnInst*, 8> Returns;  // Ignore returns cloned.
   CloneFunctionInto(NewF, F, ValueMap, Returns, "", CodeInfo);
   return NewF;
 }
@@ -179,7 +180,7 @@ namespace {
     Function *NewFunc;
     const Function *OldFunc;
     DenseMap<const Value*, Value*> &ValueMap;
-    std::vector<ReturnInst*> &Returns;
+    SmallVectorImpl<ReturnInst*> &Returns;
     const char *NameSuffix;
     ClonedCodeInfo *CodeInfo;
     const TargetData *TD;
@@ -187,7 +188,7 @@ namespace {
   public:
     PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
                           DenseMap<const Value*, Value*> &valueMap,
-                          std::vector<ReturnInst*> &returns,
+                          SmallVectorImpl<ReturnInst*> &returns,
                           const char *nameSuffix, 
                           ClonedCodeInfo *codeInfo,
                           const TargetData *td)
@@ -218,7 +219,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
   
   // Nope, clone it now.
   BasicBlock *NewBB;
-  BBEntry = NewBB = BasicBlock::Create();
+  BBEntry = NewBB = BasicBlock::Create(BB->getContext());
   if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
 
   bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
@@ -237,7 +238,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
     // Do not clone llvm.dbg.region.end. It will be adjusted by the inliner.
     if (const DbgFuncStartInst *DFSI = dyn_cast<DbgFuncStartInst>(II)) {
       if (DbgFnStart == NULL) {
-        DISubprogram SP(cast<GlobalVariable>(DFSI->getSubprogram()));
+        DISubprogram SP(DFSI->getSubprogram());
         if (SP.describes(BB->getParent()))
           DbgFnStart = DFSI->getSubprogram();
       }
@@ -323,17 +324,21 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
 /// mapping its operands through ValueMap if they are available.
 Constant *PruningFunctionCloner::
 ConstantFoldMappedInstruction(const Instruction *I) {
+  LLVMContext &Context = I->getContext();
+  
   SmallVector<Constant*, 8> Ops;
   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
     if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i),
-                                                           ValueMap)))
+                                                           ValueMap,
+                                                           Context)))
       Ops.push_back(Op);
     else
       return 0;  // All operands not constant!
 
   if (const CmpInst *CI = dyn_cast<CmpInst>(I))
     return ConstantFoldCompareInstOperands(CI->getPredicate(),
-                                           &Ops[0], Ops.size(), TD);
+                                           &Ops[0], Ops.size(), 
+                                           Context, TD);
 
   if (const LoadInst *LI = dyn_cast<LoadInst>(I))
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
@@ -344,7 +349,7 @@ ConstantFoldMappedInstruction(const Instruction *I) {
                                                           CE);
 
   return ConstantFoldInstOperands(I->getOpcode(), I->getType(), &Ops[0],
-                                  Ops.size(), TD);
+                                  Ops.size(), Context, TD);
 }
 
 /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
@@ -356,11 +361,12 @@ ConstantFoldMappedInstruction(const Instruction *I) {
 /// used for things like CloneFunction or CloneModule.
 void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
                                      DenseMap<const Value*, Value*> &ValueMap,
-                                     std::vector<ReturnInst*> &Returns,
+                                     SmallVectorImpl<ReturnInst*> &Returns,
                                      const char *NameSuffix, 
                                      ClonedCodeInfo *CodeInfo,
                                      const TargetData *TD) {
   assert(NameSuffix && "NameSuffix cannot be null!");
+  LLVMContext &Context = OldFunc->getContext();
   
 #ifndef NDEBUG
   for (Function::const_arg_iterator II = OldFunc->arg_begin(), 
@@ -385,7 +391,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
   // insert it into the new function in the right order.  If not, ignore it.
   //
   // Defer PHI resolution until rest of function is resolved.
-  std::vector<const PHINode*> PHIToResolve;
+  SmallVector<const PHINode*, 16> PHIToResolve;
   for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
        BI != BE; ++BI) {
     BasicBlock *NewBB = cast_or_null<BasicBlock>(ValueMap[BI]);
@@ -430,7 +436,8 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
       for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
         if (BasicBlock *MappedBlock = 
             cast_or_null<BasicBlock>(ValueMap[PN->getIncomingBlock(pred)])) {
-          Value *InVal = MapValue(PN->getIncomingValue(pred), ValueMap);
+          Value *InVal = MapValue(PN->getIncomingValue(pred),
+                                  ValueMap, Context);
           assert(InVal && "Unknown input value?");
           PN->setIncomingValue(pred, InVal);
           PN->setIncomingBlock(pred, MappedBlock);
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index 82f5b93..0285f8c 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -56,10 +56,11 @@ Module *llvm::CloneModule(const Module *M,
   //
   for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
        I != E; ++I) {
-    GlobalVariable *GV = new GlobalVariable(I->getType()->getElementType(),
+    GlobalVariable *GV = new GlobalVariable(*New, 
+                                            I->getType()->getElementType(),
                                             false,
                                             GlobalValue::ExternalLinkage, 0,
-                                            I->getName(), New);
+                                            I->getName());
     GV->setAlignment(I->getAlignment());
     ValueMap[I] = GV;
   }
@@ -88,7 +89,8 @@ Module *llvm::CloneModule(const Module *M,
     GlobalVariable *GV = cast<GlobalVariable>(ValueMap[I]);
     if (I->hasInitializer())
       GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(),
-                                                 ValueMap)));
+                                                 ValueMap,
+                                                 M->getContext())));
     GV->setLinkage(I->getLinkage());
     GV->setThreadLocal(I->isThreadLocal());
     GV->setConstant(I->isConstant());
@@ -106,7 +108,7 @@ Module *llvm::CloneModule(const Module *M,
         ValueMap[J] = DestI++;
       }
 
-      std::vector<ReturnInst*> Returns;  // Ignore returns cloned...
+      SmallVector<ReturnInst*, 8> Returns;  // Ignore returns cloned.
       CloneFunctionInto(F, I, ValueMap, Returns);
     }
 
@@ -119,7 +121,7 @@ Module *llvm::CloneModule(const Module *M,
     GlobalAlias *GA = cast<GlobalAlias>(ValueMap[I]);
     GA->setLinkage(I->getLinkage());
     if (const Constant* C = I->getAliasee())
-      GA->setAliasee(cast<Constant>(MapValue(C, ValueMap)));
+      GA->setAliasee(cast<Constant>(MapValue(C, ValueMap, M->getContext())));
   }
   
   return New;
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 6d5904e..c39ccf7 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -18,6 +18,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/Dominators.h"
@@ -27,6 +28,8 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/StringExtras.h"
 #include <algorithm>
 #include <set>
@@ -180,8 +183,24 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
 void CodeExtractor::splitReturnBlocks() {
   for (std::set<BasicBlock*>::iterator I = BlocksToExtract.begin(),
          E = BlocksToExtract.end(); I != E; ++I)
-    if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator()))
-      (*I)->splitBasicBlock(RI, (*I)->getName()+".ret");
+    if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) {
+      BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret");
+      if (DT) {
+        // Old dominates New. New node domiantes all other nodes dominated
+        //by Old.
+        DomTreeNode *OldNode = DT->getNode(*I);
+        SmallVector<DomTreeNode*, 8> Children;
+        for (DomTreeNode::iterator DI = OldNode->begin(), DE = OldNode->end();
+             DI != DE; ++DI) 
+          Children.push_back(*DI);
+
+        DomTreeNode *NewNode = DT->addNewBlock(New, *I);
+
+        for (SmallVector<DomTreeNode*, 8>::iterator I = Children.begin(),
+               E = Children.end(); I != E; ++I) 
+          DT->changeImmediateDominator(*I, NewNode);
+      }
+    }
 }
 
 // findInputsOutputs - Find inputs to, outputs from the code region.
@@ -234,15 +253,15 @@ Function *CodeExtractor::constructFunction(const Values &inputs,
                                            BasicBlock *newHeader,
                                            Function *oldFunction,
                                            Module *M) {
-  DOUT << "inputs: " << inputs.size() << "\n";
-  DOUT << "outputs: " << outputs.size() << "\n";
+  DEBUG(errs() << "inputs: " << inputs.size() << "\n");
+  DEBUG(errs() << "outputs: " << outputs.size() << "\n");
 
   // This function returns unsigned, outputs will go back by reference.
   switch (NumExitBlocks) {
   case 0:
-  case 1: RetTy = Type::VoidTy; break;
-  case 2: RetTy = Type::Int1Ty; break;
-  default: RetTy = Type::Int16Ty; break;
+  case 1: RetTy = Type::getVoidTy(header->getContext()); break;
+  case 2: RetTy = Type::getInt1Ty(header->getContext()); break;
+  default: RetTy = Type::getInt16Ty(header->getContext()); break;
   }
 
   std::vector<const Type*> paramTy;
@@ -251,32 +270,34 @@ Function *CodeExtractor::constructFunction(const Values &inputs,
   for (Values::const_iterator i = inputs.begin(),
          e = inputs.end(); i != e; ++i) {
     const Value *value = *i;
-    DOUT << "value used in func: " << *value << "\n";
+    DEBUG(errs() << "value used in func: " << *value << "\n");
     paramTy.push_back(value->getType());
   }
 
   // Add the types of the output values to the function's argument list.
   for (Values::const_iterator I = outputs.begin(), E = outputs.end();
        I != E; ++I) {
-    DOUT << "instr used in func: " << **I << "\n";
+    DEBUG(errs() << "instr used in func: " << **I << "\n");
     if (AggregateArgs)
       paramTy.push_back((*I)->getType());
     else
       paramTy.push_back(PointerType::getUnqual((*I)->getType()));
   }
 
-  DOUT << "Function type: " << *RetTy << " f(";
+  DEBUG(errs() << "Function type: " << *RetTy << " f(");
   for (std::vector<const Type*>::iterator i = paramTy.begin(),
          e = paramTy.end(); i != e; ++i)
-    DOUT << **i << ", ";
-  DOUT << ")\n";
+    DEBUG(errs() << **i << ", ");
+  DEBUG(errs() << ")\n");
 
   if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
-    PointerType *StructPtr = PointerType::getUnqual(StructType::get(paramTy));
+    PointerType *StructPtr =
+           PointerType::getUnqual(StructType::get(M->getContext(), paramTy));
     paramTy.clear();
     paramTy.push_back(StructPtr);
   }
-  const FunctionType *funcType = FunctionType::get(RetTy, paramTy, false);
+  const FunctionType *funcType =
+                  FunctionType::get(RetTy, paramTy, false);
 
   // Create the new function
   Function *newFunction = Function::Create(funcType,
@@ -298,13 +319,13 @@ Function *CodeExtractor::constructFunction(const Values &inputs,
     Value *RewriteVal;
     if (AggregateArgs) {
       Value *Idx[2];
-      Idx[0] = Constant::getNullValue(Type::Int32Ty);
-      Idx[1] = ConstantInt::get(Type::Int32Ty, i);
-      std::string GEPname = "gep_" + inputs[i]->getName();
+      Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext()));
+      Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i);
       TerminatorInst *TI = newFunction->begin()->getTerminator();
-      GetElementPtrInst *GEP = GetElementPtrInst::Create(AI, Idx, Idx+2, 
-                                                         GEPname, TI);
-      RewriteVal = new LoadInst(GEP, "load" + GEPname, TI);
+      GetElementPtrInst *GEP = 
+        GetElementPtrInst::Create(AI, Idx, Idx+2, 
+                                  "gep_" + inputs[i]->getName(), TI);
+      RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI);
     } else
       RewriteVal = AI++;
 
@@ -340,6 +361,20 @@ Function *CodeExtractor::constructFunction(const Values &inputs,
   return newFunction;
 }
 
+/// FindPhiPredForUseInBlock - Given a value and a basic block, find a PHI
+/// that uses the value within the basic block, and return the predecessor
+/// block associated with that use, or return 0 if none is found.
+static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) {
+  for (Value::use_iterator UI = Used->use_begin(),
+       UE = Used->use_end(); UI != UE; ++UI) {
+     PHINode *P = dyn_cast<PHINode>(*UI);
+     if (P && P->getParent() == BB)
+       return P->getIncomingBlock(UI);
+  }
+  
+  return 0;
+}
+
 /// emitCallAndSwitchStatement - This method sets up the caller side by adding
 /// the call instruction, splitting any PHI nodes in the header block as
 /// necessary.
@@ -348,7 +383,9 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
                            Values &inputs, Values &outputs) {
   // Emit a call to the new function, passing in: *pointer to struct (if
   // aggregating parameters), or plan inputs and allocated memory for outputs
-  std::vector<Value*> params, StructValues, ReloadOutputs;
+  std::vector<Value*> params, StructValues, ReloadOutputs, Reloads;
+  
+  LLVMContext &Context = newFunction->getContext();
 
   // Add inputs as params, or to be filled into the struct
   for (Values::iterator i = inputs.begin(), e = inputs.end(); i != e; ++i)
@@ -378,7 +415,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
       ArgTypes.push_back((*v)->getType());
 
     // Allocate a struct at the beginning of this function
-    Type *StructArgTy = StructType::get(ArgTypes);
+    Type *StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
     Struct =
       new AllocaInst(StructArgTy, 0, "structArg",
                      codeReplacer->getParent()->begin()->begin());
@@ -386,8 +423,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
 
     for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
       Value *Idx[2];
-      Idx[0] = Constant::getNullValue(Type::Int32Ty);
-      Idx[1] = ConstantInt::get(Type::Int32Ty, i);
+      Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+      Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i);
       GetElementPtrInst *GEP =
         GetElementPtrInst::Create(Struct, Idx, Idx + 2,
                                   "gep_" + StructValues[i]->getName());
@@ -412,8 +449,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
     Value *Output = 0;
     if (AggregateArgs) {
       Value *Idx[2];
-      Idx[0] = Constant::getNullValue(Type::Int32Ty);
-      Idx[1] = ConstantInt::get(Type::Int32Ty, FirstOut + i);
+      Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+      Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
       GetElementPtrInst *GEP
         = GetElementPtrInst::Create(Struct, Idx, Idx + 2,
                                     "gep_reload_" + outputs[i]->getName());
@@ -423,6 +460,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
       Output = ReloadOutputs[i];
     }
     LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload");
+    Reloads.push_back(load);
     codeReplacer->getInstList().push_back(load);
     std::vector<User*> Users(outputs[i]->use_begin(), outputs[i]->use_end());
     for (unsigned u = 0, e = Users.size(); u != e; ++u) {
@@ -434,7 +472,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
 
   // Now we can emit a switch statement using the call as a value.
   SwitchInst *TheSwitch =
-      SwitchInst::Create(ConstantInt::getNullValue(Type::Int16Ty),
+      SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)),
                          codeReplacer, 0, codeReplacer);
 
   // Since there may be multiple exits from the original region, make the new
@@ -456,7 +494,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
         if (!NewTarget) {
           // If we don't already have an exit stub for this non-extracted
           // destination, create one now!
-          NewTarget = BasicBlock::Create(OldTarget->getName() + ".exitStub",
+          NewTarget = BasicBlock::Create(Context,
+                                         OldTarget->getName() + ".exitStub",
                                          newFunction);
           unsigned SuccNum = switchVal++;
 
@@ -465,17 +504,18 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
           case 0:
           case 1: break;  // No value needed.
           case 2:         // Conditional branch, return a bool
-            brVal = ConstantInt::get(Type::Int1Ty, !SuccNum);
+            brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum);
             break;
           default:
-            brVal = ConstantInt::get(Type::Int16Ty, SuccNum);
+            brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum);
             break;
           }
 
-          ReturnInst *NTRet = ReturnInst::Create(brVal, NewTarget);
+          ReturnInst *NTRet = ReturnInst::Create(Context, brVal, NewTarget);
 
           // Update the switch instruction.
-          TheSwitch->addCase(ConstantInt::get(Type::Int16Ty, SuccNum),
+          TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context),
+                                              SuccNum),
                              OldTarget);
 
           // Restore values just before we exit
@@ -507,14 +547,25 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
                 DominatesDef = false;
             }
 
-            if (DT)
+            if (DT) {
               DominatesDef = DT->dominates(DefBlock, OldTarget);
+              
+              // If the output value is used by a phi in the target block,
+              // then we need to test for dominance of the phi's predecessor
+              // instead.  Unfortunately, this a little complicated since we
+              // have already rewritten uses of the value to uses of the reload.
+              BasicBlock* pred = FindPhiPredForUseInBlock(Reloads[out], 
+                                                          OldTarget);
+              if (pred && DT && DT->dominates(DefBlock, pred))
+                DominatesDef = true;
+            }
 
             if (DominatesDef) {
               if (AggregateArgs) {
                 Value *Idx[2];
-                Idx[0] = Constant::getNullValue(Type::Int32Ty);
-                Idx[1] = ConstantInt::get(Type::Int32Ty,FirstOut+out);
+                Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+                Idx[1] = ConstantInt::get(Type::getInt32Ty(Context),
+                                          FirstOut+out);
                 GetElementPtrInst *GEP =
                   GetElementPtrInst::Create(OAI, Idx, Idx + 2,
                                             "gep_" + outputs[out]->getName(),
@@ -543,15 +594,16 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
     // this should be rewritten as a `ret'
 
     // Check if the function should return a value
-    if (OldFnRetTy == Type::VoidTy) {
-      ReturnInst::Create(0, TheSwitch);  // Return void
+    if (OldFnRetTy == Type::getVoidTy(Context)) {
+      ReturnInst::Create(Context, 0, TheSwitch);  // Return void
     } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) {
       // return what we have
-      ReturnInst::Create(TheSwitch->getCondition(), TheSwitch);
+      ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch);
     } else {
       // Otherwise we must have code extracted an unwind or something, just
       // return whatever we want.
-      ReturnInst::Create(Constant::getNullValue(OldFnRetTy), TheSwitch);
+      ReturnInst::Create(Context, 
+                         Constant::getNullValue(OldFnRetTy), TheSwitch);
     }
 
     TheSwitch->eraseFromParent();
@@ -644,12 +696,14 @@ ExtractCodeRegion(const std::vector<BasicBlock*> &code) {
   Function *oldFunction = header->getParent();
 
   // This takes place of the original loop
-  BasicBlock *codeReplacer = BasicBlock::Create("codeRepl", oldFunction,
+  BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), 
+                                                "codeRepl", oldFunction,
                                                 header);
 
   // The new function needs a root node because other nodes can branch to the
   // head of the region, but the entry node of a function cannot have preds.
-  BasicBlock *newFuncRoot = BasicBlock::Create("newFuncRoot");
+  BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), 
+                                               "newFuncRoot");
   newFuncRoot->getInstList().push_back(BranchInst::Create(header));
 
   // Find inputs to, outputs from the code region.
@@ -702,7 +756,8 @@ ExtractCodeRegion(const std::vector<BasicBlock*> &code) {
   //  cerr << "OLD FUNCTION: " << *oldFunction;
   //  verifyFunction(*oldFunction);
 
-  DEBUG(if (verifyFunction(*newFunction)) abort());
+  DEBUG(if (verifyFunction(*newFunction)) 
+        llvm_report_error("verifyFunction failed!"));
   return newFunction;
 }
 
diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp
index b8dd754..c908b4a 100644
--- a/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -39,7 +39,8 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
   // Create a stack slot to hold the value.
   AllocaInst *Slot;
   if (AllocaPoint) {
-    Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem", AllocaPoint);
+    Slot = new AllocaInst(I.getType(), 0,
+                          I.getName()+".reg2mem", AllocaPoint);
   } else {
     Function *F = I.getParent()->getParent();
     Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem",
@@ -116,7 +117,8 @@ AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
   // Create a stack slot to hold the value.
   AllocaInst *Slot;
   if (AllocaPoint) {
-    Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem", AllocaPoint);
+    Slot = new AllocaInst(P->getType(), 0,
+                          P->getName()+".reg2mem", AllocaPoint);
   } else {
     Function *F = P->getParent()->getParent();
     Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem",
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 4989c00..0d00d69 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
@@ -28,13 +29,73 @@
 #include "llvm/Support/CallSite.h"
 using namespace llvm;
 
-bool llvm::InlineFunction(CallInst *CI, CallGraph *CG, const TargetData *TD) {
-  return InlineFunction(CallSite(CI), CG, TD);
+bool llvm::InlineFunction(CallInst *CI, CallGraph *CG, const TargetData *TD,
+                          SmallVectorImpl<AllocaInst*> *StaticAllocas) {
+  return InlineFunction(CallSite(CI), CG, TD, StaticAllocas);
 }
-bool llvm::InlineFunction(InvokeInst *II, CallGraph *CG, const TargetData *TD) {
-  return InlineFunction(CallSite(II), CG, TD);
+bool llvm::InlineFunction(InvokeInst *II, CallGraph *CG, const TargetData *TD,
+                          SmallVectorImpl<AllocaInst*> *StaticAllocas) {
+  return InlineFunction(CallSite(II), CG, TD, StaticAllocas);
 }
 
+
+/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
+/// an invoke, we have to turn all of the calls that can throw into
+/// invokes.  This function analyze BB to see if there are any calls, and if so,
+/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
+/// nodes in that block with the values specified in InvokeDestPHIValues.
+///
+static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
+                                                   BasicBlock *InvokeDest,
+                           const SmallVectorImpl<Value*> &InvokeDestPHIValues) {
+  for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
+    Instruction *I = BBI++;
+    
+    // We only need to check for function calls: inlined invoke
+    // instructions require no special handling.
+    CallInst *CI = dyn_cast<CallInst>(I);
+    if (CI == 0) continue;
+    
+    // If this call cannot unwind, don't convert it to an invoke.
+    if (CI->doesNotThrow())
+      continue;
+    
+    // Convert this function call into an invoke instruction.
+    // First, split the basic block.
+    BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc");
+    
+    // Next, create the new invoke instruction, inserting it at the end
+    // of the old basic block.
+    SmallVector<Value*, 8> InvokeArgs(CI->op_begin()+1, CI->op_end());
+    InvokeInst *II =
+      InvokeInst::Create(CI->getCalledValue(), Split, InvokeDest,
+                         InvokeArgs.begin(), InvokeArgs.end(),
+                         CI->getName(), BB->getTerminator());
+    II->setCallingConv(CI->getCallingConv());
+    II->setAttributes(CI->getAttributes());
+    
+    // Make sure that anything using the call now uses the invoke!  This also
+    // updates the CallGraph if present.
+    CI->replaceAllUsesWith(II);
+    
+    // Delete the unconditional branch inserted by splitBasicBlock
+    BB->getInstList().pop_back();
+    Split->getInstList().pop_front();  // Delete the original call
+    
+    // Update any PHI nodes in the exceptional block to indicate that
+    // there is now a new entry in them.
+    unsigned i = 0;
+    for (BasicBlock::iterator I = InvokeDest->begin();
+         isa<PHINode>(I); ++I, ++i)
+      cast<PHINode>(I)->addIncoming(InvokeDestPHIValues[i], BB);
+    
+    // This basic block is now complete, the caller will continue scanning the
+    // next one.
+    return;
+  }
+}
+  
+
 /// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls
 /// in the body of the inlined function into invokes and turn unwind
 /// instructions into branches to the invoke unwind dest.
@@ -43,10 +104,9 @@ bool llvm::InlineFunction(InvokeInst *II, CallGraph *CG, const TargetData *TD) {
 /// block of the inlined code (the last block is the end of the function),
 /// and InlineCodeInfo is information about the code that got inlined.
 static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
-                                ClonedCodeInfo &InlinedCodeInfo,
-                                CallGraph *CG) {
+                                ClonedCodeInfo &InlinedCodeInfo) {
   BasicBlock *InvokeDest = II->getUnwindDest();
-  std::vector<Value*> InvokeDestPHIValues;
+  SmallVector<Value*, 8> InvokeDestPHIValues;
 
   // If there are PHI nodes in the unwind destination block, we need to
   // keep track of which values came into them from this invoke, then remove
@@ -62,92 +122,39 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
 
   // The inlined code is currently at the end of the function, scan from the
   // start of the inlined code to its end, checking for stuff we need to
-  // rewrite.
-  if (InlinedCodeInfo.ContainsCalls || InlinedCodeInfo.ContainsUnwinds) {
-    for (Function::iterator BB = FirstNewBlock, E = Caller->end();
-         BB != E; ++BB) {
-      if (InlinedCodeInfo.ContainsCalls) {
-        for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ){
-          Instruction *I = BBI++;
-
-          // We only need to check for function calls: inlined invoke
-          // instructions require no special handling.
-          if (!isa<CallInst>(I)) continue;
-          CallInst *CI = cast<CallInst>(I);
-
-          // If this call cannot unwind, don't convert it to an invoke.
-          if (CI->doesNotThrow())
-            continue;
-
-          // Convert this function call into an invoke instruction.
-          // First, split the basic block.
-          BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc");
-
-          // Next, create the new invoke instruction, inserting it at the end
-          // of the old basic block.
-          SmallVector<Value*, 8> InvokeArgs(CI->op_begin()+1, CI->op_end());
-          InvokeInst *II =
-            InvokeInst::Create(CI->getCalledValue(), Split, InvokeDest,
-                               InvokeArgs.begin(), InvokeArgs.end(),
-                               CI->getName(), BB->getTerminator());
-          II->setCallingConv(CI->getCallingConv());
-          II->setAttributes(CI->getAttributes());
-
-          // Make sure that anything using the call now uses the invoke!
-          CI->replaceAllUsesWith(II);
-
-          // Update the callgraph.
-          if (CG) {
-            // We should be able to do this:
-            //   (*CG)[Caller]->replaceCallSite(CI, II);
-            // but that fails if the old call site isn't in the call graph,
-            // which, because of LLVM bug 3601, it sometimes isn't.
-            CallGraphNode *CGN = (*CG)[Caller];
-            for (CallGraphNode::iterator NI = CGN->begin(), NE = CGN->end();
-                 NI != NE; ++NI) {
-              if (NI->first == CI) {
-                NI->first = II;
-                break;
-              }
-            }
-          }
-
-          // Delete the unconditional branch inserted by splitBasicBlock
-          BB->getInstList().pop_back();
-          Split->getInstList().pop_front();  // Delete the original call
-
-          // Update any PHI nodes in the exceptional block to indicate that
-          // there is now a new entry in them.
-          unsigned i = 0;
-          for (BasicBlock::iterator I = InvokeDest->begin();
-               isa<PHINode>(I); ++I, ++i) {
-            PHINode *PN = cast<PHINode>(I);
-            PN->addIncoming(InvokeDestPHIValues[i], BB);
-          }
-
-          // This basic block is now complete, start scanning the next one.
-          break;
-        }
-      }
-
-      if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
-        // An UnwindInst requires special handling when it gets inlined into an
-        // invoke site.  Once this happens, we know that the unwind would cause
-        // a control transfer to the invoke exception destination, so we can
-        // transform it into a direct branch to the exception destination.
-        BranchInst::Create(InvokeDest, UI);
-
-        // Delete the unwind instruction!
-        UI->eraseFromParent();
-
-        // Update any PHI nodes in the exceptional block to indicate that
-        // there is now a new entry in them.
-        unsigned i = 0;
-        for (BasicBlock::iterator I = InvokeDest->begin();
-             isa<PHINode>(I); ++I, ++i) {
-          PHINode *PN = cast<PHINode>(I);
-          PN->addIncoming(InvokeDestPHIValues[i], BB);
-        }
+  // rewrite.  If the code doesn't have calls or unwinds, we know there is
+  // nothing to rewrite.
+  if (!InlinedCodeInfo.ContainsCalls && !InlinedCodeInfo.ContainsUnwinds) {
+    // Now that everything is happy, we have one final detail.  The PHI nodes in
+    // the exception destination block still have entries due to the original
+    // invoke instruction.  Eliminate these entries (which might even delete the
+    // PHI node) now.
+    InvokeDest->removePredecessor(II->getParent());
+    return;
+  }
+  
+  for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
+    if (InlinedCodeInfo.ContainsCalls)
+      HandleCallsInBlockInlinedThroughInvoke(BB, InvokeDest,
+                                             InvokeDestPHIValues);
+
+    if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+      // An UnwindInst requires special handling when it gets inlined into an
+      // invoke site.  Once this happens, we know that the unwind would cause
+      // a control transfer to the invoke exception destination, so we can
+      // transform it into a direct branch to the exception destination.
+      BranchInst::Create(InvokeDest, UI);
+
+      // Delete the unwind instruction!
+      UI->eraseFromParent();
+
+      // Update any PHI nodes in the exceptional block to indicate that
+      // there is now a new entry in them.
+      unsigned i = 0;
+      for (BasicBlock::iterator I = InvokeDest->begin();
+           isa<PHINode>(I); ++I, ++i) {
+        PHINode *PN = cast<PHINode>(I);
+        PN->addIncoming(InvokeDestPHIValues[i], BB);
       }
     }
   }
@@ -185,17 +192,19 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
   }
 
   for (; I != E; ++I) {
-    const Instruction *OrigCall = I->first.getInstruction();
+    const Value *OrigCall = I->first;
 
     DenseMap<const Value*, Value*>::iterator VMI = ValueMap.find(OrigCall);
     // Only copy the edge if the call was inlined!
-    if (VMI != ValueMap.end() && VMI->second) {
-      // If the call was inlined, but then constant folded, there is no edge to
-      // add.  Check for this case.
-      if (Instruction *NewCall = dyn_cast<Instruction>(VMI->second))
-        CallerNode->addCalledFunction(CallSite::get(NewCall), I->second);
-    }
+    if (VMI == ValueMap.end() || VMI->second == 0)
+      continue;
+    
+    // If the call was inlined, but then constant folded, there is no edge to
+    // add.  Check for this case.
+    if (Instruction *NewCall = dyn_cast<Instruction>(VMI->second))
+      CallerNode->addCalledFunction(CallSite::get(NewCall), I->second);
   }
+  
   // Update the call graph by deleting the edge from Callee to Caller.  We must
   // do this after the loop above in case Caller and Callee are the same.
   CallerNode->removeCallEdgeFor(CS);
@@ -204,25 +213,27 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
 /// findFnRegionEndMarker - This is a utility routine that is used by
 /// InlineFunction. Return llvm.dbg.region.end intrinsic that corresponds
 /// to the llvm.dbg.func.start of the function F. Otherwise return NULL.
+///
 static const DbgRegionEndInst *findFnRegionEndMarker(const Function *F) {
 
-  GlobalVariable *FnStart = NULL;
+  MDNode *FnStart = NULL;
   const DbgRegionEndInst *FnEnd = NULL;
   for (Function::const_iterator FI = F->begin(), FE =F->end(); FI != FE; ++FI) 
     for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end(); BI != BE;
          ++BI) {
       if (FnStart == NULL)  {
         if (const DbgFuncStartInst *FSI = dyn_cast<DbgFuncStartInst>(BI)) {
-          DISubprogram SP(cast<GlobalVariable>(FSI->getSubprogram()));
+          DISubprogram SP(FSI->getSubprogram());
           assert (SP.isNull() == false && "Invalid llvm.dbg.func.start");
           if (SP.describes(F))
-            FnStart = SP.getGV();
+            FnStart = SP.getNode();
         }
-      } else {
-        if (const DbgRegionEndInst *REI = dyn_cast<DbgRegionEndInst>(BI))
-          if (REI->getContext() == FnStart)
-            FnEnd = REI;
+        continue;
       }
+      
+      if (const DbgRegionEndInst *REI = dyn_cast<DbgRegionEndInst>(BI))
+        if (REI->getContext() == FnStart)
+          FnEnd = REI;
     }
   return FnEnd;
 }
@@ -236,8 +247,10 @@ static const DbgRegionEndInst *findFnRegionEndMarker(const Function *F) {
 // exists in the instruction stream.  Similiarly this will inline a recursive
 // function by one level.
 //
-bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {
+bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
+                          SmallVectorImpl<AllocaInst*> *StaticAllocas) {
   Instruction *TheCall = CS.getInstruction();
+  LLVMContext &Context = TheCall->getContext();
   assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
          "Instruction not in function!");
 
@@ -277,7 +290,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {
 
   // Make sure to capture all of the return instructions from the cloned
   // function.
-  std::vector<ReturnInst*> Returns;
+  SmallVector<ReturnInst*, 8> Returns;
   ClonedCodeInfo InlinedFunctionInfo;
   Function::iterator FirstNewBlock;
 
@@ -302,15 +315,17 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {
       if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal) &&
           !CalledFunc->onlyReadsMemory()) {
         const Type *AggTy = cast<PointerType>(I->getType())->getElementType();
-        const Type *VoidPtrTy = PointerType::getUnqual(Type::Int8Ty);
+        const Type *VoidPtrTy = 
+            Type::getInt8PtrTy(Context);
 
         // Create the alloca.  If we have TargetData, use nice alignment.
         unsigned Align = 1;
         if (TD) Align = TD->getPrefTypeAlignment(AggTy);
-        Value *NewAlloca = new AllocaInst(AggTy, 0, Align, I->getName(),
-                                          Caller->begin()->begin());
+        Value *NewAlloca = new AllocaInst(AggTy, 0, Align, 
+                                          I->getName(), 
+                                          &*Caller->begin()->begin());
         // Emit a memcpy.
-        const Type *Tys[] = { Type::Int64Ty };
+        const Type *Tys[] = { Type::getInt64Ty(Context) };
         Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
                                                        Intrinsic::memcpy, 
                                                        Tys, 1);
@@ -321,13 +336,15 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {
         if (TD == 0)
           Size = ConstantExpr::getSizeOf(AggTy);
         else
-          Size = ConstantInt::get(Type::Int64Ty, TD->getTypeStoreSize(AggTy));
+          Size = ConstantInt::get(Type::getInt64Ty(Context),
+                                         TD->getTypeStoreSize(AggTy));
 
         // Always generate a memcpy of alignment 1 here because we don't know
         // the alignment of the src pointer.  Other optimizations can infer
         // better alignment.
         Value *CallArgs[] = {
-          DestCast, SrcCast, Size, ConstantInt::get(Type::Int32Ty, 1)
+          DestCast, SrcCast, Size,
+          ConstantInt::get(Type::getInt32Ty(Context), 1)
         };
         CallInst *TheMemCpy =
           CallInst::Create(MemCpyFn, CallArgs, CallArgs+4, "", TheCall);
@@ -352,13 +369,12 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {
     // call site. The function body cloner does not clone original
     // region end marker from the CalledFunc. This will ensure that
     // inlined function's scope ends at the right place. 
-    const DbgRegionEndInst *DREI = findFnRegionEndMarker(CalledFunc);
-    if (DREI) {
-      for (BasicBlock::iterator BI = TheCall, 
-             BE = TheCall->getParent()->end(); BI != BE; ++BI) {
+    if (const DbgRegionEndInst *DREI = findFnRegionEndMarker(CalledFunc)) {
+      for (BasicBlock::iterator BI = TheCall, BE = TheCall->getParent()->end();
+           BI != BE; ++BI) {
         if (DbgStopPointInst *DSPI = dyn_cast<DbgStopPointInst>(BI)) {
           if (DbgRegionEndInst *NewDREI = 
-              dyn_cast<DbgRegionEndInst>(DREI->clone()))
+                dyn_cast<DbgRegionEndInst>(DREI->clone()))
             NewDREI->insertAfter(DSPI);
           break;
         }
@@ -388,31 +404,39 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {
   {
     BasicBlock::iterator InsertPoint = Caller->begin()->begin();
     for (BasicBlock::iterator I = FirstNewBlock->begin(),
-           E = FirstNewBlock->end(); I != E; )
-      if (AllocaInst *AI = dyn_cast<AllocaInst>(I++)) {
-        // If the alloca is now dead, remove it.  This often occurs due to code
-        // specialization.
-        if (AI->use_empty()) {
-          AI->eraseFromParent();
-          continue;
-        }
+         E = FirstNewBlock->end(); I != E; ) {
+      AllocaInst *AI = dyn_cast<AllocaInst>(I++);
+      if (AI == 0) continue;
+      
+      // If the alloca is now dead, remove it.  This often occurs due to code
+      // specialization.
+      if (AI->use_empty()) {
+        AI->eraseFromParent();
+        continue;
+      }
 
-        if (isa<Constant>(AI->getArraySize())) {
-          // Scan for the block of allocas that we can move over, and move them
-          // all at once.
-          while (isa<AllocaInst>(I) &&
-                 isa<Constant>(cast<AllocaInst>(I)->getArraySize()))
-            ++I;
-
-          // Transfer all of the allocas over in a block.  Using splice means
-          // that the instructions aren't removed from the symbol table, then
-          // reinserted.
-          Caller->getEntryBlock().getInstList().splice(
-              InsertPoint,
-              FirstNewBlock->getInstList(),
-              AI, I);
-        }
+      if (!isa<Constant>(AI->getArraySize()))
+        continue;
+      
+      // Keep track of the static allocas that we inline into the caller if the
+      // StaticAllocas pointer is non-null.
+      if (StaticAllocas) StaticAllocas->push_back(AI);
+      
+      // Scan for the block of allocas that we can move over, and move them
+      // all at once.
+      while (isa<AllocaInst>(I) &&
+             isa<Constant>(cast<AllocaInst>(I)->getArraySize())) {
+        if (StaticAllocas) StaticAllocas->push_back(cast<AllocaInst>(I));
+        ++I;
       }
+
+      // Transfer all of the allocas over in a block.  Using splice means
+      // that the instructions aren't removed from the symbol table, then
+      // reinserted.
+      Caller->getEntryBlock().getInstList().splice(InsertPoint,
+                                                   FirstNewBlock->getInstList(),
+                                                   AI, I);
+    }
   }
 
   // If the inlined code contained dynamic alloca instructions, wrap the inlined
@@ -486,7 +510,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {
          BB != E; ++BB) {
       TerminatorInst *Term = BB->getTerminator();
       if (isa<UnwindInst>(Term)) {
-        new UnreachableInst(Term);
+        new UnreachableInst(Context, Term);
         BB->getInstList().erase(Term);
       }
     }
@@ -495,7 +519,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {
   // any inlined 'unwind' instructions into branches to the invoke exception
   // destination, and call instructions into invoke instructions.
   if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
-    HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo, CG);
+    HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);
 
   // If we cloned in _exactly one_ basic block, and if that block ends in a
   // return instruction, we splice the body of the inlined callee directly into
diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp
index 4f8a160..1fa51a3 100644
--- a/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/lib/Transforms/Utils/InstructionNamer.cpp
@@ -32,7 +32,7 @@ namespace {
     bool runOnFunction(Function &F) {
       for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end();
            AI != AE; ++AI)
-        if (!AI->hasName() && AI->getType() != Type::VoidTy)
+        if (!AI->hasName() && AI->getType() != Type::getVoidTy(F.getContext()))
           AI->setName("tmp");
 
       for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
@@ -40,7 +40,7 @@ namespace {
           BB->setName("BB");
         
         for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
-          if (!I->hasName() && I->getType() != Type::VoidTy)
+          if (!I->hasName() && I->getType() != Type::getVoidTy(F.getContext()))
             I->setName("tmp");
       }
       return true;
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index d5e7303..56e662e 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -33,22 +33,19 @@
 #include "llvm/Pass.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/Compiler.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/PredIteratorCache.h"
-#include <algorithm>
-#include <map>
 using namespace llvm;
 
 STATISTIC(NumLCSSA, "Number of live out of a loop variables");
 
 namespace {
-  struct VISIBILITY_HIDDEN LCSSA : public LoopPass {
+  struct LCSSA : public LoopPass {
     static char ID; // Pass identification, replacement for typeid
     LCSSA() : LoopPass(&ID) {}
 
@@ -57,12 +54,10 @@ namespace {
     DominatorTree *DT;
     std::vector<BasicBlock*> LoopBlocks;
     PredIteratorCache PredCache;
+    Loop *L;
     
     virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
 
-    void ProcessInstruction(Instruction* Instr,
-                            const SmallVector<BasicBlock*, 8>& exitBlocks);
-    
     /// This transformation requires natural loop information & requires that
     /// loop preheaders be inserted into the CFG.  It maintains both of these,
     /// as well as the CFG.  It also requires dominator information.
@@ -71,9 +66,9 @@ namespace {
       AU.setPreservesCFG();
       AU.addRequiredID(LoopSimplifyID);
       AU.addPreservedID(LoopSimplifyID);
-      AU.addRequired<LoopInfo>();
+      AU.addRequiredTransitive<LoopInfo>();
       AU.addPreserved<LoopInfo>();
-      AU.addRequired<DominatorTree>();
+      AU.addRequiredTransitive<DominatorTree>();
       AU.addPreserved<ScalarEvolution>();
       AU.addPreserved<DominatorTree>();
 
@@ -85,15 +80,17 @@ namespace {
       AU.addPreserved<DominanceFrontier>();
     }
   private:
-    void getLoopValuesUsedOutsideLoop(Loop *L,
-                                      SetVector<Instruction*> &AffectedValues,
-                                 const SmallVector<BasicBlock*, 8>& exitBlocks);
-
-    Value *GetValueForBlock(DomTreeNode *BB, Instruction *OrigInst,
-                            DenseMap<DomTreeNode*, Value*> &Phis);
+    bool ProcessInstruction(Instruction *Inst,
+                            const SmallVectorImpl<BasicBlock*> &ExitBlocks);
+    
+    /// verifyAnalysis() - Verify loop nest.
+    virtual void verifyAnalysis() const {
+      // Check the special guarantees that LCSSA makes.
+      assert(L->isLCSSAForm() && "LCSSA form not preserved!");
+    }
 
     /// inLoop - returns true if the given block is within the current loop
-    bool inLoop(BasicBlock* B) {
+    bool inLoop(BasicBlock *B) const {
       return std::binary_search(LoopBlocks.begin(), LoopBlocks.end(), B);
     }
   };
@@ -105,181 +102,163 @@ static RegisterPass<LCSSA> X("lcssa", "Loop-Closed SSA Form Pass");
 Pass *llvm::createLCSSAPass() { return new LCSSA(); }
 const PassInfo *const llvm::LCSSAID = &X;
 
+
+/// BlockDominatesAnExit - Return true if the specified block dominates at least
+/// one of the blocks in the specified list.
+static bool BlockDominatesAnExit(BasicBlock *BB,
+                                 const SmallVectorImpl<BasicBlock*> &ExitBlocks,
+                                 DominatorTree *DT) {
+  DomTreeNode *DomNode = DT->getNode(BB);
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+    if (DT->dominates(DomNode, DT->getNode(ExitBlocks[i])))
+      return true;
+
+  return false;
+}
+
+
 /// runOnFunction - Process all loops in the function, inner-most out.
-bool LCSSA::runOnLoop(Loop *L, LPPassManager &LPM) {
-  PredCache.clear();
+bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) {
+  L = TheLoop;
   
   LI = &LPM.getAnalysis<LoopInfo>();
   DT = &getAnalysis<DominatorTree>();
 
-  // Speed up queries by creating a sorted list of blocks
+  // Get the set of exiting blocks.
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  L->getExitBlocks(ExitBlocks);
+  
+  if (ExitBlocks.empty())
+    return false;
+  
+  // Speed up queries by creating a sorted vector of blocks.
   LoopBlocks.clear();
   LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());
-  std::sort(LoopBlocks.begin(), LoopBlocks.end());
+  array_pod_sort(LoopBlocks.begin(), LoopBlocks.end());
   
-  SmallVector<BasicBlock*, 8> exitBlocks;
-  L->getExitBlocks(exitBlocks);
+  // Look at all the instructions in the loop, checking to see if they have uses
+  // outside the loop.  If so, rewrite those uses.
+  bool MadeChange = false;
   
-  SetVector<Instruction*> AffectedValues;
-  getLoopValuesUsedOutsideLoop(L, AffectedValues, exitBlocks);
+  for (Loop::block_iterator BBI = L->block_begin(), E = L->block_end();
+       BBI != E; ++BBI) {
+    BasicBlock *BB = *BBI;
+    
+    // For large loops, avoid use-scanning by using dominance information:  In
+    // particular, if a block does not dominate any of the loop exits, then none
+    // of the values defined in the block could be used outside the loop.
+    if (!BlockDominatesAnExit(BB, ExitBlocks, DT))
+      continue;
+    
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end();
+         I != E; ++I) {
+      // Reject two common cases fast: instructions with no uses (like stores)
+      // and instructions with one use that is in the same block as this.
+      if (I->use_empty() ||
+          (I->hasOneUse() && I->use_back()->getParent() == BB &&
+           !isa<PHINode>(I->use_back())))
+        continue;
+      
+      MadeChange |= ProcessInstruction(I, ExitBlocks);
+    }
+  }
   
-  // If no values are affected, we can save a lot of work, since we know that
-  // nothing will be changed.
-  if (AffectedValues.empty())
-    return false;
+  assert(L->isLCSSAForm());
+  PredCache.clear();
+
+  return MadeChange;
+}
+
+/// isExitBlock - Return true if the specified block is in the list.
+static bool isExitBlock(BasicBlock *BB,
+                        const SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+    if (ExitBlocks[i] == BB)
+      return true;
+  return false;
+}
+
+/// ProcessInstruction - Given an instruction in the loop, check to see if it
+/// has any uses that are outside the current loop.  If so, insert LCSSA PHI
+/// nodes and rewrite the uses.
+bool LCSSA::ProcessInstruction(Instruction *Inst,
+                               const SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+  SmallVector<Use*, 16> UsesToRewrite;
   
-  // Iterate over all affected values for this loop and insert Phi nodes
-  // for them in the appropriate exit blocks
+  BasicBlock *InstBB = Inst->getParent();
   
-  for (SetVector<Instruction*>::iterator I = AffectedValues.begin(),
-       E = AffectedValues.end(); I != E; ++I)
-    ProcessInstruction(*I, exitBlocks);
+  for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
+       UI != E; ++UI) {
+    BasicBlock *UserBB = cast<Instruction>(*UI)->getParent();
+    if (PHINode *PN = dyn_cast<PHINode>(*UI))
+      UserBB = PN->getIncomingBlock(UI);
+    
+    if (InstBB != UserBB && !inLoop(UserBB))
+      UsesToRewrite.push_back(&UI.getUse());
+  }
   
-  assert(L->isLCSSAForm());
+  // If there are no uses outside the loop, exit with no change.
+  if (UsesToRewrite.empty()) return false;
   
-  return true;
-}
-
-/// processInstruction - Given a live-out instruction, insert LCSSA Phi nodes,
-/// eliminate all out-of-loop uses.
-void LCSSA::ProcessInstruction(Instruction *Instr,
-                               const SmallVector<BasicBlock*, 8>& exitBlocks) {
   ++NumLCSSA; // We are applying the transformation
 
-  // Keep track of the blocks that have the value available already.
-  DenseMap<DomTreeNode*, Value*> Phis;
-
-  BasicBlock *DomBB = Instr->getParent();
-
   // Invoke instructions are special in that their result value is not available
   // along their unwind edge. The code below tests to see whether DomBB dominates
   // the value, so adjust DomBB to the normal destination block, which is
   // effectively where the value is first usable.
-  if (InvokeInst *Inv = dyn_cast<InvokeInst>(Instr))
+  BasicBlock *DomBB = Inst->getParent();
+  if (InvokeInst *Inv = dyn_cast<InvokeInst>(Inst))
     DomBB = Inv->getNormalDest();
 
   DomTreeNode *DomNode = DT->getNode(DomBB);
 
-  // Insert the LCSSA phi's into the exit blocks (dominated by the value), and
-  // add them to the Phi's map.
-  for (SmallVector<BasicBlock*, 8>::const_iterator BBI = exitBlocks.begin(),
-      BBE = exitBlocks.end(); BBI != BBE; ++BBI) {
-    BasicBlock *BB = *BBI;
-    DomTreeNode *ExitBBNode = DT->getNode(BB);
-    Value *&Phi = Phis[ExitBBNode];
-    if (!Phi && DT->dominates(DomNode, ExitBBNode)) {
-      PHINode *PN = PHINode::Create(Instr->getType(), Instr->getName()+".lcssa",
-                                    BB->begin());
-      PN->reserveOperandSpace(PredCache.GetNumPreds(BB));
-
-      // Remember that this phi makes the value alive in this block.
-      Phi = PN;
-
-      // Add inputs from inside the loop for this PHI.
-      for (BasicBlock** PI = PredCache.GetPreds(BB); *PI; ++PI)
-        PN->addIncoming(Instr, *PI);
-    }
-  }
+  SSAUpdater SSAUpdate;
+  SSAUpdate.Initialize(Inst);
   
-  
-  // Record all uses of Instr outside the loop.  We need to rewrite these.  The
-  // LCSSA phis won't be included because they use the value in the loop.
-  for (Value::use_iterator UI = Instr->use_begin(), E = Instr->use_end();
-       UI != E;) {
-    BasicBlock *UserBB = cast<Instruction>(*UI)->getParent();
-    if (PHINode *P = dyn_cast<PHINode>(*UI)) {
-      UserBB = P->getIncomingBlock(UI);
-    }
+  // Insert the LCSSA phi's into all of the exit blocks dominated by the
+  // value., and add them to the Phi's map.
+  for (SmallVectorImpl<BasicBlock*>::const_iterator BBI = ExitBlocks.begin(),
+      BBE = ExitBlocks.end(); BBI != BBE; ++BBI) {
+    BasicBlock *ExitBB = *BBI;
+    if (!DT->dominates(DomNode, DT->getNode(ExitBB))) continue;
     
-    // If the user is in the loop, don't rewrite it!
-    if (UserBB == Instr->getParent() || inLoop(UserBB)) {
-      ++UI;
-      continue;
-    }
+    // If we already inserted something for this BB, don't reprocess it.
+    if (SSAUpdate.HasValueForBlock(ExitBB)) continue;
     
-    // Otherwise, patch up uses of the value with the appropriate LCSSA Phi,
-    // inserting PHI nodes into join points where needed.
-    Value *Val = GetValueForBlock(DT->getNode(UserBB), Instr, Phis);
-    
-    // Preincrement the iterator to avoid invalidating it when we change the
-    // value.
-    Use &U = UI.getUse();
-    ++UI;
-    U.set(Val);
-  }
-}
+    PHINode *PN = PHINode::Create(Inst->getType(), Inst->getName()+".lcssa",
+                                  ExitBB->begin());
+    PN->reserveOperandSpace(PredCache.GetNumPreds(ExitBB));
 
-/// getLoopValuesUsedOutsideLoop - Return any values defined in the loop that
-/// are used by instructions outside of it.
-void LCSSA::getLoopValuesUsedOutsideLoop(Loop *L,
-                                      SetVector<Instruction*> &AffectedValues,
-                                const SmallVector<BasicBlock*, 8>& exitBlocks) {
-  // FIXME: For large loops, we may be able to avoid a lot of use-scanning
-  // by using dominance information.  In particular, if a block does not
-  // dominate any of the loop exits, then none of the values defined in the
-  // block could be used outside the loop.
-  for (Loop::block_iterator BB = L->block_begin(), BE = L->block_end();
-       BB != BE; ++BB) {
-    for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ++I)
-      for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE;
-           ++UI) {
-        BasicBlock *UserBB = cast<Instruction>(*UI)->getParent();
-        if (PHINode* p = dyn_cast<PHINode>(*UI)) {
-          UserBB = p->getIncomingBlock(UI);
-        }
-        
-        if (*BB != UserBB && !inLoop(UserBB)) {
-          AffectedValues.insert(I);
-          break;
-        }
-      }
+    // Add inputs from inside the loop for this PHI.
+    for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI)
+      PN->addIncoming(Inst, *PI);
+    
+    // Remember that this phi makes the value alive in this block.
+    SSAUpdate.AddAvailableValue(ExitBB, PN);
   }
-}
-
-/// GetValueForBlock - Get the value to use within the specified basic block.
-/// available values are in Phis.
-Value *LCSSA::GetValueForBlock(DomTreeNode *BB, Instruction *OrigInst,
-                               DenseMap<DomTreeNode*, Value*> &Phis) {
-  // If there is no dominator info for this BB, it is unreachable.
-  if (BB == 0)
-    return UndefValue::get(OrigInst->getType());
-                                 
-  // If we have already computed this value, return the previously computed val.
-  if (Phis.count(BB)) return Phis[BB];
-
-  DomTreeNode *IDom = BB->getIDom();
+  
+  // Rewrite all uses outside the loop in terms of the new PHIs we just
+  // inserted.
+  for (unsigned i = 0, e = UsesToRewrite.size(); i != e; ++i) {
+    // If this use is in an exit block, rewrite to use the newly inserted PHI.
+    // This is required for correctness because SSAUpdate doesn't handle uses in
+    // the same block.  It assumes the PHI we inserted is at the end of the
+    // block.
+    Instruction *User = cast<Instruction>(UsesToRewrite[i]->getUser());
+    BasicBlock *UserBB = User->getParent();
+    if (PHINode *PN = dyn_cast<PHINode>(User))
+      UserBB = PN->getIncomingBlock(*UsesToRewrite[i]);
 
-  // Otherwise, there are two cases: we either have to insert a PHI node or we
-  // don't.  We need to insert a PHI node if this block is not dominated by one
-  // of the exit nodes from the loop (the loop could have multiple exits, and
-  // though the value defined *inside* the loop dominated all its uses, each
-  // exit by itself may not dominate all the uses).
-  //
-  // The simplest way to check for this condition is by checking to see if the
-  // idom is in the loop.  If so, we *know* that none of the exit blocks
-  // dominate this block.  Note that we *know* that the block defining the
-  // original instruction is in the idom chain, because if it weren't, then the
-  // original value didn't dominate this use.
-  if (!inLoop(IDom->getBlock())) {
-    // Idom is not in the loop, we must still be "below" the exit block and must
-    // be fully dominated by the value live in the idom.
-    Value* val = GetValueForBlock(IDom, OrigInst, Phis);
-    Phis.insert(std::make_pair(BB, val));
-    return val;
+    if (isa<PHINode>(UserBB->begin()) &&
+        isExitBlock(UserBB, ExitBlocks)) {
+      UsesToRewrite[i]->set(UserBB->begin());
+      continue;
+    }
+    
+    // Otherwise, do full PHI insertion.
+    SSAUpdate.RewriteUse(*UsesToRewrite[i]);
   }
   
-  BasicBlock *BBN = BB->getBlock();
-  
-  // Otherwise, the idom is the loop, so we need to insert a PHI node.  Do so
-  // now, then get values to fill in the incoming values for the PHI.
-  PHINode *PN = PHINode::Create(OrigInst->getType(),
-                                OrigInst->getName() + ".lcssa", BBN->begin());
-  PN->reserveOperandSpace(PredCache.GetNumPreds(BBN));
-  Phis.insert(std::make_pair(BB, PN));
-                                 
-  // Fill in the incoming values for the block.
-  for (BasicBlock** PI = PredCache.GetPreds(BBN); *PI; ++PI)
-    PN->addIncoming(GetValueForBlock(DT->getNode(*PI), OrigInst, Phis), *PI);
-  return PN;
+  return true;
 }
 
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 8c08638..b622611 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -20,9 +20,11 @@
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
@@ -183,8 +185,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) {
     } else if (SI->getNumSuccessors() == 2) {
       // Otherwise, we can fold this switch into a conditional branch
       // instruction if it has only one non-default destination.
-      Value *Cond = new ICmpInst(ICmpInst::ICMP_EQ, SI->getCondition(),
-                                 SI->getSuccessorValue(1), "cond", SI);
+      Value *Cond = new ICmpInst(SI, ICmpInst::ICMP_EQ, SI->getCondition(),
+                                 SI->getSuccessorValue(1), "cond");
       // Insert the new branch...
       BranchInst::Create(SI->getSuccessor(1), SI->getSuccessor(0), Cond, SI);
 
@@ -262,7 +264,6 @@ void llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) {
 /// too, recursively.
 void
 llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
-
   // We can remove a PHI if it is on a cycle in the def-use graph
   // where each node in the cycle has degree one, i.e. only one use,
   // and is an instruction with no side effects.
@@ -294,7 +295,7 @@ llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
 /// between them, moving the instructions in the predecessor into DestBB and
 /// deleting the predecessor block.
 ///
-void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB) {
+void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
   // If BB has single-entry PHI nodes, fold them.
   while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
     Value *NewVal = PN->getIncomingValue(0);
@@ -314,6 +315,13 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB) {
   // Anything that branched to PredBB now branches to DestBB.
   PredBB->replaceAllUsesWith(DestBB);
   
+  if (P) {
+    ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
+    if (PI) {
+      PI->replaceAllUses(PredBB, DestBB);
+      PI->removeEdge(ProfileInfo::getEdge(PredBB, DestBB));
+    }
+  }
   // Nuke BB.
   PredBB->eraseFromParent();
 }
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index d6b167f..c22708a 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -37,10 +37,12 @@
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Type.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/CFG.h"
@@ -55,44 +57,42 @@ STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted");
 STATISTIC(NumNested  , "Number of nested loops split out");
 
 namespace {
-  struct VISIBILITY_HIDDEN LoopSimplify : public FunctionPass {
+  struct VISIBILITY_HIDDEN LoopSimplify : public LoopPass {
     static char ID; // Pass identification, replacement for typeid
-    LoopSimplify() : FunctionPass(&ID) {}
+    LoopSimplify() : LoopPass(&ID) {}
 
     // AA - If we have an alias analysis object to update, this is it, otherwise
     // this is null.
     AliasAnalysis *AA;
     LoopInfo *LI;
     DominatorTree *DT;
-    virtual bool runOnFunction(Function &F);
+    Loop *L;
+    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       // We need loop information to identify the loops...
-      AU.addRequired<LoopInfo>();
-      AU.addRequired<DominatorTree>();
+      AU.addRequiredTransitive<LoopInfo>();
+      AU.addRequiredTransitive<DominatorTree>();
 
       AU.addPreserved<LoopInfo>();
       AU.addPreserved<DominatorTree>();
       AU.addPreserved<DominanceFrontier>();
       AU.addPreserved<AliasAnalysis>();
+      AU.addPreserved<ScalarEvolution>();
       AU.addPreservedID(BreakCriticalEdgesID);  // No critical edges added.
     }
 
     /// verifyAnalysis() - Verify loop nest.
     void verifyAnalysis() const {
-#ifndef NDEBUG
-      LoopInfo *NLI = &getAnalysis<LoopInfo>();
-      for (LoopInfo::iterator I = NLI->begin(), E = NLI->end(); I != E; ++I) 
-        (*I)->verifyLoop();
-#endif  
+      assert(L->isLoopSimplifyForm() && "LoopSimplify form not preserved!");
     }
 
   private:
-    bool ProcessLoop(Loop *L);
+    bool ProcessLoop(Loop *L, LPPassManager &LPM);
     BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
-    void InsertPreheaderForLoop(Loop *L);
-    Loop *SeparateNestedLoop(Loop *L);
-    void InsertUniqueBackedgeBlock(Loop *L);
+    BasicBlock *InsertPreheaderForLoop(Loop *L);
+    Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM);
+    void InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
     void PlaceSplitBlockCarefully(BasicBlock *NewBB,
                                   SmallVectorImpl<BasicBlock*> &SplitPreds,
                                   Loop *L);
@@ -105,73 +105,19 @@ X("loopsimplify", "Canonicalize natural loops", true);
 
 // Publically exposed interface to pass...
 const PassInfo *const llvm::LoopSimplifyID = &X;
-FunctionPass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
+Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
 
 /// runOnFunction - Run down all loops in the CFG (recursively, but we could do
 /// it in any convenient order) inserting preheaders...
 ///
-bool LoopSimplify::runOnFunction(Function &F) {
+bool LoopSimplify::runOnLoop(Loop *l, LPPassManager &LPM) {
+  L = l;
   bool Changed = false;
   LI = &getAnalysis<LoopInfo>();
   AA = getAnalysisIfAvailable<AliasAnalysis>();
   DT = &getAnalysis<DominatorTree>();
 
-  // Check to see that no blocks (other than the header) in loops have
-  // predecessors that are not in loops.  This is not valid for natural loops,
-  // but can occur if the blocks are unreachable.  Since they are unreachable we
-  // can just shamelessly destroy their terminators to make them not branch into
-  // the loop!
-  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
-    // This case can only occur for unreachable blocks.  Blocks that are
-    // unreachable can't be in loops, so filter those blocks out.
-    if (LI->getLoopFor(BB)) continue;
-    
-    bool BlockUnreachable = false;
-    TerminatorInst *TI = BB->getTerminator();
-
-    // Check to see if any successors of this block are non-loop-header loops
-    // that are not the header.
-    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
-      // If this successor is not in a loop, BB is clearly ok.
-      Loop *L = LI->getLoopFor(TI->getSuccessor(i));
-      if (!L) continue;
-      
-      // If the succ is the loop header, and if L is a top-level loop, then this
-      // is an entrance into a loop through the header, which is also ok.
-      if (L->getHeader() == TI->getSuccessor(i) && L->getParentLoop() == 0)
-        continue;
-      
-      // Otherwise, this is an entrance into a loop from some place invalid.
-      // Either the loop structure is invalid and this is not a natural loop (in
-      // which case the compiler is buggy somewhere else) or BB is unreachable.
-      BlockUnreachable = true;
-      break;
-    }
-    
-    // If this block is ok, check the next one.
-    if (!BlockUnreachable) continue;
-    
-    // Otherwise, this block is dead.  To clean up the CFG and to allow later
-    // loop transformations to ignore this case, we delete the edges into the
-    // loop by replacing the terminator.
-    
-    // Remove PHI entries from the successors.
-    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
-      TI->getSuccessor(i)->removePredecessor(BB);
-   
-    // Add a new unreachable instruction before the old terminator.
-    new UnreachableInst(TI);
-    
-    // Delete the dead terminator.
-    if (AA) AA->deleteValue(TI);
-    if (!TI->use_empty())
-      TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
-    TI->eraseFromParent();
-    Changed |= true;
-  }
-  
-  for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
-    Changed |= ProcessLoop(*I);
+  Changed |= ProcessLoop(L, LPM);
 
   return Changed;
 }
@@ -179,21 +125,42 @@ bool LoopSimplify::runOnFunction(Function &F) {
 /// ProcessLoop - Walk the loop structure in depth first order, ensuring that
 /// all loops have preheaders.
 ///
-bool LoopSimplify::ProcessLoop(Loop *L) {
+bool LoopSimplify::ProcessLoop(Loop *L, LPPassManager &LPM) {
   bool Changed = false;
 ReprocessLoop:
-  
-  // Canonicalize inner loops before outer loops.  Inner loop canonicalization
-  // can provide work for the outer loop to canonicalize.
-  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
-    Changed |= ProcessLoop(*I);
-  
-  assert(L->getBlocks()[0] == L->getHeader() &&
-         "Header isn't first block in loop?");
+
+  // Check to see that no blocks (other than the header) in this loop that has
+  // predecessors that are not in the loop.  This is not valid for natural
+  // loops, but can occur if the blocks are unreachable.  Since they are
+  // unreachable we can just shamelessly delete those CFG edges!
+  for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
+       BB != E; ++BB) {
+    if (*BB == L->getHeader()) continue;
+
+    SmallPtrSet<BasicBlock *, 4> BadPreds;
+    for (pred_iterator PI = pred_begin(*BB), PE = pred_end(*BB); PI != PE; ++PI)
+      if (!L->contains(*PI))
+        BadPreds.insert(*PI);
+
+    // Delete each unique out-of-loop (and thus dead) predecessor.
+    for (SmallPtrSet<BasicBlock *, 4>::iterator I = BadPreds.begin(),
+         E = BadPreds.end(); I != E; ++I) {
+      // Inform each successor of each dead pred.
+      for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI)
+        (*SI)->removePredecessor(*I);
+      // Zap the dead pred's terminator and replace it with unreachable.
+      TerminatorInst *TI = (*I)->getTerminator();
+       TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
+      (*I)->getTerminator()->eraseFromParent();
+      new UnreachableInst((*I)->getContext(), *I);
+      Changed = true;
+    }
+  }
 
   // Does the loop already have a preheader?  If so, don't insert one.
-  if (L->getLoopPreheader() == 0) {
-    InsertPreheaderForLoop(L);
+  BasicBlock *Preheader = L->getLoopPreheader();
+  if (!Preheader) {
+    Preheader = InsertPreheaderForLoop(L);
     NumInserted++;
     Changed = true;
   }
@@ -229,10 +196,9 @@ ReprocessLoop:
     // this for loops with a giant number of backedges, just factor them into a
     // common backedge instead.
     if (NumBackedges < 8) {
-      if (Loop *NL = SeparateNestedLoop(L)) {
+      if (SeparateNestedLoop(L, LPM)) {
         ++NumNested;
         // This is a big restructuring change, reprocess the whole loop.
-        ProcessLoop(NL);
         Changed = true;
         // GCC doesn't tail recursion eliminate this.
         goto ReprocessLoop;
@@ -242,7 +208,7 @@ ReprocessLoop:
     // If we either couldn't, or didn't want to, identify nesting of the loops,
     // insert a new block that all backedges target, then make it jump to the
     // loop header.
-    InsertUniqueBackedgeBlock(L);
+    InsertUniqueBackedgeBlock(L, Preheader);
     NumInserted++;
     Changed = true;
   }
@@ -253,7 +219,7 @@ ReprocessLoop:
   PHINode *PN;
   for (BasicBlock::iterator I = L->getHeader()->begin();
        (PN = dyn_cast<PHINode>(I++)); )
-    if (Value *V = PN->hasConstantValue()) {
+    if (Value *V = PN->hasConstantValue(DT)) {
       if (AA) AA->deleteValue(PN);
       PN->replaceAllUsesWith(V);
       PN->eraseFromParent();
@@ -286,19 +252,10 @@ ReprocessLoop:
         Instruction *Inst = I++;
         if (Inst == CI)
           continue;
-        if (Inst->isTrapping()) {
+        if (!L->makeLoopInvariant(Inst, Changed, Preheader->getTerminator())) {
           AllInvariant = false;
           break;
         }
-        for (unsigned j = 0, f = Inst->getNumOperands(); j != f; ++j)
-          if (!L->isLoopInvariant(Inst->getOperand(j))) {
-            AllInvariant = false;
-            break;
-          }
-        if (!AllInvariant)
-          break;
-        // Hoist.
-        Inst->moveBefore(L->getLoopPreheader()->getTerminator());
       }
       if (!AllInvariant) continue;
 
@@ -317,9 +274,10 @@ ReprocessLoop:
       DomTreeNode *Node = DT->getNode(ExitingBlock);
       const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
         Node->getChildren();
-      for (unsigned k = 0, g = Children.size(); k != g; ++k) {
-        DT->changeImmediateDominator(Children[k], Node->getIDom());
-        if (DF) DF->changeImmediateDominator(Children[k]->getBlock(),
+      while (!Children.empty()) {
+        DomTreeNode *Child = Children.front();
+        DT->changeImmediateDominator(Child, Node->getIDom());
+        if (DF) DF->changeImmediateDominator(Child->getBlock(),
                                              Node->getIDom()->getBlock(),
                                              DT);
       }
@@ -339,7 +297,7 @@ ReprocessLoop:
 /// preheader, this method is called to insert one.  This method has two phases:
 /// preheader insertion and analysis updating.
 ///
-void LoopSimplify::InsertPreheaderForLoop(Loop *L) {
+BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
   BasicBlock *Header = L->getHeader();
 
   // Compute the set of predecessors of the loop that are not in the loop.
@@ -353,19 +311,12 @@ void LoopSimplify::InsertPreheaderForLoop(Loop *L) {
   BasicBlock *NewBB =
     SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(),
                            ".preheader", this);
-  
-
-  //===--------------------------------------------------------------------===//
-  //  Update analysis results now that we have performed the transformation
-  //
-
-  // We know that we have loop information to update... update it now.
-  if (Loop *Parent = L->getParentLoop())
-    Parent->addBasicBlockToLoop(NewBB, LI->getBase());
 
   // Make sure that NewBB is put someplace intelligent, which doesn't mess up
   // code layout too horribly.
   PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L);
+
+  return NewBB;
 }
 
 /// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit
@@ -382,17 +333,6 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
                                              LoopBlocks.size(), ".loopexit",
                                              this);
 
-  // Update Loop Information - we know that the new block will be in whichever
-  // loop the Exit block is in.  Note that it may not be in that immediate loop,
-  // if the successor is some other loop header.  In that case, we continue 
-  // walking up the loop tree to find a loop that contains both the successor
-  // block and the predecessor block.
-  Loop *SuccLoop = LI->getLoopFor(Exit);
-  while (SuccLoop && !SuccLoop->contains(L->getHeader()))
-    SuccLoop = SuccLoop->getParentLoop();
-  if (SuccLoop)
-    SuccLoop->addBasicBlockToLoop(NewBB, LI->getBase());
-
   return NewBB;
 }
 
@@ -422,14 +362,13 @@ static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
     PHINode *PN = cast<PHINode>(I);
     ++I;
-    if (Value *V = PN->hasConstantValue())
-      if (!isa<Instruction>(V) || DT->dominates(cast<Instruction>(V), PN)) {
-        // This is a degenerate PHI already, don't modify it!
-        PN->replaceAllUsesWith(V);
-        if (AA) AA->deleteValue(PN);
-        PN->eraseFromParent();
-        continue;
-      }
+    if (Value *V = PN->hasConstantValue(DT)) {
+      // This is a degenerate PHI already, don't modify it!
+      PN->replaceAllUsesWith(V);
+      if (AA) AA->deleteValue(PN);
+      PN->eraseFromParent();
+      continue;
+    }
 
     // Scan this PHI node looking for a use of the PHI node by itself.
     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
@@ -496,7 +435,7 @@ void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB,
 /// If we are able to separate out a loop, return the new outer loop that was
 /// created.
 ///
-Loop *LoopSimplify::SeparateNestedLoop(Loop *L) {
+Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
   PHINode *PN = FindPHIToPartitionLoops(L, DT, AA);
   if (PN == 0) return 0;  // No known way to partition.
 
@@ -527,17 +466,20 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L) {
   else
     LI->changeTopLevelLoop(L, NewOuter);
 
-  // This block is going to be our new header block: add it to this loop and all
-  // parent loops.
-  NewOuter->addBasicBlockToLoop(NewBB, LI->getBase());
-
   // L is now a subloop of our outer loop.
   NewOuter->addChildLoop(L);
 
+  // Add the new loop to the pass manager queue.
+  LPM.insertLoopIntoQueue(NewOuter);
+
   for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
        I != E; ++I)
     NewOuter->addBlockEntry(*I);
 
+  // Now reset the header in L, which had been moved by
+  // SplitBlockPredecessors for the outer loop.
+  L->moveToHeader(Header);
+
   // Determine which blocks should stay in L and which should be moved out to
   // the Outer loop now.
   std::set<BasicBlock*> BlocksInL;
@@ -578,11 +520,10 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L) {
 /// backedges to target a new basic block and have that block branch to the loop
 /// header.  This ensures that loops have exactly one backedge.
 ///
-void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L) {
+void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
   assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
 
   // Get information about the loop
-  BasicBlock *Preheader = L->getLoopPreheader();
   BasicBlock *Header = L->getHeader();
   Function *F = Header->getParent();
 
@@ -592,7 +533,8 @@ void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L) {
     if (*I != Preheader) BackedgeBlocks.push_back(*I);
 
   // Create and insert the new backedge block...
-  BasicBlock *BEBlock = BasicBlock::Create(Header->getName()+".backedge", F);
+  BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(),
+                                           Header->getName()+".backedge", F);
   BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);
 
   // Move the new backedge block to right after the last backedge block.
diff --git a/lib/Transforms/Utils/LowerAllocations.cpp b/lib/Transforms/Utils/LowerAllocations.cpp
index 74e7028..f26d7c1 100644
--- a/lib/Transforms/Utils/LowerAllocations.cpp
+++ b/lib/Transforms/Utils/LowerAllocations.cpp
@@ -19,6 +19,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
 #include "llvm/Constants.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Pass.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Target/TargetData.h"
@@ -28,17 +29,17 @@ using namespace llvm;
 STATISTIC(NumLowered, "Number of allocations lowered");
 
 namespace {
-  /// LowerAllocations - Turn malloc and free instructions into %malloc and
-  /// %free calls.
+  /// LowerAllocations - Turn malloc and free instructions into @malloc and
+  /// @free calls.
   ///
   class VISIBILITY_HIDDEN LowerAllocations : public BasicBlockPass {
-    Constant *MallocFunc;   // Functions in the module we are processing
-    Constant *FreeFunc;     // Initialized by doInitialization
+    Constant *FreeFunc;   // Functions in the module we are processing
+                          // Initialized by doInitialization
     bool LowerMallocArgToInteger;
   public:
     static char ID; // Pass ID, replacement for typeid
     explicit LowerAllocations(bool LowerToInt = false)
-      : BasicBlockPass(&ID), MallocFunc(0), FreeFunc(0), 
+      : BasicBlockPass(&ID), FreeFunc(0), 
         LowerMallocArgToInteger(LowerToInt) {}
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -86,12 +87,9 @@ Pass *llvm::createLowerAllocationsPass(bool LowerMallocArgToInteger) {
 // This function is always successful.
 //
 bool LowerAllocations::doInitialization(Module &M) {
-  const Type *BPTy = PointerType::getUnqual(Type::Int8Ty);
-  // Prototype malloc as "char* malloc(...)", because we don't know in
-  // doInitialization whether size_t is int or long.
-  FunctionType *FT = FunctionType::get(BPTy, true);
-  MallocFunc = M.getOrInsertFunction("malloc", FT);
-  FreeFunc = M.getOrInsertFunction("free"  , Type::VoidTy, BPTy, (Type *)0);
+  const Type *BPTy = Type::getInt8PtrTy(M.getContext());
+  FreeFunc = M.getOrInsertFunction("free"  , Type::getVoidTy(M.getContext()),
+                                   BPTy, (Type *)0);
   return true;
 }
 
@@ -100,57 +98,22 @@ bool LowerAllocations::doInitialization(Module &M) {
 //
 bool LowerAllocations::runOnBasicBlock(BasicBlock &BB) {
   bool Changed = false;
-  assert(MallocFunc && FreeFunc && "Pass not initialized!");
+  assert(FreeFunc && "Pass not initialized!");
 
   BasicBlock::InstListType &BBIL = BB.getInstList();
 
   const TargetData &TD = getAnalysis<TargetData>();
-  const Type *IntPtrTy = TD.getIntPtrType();
+  const Type *IntPtrTy = TD.getIntPtrType(BB.getContext());
 
   // Loop over all of the instructions, looking for malloc or free instructions
   for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
     if (MallocInst *MI = dyn_cast<MallocInst>(I)) {
-      const Type *AllocTy = MI->getType()->getElementType();
-
-      // malloc(type) becomes i8 *malloc(size)
-      Value *MallocArg;
-      if (LowerMallocArgToInteger)
-        MallocArg = ConstantInt::get(Type::Int64Ty,
-                                     TD.getTypeAllocSize(AllocTy));
-      else
-        MallocArg = ConstantExpr::getSizeOf(AllocTy);
-      MallocArg = ConstantExpr::getTruncOrBitCast(cast<Constant>(MallocArg), 
-                                                  IntPtrTy);
-
-      if (MI->isArrayAllocation()) {
-        if (isa<ConstantInt>(MallocArg) &&
-            cast<ConstantInt>(MallocArg)->isOne()) {
-          MallocArg = MI->getOperand(0);         // Operand * 1 = Operand
-        } else if (Constant *CO = dyn_cast<Constant>(MI->getOperand(0))) {
-          CO = ConstantExpr::getIntegerCast(CO, IntPtrTy, false /*ZExt*/);
-          MallocArg = ConstantExpr::getMul(CO, cast<Constant>(MallocArg));
-        } else {
-          Value *Scale = MI->getOperand(0);
-          if (Scale->getType() != IntPtrTy)
-            Scale = CastInst::CreateIntegerCast(Scale, IntPtrTy, false /*ZExt*/,
-                                                "", I);
-
-          // Multiply it by the array size if necessary...
-          MallocArg = BinaryOperator::Create(Instruction::Mul, Scale,
-                                             MallocArg, "", I);
-        }
-      }
-
-      // Create the call to Malloc.
-      CallInst *MCall = CallInst::Create(MallocFunc, MallocArg, "", I);
-      MCall->setTailCall();
-
-      // Create a cast instruction to convert to the right type...
-      Value *MCast;
-      if (MCall->getType() != Type::VoidTy)
-        MCast = new BitCastInst(MCall, MI->getType(), "", I);
-      else
-        MCast = Constant::getNullValue(MI->getType());
+      Value *ArraySize = MI->getOperand(0);
+      if (ArraySize->getType() != IntPtrTy)
+        ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy,
+                                                false /*ZExt*/, "", I);
+      Value *MCast = CallInst::CreateMalloc(I, IntPtrTy,
+                                            MI->getAllocatedType(), ArraySize);
 
       // Replace all uses of the old malloc inst with the cast inst
       MI->replaceAllUsesWith(MCast);
@@ -160,7 +123,7 @@ bool LowerAllocations::runOnBasicBlock(BasicBlock &BB) {
     } else if (FreeInst *FI = dyn_cast<FreeInst>(I)) {
       Value *PtrCast = 
         new BitCastInst(FI->getOperand(0),
-                        PointerType::getUnqual(Type::Int8Ty), "", I);
+               Type::getInt8PtrTy(BB.getContext()), "", I);
 
       // Insert a call to the free function...
       CallInst::Create(FreeFunc, PtrCast, "", I)->setTailCall();
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index 1f6b1a2..9a3de26 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -40,6 +40,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -114,7 +115,8 @@ FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) {
 // doInitialization - Make sure that there is a prototype for abort in the
 // current module.
 bool LowerInvoke::doInitialization(Module &M) {
-  const Type *VoidPtrTy = PointerType::getUnqual(Type::Int8Ty);
+  const Type *VoidPtrTy =
+          Type::getInt8PtrTy(M.getContext());
   AbortMessage = 0;
   if (ExpensiveEHSupport) {
     // Insert a type for the linked list of jump buffers.
@@ -125,9 +127,9 @@ bool LowerInvoke::doInitialization(Module &M) {
     { // The type is recursive, so use a type holder.
       std::vector<const Type*> Elements;
       Elements.push_back(JmpBufTy);
-      OpaqueType *OT = OpaqueType::get();
+      OpaqueType *OT = OpaqueType::get(M.getContext());
       Elements.push_back(PointerType::getUnqual(OT));
-      PATypeHolder JBLType(StructType::get(Elements));
+      PATypeHolder JBLType(StructType::get(M.getContext(), Elements));
       OT->refineAbstractTypeTo(JBLType.get());  // Complete the cycle.
       JBLinkTy = JBLType.get();
       M.addTypeName("llvm.sjljeh.jmpbufty", JBLinkTy);
@@ -138,10 +140,10 @@ bool LowerInvoke::doInitialization(Module &M) {
     // Now that we've done that, insert the jmpbuf list head global, unless it
     // already exists.
     if (!(JBListHead = M.getGlobalVariable("llvm.sjljeh.jblist", PtrJBList))) {
-      JBListHead = new GlobalVariable(PtrJBList, false,
+      JBListHead = new GlobalVariable(M, PtrJBList, false,
                                       GlobalValue::LinkOnceAnyLinkage,
                                       Constant::getNullValue(PtrJBList),
-                                      "llvm.sjljeh.jblist", &M);
+                                      "llvm.sjljeh.jblist");
     }
 
 // VisualStudio defines setjmp as _setjmp via #include <csetjmp> / <setjmp.h>,
@@ -163,7 +165,8 @@ bool LowerInvoke::doInitialization(Module &M) {
   }
 
   // We need the 'write' and 'abort' functions for both models.
-  AbortFn = M.getOrInsertFunction("abort", Type::VoidTy, (Type *)0);
+  AbortFn = M.getOrInsertFunction("abort", Type::getVoidTy(M.getContext()),
+                                  (Type *)0);
 #if 0 // "write" is Unix-specific.. code is going away soon anyway.
   WriteFn = M.getOrInsertFunction("write", Type::VoidTy, Type::Int32Ty,
                                   VoidPtrTy, Type::Int32Ty, (Type *)0);
@@ -178,26 +181,30 @@ void LowerInvoke::createAbortMessage(Module *M) {
     // The abort message for expensive EH support tells the user that the
     // program 'unwound' without an 'invoke' instruction.
     Constant *Msg =
-      ConstantArray::get("ERROR: Exception thrown, but not caught!\n");
+      ConstantArray::get(M->getContext(),
+                         "ERROR: Exception thrown, but not caught!\n");
     AbortMessageLength = Msg->getNumOperands()-1;  // don't include \0
 
-    GlobalVariable *MsgGV = new GlobalVariable(Msg->getType(), true,
+    GlobalVariable *MsgGV = new GlobalVariable(*M, Msg->getType(), true,
                                                GlobalValue::InternalLinkage,
-                                               Msg, "abortmsg", M);
-    std::vector<Constant*> GEPIdx(2, Constant::getNullValue(Type::Int32Ty));
+                                               Msg, "abortmsg");
+    std::vector<Constant*> GEPIdx(2,
+                     Constant::getNullValue(Type::getInt32Ty(M->getContext())));
     AbortMessage = ConstantExpr::getGetElementPtr(MsgGV, &GEPIdx[0], 2);
   } else {
     // The abort message for cheap EH support tells the user that EH is not
     // enabled.
     Constant *Msg =
-      ConstantArray::get("Exception handler needed, but not enabled.  Recompile"
-                         " program with -enable-correct-eh-support.\n");
+      ConstantArray::get(M->getContext(), 
+                        "Exception handler needed, but not enabled."      
+                        "Recompile program with -enable-correct-eh-support.\n");
     AbortMessageLength = Msg->getNumOperands()-1;  // don't include \0
 
-    GlobalVariable *MsgGV = new GlobalVariable(Msg->getType(), true,
+    GlobalVariable *MsgGV = new GlobalVariable(*M, Msg->getType(), true,
                                                GlobalValue::InternalLinkage,
-                                               Msg, "abortmsg", M);
-    std::vector<Constant*> GEPIdx(2, Constant::getNullValue(Type::Int32Ty));
+                                               Msg, "abortmsg");
+    std::vector<Constant*> GEPIdx(2, Constant::getNullValue(
+                                            Type::getInt32Ty(M->getContext())));
     AbortMessage = ConstantExpr::getGetElementPtr(MsgGV, &GEPIdx[0], 2);
   }
 }
@@ -249,8 +256,9 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) {
 
       // Insert a return instruction.  This really should be a "barrier", as it
       // is unreachable.
-      ReturnInst::Create(F.getReturnType() == Type::VoidTy ? 0 :
-                         Constant::getNullValue(F.getReturnType()), UI);
+      ReturnInst::Create(F.getContext(),
+                         F.getReturnType() == Type::getVoidTy(F.getContext()) ?
+                          0 : Constant::getNullValue(F.getReturnType()), UI);
 
       // Remove the unwind instruction now.
       BB->getInstList().erase(UI);
@@ -265,7 +273,8 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) {
 void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
                                          AllocaInst *InvokeNum,
                                          SwitchInst *CatchSwitch) {
-  ConstantInt *InvokeNoC = ConstantInt::get(Type::Int32Ty, InvokeNo);
+  ConstantInt *InvokeNoC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
+                                            InvokeNo);
 
   // If the unwind edge has phi nodes, split the edge.
   if (isa<PHINode>(II->getUnwindDest()->begin())) {
@@ -284,7 +293,8 @@ void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
 
   BasicBlock::iterator NI = II->getNormalDest()->getFirstNonPHI();
   // nonvolatile.
-  new StoreInst(Constant::getNullValue(Type::Int32Ty), InvokeNum, false, NI);
+  new StoreInst(Constant::getNullValue(Type::getInt32Ty(II->getContext())), 
+                InvokeNum, false, NI);
 
   // Add a switch case to our unwind block.
   CatchSwitch->addCase(InvokeNoC, II->getUnwindDest());
@@ -469,13 +479,15 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
     // alloca because the value needs to be live across invokes.
     unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0;
     AllocaInst *JmpBuf =
-      new AllocaInst(JBLinkTy, 0, Align, "jblink", F.begin()->begin());
+      new AllocaInst(JBLinkTy, 0, Align,
+                     "jblink", F.begin()->begin());
 
     std::vector<Value*> Idx;
-    Idx.push_back(Constant::getNullValue(Type::Int32Ty));
-    Idx.push_back(ConstantInt::get(Type::Int32Ty, 1));
+    Idx.push_back(Constant::getNullValue(Type::getInt32Ty(F.getContext())));
+    Idx.push_back(ConstantInt::get(Type::getInt32Ty(F.getContext()), 1));
     OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx.begin(), Idx.end(),
-                                             "OldBuf", EntryBB->getTerminator());
+                                             "OldBuf",
+                                              EntryBB->getTerminator());
 
     // Copy the JBListHead to the alloca.
     Value *OldBuf = new LoadInst(JBListHead, "oldjmpbufptr", true,
@@ -487,20 +499,21 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
 
     // Create the catch block.  The catch block is basically a big switch
     // statement that goes to all of the invoke catch blocks.
-    BasicBlock *CatchBB = BasicBlock::Create("setjmp.catch", &F);
+    BasicBlock *CatchBB =
+            BasicBlock::Create(F.getContext(), "setjmp.catch", &F);
 
     // Create an alloca which keeps track of which invoke is currently
     // executing.  For normal calls it contains zero.
-    AllocaInst *InvokeNum = new AllocaInst(Type::Int32Ty, 0, "invokenum",
-                                           EntryBB->begin());
-    new StoreInst(ConstantInt::get(Type::Int32Ty, 0), InvokeNum, true,
-                  EntryBB->getTerminator());
+    AllocaInst *InvokeNum = new AllocaInst(Type::getInt32Ty(F.getContext()), 0,
+                                           "invokenum",EntryBB->begin());
+    new StoreInst(ConstantInt::get(Type::getInt32Ty(F.getContext()), 0), 
+                  InvokeNum, true, EntryBB->getTerminator());
 
     // Insert a load in the Catch block, and a switch on its value.  By default,
     // we go to a block that just does an unwind (which is the correct action
     // for a standard call).
-    BasicBlock *UnwindBB = BasicBlock::Create("unwindbb", &F);
-    Unwinds.push_back(new UnwindInst(UnwindBB));
+    BasicBlock *UnwindBB = BasicBlock::Create(F.getContext(), "unwindbb", &F);
+    Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBB));
 
     Value *CatchLoad = new LoadInst(InvokeNum, "invoke.num", true, CatchBB);
     SwitchInst *CatchSwitch =
@@ -512,19 +525,21 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
     BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
                                                      "setjmp.cont");
 
-    Idx[1] = ConstantInt::get(Type::Int32Ty, 0);
+    Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 0);
     Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx.begin(), Idx.end(),
                                                  "TheJmpBuf",
                                                  EntryBB->getTerminator());
-    JmpBufPtr = new BitCastInst(JmpBufPtr, PointerType::getUnqual(Type::Int8Ty),
+    JmpBufPtr = new BitCastInst(JmpBufPtr,
+                        Type::getInt8PtrTy(F.getContext()),
                                 "tmp", EntryBB->getTerminator());
     Value *SJRet = CallInst::Create(SetJmpFn, JmpBufPtr, "sjret",
                                     EntryBB->getTerminator());
 
     // Compare the return value to zero.
-    Value *IsNormal = new ICmpInst(ICmpInst::ICMP_EQ, SJRet,
+    Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
+                                   ICmpInst::ICMP_EQ, SJRet,
                                    Constant::getNullValue(SJRet->getType()),
-      "notunwind", EntryBB->getTerminator());
+                                   "notunwind");
     // Nuke the uncond branch.
     EntryBB->getTerminator()->eraseFromParent();
 
@@ -541,9 +556,10 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
   // Create three new blocks, the block to load the jmpbuf ptr and compare
   // against null, the block to do the longjmp, and the error block for if it
   // is null.  Add them at the end of the function because they are not hot.
-  BasicBlock *UnwindHandler = BasicBlock::Create("dounwind", &F);
-  BasicBlock *UnwindBlock = BasicBlock::Create("unwind", &F);
-  BasicBlock *TermBlock = BasicBlock::Create("unwinderror", &F);
+  BasicBlock *UnwindHandler = BasicBlock::Create(F.getContext(),
+                                                "dounwind", &F);
+  BasicBlock *UnwindBlock = BasicBlock::Create(F.getContext(), "unwind", &F);
+  BasicBlock *TermBlock = BasicBlock::Create(F.getContext(), "unwinderror", &F);
 
   // If this function contains an invoke, restore the old jumpbuf ptr.
   Value *BufPtr;
@@ -556,26 +572,27 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
   }
 
   // Load the JBList, if it's null, then there was no catch!
-  Value *NotNull = new ICmpInst(ICmpInst::ICMP_NE, BufPtr,
+  Value *NotNull = new ICmpInst(*UnwindHandler, ICmpInst::ICMP_NE, BufPtr,
                                 Constant::getNullValue(BufPtr->getType()),
-    "notnull", UnwindHandler);
+                                "notnull");
   BranchInst::Create(UnwindBlock, TermBlock, NotNull, UnwindHandler);
 
   // Create the block to do the longjmp.
   // Get a pointer to the jmpbuf and longjmp.
   std::vector<Value*> Idx;
-  Idx.push_back(Constant::getNullValue(Type::Int32Ty));
-  Idx.push_back(ConstantInt::get(Type::Int32Ty, 0));
+  Idx.push_back(Constant::getNullValue(Type::getInt32Ty(F.getContext())));
+  Idx.push_back(ConstantInt::get(Type::getInt32Ty(F.getContext()), 0));
   Idx[0] = GetElementPtrInst::Create(BufPtr, Idx.begin(), Idx.end(), "JmpBuf",
                                      UnwindBlock);
-  Idx[0] = new BitCastInst(Idx[0], PointerType::getUnqual(Type::Int8Ty),
+  Idx[0] = new BitCastInst(Idx[0],
+             Type::getInt8PtrTy(F.getContext()),
                            "tmp", UnwindBlock);
-  Idx[1] = ConstantInt::get(Type::Int32Ty, 1);
+  Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);
   CallInst::Create(LongJmpFn, Idx.begin(), Idx.end(), "", UnwindBlock);
-  new UnreachableInst(UnwindBlock);
+  new UnreachableInst(F.getContext(), UnwindBlock);
 
   // Set up the term block ("throw without a catch").
-  new UnreachableInst(TermBlock);
+  new UnreachableInst(F.getContext(), TermBlock);
 
   // Insert a new call to write(2, AbortMessage, AbortMessageLength);
   writeAbortMessage(TermBlock->getTerminator());
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 1da5936..764f098 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Pass.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Debug.h"
@@ -108,8 +109,10 @@ bool LowerSwitch::runOnFunction(Function &F) {
 
 // operator<< - Used for debugging purposes.
 //
-static std::ostream& operator<<(std::ostream &O,
-                                const LowerSwitch::CaseVector &C) {
+static raw_ostream& operator<<(raw_ostream &O,
+                               const LowerSwitch::CaseVector &C) ATTRIBUTE_USED;
+static raw_ostream& operator<<(raw_ostream &O,
+                               const LowerSwitch::CaseVector &C) {
   O << "[";
 
   for (LowerSwitch::CaseVector::const_iterator B = C.begin(),
@@ -121,11 +124,6 @@ static std::ostream& operator<<(std::ostream &O,
   return O << "]";
 }
 
-static OStream& operator<<(OStream &O, const LowerSwitch::CaseVector &C) {
-  if (O.stream()) *O.stream() << C;
-  return O;
-}
-
 // switchConvert - Convert the switch statement into a binary lookup of
 // the case values. The function recursively builds this tree.
 //
@@ -140,9 +138,9 @@ BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
 
   unsigned Mid = Size / 2;
   std::vector<CaseRange> LHS(Begin, Begin + Mid);
-  DOUT << "LHS: " << LHS << "\n";
+  DEBUG(errs() << "LHS: " << LHS << "\n");
   std::vector<CaseRange> RHS(Begin + Mid, End);
-  DOUT << "RHS: " << RHS << "\n";
+  DEBUG(errs() << "RHS: " << RHS << "\n");
 
   CaseRange& Pivot = *(Begin + Mid);
   DEBUG(errs() << "Pivot ==> " 
@@ -157,11 +155,12 @@ BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
   // Create a new node that checks if the value is < pivot. Go to the
   // left branch if it is and right branch if not.
   Function* F = OrigBlock->getParent();
-  BasicBlock* NewNode = BasicBlock::Create("NodeBlock");
+  BasicBlock* NewNode = BasicBlock::Create(Val->getContext(), "NodeBlock");
   Function::iterator FI = OrigBlock;
   F->getBasicBlockList().insert(++FI, NewNode);
 
-  ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT, Val, Pivot.Low, "Pivot");
+  ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT,
+                                Val, Pivot.Low, "Pivot");
   NewNode->getInstList().push_back(Comp);
   BranchInst::Create(LBranch, RBranch, Comp, NewNode);
   return NewNode;
@@ -178,7 +177,7 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
                                       BasicBlock* Default)
 {
   Function* F = OrigBlock->getParent();
-  BasicBlock* NewLeaf = BasicBlock::Create("LeafBlock");
+  BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock");
   Function::iterator FI = OrigBlock;
   F->getBasicBlockList().insert(++FI, NewLeaf);
 
@@ -186,18 +185,18 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
   ICmpInst* Comp = NULL;
   if (Leaf.Low == Leaf.High) {
     // Make the seteq instruction...
-    Comp = new ICmpInst(ICmpInst::ICMP_EQ, Val, Leaf.Low,
-                        "SwitchLeaf", NewLeaf);
+    Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val,
+                        Leaf.Low, "SwitchLeaf");
   } else {
     // Make range comparison
     if (cast<ConstantInt>(Leaf.Low)->isMinValue(true /*isSigned*/)) {
       // Val >= Min && Val <= Hi --> Val <= Hi
-      Comp = new ICmpInst(ICmpInst::ICMP_SLE, Val, Leaf.High,
-                          "SwitchLeaf", NewLeaf);
+      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High,
+                          "SwitchLeaf");
     } else if (cast<ConstantInt>(Leaf.Low)->isZero()) {
       // Val >= 0 && Val <= Hi --> Val <=u Hi
-      Comp = new ICmpInst(ICmpInst::ICMP_ULE, Val, Leaf.High,
-                          "SwitchLeaf", NewLeaf);      
+      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High,
+                          "SwitchLeaf");      
     } else {
       // Emit V-Lo <=u Hi-Lo
       Constant* NegLo = ConstantExpr::getNeg(Leaf.Low);
@@ -205,8 +204,8 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
                                                    Val->getName()+".off",
                                                    NewLeaf);
       Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High);
-      Comp = new ICmpInst(ICmpInst::ICMP_ULE, Add, UpperBound,
-                          "SwitchLeaf", NewLeaf);
+      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound,
+                          "SwitchLeaf");
     }
   }
 
@@ -290,7 +289,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
 
   // Create a new, empty default block so that the new hierarchy of
   // if-then statements go to this and the PHI nodes are happy.
-  BasicBlock* NewDefault = BasicBlock::Create("NewDefault");
+  BasicBlock* NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
   F->getBasicBlockList().insert(Default, NewDefault);
 
   BranchInst::Create(Default, NewDefault);
@@ -308,9 +307,10 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
   CaseVector Cases;
   unsigned numCmps = Clusterify(Cases, SI);
 
-  DOUT << "Clusterify finished. Total clusters: " << Cases.size()
-       << ". Total compares: " << numCmps << "\n";
-  DOUT << "Cases: " << Cases << "\n";
+  DEBUG(errs() << "Clusterify finished. Total clusters: " << Cases.size()
+               << ". Total compares: " << numCmps << "\n");
+  DEBUG(errs() << "Cases: " << Cases << "\n");
+  (void)numCmps;
   
   BasicBlock* SwitchBlock = switchConvert(Cases.begin(), Cases.end(), Val,
                                           OrigBlock, NewDefault);
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index 2b06d77..5df0832 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -75,7 +75,7 @@ bool PromotePass::runOnFunction(Function &F) {
 
     if (Allocas.empty()) break;
 
-    PromoteMemToReg(Allocas, DT, DF);
+    PromoteMemToReg(Allocas, DT, DF, F.getContext());
     NumPromoted += Allocas.size();
     Changed = true;
   }
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index b717699..9ca06bd 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -23,13 +23,13 @@
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/AliasSetTracker.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Compiler.h"
@@ -41,7 +41,6 @@ STATISTIC(NumSingleStore,   "Number of alloca's promoted with a single store");
 STATISTIC(NumDeadAlloca,    "Number of dead alloca's removed");
 STATISTIC(NumPHIInsert,     "Number of PHI nodes inserted");
 
-// Provide DenseMapInfo for all pointers.
 namespace llvm {
 template<>
 struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > {
@@ -181,6 +180,8 @@ namespace {
     /// AST - An AliasSetTracker object to update.  If null, don't update it.
     ///
     AliasSetTracker *AST;
+    
+    LLVMContext &Context;
 
     /// AllocaLookup - Reverse mapping of Allocas.
     ///
@@ -212,8 +213,9 @@ namespace {
     DenseMap<const BasicBlock*, unsigned> BBNumPreds;
   public:
     PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt,
-                   DominanceFrontier &df, AliasSetTracker *ast)
-      : Allocas(A), DT(dt), DF(df), AST(ast) {}
+                   DominanceFrontier &df, AliasSetTracker *ast,
+                   LLVMContext &C)
+      : Allocas(A), DT(dt), DF(df), AST(ast), Context(C) {}
 
     void run();
 
@@ -291,10 +293,9 @@ namespace {
       // As we scan the uses of the alloca instruction, keep track of stores,
       // and decide whether all of the loads and stores to the alloca are within
       // the same basic block.
-      for (Value::use_iterator U = AI->use_begin(), E = AI->use_end();
-           U != E;)  {
-        Instruction *User = cast<Instruction>(*U);
-        ++U;
+      for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+           UI != E;)  {
+        Instruction *User = cast<Instruction>(*UI++);
         if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
           // Remove any uses of this alloca in DbgInfoInstrinsics.
           assert(BC->hasOneUse() && "Unexpected alloca uses!");
@@ -303,7 +304,8 @@ namespace {
           BC->eraseFromParent();
           continue;
         } 
-        else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+        
+        if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
           // Remember the basic blocks which define new values for the alloca
           DefiningBlocks.push_back(SI->getParent());
           AllocaPointerVal = SI->getOperand(0);
@@ -491,17 +493,14 @@ void PromoteMem2Reg::run() {
       PHINode *PN = I->second;
       
       // If this PHI node merges one value and/or undefs, get the value.
-      if (Value *V = PN->hasConstantValue(true)) {
-        if (!isa<Instruction>(V) ||
-            properlyDominates(cast<Instruction>(V), PN)) {
-          if (AST && isa<PointerType>(PN->getType()))
-            AST->deleteValue(PN);
-          PN->replaceAllUsesWith(V);
-          PN->eraseFromParent();
-          NewPhiNodes.erase(I++);
-          EliminatedAPHI = true;
-          continue;
-        }
+      if (Value *V = PN->hasConstantValue(&DT)) {
+        if (AST && isa<PointerType>(PN->getType()))
+          AST->deleteValue(PN);
+        PN->replaceAllUsesWith(V);
+        PN->eraseFromParent();
+        NewPhiNodes.erase(I++);
+        EliminatedAPHI = true;
+        continue;
       }
       ++I;
     }
@@ -603,7 +602,9 @@ ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
         LiveInBlockWorklist.pop_back();
         --i, --e;
         break;
-      } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+      }
+      
+      if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
         if (LI->getOperand(0) != AI) continue;
         
         // Okay, we found a load before a store to the alloca.  It is actually
@@ -757,6 +758,7 @@ void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI,
   }
 }
 
+namespace {
 
 /// StoreIndexSearchPredicate - This is a helper predicate used to search by the
 /// first element of a pair.
@@ -767,6 +769,8 @@ struct StoreIndexSearchPredicate {
   }
 };
 
+}
+
 /// PromoteSingleBlockAlloca - Many allocas are only used within a single basic
 /// block.  If this is the case, avoid traversing the CFG and inserting a lot of
 /// potentially useless PHI nodes by just performing a single linear pass over
@@ -864,8 +868,8 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
   // Create a PhiNode using the dereferenced type... and add the phi-node to the
   // BasicBlock.
   PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(),
-                       Allocas[AllocaNo]->getName() + "." +
-                       utostr(Version++), BB->begin());
+                       Allocas[AllocaNo]->getName() + "." + Twine(Version++), 
+                       BB->begin());
   ++NumPHIInsert;
   PhiToAllocaMap[PN] = AllocaNo;
   PN->reserveOperandSpace(getNumPreds(BB));
@@ -995,9 +999,9 @@ NextIteration:
 ///
 void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas,
                            DominatorTree &DT, DominanceFrontier &DF,
-                           AliasSetTracker *AST) {
+                           LLVMContext &Context, AliasSetTracker *AST) {
   // If there is nothing to do, bail out...
   if (Allocas.empty()) return;
 
-  PromoteMem2Reg(Allocas, DT, DF, AST).run();
+  PromoteMem2Reg(Allocas, DT, DF, AST, Context).run();
 }
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
new file mode 100644
index 0000000..780ee26
--- /dev/null
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -0,0 +1,335 @@
+//===- SSAUpdater.cpp - Unstructured SSA Update Tool ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SSAUpdater class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Instructions.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+typedef DenseMap<BasicBlock*, TrackingVH<Value> > AvailableValsTy;
+typedef std::vector<std::pair<BasicBlock*, TrackingVH<Value> > >
+                IncomingPredInfoTy;
+
+static AvailableValsTy &getAvailableVals(void *AV) {
+  return *static_cast<AvailableValsTy*>(AV);
+}
+
+static IncomingPredInfoTy &getIncomingPredInfo(void *IPI) {
+  return *static_cast<IncomingPredInfoTy*>(IPI);
+}
+
+
+SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
+  : AV(0), PrototypeValue(0), IPI(0), InsertedPHIs(NewPHI) {}
+
+SSAUpdater::~SSAUpdater() {
+  delete &getAvailableVals(AV);
+  delete &getIncomingPredInfo(IPI);
+}
+
+/// Initialize - Reset this object to get ready for a new set of SSA
+/// updates.  ProtoValue is the value used to name PHI nodes.
+void SSAUpdater::Initialize(Value *ProtoValue) {
+  if (AV == 0)
+    AV = new AvailableValsTy();
+  else
+    getAvailableVals(AV).clear();
+  
+  if (IPI == 0)
+    IPI = new IncomingPredInfoTy();
+  else
+    getIncomingPredInfo(IPI).clear();
+  PrototypeValue = ProtoValue;
+}
+
+/// HasValueForBlock - Return true if the SSAUpdater already has a value for
+/// the specified block.
+bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const {
+  return getAvailableVals(AV).count(BB);
+}
+
+/// AddAvailableValue - Indicate that a rewritten value is available in the
+/// specified block with the specified value.
+void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
+  assert(PrototypeValue != 0 && "Need to initialize SSAUpdater");
+  assert(PrototypeValue->getType() == V->getType() &&
+         "All rewritten values must have the same type");
+  getAvailableVals(AV)[BB] = V;
+}
+
+/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
+/// live at the end of the specified block.
+Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) {
+  assert(getIncomingPredInfo(IPI).empty() && "Unexpected Internal State");
+  Value *Res = GetValueAtEndOfBlockInternal(BB);
+  assert(getIncomingPredInfo(IPI).empty() && "Unexpected Internal State");
+  return Res;
+}
+
+/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
+/// is live in the middle of the specified block.
+///
+/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one
+/// important case: if there is a definition of the rewritten value after the
+/// 'use' in BB.  Consider code like this:
+///
+///      X1 = ...
+///   SomeBB:
+///      use(X)
+///      X2 = ...
+///      br Cond, SomeBB, OutBB
+///
+/// In this case, there are two values (X1 and X2) added to the AvailableVals
+/// set by the client of the rewriter, and those values are both live out of
+/// their respective blocks.  However, the use of X happens in the *middle* of
+/// a block.  Because of this, we need to insert a new PHI node in SomeBB to
+/// merge the appropriate values, and this value isn't live out of the block.
+///
+Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
+  // If there is no definition of the renamed variable in this block, just use
+  // GetValueAtEndOfBlock to do our work.
+  if (!getAvailableVals(AV).count(BB))
+    return GetValueAtEndOfBlock(BB);
+  
+  // Otherwise, we have the hard case.  Get the live-in values for each
+  // predecessor.
+  SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues;
+  Value *SingularValue = 0;
+  
+  // We can get our predecessor info by walking the pred_iterator list, but it
+  // is relatively slow.  If we already have PHI nodes in this block, walk one
+  // of them to get the predecessor list instead.
+  if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
+    for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) {
+      BasicBlock *PredBB = SomePhi->getIncomingBlock(i);
+      Value *PredVal = GetValueAtEndOfBlock(PredBB);
+      PredValues.push_back(std::make_pair(PredBB, PredVal));
+      
+      // Compute SingularValue.
+      if (i == 0)
+        SingularValue = PredVal;
+      else if (PredVal != SingularValue)
+        SingularValue = 0;
+    }
+  } else {
+    bool isFirstPred = true;
+    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+      BasicBlock *PredBB = *PI;
+      Value *PredVal = GetValueAtEndOfBlock(PredBB);
+      PredValues.push_back(std::make_pair(PredBB, PredVal));
+      
+      // Compute SingularValue.
+      if (isFirstPred) {
+        SingularValue = PredVal;
+        isFirstPred = false;
+      } else if (PredVal != SingularValue)
+        SingularValue = 0;
+    }
+  }
+  
+  // If there are no predecessors, just return undef.
+  if (PredValues.empty())
+    return UndefValue::get(PrototypeValue->getType());
+  
+  // Otherwise, if all the merged values are the same, just use it.
+  if (SingularValue != 0)
+    return SingularValue;
+  
+  // Otherwise, we do need a PHI: insert one now.
+  PHINode *InsertedPHI = PHINode::Create(PrototypeValue->getType(),
+                                         PrototypeValue->getName(),
+                                         &BB->front());
+  InsertedPHI->reserveOperandSpace(PredValues.size());
+  
+  // Fill in all the predecessors of the PHI.
+  for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+    InsertedPHI->addIncoming(PredValues[i].second, PredValues[i].first);
+  
+  // See if the PHI node can be merged to a single value.  This can happen in
+  // loop cases when we get a PHI of itself and one other value.
+  if (Value *ConstVal = InsertedPHI->hasConstantValue()) {
+    InsertedPHI->eraseFromParent();
+    return ConstVal;
+  }
+
+  // If the client wants to know about all new instructions, tell it.
+  if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+  
+  DEBUG(errs() << "  Inserted PHI: " << *InsertedPHI << "\n");
+  return InsertedPHI;
+}
+
+/// RewriteUse - Rewrite a use of the symbolic value.  This handles PHI nodes,
+/// which use their value in the corresponding predecessor.
+void SSAUpdater::RewriteUse(Use &U) {
+  Instruction *User = cast<Instruction>(U.getUser());
+  BasicBlock *UseBB = User->getParent();
+  if (PHINode *UserPN = dyn_cast<PHINode>(User))
+    UseBB = UserPN->getIncomingBlock(U);
+  
+  U.set(GetValueInMiddleOfBlock(UseBB));
+}
+
+
+/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
+/// for the specified BB and if so, return it.  If not, construct SSA form by
+/// walking predecessors inserting PHI nodes as needed until we get to a block
+/// where the value is available.
+///
+Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
+  AvailableValsTy &AvailableVals = getAvailableVals(AV);
+  
+  // Query AvailableVals by doing an insertion of null.
+  std::pair<AvailableValsTy::iterator, bool> InsertRes =
+  AvailableVals.insert(std::make_pair(BB, WeakVH()));
+  
+  // Handle the case when the insertion fails because we have already seen BB.
+  if (!InsertRes.second) {
+    // If the insertion failed, there are two cases.  The first case is that the
+    // value is already available for the specified block.  If we get this, just
+    // return the value.
+    if (InsertRes.first->second != 0)
+      return InsertRes.first->second;
+    
+    // Otherwise, if the value we find is null, then this is the value is not
+    // known but it is being computed elsewhere in our recursion.  This means
+    // that we have a cycle.  Handle this by inserting a PHI node and returning
+    // it.  When we get back to the first instance of the recursion we will fill
+    // in the PHI node.
+    return InsertRes.first->second =
+    PHINode::Create(PrototypeValue->getType(), PrototypeValue->getName(),
+                    &BB->front());
+  }
+  
+  // Okay, the value isn't in the map and we just inserted a null in the entry
+  // to indicate that we're processing the block.  Since we have no idea what
+  // value is in this block, we have to recurse through our predecessors.
+  //
+  // While we're walking our predecessors, we keep track of them in a vector,
+  // then insert a PHI node in the end if we actually need one.  We could use a
+  // smallvector here, but that would take a lot of stack space for every level
+  // of the recursion, just use IncomingPredInfo as an explicit stack.
+  IncomingPredInfoTy &IncomingPredInfo = getIncomingPredInfo(IPI);
+  unsigned FirstPredInfoEntry = IncomingPredInfo.size();
+  
+  // As we're walking the predecessors, keep track of whether they are all
+  // producing the same value.  If so, this value will capture it, if not, it
+  // will get reset to null.  We distinguish the no-predecessor case explicitly
+  // below.
+  TrackingVH<Value> SingularValue;
+  
+  // We can get our predecessor info by walking the pred_iterator list, but it
+  // is relatively slow.  If we already have PHI nodes in this block, walk one
+  // of them to get the predecessor list instead.
+  if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
+    for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) {
+      BasicBlock *PredBB = SomePhi->getIncomingBlock(i);
+      Value *PredVal = GetValueAtEndOfBlockInternal(PredBB);
+      IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal));
+      
+      // Compute SingularValue.
+      if (i == 0)
+        SingularValue = PredVal;
+      else if (PredVal != SingularValue)
+        SingularValue = 0;
+    }
+  } else {
+    bool isFirstPred = true;
+    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+      BasicBlock *PredBB = *PI;
+      Value *PredVal = GetValueAtEndOfBlockInternal(PredBB);
+      IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal));
+      
+      // Compute SingularValue.
+      if (isFirstPred) {
+        SingularValue = PredVal;
+        isFirstPred = false;
+      } else if (PredVal != SingularValue)
+        SingularValue = 0;
+    }
+  }
+  
+  // If there are no predecessors, then we must have found an unreachable block
+  // just return 'undef'.  Since there are no predecessors, InsertRes must not
+  // be invalidated.
+  if (IncomingPredInfo.size() == FirstPredInfoEntry)
+    return InsertRes.first->second = UndefValue::get(PrototypeValue->getType());
+  
+  /// Look up BB's entry in AvailableVals.  'InsertRes' may be invalidated.  If
+  /// this block is involved in a loop, a no-entry PHI node will have been
+  /// inserted as InsertedVal.  Otherwise, we'll still have the null we inserted
+  /// above.
+  TrackingVH<Value> &InsertedVal = AvailableVals[BB];
+  
+  // If all the predecessor values are the same then we don't need to insert a
+  // PHI.  This is the simple and common case.
+  if (SingularValue) {
+    // If a PHI node got inserted, replace it with the singlar value and delete
+    // it.
+    if (InsertedVal) {
+      PHINode *OldVal = cast<PHINode>(InsertedVal);
+      // Be careful about dead loops.  These RAUW's also update InsertedVal.
+      if (InsertedVal != SingularValue)
+        OldVal->replaceAllUsesWith(SingularValue);
+      else
+        OldVal->replaceAllUsesWith(UndefValue::get(InsertedVal->getType()));
+      OldVal->eraseFromParent();
+    } else {
+      InsertedVal = SingularValue;
+    }
+    
+    // Drop the entries we added in IncomingPredInfo to restore the stack.
+    IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
+                           IncomingPredInfo.end());
+    return InsertedVal;
+  }
+  
+  // Otherwise, we do need a PHI: insert one now if we don't already have one.
+  if (InsertedVal == 0)
+    InsertedVal = PHINode::Create(PrototypeValue->getType(),
+                                  PrototypeValue->getName(), &BB->front());
+  
+  PHINode *InsertedPHI = cast<PHINode>(InsertedVal);
+  InsertedPHI->reserveOperandSpace(IncomingPredInfo.size()-FirstPredInfoEntry);
+  
+  // Fill in all the predecessors of the PHI.
+  for (IncomingPredInfoTy::iterator I =
+         IncomingPredInfo.begin()+FirstPredInfoEntry,
+       E = IncomingPredInfo.end(); I != E; ++I)
+    InsertedPHI->addIncoming(I->second, I->first);
+  
+  // Drop the entries we added in IncomingPredInfo to restore the stack.
+  IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
+                         IncomingPredInfo.end());
+  
+  // See if the PHI node can be merged to a single value.  This can happen in
+  // loop cases when we get a PHI of itself and one other value.
+  if (Value *ConstVal = InsertedPHI->hasConstantValue()) {
+    InsertedPHI->replaceAllUsesWith(ConstVal);
+    InsertedPHI->eraseFromParent();
+    InsertedVal = ConstVal;
+  } else {
+    DEBUG(errs() << "  Inserted PHI: " << *InsertedPHI << "\n");
+    
+    // If the client wants to know about all new instructions, tell it.
+    if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+  }
+  
+  return InsertedVal;
+}
+
+
diff --git a/lib/Transforms/Utils/SSI.cpp b/lib/Transforms/Utils/SSI.cpp
index 4c4dd37..3bb2e8e 100644
--- a/lib/Transforms/Utils/SSI.cpp
+++ b/lib/Transforms/Utils/SSI.cpp
@@ -23,6 +23,7 @@
 
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/SSI.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
 
 using namespace llvm;
@@ -30,11 +31,12 @@ using namespace llvm;
 static const std::string SSI_PHI = "SSI_phi";
 static const std::string SSI_SIG = "SSI_sigma";
 
-static const unsigned UNSIGNED_INFINITE = ~0U;
+STATISTIC(NumSigmaInserted, "Number of sigma functions inserted");
+STATISTIC(NumPhiInserted, "Number of phi functions inserted");
 
 void SSI::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<DominanceFrontier>();
-  AU.addRequired<DominatorTree>();
+  AU.addRequiredTransitive<DominanceFrontier>();
+  AU.addRequiredTransitive<DominatorTree>();
   AU.setPreservesAll();
 }
 
@@ -45,22 +47,23 @@ bool SSI::runOnFunction(Function &F) {
 
 /// This methods creates the SSI representation for the list of values
 /// received. It will only create SSI representation if a value is used
-/// in a to decide a branch. Repeated values are created only once.
+/// to decide a branch. Repeated values are created only once.
 ///
 void SSI::createSSI(SmallVectorImpl<Instruction *> &value) {
   init(value);
 
-  for (unsigned i = 0; i < num_values; ++i) {
-    if (created.insert(value[i])) {
-      needConstruction[i] = true;
-    }
-  }
-  insertSigmaFunctions(value);
+  SmallPtrSet<Instruction*, 4> needConstruction;
+  for (SmallVectorImpl<Instruction*>::iterator I = value.begin(),
+       E = value.end(); I != E; ++I)
+    if (created.insert(*I))
+      needConstruction.insert(*I);
+
+  insertSigmaFunctions(needConstruction);
 
   // Test if there is a need to transform to SSI
-  if (needConstruction.any()) {
-    insertPhiFunctions(value);
-    renameInit(value);
+  if (!needConstruction.empty()) {
+    insertPhiFunctions(needConstruction);
+    renameInit(needConstruction);
     rename(DT_->getRoot());
     fixPhis();
   }
@@ -71,100 +74,107 @@ void SSI::createSSI(SmallVectorImpl<Instruction *> &value) {
 /// Insert sigma functions (a sigma function is a phi function with one
 /// operator)
 ///
-void SSI::insertSigmaFunctions(SmallVectorImpl<Instruction *> &value) {
-  for (unsigned i = 0; i < num_values; ++i) {
-    if (!needConstruction[i])
-      continue;
-
-    bool need = false;
-    for (Value::use_iterator begin = value[i]->use_begin(), end =
-         value[i]->use_end(); begin != end; ++begin) {
+void SSI::insertSigmaFunctions(SmallPtrSet<Instruction*, 4> &value) {
+  for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(),
+       E = value.end(); I != E; ++I) {
+    for (Value::use_iterator begin = (*I)->use_begin(),
+         end = (*I)->use_end(); begin != end; ++begin) {
       // Test if the Use of the Value is in a comparator
-      CmpInst *CI = dyn_cast<CmpInst>(begin);
-      if (CI && isUsedInTerminator(CI)) {
-        // Basic Block of the Instruction
-        BasicBlock *BB = CI->getParent();
-        // Last Instruction of the Basic Block
-        const TerminatorInst *TI = BB->getTerminator();
-
-        for (unsigned j = 0, e = TI->getNumSuccessors(); j < e; ++j) {
-          // Next Basic Block
-          BasicBlock *BB_next = TI->getSuccessor(j);
-          if (BB_next != BB &&
-              BB_next->getUniquePredecessor() != NULL &&
-              dominateAny(BB_next, value[i])) {
-            PHINode *PN = PHINode::Create(
-                value[i]->getType(), SSI_SIG, BB_next->begin());
-            PN->addIncoming(value[i], BB);
-            sigmas.insert(std::make_pair(PN, i));
-            created.insert(PN);
-            need = true;
-            defsites[i].push_back(BB_next);
+      if (CmpInst *CI = dyn_cast<CmpInst>(begin)) {
+        // Iterates through all uses of CmpInst
+        for (Value::use_iterator begin_ci = CI->use_begin(),
+             end_ci = CI->use_end(); begin_ci != end_ci; ++begin_ci) {
+          // Test if any use of CmpInst is in a Terminator
+          if (TerminatorInst *TI = dyn_cast<TerminatorInst>(begin_ci)) {
+            insertSigma(TI, *I);
           }
         }
       }
     }
-    needConstruction[i] = need;
+  }
+}
+
+/// Inserts Sigma Functions in every BasicBlock successor to Terminator
+/// Instruction TI. All inserted Sigma Function are related to Instruction I.
+///
+void SSI::insertSigma(TerminatorInst *TI, Instruction *I) {
+  // Basic Block of the Terminator Instruction
+  BasicBlock *BB = TI->getParent();
+  for (unsigned i = 0, e = TI->getNumSuccessors(); i < e; ++i) {
+    // Next Basic Block
+    BasicBlock *BB_next = TI->getSuccessor(i);
+    if (BB_next != BB &&
+        BB_next->getSinglePredecessor() != NULL &&
+        dominateAny(BB_next, I)) {
+      PHINode *PN = PHINode::Create(I->getType(), SSI_SIG, BB_next->begin());
+      PN->addIncoming(I, BB);
+      sigmas[PN] = I;
+      created.insert(PN);
+      defsites[I].push_back(BB_next);
+      ++NumSigmaInserted;
+    }
   }
 }
 
 /// Insert phi functions when necessary
 ///
-void SSI::insertPhiFunctions(SmallVectorImpl<Instruction *> &value) {
+void SSI::insertPhiFunctions(SmallPtrSet<Instruction*, 4> &value) {
   DominanceFrontier *DF = &getAnalysis<DominanceFrontier>();
-  for (unsigned i = 0; i < num_values; ++i) {
+  for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(),
+       E = value.end(); I != E; ++I) {
     // Test if there were any sigmas for this variable
-    if (needConstruction[i]) {
-
-      SmallPtrSet<BasicBlock *, 1> BB_visited;
-
-      // Insert phi functions if there is any sigma function
-      while (!defsites[i].empty()) {
-
-        BasicBlock *BB = defsites[i].back();
-
-        defsites[i].pop_back();
-        DominanceFrontier::iterator DF_BB = DF->find(BB);
-
-        // Iterates through all the dominance frontier of BB
-        for (std::set<BasicBlock *>::iterator DF_BB_begin =
-             DF_BB->second.begin(), DF_BB_end = DF_BB->second.end();
-             DF_BB_begin != DF_BB_end; ++DF_BB_begin) {
-          BasicBlock *BB_dominated = *DF_BB_begin;
-
-          // Test if has not yet visited this node and if the
-          // original definition dominates this node
-          if (BB_visited.insert(BB_dominated) &&
-              DT_->properlyDominates(value_original[i], BB_dominated) &&
-              dominateAny(BB_dominated, value[i])) {
-            PHINode *PN = PHINode::Create(
-                value[i]->getType(), SSI_PHI, BB_dominated->begin());
-            phis.insert(std::make_pair(PN, i));
-            created.insert(PN);
-
-            defsites[i].push_back(BB_dominated);
-          }
+    SmallPtrSet<BasicBlock *, 16> BB_visited;
+
+    // Insert phi functions if there is any sigma function
+    while (!defsites[*I].empty()) {
+
+      BasicBlock *BB = defsites[*I].back();
+
+      defsites[*I].pop_back();
+      DominanceFrontier::iterator DF_BB = DF->find(BB);
+
+      // The BB is unreachable. Skip it.
+      if (DF_BB == DF->end())
+        continue; 
+
+      // Iterates through all the dominance frontier of BB
+      for (std::set<BasicBlock *>::iterator DF_BB_begin =
+           DF_BB->second.begin(), DF_BB_end = DF_BB->second.end();
+           DF_BB_begin != DF_BB_end; ++DF_BB_begin) {
+        BasicBlock *BB_dominated = *DF_BB_begin;
+
+        // Test if has not yet visited this node and if the
+        // original definition dominates this node
+        if (BB_visited.insert(BB_dominated) &&
+            DT_->properlyDominates(value_original[*I], BB_dominated) &&
+            dominateAny(BB_dominated, *I)) {
+          PHINode *PN = PHINode::Create(
+              (*I)->getType(), SSI_PHI, BB_dominated->begin());
+          phis.insert(std::make_pair(PN, *I));
+          created.insert(PN);
+
+          defsites[*I].push_back(BB_dominated);
+          ++NumPhiInserted;
         }
       }
-      BB_visited.clear();
     }
+    BB_visited.clear();
   }
 }
 
 /// Some initialization for the rename part
 ///
-void SSI::renameInit(SmallVectorImpl<Instruction *> &value) {
-  value_stack.resize(num_values);
-  for (unsigned i = 0; i < num_values; ++i) {
-    value_stack[i].push_back(value[i]);
-  }
+void SSI::renameInit(SmallPtrSet<Instruction*, 4> &value) {
+  for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(),
+       E = value.end(); I != E; ++I)
+    value_stack[*I].push_back(*I);
 }
 
 /// Renames all variables in the specified BasicBlock.
 /// Only variables that need to be rename will be.
 ///
 void SSI::rename(BasicBlock *BB) {
-  BitVector *defined = new BitVector(num_values, false);
+  SmallPtrSet<Instruction*, 8> defined;
 
   // Iterate through instructions and make appropriate renaming.
   // For SSI_PHI (b = PHI()), store b at value_stack as a new
@@ -178,19 +188,17 @@ void SSI::rename(BasicBlock *BB) {
        begin != end; ++begin) {
     Instruction *I = begin;
     if (PHINode *PN = dyn_cast<PHINode>(I)) { // Treat PHI functions
-      int position;
+      Instruction* position;
 
       // Treat SSI_PHI
-      if ((position = getPositionPhi(PN)) != -1) {
+      if ((position = getPositionPhi(PN))) {
         value_stack[position].push_back(PN);
-        (*defined)[position] = true;
-      }
-
+        defined.insert(position);
       // Treat SSI_SIG
-      else if ((position = getPositionSigma(PN)) != -1) {
+      } else if ((position = getPositionSigma(PN))) {
         substituteUse(I);
         value_stack[position].push_back(PN);
-        (*defined)[position] = true;
+        defined.insert(position);
       }
 
       // Treat all other PHI functions
@@ -216,10 +224,9 @@ void SSI::rename(BasicBlock *BB) {
     for (BasicBlock::iterator begin = BB_succ->begin(),
          notPhi = BB_succ->getFirstNonPHI(); begin != *notPhi; ++begin) {
       Instruction *I = begin;
-      PHINode *PN;
-      int position;
-      if ((PN = dyn_cast<PHINode>(I)) && ((position
-          = getPositionPhi(PN)) != -1)) {
+      PHINode *PN = dyn_cast<PHINode>(I);
+      Instruction* position;
+      if (PN && ((position = getPositionPhi(PN)))) {
         PN->addIncoming(value_stack[position].back(), BB);
       }
     }
@@ -237,13 +244,9 @@ void SSI::rename(BasicBlock *BB) {
 
   // Now we remove all inserted definitions of a variable from the top of
   // the stack leaving the previous one as the top.
-  if (defined->any()) {
-    for (unsigned i = 0; i < num_values; ++i) {
-      if ((*defined)[i]) {
-        value_stack[i].pop_back();
-      }
-    }
-  }
+  for (SmallPtrSet<Instruction*, 8>::iterator DI = defined.begin(),
+       DE = defined.end(); DI != DE; ++DI)
+    value_stack[*DI].pop_back();
 }
 
 /// Substitute any use in this instruction for the last definition of
@@ -252,23 +255,24 @@ void SSI::rename(BasicBlock *BB) {
 void SSI::substituteUse(Instruction *I) {
   for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) {
     Value *operand = I->getOperand(i);
-    for (unsigned j = 0; j < num_values; ++j) {
-      if (operand == value_stack[j].front() &&
-          I != value_stack[j].back()) {
+    for (DenseMap<Instruction*, SmallVector<Instruction*, 1> >::iterator
+         VI = value_stack.begin(), VE = value_stack.end(); VI != VE; ++VI) {
+      if (operand == VI->second.front() &&
+          I != VI->second.back()) {
         PHINode *PN_I = dyn_cast<PHINode>(I);
-        PHINode *PN_vs = dyn_cast<PHINode>(value_stack[j].back());
+        PHINode *PN_vs = dyn_cast<PHINode>(VI->second.back());
 
         // If a phi created in a BasicBlock is used as an operand of another
         // created in the same BasicBlock, this step marks this second phi,
         // to fix this issue later. It cannot be fixed now, because the
         // operands of the first phi are not final yet.
         if (PN_I && PN_vs &&
-            value_stack[j].back()->getParent() == I->getParent()) {
+            VI->second.back()->getParent() == I->getParent()) {
 
           phisToFix.insert(PN_I);
         }
 
-        I->setOperand(i, value_stack[j].back());
+        I->setOperand(i, VI->second.back());
         break;
       }
     }
@@ -276,12 +280,16 @@ void SSI::substituteUse(Instruction *I) {
 }
 
 /// Test if the BasicBlock BB dominates any use or definition of value.
+/// If it dominates a phi instruction that is on the same BasicBlock,
+/// that does not count.
 ///
 bool SSI::dominateAny(BasicBlock *BB, Instruction *value) {
   for (Value::use_iterator begin = value->use_begin(),
        end = value->use_end(); begin != end; ++begin) {
     Instruction *I = cast<Instruction>(*begin);
     BasicBlock *BB_father = I->getParent();
+    if (BB == BB_father && isa<PHINode>(I))
+      continue;
     if (DT_->dominates(BB, BB_father)) {
       return true;
     }
@@ -293,31 +301,54 @@ bool SSI::dominateAny(BasicBlock *BB, Instruction *value) {
 /// as an operand of another phi function used in the same BasicBlock,
 /// LLVM looks this as an error. So on the second phi, the first phi is called
 /// P and the BasicBlock it incomes is B. This P will be replaced by the value
-/// it has for BasicBlock B.
+/// it has for BasicBlock B. It also includes undef values for predecessors
+/// that were not included in the phi.
 ///
 void SSI::fixPhis() {
   for (SmallPtrSet<PHINode *, 1>::iterator begin = phisToFix.begin(),
        end = phisToFix.end(); begin != end; ++begin) {
     PHINode *PN = *begin;
     for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) {
-      PHINode *PN_father;
-      if ((PN_father = dyn_cast<PHINode>(PN->getIncomingValue(i))) &&
-          PN->getParent() == PN_father->getParent()) {
+      PHINode *PN_father = dyn_cast<PHINode>(PN->getIncomingValue(i));
+      if (PN_father && PN->getParent() == PN_father->getParent() &&
+          !DT_->dominates(PN->getParent(), PN->getIncomingBlock(i))) {
         BasicBlock *BB = PN->getIncomingBlock(i);
         int pos = PN_father->getBasicBlockIndex(BB);
         PN->setIncomingValue(i, PN_father->getIncomingValue(pos));
       }
     }
   }
+
+  for (DenseMapIterator<PHINode *, Instruction*> begin = phis.begin(),
+       end = phis.end(); begin != end; ++begin) {
+    PHINode *PN = begin->first;
+    BasicBlock *BB = PN->getParent();
+    pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+    SmallVector<BasicBlock*, 8> Preds(PI, PE);
+    for (unsigned size = Preds.size();
+         PI != PE && PN->getNumIncomingValues() != size; ++PI) {
+      bool found = false;
+      for (unsigned i = 0, pn_end = PN->getNumIncomingValues();
+           i < pn_end; ++i) {
+        if (PN->getIncomingBlock(i) == *PI) {
+          found = true;
+          break;
+        }
+      }
+      if (!found) {
+        PN->addIncoming(UndefValue::get(PN->getType()), *PI);
+      }
+    }
+  }
 }
 
 /// Return which variable (position on the vector of variables) this phi
 /// represents on the phis list.
 ///
-unsigned SSI::getPositionPhi(PHINode *PN) {
-  DenseMap<PHINode *, unsigned>::iterator val = phis.find(PN);
+Instruction* SSI::getPositionPhi(PHINode *PN) {
+  DenseMap<PHINode *, Instruction*>::iterator val = phis.find(PN);
   if (val == phis.end())
-    return UNSIGNED_INFINITE;
+    return 0;
   else
     return val->second;
 }
@@ -325,52 +356,27 @@ unsigned SSI::getPositionPhi(PHINode *PN) {
 /// Return which variable (position on the vector of variables) this phi
 /// represents on the sigmas list.
 ///
-unsigned SSI::getPositionSigma(PHINode *PN) {
-  DenseMap<PHINode *, unsigned>::iterator val = sigmas.find(PN);
+Instruction* SSI::getPositionSigma(PHINode *PN) {
+  DenseMap<PHINode *, Instruction*>::iterator val = sigmas.find(PN);
   if (val == sigmas.end())
-    return UNSIGNED_INFINITE;
+    return 0;
   else
     return val->second;
 }
 
-/// Return true if the the Comparison Instruction is an operator
-/// of the Terminator instruction of its Basic Block.
-///
-unsigned SSI::isUsedInTerminator(CmpInst *CI) {
-  TerminatorInst *TI = CI->getParent()->getTerminator();
-  if (TI->getNumOperands() == 0) {
-    return false;
-  } else if (CI == TI->getOperand(0)) {
-    return true;
-  } else {
-    return false;
-  }
-}
-
 /// Initializes
 ///
 void SSI::init(SmallVectorImpl<Instruction *> &value) {
-  num_values = value.size();
-  needConstruction.resize(num_values, false);
-
-  value_original.resize(num_values);
-  defsites.resize(num_values);
-
-  for (unsigned i = 0; i < num_values; ++i) {
-    value_original[i] = value[i]->getParent();
-    defsites[i].push_back(value_original[i]);
+  for (SmallVectorImpl<Instruction *>::iterator I = value.begin(),
+       E = value.end(); I != E; ++I) {
+    value_original[*I] = (*I)->getParent();
+    defsites[*I].push_back((*I)->getParent());
   }
 }
 
 /// Clean all used resources in this creation of SSI
 ///
 void SSI::clean() {
-  for (unsigned i = 0; i < num_values; ++i) {
-    defsites[i].clear();
-    if (i < value_stack.size())
-      value_stack[i].clear();
-  }
-
   phis.clear();
   sigmas.clear();
   phisToFix.clear();
@@ -378,7 +384,6 @@ void SSI::clean() {
   defsites.clear();
   value_stack.clear();
   value_original.clear();
-  needConstruction.clear();
 }
 
 /// createSSIPass - The public interface to this file...
@@ -388,3 +393,40 @@ FunctionPass *llvm::createSSIPass() { return new SSI(); }
 char SSI::ID = 0;
 static RegisterPass<SSI> X("ssi", "Static Single Information Construction");
 
+/// SSIEverything - A pass that runs createSSI on every non-void variable,
+/// intended for debugging.
+namespace {
+  struct VISIBILITY_HIDDEN SSIEverything : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    SSIEverything() : FunctionPass(&ID) {}
+
+    bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<SSI>();
+    }
+  };
+}
+
+bool SSIEverything::runOnFunction(Function &F) {
+  SmallVector<Instruction *, 16> Insts;
+  SSI &ssi = getAnalysis<SSI>();
+
+  if (F.isDeclaration() || F.isIntrinsic()) return false;
+
+  for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B)
+    for (BasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I)
+      if (I->getType() != Type::getVoidTy(F.getContext()))
+        Insts.push_back(I);
+
+  ssi.createSSI(Insts);
+  return true;
+}
+
+/// createSSIEverythingPass - The public interface to this file...
+///
+FunctionPass *llvm::createSSIEverythingPass() { return new SSIEverything(); }
+
+char SSIEverything::ID = 0;
+static RegisterPass<SSIEverything>
+Y("ssi-everything", "Static Single Information Construction");
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 58d4d5a..6fd7d7b 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -21,6 +21,7 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/ADT/SmallVector.h"
@@ -84,19 +85,12 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
 static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
   assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
 
-  DOUT << "Looking to fold " << BB->getNameStart() << " into " 
-       << Succ->getNameStart() << "\n";
+  DEBUG(errs() << "Looking to fold " << BB->getName() << " into " 
+        << Succ->getName() << "\n");
   // Shortcut, if there is only a single predecessor it must be BB and merging
   // is always safe
   if (Succ->getSinglePredecessor()) return true;
 
-  typedef SmallPtrSet<Instruction*, 16> InstrSet;
-  InstrSet BBPHIs;
-
-  // Make a list of all phi nodes in BB
-  BasicBlock::iterator BBI = BB->begin();
-  while (isa<PHINode>(*BBI)) BBPHIs.insert(BBI++);
-
   // Make a list of the predecessors of BB
   typedef SmallPtrSet<BasicBlock*, 16> BlockSet;
   BlockSet BBPreds(pred_begin(BB), pred_end(BB));
@@ -126,16 +120,13 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
             PI != PE; PI++) {
         if (BBPN->getIncomingValueForBlock(*PI) 
               != PN->getIncomingValueForBlock(*PI)) {
-          DOUT << "Can't fold, phi node " << *PN->getNameStart() << " in " 
-               << Succ->getNameStart() << " is conflicting with " 
-               << BBPN->getNameStart() << " with regard to common predecessor "
-               << (*PI)->getNameStart() << "\n";
+          DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in " 
+                << Succ->getName() << " is conflicting with " 
+                << BBPN->getName() << " with regard to common predecessor "
+                << (*PI)->getName() << "\n");
           return false;
         }
       }
-      // Remove this phinode from the list of phis in BB, since it has been
-      // handled.
-      BBPHIs.erase(BBPN);
     } else {
       Value* Val = PN->getIncomingValueForBlock(BB);
       for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
@@ -144,33 +135,15 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
         // one for BB, in which case this phi node will not prevent the merging
         // of the block.
         if (Val != PN->getIncomingValueForBlock(*PI)) {
-          DOUT << "Can't fold, phi node " << *PN->getNameStart() << " in " 
-          << Succ->getNameStart() << " is conflicting with regard to common "
-          << "predecessor " << (*PI)->getNameStart() << "\n";
+          DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in " 
+                << Succ->getName() << " is conflicting with regard to common "
+                << "predecessor " << (*PI)->getName() << "\n");
           return false;
         }
       }
     }
   }
 
-  // If there are any other phi nodes in BB that don't have a phi node in Succ
-  // to merge with, they must be moved to Succ completely. However, for any
-  // predecessors of Succ, branches will be added to the phi node that just
-  // point to itself. So, for any common predecessors, this must not cause
-  // conflicts.
-  for (InstrSet::iterator I = BBPHIs.begin(), E = BBPHIs.end();
-        I != E; I++) {
-    PHINode *PN = cast<PHINode>(*I);
-    for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
-          PI != PE; PI++)
-      if (PN->getIncomingValueForBlock(*PI) != PN) {
-        DOUT << "Can't fold, phi node " << *PN->getNameStart() << " in " 
-             << BB->getNameStart() << " is conflicting with regard to common "
-             << "predecessor " << (*PI)->getNameStart() << "\n";
-        return false;
-      }
-  }
-
   return true;
 }
 
@@ -182,8 +155,36 @@ static bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
   // Check to see if merging these blocks would cause conflicts for any of the
   // phi nodes in BB or Succ. If not, we can safely merge.
   if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false;
-  
-  DOUT << "Killing Trivial BB: \n" << *BB;
+
+  // Check for cases where Succ has multiple predecessors and a PHI node in BB
+  // has uses which will not disappear when the PHI nodes are merged.  It is
+  // possible to handle such cases, but difficult: it requires checking whether
+  // BB dominates Succ, which is non-trivial to calculate in the case where
+  // Succ has multiple predecessors.  Also, it requires checking whether
+  // constructing the necessary self-referential PHI node doesn't intoduce any
+  // conflicts; this isn't too difficult, but the previous code for doing this
+  // was incorrect.
+  //
+  // Note that if this check finds a live use, BB dominates Succ, so BB is
+  // something like a loop pre-header (or rarely, a part of an irreducible CFG);
+  // folding the branch isn't profitable in that case anyway.
+  if (!Succ->getSinglePredecessor()) {
+    BasicBlock::iterator BBI = BB->begin();
+    while (isa<PHINode>(*BBI)) {
+      for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
+           UI != E; ++UI) {
+        if (PHINode* PN = dyn_cast<PHINode>(*UI)) {
+          if (PN->getIncomingBlock(UI) != BB)
+            return false;
+        } else {
+          return false;
+        }
+      }
+      ++BBI;
+    }
+  }
+
+  DEBUG(errs() << "Killing Trivial BB: \n" << *BB);
   
   if (isa<PHINode>(Succ->begin())) {
     // If there is more than one pred of succ, and there are PHI nodes in
@@ -217,38 +218,16 @@ static bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
     }
   }
   
-  if (isa<PHINode>(&BB->front())) {
-    SmallVector<BasicBlock*, 16>
-    OldSuccPreds(pred_begin(Succ), pred_end(Succ));
-    
-    // Move all PHI nodes in BB to Succ if they are alive, otherwise
-    // delete them.
-    while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
-      if (PN->use_empty()) {
-        // Just remove the dead phi.  This happens if Succ's PHIs were the only
-        // users of the PHI nodes.
-        PN->eraseFromParent();
-        continue;
-      }
-    
-      // The instruction is alive, so this means that BB must dominate all
-      // predecessors of Succ (Since all uses of the PN are after its
-      // definition, so in Succ or a block dominated by Succ. If a predecessor
-      // of Succ would not be dominated by BB, PN would violate the def before
-      // use SSA demand). Therefore, we can simply move the phi node to the
-      // next block.
+  while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
+    if (Succ->getSinglePredecessor()) {
+      // BB is the only predecessor of Succ, so Succ will end up with exactly
+      // the same predecessors BB had.
       Succ->getInstList().splice(Succ->begin(),
                                  BB->getInstList(), BB->begin());
-      
-      // We need to add new entries for the PHI node to account for
-      // predecessors of Succ that the PHI node does not take into
-      // account.  At this point, since we know that BB dominated succ and all
-      // of its predecessors, this means that we should any newly added
-      // incoming edges should use the PHI node itself as the value for these
-      // edges, because they are loop back edges.
-      for (unsigned i = 0, e = OldSuccPreds.size(); i != e; ++i)
-        if (OldSuccPreds[i] != BB)
-          PN->addIncoming(PN, OldSuccPreds[i]);
+    } else {
+      // We explicitly check for such uses in CanPropagatePredecessorsForPHIs.
+      assert(PN->use_empty() && "There shouldn't be any uses here!");
+      PN->eraseFromParent();
     }
   }
     
@@ -383,26 +362,15 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
       // Okay, it looks like the instruction IS in the "condition".  Check to
       // see if its a cheap instruction to unconditionally compute, and if it
       // only uses stuff defined outside of the condition.  If so, hoist it out.
+      if (!I->isSafeToSpeculativelyExecute())
+        return false;
+
       switch (I->getOpcode()) {
       default: return false;  // Cannot hoist this out safely.
       case Instruction::Load: {
-        // We can hoist loads that are non-volatile and obviously cannot trap.
-        if (cast<LoadInst>(I)->isVolatile())
-          return false;
-        // FIXME: A computation of a constant can trap!
-        if (!isa<AllocaInst>(I->getOperand(0)) &&
-            !isa<Constant>(I->getOperand(0)))
-          return false;
-        // External weak globals may have address 0, so we can't load them.
-        Value *V2 = I->getOperand(0)->getUnderlyingObject();
-        if (V2) {
-          GlobalVariable* GV = dyn_cast<GlobalVariable>(V2);
-          if (GV && GV->hasExternalWeakLinkage())
-            return false;
-        }
-        // Finally, we have to check to make sure there are no instructions
-        // before the load in its basic block, as we are going to hoist the loop
-        // out to its predecessor.
+        // We have to check to make sure there are no instructions before the
+        // load in its basic block, as we are going to hoist the loop out to
+        // its predecessor.
         BasicBlock::iterator IP = PBB->begin();
         while (isa<DbgInfoIntrinsic>(IP))
           IP++;
@@ -645,12 +613,13 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
         assert(ThisCases.size() == 1 && "Branch can only have one case!");
         // Insert the new branch.
         Instruction *NI = BranchInst::Create(ThisDef, TI);
+        (void) NI;
 
         // Remove PHI node entries for the dead edge.
         ThisCases[0].second->removePredecessor(TI->getParent());
 
-        DOUT << "Threading pred instr: " << *Pred->getTerminator()
-             << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n";
+        DEBUG(errs() << "Threading pred instr: " << *Pred->getTerminator()
+             << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
 
         EraseTerminatorInstAndDCECond(TI);
         return true;
@@ -662,8 +631,8 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
         for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
           DeadCases.insert(PredCases[i].first);
 
-        DOUT << "Threading pred instr: " << *Pred->getTerminator()
-             << "Through successor TI: " << *TI;
+        DEBUG(errs() << "Threading pred instr: " << *Pred->getTerminator()
+                     << "Through successor TI: " << *TI);
 
         for (unsigned i = SI->getNumCases()-1; i != 0; --i)
           if (DeadCases.count(SI->getCaseValue(i))) {
@@ -671,7 +640,7 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
             SI->removeCase(i);
           }
 
-        DOUT << "Leaving: " << *TI << "\n";
+        DEBUG(errs() << "Leaving: " << *TI << "\n");
         return true;
       }
     }
@@ -712,9 +681,10 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
 
     // Insert the new branch.
     Instruction *NI = BranchInst::Create(TheRealDest, TI);
+    (void) NI;
 
-    DOUT << "Threading pred instr: " << *Pred->getTerminator()
-         << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n";
+    DEBUG(errs() << "Threading pred instr: " << *Pred->getTerminator()
+              << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
 
     EraseTerminatorInstAndDCECond(TI);
     return true;
@@ -847,7 +817,8 @@ static bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI) {
           if (InfLoopBlock == 0) {
             // Insert it at the end of the function, because it's either code,
             // or it won't matter if it's hot. :)
-            InfLoopBlock = BasicBlock::Create("infloop", BB->getParent());
+            InfLoopBlock = BasicBlock::Create(BB->getContext(),
+                                              "infloop", BB->getParent());
             BranchInst::Create(InfLoopBlock, InfLoopBlock);
           }
           NewSI->setSuccessor(i, InfLoopBlock);
@@ -900,7 +871,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
   while (isa<DbgInfoIntrinsic>(I2))
     I2 = BB2_Itr++;
   if (I1->getOpcode() != I2->getOpcode() || isa<PHINode>(I1) ||
-      !I1->isIdenticalTo(I2) ||
+      !I1->isIdenticalToWhenDefined(I2) ||
       (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
     return false;
 
@@ -919,6 +890,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
     BIParent->getInstList().splice(BI, BB1->getInstList(), I1);
     if (!I2->use_empty())
       I2->replaceAllUsesWith(I1);
+    I1->intersectOptionalDataWith(I2);
     BB2->getInstList().erase(I2);
 
     I1 = BB1_Itr++;
@@ -927,7 +899,8 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
     I2 = BB2_Itr++;
     while (isa<DbgInfoIntrinsic>(I2))
       I2 = BB2_Itr++;
-  } while (I1->getOpcode() == I2->getOpcode() && I1->isIdenticalTo(I2));
+  } while (I1->getOpcode() == I2->getOpcode() &&
+           I1->isIdenticalToWhenDefined(I2));
 
   return true;
 
@@ -939,7 +912,7 @@ HoistTerminator:
   // Okay, it is safe to hoist the terminator.
   Instruction *NT = I1->clone();
   BIParent->getInstList().insert(BI, NT);
-  if (NT->getType() != Type::VoidTy) {
+  if (NT->getType() != Type::getVoidTy(BB1->getContext())) {
     I1->replaceAllUsesWith(NT);
     I2->replaceAllUsesWith(NT);
     NT->takeName(I1);
@@ -1197,7 +1170,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) {
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
     ConstantInt *CB;
     if ((CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i))) &&
-        CB->getType() == Type::Int1Ty) {
+        CB->getType() == Type::getInt1Ty(BB->getContext())) {
       // Okay, we now know that all edges from PredBB should be revectored to
       // branch to RealDest.
       BasicBlock *PredBB = PN->getIncomingBlock(i);
@@ -1209,7 +1182,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) {
       // difficult cases.  Instead of being smart about this, just insert a new
       // block that jumps to the destination block, effectively splitting
       // the edge we are about to create.
-      BasicBlock *EdgeBB = BasicBlock::Create(RealDest->getName()+".critedge",
+      BasicBlock *EdgeBB = BasicBlock::Create(BB->getContext(),
+                                              RealDest->getName()+".critedge",
                                               RealDest->getParent(), RealDest);
       BranchInst::Create(RealDest, EdgeBB);
       PHINode *PN;
@@ -1242,7 +1216,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) {
           }
           
           // Check for trivial simplification.
-          if (Constant *C = ConstantFoldInstruction(N)) {
+          if (Constant *C = ConstantFoldInstruction(N, BB->getContext())) {
             TranslateMap[BBI] = C;
             delete N;   // Constant folded away, don't need actual inst
           } else {
@@ -1296,8 +1270,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN) {
     if (NumPhis > 2)
       return false;
   
-  DOUT << "FOUND IF CONDITION!  " << *IfCond << "  T: "
-       << IfTrue->getName() << "  F: " << IfFalse->getName() << "\n";
+  DEBUG(errs() << "FOUND IF CONDITION!  " << *IfCond << "  T: "
+        << IfTrue->getName() << "  F: " << IfFalse->getName() << "\n");
   
   // Loop over the PHI's seeing if we can promote them all to select
   // instructions.  While we are at it, keep track of the instructions
@@ -1427,7 +1401,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) {
   if (FalseRet->getNumOperands() == 0) {
     TrueSucc->removePredecessor(BI->getParent());
     FalseSucc->removePredecessor(BI->getParent());
-    ReturnInst::Create(0, BI);
+    ReturnInst::Create(BI->getContext(), 0, BI);
     EraseTerminatorInstAndDCECond(BI);
     return true;
   }
@@ -1476,12 +1450,13 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) {
   }
 
   Value *RI = !TrueValue ?
-              ReturnInst::Create(BI) :
-              ReturnInst::Create(TrueValue, BI);
+              ReturnInst::Create(BI->getContext(), BI) :
+              ReturnInst::Create(BI->getContext(), TrueValue, BI);
+  (void) RI;
       
-  DOUT << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
-       << "\n  " << *BI << "NewRet = " << *RI
-       << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc;
+  DEBUG(errs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
+               << "\n  " << *BI << "NewRet = " << *RI
+               << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc);
       
   EraseTerminatorInstAndDCECond(BI);
 
@@ -1561,7 +1536,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
     else
       continue;
 
-    DOUT << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB;
+    DEBUG(errs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
     
     // If we need to invert the condition in the pred block to match, do so now.
     if (InvertPredCond) {
@@ -1605,7 +1580,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
 static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
   assert(PBI->isConditional() && BI->isConditional());
   BasicBlock *BB = BI->getParent();
-  
+
   // If this block ends with a branch instruction, and if there is a
   // predecessor that ends on a branch of the same condition, make 
   // this conditional branch redundant.
@@ -1616,7 +1591,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
     if (BB->getSinglePredecessor()) {
       // Turn this into a branch on constant.
       bool CondIsTrue = PBI->getSuccessor(0) == BB;
-      BI->setCondition(ConstantInt::get(Type::Int1Ty, CondIsTrue));
+      BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), 
+                                        CondIsTrue));
       return true;  // Nuke the branch on constant.
     }
     
@@ -1624,7 +1600,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
     // in the constant and simplify the block result.  Subsequent passes of
     // simplifycfg will thread the block.
     if (BlockIsSimpleEnoughToThreadThrough(BB)) {
-      PHINode *NewPN = PHINode::Create(Type::Int1Ty,
+      PHINode *NewPN = PHINode::Create(Type::getInt1Ty(BB->getContext()),
                                        BI->getCondition()->getName() + ".pr",
                                        BB->begin());
       // Okay, we're going to insert the PHI node.  Since PBI is not the only
@@ -1636,7 +1612,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
             PBI->getCondition() == BI->getCondition() &&
             PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
           bool CondIsTrue = PBI->getSuccessor(0) == BB;
-          NewPN->addIncoming(ConstantInt::get(Type::Int1Ty, 
+          NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()), 
                                               CondIsTrue), *PI);
         } else {
           NewPN->addIncoming(BI->getCondition(), *PI);
@@ -1694,8 +1670,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
   // Finally, if everything is ok, fold the branches to logical ops.
   BasicBlock *OtherDest  = BI->getSuccessor(BIOp ^ 1);
   
-  DOUT << "FOLDING BRs:" << *PBI->getParent()
-       << "AND: " << *BI->getParent();
+  DEBUG(errs() << "FOLDING BRs:" << *PBI->getParent()
+               << "AND: " << *BI->getParent());
   
   
   // If OtherDest *is* BB, then BB is a basic block with a single conditional
@@ -1708,12 +1684,13 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
   if (OtherDest == BB) {
     // Insert it at the end of the function, because it's either code,
     // or it won't matter if it's hot. :)
-    BasicBlock *InfLoopBlock = BasicBlock::Create("infloop", BB->getParent());
+    BasicBlock *InfLoopBlock = BasicBlock::Create(BB->getContext(),
+                                                  "infloop", BB->getParent());
     BranchInst::Create(InfLoopBlock, InfLoopBlock);
     OtherDest = InfLoopBlock;
   }  
   
-  DOUT << *PBI->getParent()->getParent();
+  DEBUG(errs() << *PBI->getParent()->getParent());
   
   // BI may have other predecessors.  Because of this, we leave
   // it alone, but modify PBI.
@@ -1763,9 +1740,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
     }
   }
   
-  DOUT << "INTO: " << *PBI->getParent();
-  
-  DOUT << *PBI->getParent()->getParent();
+  DEBUG(errs() << "INTO: " << *PBI->getParent());
+  DEBUG(errs() << *PBI->getParent()->getParent());
   
   // This basic block is probably dead.  We know it has at least
   // one fewer predecessor.
@@ -1792,7 +1768,7 @@ bool llvm::SimplifyCFG(BasicBlock *BB) {
   // Remove basic blocks that have no predecessors... or that just have themself
   // as a predecessor.  These are unreachable.
   if (pred_begin(BB) == pred_end(BB) || BB->getSinglePredecessor() == BB) {
-    DOUT << "Removing BB: \n" << *BB;
+    DEBUG(errs() << "Removing BB: \n" << *BB);
     DeleteDeadBlock(BB);
     return true;
   }
@@ -1832,8 +1808,8 @@ bool llvm::SimplifyCFG(BasicBlock *BB) {
       if (!UncondBranchPreds.empty()) {
         while (!UncondBranchPreds.empty()) {
           BasicBlock *Pred = UncondBranchPreds.pop_back_val();
-          DOUT << "FOLDING: " << *BB
-               << "INTO UNCOND BRANCH PRED: " << *Pred;
+          DEBUG(errs() << "FOLDING: " << *BB
+                       << "INTO UNCOND BRANCH PRED: " << *Pred);
           Instruction *UncondBranch = Pred->getTerminator();
           // Clone the return and add it to the end of the predecessor.
           Instruction *NewRet = RI->clone();
@@ -1884,33 +1860,26 @@ bool llvm::SimplifyCFG(BasicBlock *BB) {
   } else if (isa<UnwindInst>(BB->begin())) {
     // Check to see if the first instruction in this block is just an unwind.
     // If so, replace any invoke instructions which use this as an exception
-    // destination with call instructions, and any unconditional branch
-    // predecessor with an unwind.
+    // destination with call instructions.
     //
     SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
     while (!Preds.empty()) {
       BasicBlock *Pred = Preds.back();
-      if (BranchInst *BI = dyn_cast<BranchInst>(Pred->getTerminator())) {
-        if (BI->isUnconditional()) {
-          Pred->getInstList().pop_back();  // nuke uncond branch
-          new UnwindInst(Pred);            // Use unwind.
-          Changed = true;
-        }
-      } else if (InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator()))
+      if (InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator()))
         if (II->getUnwindDest() == BB) {
           // Insert a new branch instruction before the invoke, because this
-          // is now a fall through...
+          // is now a fall through.
           BranchInst *BI = BranchInst::Create(II->getNormalDest(), II);
           Pred->getInstList().remove(II);   // Take out of symbol table
 
-          // Insert the call now...
+          // Insert the call now.
           SmallVector<Value*,8> Args(II->op_begin()+3, II->op_end());
           CallInst *CI = CallInst::Create(II->getCalledValue(),
                                           Args.begin(), Args.end(),
                                           II->getName(), BI);
           CI->setCallingConv(II->getCallingConv());
           CI->setAttributes(II->getAttributes());
-          // If the invoke produced a value, the Call now does instead
+          // If the invoke produced a value, the Call now does instead.
           II->replaceAllUsesWith(CI);
           delete II;
           Changed = true;
@@ -2042,7 +2011,7 @@ bool llvm::SimplifyCFG(BasicBlock *BB) {
         if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
           if (BI->isUnconditional()) {
             if (BI->getSuccessor(0) == BB) {
-              new UnreachableInst(TI);
+              new UnreachableInst(TI->getContext(), TI);
               TI->eraseFromParent();
               Changed = true;
             }
diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index 848f2b8..30cb94d 100644
--- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -66,8 +66,8 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
   } else if (UnwindingBlocks.size() == 1) {
     UnwindBlock = UnwindingBlocks.front();
   } else {
-    UnwindBlock = BasicBlock::Create("UnifiedUnwindBlock", &F);
-    new UnwindInst(UnwindBlock);
+    UnwindBlock = BasicBlock::Create(F.getContext(), "UnifiedUnwindBlock", &F);
+    new UnwindInst(F.getContext(), UnwindBlock);
 
     for (std::vector<BasicBlock*>::iterator I = UnwindingBlocks.begin(),
            E = UnwindingBlocks.end(); I != E; ++I) {
@@ -83,8 +83,9 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
   } else if (UnreachableBlocks.size() == 1) {
     UnreachableBlock = UnreachableBlocks.front();
   } else {
-    UnreachableBlock = BasicBlock::Create("UnifiedUnreachableBlock", &F);
-    new UnreachableInst(UnreachableBlock);
+    UnreachableBlock = BasicBlock::Create(F.getContext(), 
+                                          "UnifiedUnreachableBlock", &F);
+    new UnreachableInst(F.getContext(), UnreachableBlock);
 
     for (std::vector<BasicBlock*>::iterator I = UnreachableBlocks.begin(),
            E = UnreachableBlocks.end(); I != E; ++I) {
@@ -107,16 +108,17 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
   // nodes (if the function returns values), and convert all of the return
   // instructions into unconditional branches.
   //
-  BasicBlock *NewRetBlock = BasicBlock::Create("UnifiedReturnBlock", &F);
+  BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(),
+                                               "UnifiedReturnBlock", &F);
 
   PHINode *PN = 0;
-  if (F.getReturnType() == Type::VoidTy) {
-    ReturnInst::Create(NULL, NewRetBlock);
+  if (F.getReturnType() == Type::getVoidTy(F.getContext())) {
+    ReturnInst::Create(F.getContext(), NULL, NewRetBlock);
   } else {
     // If the function doesn't return void... add a PHI node to the block...
     PN = PHINode::Create(F.getReturnType(), "UnifiedRetVal");
     NewRetBlock->getInstList().push_back(PN);
-    ReturnInst::Create(PN, NewRetBlock);
+    ReturnInst::Create(F.getContext(), PN, NewRetBlock);
   }
 
   // Loop over all of the blocks, replacing the return instruction with an
diff --git a/lib/Transforms/Utils/UnrollLoop.cpp b/lib/Transforms/Utils/UnrollLoop.cpp
index caef7ec..4d838b5 100644
--- a/lib/Transforms/Utils/UnrollLoop.cpp
+++ b/lib/Transforms/Utils/UnrollLoop.cpp
@@ -25,6 +25,7 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
@@ -62,7 +63,7 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {
   if (OnlyPred->getTerminator()->getNumSuccessors() != 1)
     return 0;
 
-  DOUT << "Merging: " << *BB << "into: " << *OnlyPred;
+  DEBUG(errs() << "Merging: " << *BB << "into: " << *OnlyPred);
 
   // Resolve any PHI nodes at the start of the block.  They are all
   // guaranteed to have exactly one entry if they exist, unless there are
@@ -113,7 +114,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
   
   if (!BI || BI->isUnconditional()) {
     // The loop-rotate pass can be helpful to avoid this in many cases.
-    DOUT << "  Can't unroll; loop not terminated by a conditional branch.\n";
+    DEBUG(errs() <<
+             "  Can't unroll; loop not terminated by a conditional branch.\n");
     return false;
   }
 
@@ -125,9 +127,9 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
     TripMultiple = L->getSmallConstantTripMultiple();
 
   if (TripCount != 0)
-    DOUT << "  Trip Count = " << TripCount << "\n";
+    DEBUG(errs() << "  Trip Count = " << TripCount << "\n");
   if (TripMultiple != 1)
-    DOUT << "  Trip Multiple = " << TripMultiple << "\n";
+    DEBUG(errs() << "  Trip Multiple = " << TripMultiple << "\n");
 
   // Effectively "DCE" unrolled iterations that are beyond the tripcount
   // and will never be executed.
@@ -153,17 +155,17 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
   }
 
   if (CompletelyUnroll) {
-    DOUT << "COMPLETELY UNROLLING loop %" << Header->getName()
-         << " with trip count " << TripCount << "!\n";
+    DEBUG(errs() << "COMPLETELY UNROLLING loop %" << Header->getName()
+          << " with trip count " << TripCount << "!\n");
   } else {
-    DOUT << "UNROLLING loop %" << Header->getName()
-         << " by " << Count;
+    DEBUG(errs() << "UNROLLING loop %" << Header->getName()
+          << " by " << Count);
     if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
-      DOUT << " with a breakout at trip " << BreakoutTrip;
+      DEBUG(errs() << " with a breakout at trip " << BreakoutTrip);
     } else if (TripMultiple != 1) {
-      DOUT << " with " << TripMultiple << " trips per branch";
+      DEBUG(errs() << " with " << TripMultiple << " trips per branch");
     }
-    DOUT << "!\n";
+    DEBUG(errs() << "!\n");
   }
 
   std::vector<BasicBlock*> LoopBlocks = L->getBlocks();
@@ -349,7 +351,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
 
       if (isInstructionTriviallyDead(Inst))
         (*BB)->getInstList().erase(Inst);
-      else if (Constant *C = ConstantFoldInstruction(Inst)) {
+      else if (Constant *C = ConstantFoldInstruction(Inst, 
+                                                     Header->getContext())) {
         Inst->replaceAllUsesWith(C);
         (*BB)->getInstList().erase(Inst);
       }
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index 20b676d..2d8332f 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -13,23 +13,27 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/DerivedTypes.h"  // For getNullValue(Type::Int32Ty)
 #include "llvm/Constants.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/Instruction.h"
-#include "llvm/MDNode.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
-Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
+Value *llvm::MapValue(const Value *V, ValueMapTy &VM, LLVMContext &Context) {
   Value *&VMSlot = VM[V];
   if (VMSlot) return VMSlot;      // Does it exist in the map yet?
   
   // NOTE: VMSlot can be invalidated by any reference to VM, which can grow the
   // DenseMap.  This includes any recursive calls to MapValue.
 
-  // Global values do not need to be seeded into the ValueMap if they are using
-  // the identity mapping.
-  if (isa<GlobalValue>(V) || isa<InlineAsm>(V))
+  // Global values and metadata do not need to be seeded into the ValueMap if 
+  // they are using the identity mapping.
+  if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MetadataBase>(V))
     return VMSlot = const_cast<Value*>(V);
 
   if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) {
@@ -40,7 +44,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
     else if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
       for (User::op_iterator b = CA->op_begin(), i = b, e = CA->op_end();
            i != e; ++i) {
-        Value *MV = MapValue(*i, VM);
+        Value *MV = MapValue(*i, VM, Context);
         if (MV != *i) {
           // This array must contain a reference to a global, make a new array
           // and return it.
@@ -51,7 +55,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
             Values.push_back(cast<Constant>(*j));
           Values.push_back(cast<Constant>(MV));
           for (++i; i != e; ++i)
-            Values.push_back(cast<Constant>(MapValue(*i, VM)));
+            Values.push_back(cast<Constant>(MapValue(*i, VM, Context)));
           return VM[V] = ConstantArray::get(CA->getType(), Values);
         }
       }
@@ -60,7 +64,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
     } else if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
       for (User::op_iterator b = CS->op_begin(), i = b, e = CS->op_end();
            i != e; ++i) {
-        Value *MV = MapValue(*i, VM);
+        Value *MV = MapValue(*i, VM, Context);
         if (MV != *i) {
           // This struct must contain a reference to a global, make a new struct
           // and return it.
@@ -71,7 +75,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
             Values.push_back(cast<Constant>(*j));
           Values.push_back(cast<Constant>(MV));
           for (++i; i != e; ++i)
-            Values.push_back(cast<Constant>(MapValue(*i, VM)));
+            Values.push_back(cast<Constant>(MapValue(*i, VM, Context)));
           return VM[V] = ConstantStruct::get(CS->getType(), Values);
         }
       }
@@ -80,12 +84,12 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
     } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
       std::vector<Constant*> Ops;
       for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i)
-        Ops.push_back(cast<Constant>(MapValue(*i, VM)));
+        Ops.push_back(cast<Constant>(MapValue(*i, VM, Context)));
       return VM[V] = CE->getWithOperands(Ops);
     } else if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
       for (User::op_iterator b = CP->op_begin(), i = b, e = CP->op_end();
            i != e; ++i) {
-        Value *MV = MapValue(*i, VM);
+        Value *MV = MapValue(*i, VM, Context);
         if (MV != *i) {
           // This vector value must contain a reference to a global, make a new
           // vector constant and return it.
@@ -96,38 +100,16 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
             Values.push_back(cast<Constant>(*j));
           Values.push_back(cast<Constant>(MV));
           for (++i; i != e; ++i)
-            Values.push_back(cast<Constant>(MapValue(*i, VM)));
+            Values.push_back(cast<Constant>(MapValue(*i, VM, Context)));
           return VM[V] = ConstantVector::get(Values);
         }
       }
       return VM[V] = C;
       
-    } else if (MDNode *N = dyn_cast<MDNode>(C)) {
-      for (MDNode::const_elem_iterator b = N->elem_begin(), i = b,
-             e = N->elem_end(); i != e; ++i) {
-        if (!*i) continue;
-
-        Value *MV = MapValue(*i, VM);
-        if (MV != *i) {
-          // This MDNode must contain a reference to a global, make a new MDNode
-          // and return it.
-	  SmallVector<Value*, 8> Values;
-          Values.reserve(N->getNumElements());
-          for (MDNode::const_elem_iterator j = b; j != i; ++j)
-            Values.push_back(*j);
-          Values.push_back(MV);
-          for (++i; i != e; ++i)
-            Values.push_back(MapValue(*i, VM));
-          return VM[V] = MDNode::get(Values.data(), Values.size());
-        }
-      }
-      return VM[V] = C;
-
     } else {
-      assert(0 && "Unknown type of constant!");
+      llvm_unreachable("Unknown type of constant!");
     }
   }
-
   return 0;
 }
 
@@ -136,7 +118,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
 ///
 void llvm::RemapInstruction(Instruction *I, ValueMapTy &ValueMap) {
   for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
-    Value *V = MapValue(*op, ValueMap);
+    Value *V = MapValue(*op, ValueMap, I->getParent()->getContext());
     assert(V && "Referenced value not in value map!");
     *op = V;
   }